qdyxmas's Room

Done is better than perfect...


# coding=utf-8
import time

from bs4 import BeautifulSoup
import os
import sys
import json
import re
    from urlparse import urljoin
except ImportError:
    from urllib.parse import urljoin

from requests_html import HTMLSession
def cur_file_dir():
     path = sys.path[0]
     if os.path.isdir(path):
         return path
     elif os.path.isfile(path):
         return os.path.dirname(path)
def get_now(key):
    now = int(time.time())
    timeArray  = time.localtime(now)
    ret = time.strftime("%Y%m%d%H%M%S", timeArray)
    return ret+"{}.json".format(key)
path = cur_file_dir()
class GetCities():
    def __init__(self,base_url=''):
        if not base_url:
            base_url = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2018/"
        self.base_url = base_url
        self.headers = {
        "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36"
        self.session = HTMLSession()
    def get_provinces(self):
        all_provinces_dict = {}
        r = self.session.get(self.base_url,headers=self.headers)
        all_provinces =  r.html.pq(".provincetr")
        provinces_len = len(all_provinces)
        for x in range(provinces_len):
            province_html = all_provinces.eq(x).html()
            s = BeautifulSoup(province_html)
            all_list = s.find_all("a")
            for p in all_list:
                province_name = p.text
                province_code = p["href"]
                all_provinces_dict[province_code] = {"name":province_name,"code":province_code}
        self.provinces_dict = all_provinces_dict
        return all_provinces_dict
    def get_cities(self):
        # all_cities = self.read_json("市.json")
        # self.all_cities = all_cities
        # return all_cities
        all_cities = {}
        for k in ["43.html"]:
        # for k,v in self.provinces_dict.items():
            url = self.base_url + k
            # print("url=",url)
            req = self.session.get(url,headers=self.headers)
            cities_dict = self.parser_data("citytr",req)
            all_cities[k] = cities_dict
        self.all_cities = all_cities
        # print("all_cities=",all_cities)
        return all_cities
    def get_counties(self,**kargs):
        # all_counties = self.read_json("县.json")
        # self.all_counties = all_counties
        # return all_counties
        all_counties = {}
        for k,v in self.all_cities.items():
            all_counties[k] = {}
            for k1,v1 in v.items():
                all_counties[k][k1] = {}
                url = self.base_url + k1
                req = self.session.get(url,headers=self.headers)
                counties_dict = self.parser_data("countytr",req,k[:-5]+"/")
                all_counties[k][k1] = counties_dict
        self.all_counties = all_counties
        return all_counties
    def parser_data(self,class_name,reqhtml,province_code=""):
        html = reqhtml.html.pq(".{}".format(class_name))
        html_len = len(html)
        return_list = {}
        # print("html_len=",html_len)
        for x in range(html_len):
            sub_html = html.eq(x).html()
            if sub_html.startswith("<tr") =="False:" sub_html="&lt;tr&gt;" +sub_html+"<="" tr="">"
            # print("sub_html=",sub_html)
            s = BeautifulSoup(sub_html)
            all_list = s.find_all("a")
            all_len = len(all_list)
            for index in range(0,all_len,2):
                code = all_list[index].text
                name = all_list[index+1].text
                link = province_code+all_list[index]["href"]
                link = link.replace("//","/")
                # return_list.append([province_code+link,name,code])
                return_list[link] = {}
                return_list[link]["name"] = name
                return_list[link]["code"] = code
        return return_list
    def parser_villagetr(self,class_name,reqhtml,province_code=""):
        html = reqhtml.html.pq(".{}".format(class_name))
        all_len_ = len(html)
        return_list = {}
        for x in range(all_len_):
            sub_html = html.eq(x).html()
            if sub_html.startswith("<tr") =="False:" sub_html="&lt;tr&gt;" +sub_html+"<="" tr="">"
            # print("sub_html=",sub_html)
            s = BeautifulSoup(sub_html)
            all_list = s.find_all("td")
            all_len = len(all_list)
            for index in range(0,all_len,3):
                code = all_list[index].text
                vill_code = all_list[index+1].text
                name = all_list[index+2].text
                # return_list.append([province_code+link,name,code])
                return_list[code] = {}
                return_list[code]["name"] = name
                return_list[code]["code"] = code
                # return_list[code]["vill_code"] = vill_code
        return return_list
    def get_towns(self):
        # all_towns = self.read_json("镇.json")
        # self.all_towns = all_towns
        # return all_towns
        all_towns = {}
        for k,v in self.all_counties.items():
        # for k,v in self.all_counties.items():
            all_towns[k] = {}
            for k1,v1 in v.items():
                all_towns[k][k1] = {}
                for k2,v2 in v1.items():
                    prefix = re.sub("([0-9]+\.html)","",k2)
                    all_towns[k][k1][k2] = {}
                    url = self.base_url + k2
                    req = self.session.get(url,headers=self.headers)
                    all_towns_dict = self.parser_data("towntr",req,prefix+"/")
                    all_towns[k][k1][k2] = all_towns_dict
        self.all_towns = all_towns
        return all_towns
    def get_villagetr(self,*kargs):
        # with open("村.json",'r',encoding='utf-8') as load_f:
            # all_villagetr = json.load(load_f)
        # self.all_villagetr = all_villagetr
        # return all_villagetr
        all_villagetr = {}
        for k,v in villagetr.items():
        # for k,v in self.villagetr.items():
            all_villagetr[k] = {}
            for k1,v1 in v.items():
                all_villagetr[k][k1] = {}
                for k2,v2 in v1.items():
                    all_villagetr[k][k1][k2] = {}
                    for k3,v3 in v2.items():
                        prefix = re.sub("([0-9]+\.html)","",k3)
                        all_villagetr[k][k1][k2][k3] = {}
                        url = self.base_url + k3
                        # time.sleep(0.3)
                        # print("url=",url)
                        req = self.session.get(url,headers=self.headers,timeout=None)
                        all_villagetr_dict = self.parser_villagetr("villagetr",req)
                        all_villagetr[k][k1][k2][k3] = all_villagetr_dict
                        # print(all_villagetr_dict)
        self.all_villagetr = all_villagetr
        return all_villagetr
    def write_msg(self,key="43.html"):
        all_dict = {}
        provice_name = self.provinces_dict[key]["name"]
        provice_code = self.provinces_dict[key]["code"]
        p_key = provice_name+"("+provice_code+")"
        all_dict[p_key] = {}
        cityes_dict = self.all_cities[key]
        for k,v in cityes_dict.items():
            shi_name = v['name']
            shi_code = v['code']
            p_shi = shi_name+"("+shi_code+")"
            all_counties = self.all_counties[key][k]
            for k1,v1 in all_counties.items():
                # print("v")
                xian_name = v1['name']
                xian_code = v1['code']
                p_xian = xian_name+"("+xian_code+")"
                all_towns = self.all_towns[key][k][k1]
                for k2,v2 in all_towns.items():
                    if not isinstance(v2,dict):
                        # print("zhen v2=",v2)
                        town_name = v2['name']
                        town_code = v2['code']
                        p_town = town_name+"("+town_code+")"
                        all_villagetr = self.all_villagetr[key][k][k1][k2]
                        for k3,v3 in all_villagetr.items():
                            if not isinstance(v3,dict):
                                # print("zhen v2=",v3)
                                villagetr_name = v3['name']
                                villagetr_code = v3['code']
                                villagetr_vill_code = v3['vill_code']
                                p_villagetr = villagetr_name+"("+villagetr_code+")"
        # print(all_dict)
        return all_dict
    def read_json(self,filename):
        with open(filename,'r',encoding='utf-8') as load_f:
            ret = json.load(load_f)
        return ret
def write_to_json(key,**tcase):
    curtime = get_now(key)
    filename = os.path.join(path,curtime)
    with open(filename,'w',encoding='utf-8') as json_file:
if __name__ == "__main__":
    all = c.get_provinces()
    all_cities = c.get_cities()
    all_counties= c.get_counties()
    all_towns= c.get_towns()
    all_vill = c.get_villagetr()
    hunan = c.write_msg()
    # print(all_cities)
    # print(all_towns)


805 评论

  1. Lino

    sildenafil citrate 100mg pills best viagra pills https://www.water-handbook.com/index.php?title=Black_Viagra_21383 - sildenafil 20 mg tablet coupon where can i buy viagra online safely where can you buy generic viagra buy online viagra capsules

  2. JosephAmorb

    тягачи цена http://deti.taminfo.ru/index.php?subaction=userinfo&user=uguhipeg купить сцепку продажа тягачей на авито по россии

  3. Herbertjoymn

    сервер раскрытия информации интерфакс http://www.spvzlet.ru/forum/messages/forum2/topic5976/message9722/?result=new#message9722 при выключении компьютера вся информация стирается конфиденциальность информации это информация для человека это http://kolokolchik237.ru/index.php?subaction=userinfo&user=igixoguno устройство вывода информации росреестр справочная информация по объектам недвижимости онлайн

  4. BillyJef

    доступность информации это https://subscribe.ru/digest/cookery/salad/n369674964.html хранение информации средства защиты информации

  5. MartyKek

    при отключении компьютера информация обработка информации это http://ya.listbb.ru/viewtopic.php?f=14&t=217&p=1001 информация это в информатике

  6. BillyJef

    справочная информация росреестра http://baby-best.ru/forum/topic_39577/1 алфавитный подход к измерению информации информация и информационные процессы

  7. MartyKek

    способы передачи информации какое устройство обладает наибольшей скоростью обмена информацией http://ya.iboards.ru/viewtopic.php?f=13&t=356&p=1205 материальным носителем наследственной информации в клетке является

  8. new homes minnesota

    It's the best time to make some plans for the future and it's time to be happy. I've read this post and if I could I wish to suggest you few interesting things or suggestions. Maybe you could write next articles referring to this article. I wish to read even more things about it! https://www.eternityhomesllc.com/

  9. Michaelemoth

    The most popular and convenient Cryptocurrency Exchange in 16 languages. Everything is made for people. Earning is now easier. No restrictions. Huge selection of tools Come and earn now! http://bit.ly/3bAtK2O *** Самая ТОПОВАЯ и удобная Биржа криптовалют на 16 языках. Все создано для людей. Зарабатывать теперь проще. Никаких ограничений. Огромный выбор инструментов Заходи и зарабатывай сейчас! http://bit.ly/3bAtK2O

  10. Jefferykip

    porno kim kardashian [url=http://irshansk-rada.com.ua/]teens porno[/url] porno drunk porno drunk

  11. Jefferykip

    free porno video <a href=http://irshansk-rada.com.ua/>pkf porno</a> lenkino porno porno free

  12. Stephenabept

    vr porno <a href=http://king-pinup642.od.ua/kosmolot/>online porno</a> porno hardcore www porno

  13. Joshuachese

    porno hd 1080 <a href=http://school24.if.ua/>pmv porno</a> porno hub porno webcam

  14. Charlesmop

    loli porno <a href=https://www.rugbyclubs.info/>porno hub зеркало</a> porno vk good porno

  15. Stephenabept

    gey porno <a href=http://king-pinup642.od.ua/kosmolot/>russian gay porno</a> porus porno porno magazine

  16. EdwardIcoma

    porno 1080 hd <a href=https://cosmolot-24.com.ua/ua/>porno hardcore</a> porno video hd porno movies

  17. ErrolGex

    porno family <a href=https://cosmolot-24.com.ua/ru/>vr porno</a> porno lisa ann porno big tits

  18. Joshuachese

    porno dp <a href=http://school24.if.ua/>porno ipad</a> porno girl porno big tits

  19. Jefferykip

    dancing bear porno <a href=http://irshansk-rada.com.ua/>porno lisa ann</a> playboy porno sasha grey porno

  20. Joshuachese

    porno dog <a href=http://school24.if.ua/>you porno</a> mother porno porno tracker

  21. Joshuachese

    porno iznasilovanie <a href=http://school24.if.ua/>porno comics</a> porno nd porno drunk

  22. ErrolGex

    porno lisa ann <a href=https://cosmolot-24.com.ua/ru/>porno stars</a> porno magazine anal porno

  23. EdwardIcoma

    porno iznasilovanie <a href=https://cosmolot-24.com.ua/ua/>kisankanna porno</a> porno hardcore porno onlayn

  24. Charlesmop

    porno zvezda <a href=https://www.rugbyclubs.info/>sex porno film</a> dredd porno porno webcam

  25. Joshuachese

    sasha grey porno <a href=http://school24.if.ua/>undertale porno</a> porno pictures porno film online

  26. Jefferykip

    best porno <a href=http://irshansk-rada.com.ua/>porno ipad</a> sfm porno sex porno video

  27. Joshuachese

    mother porno <a href=http://school24.if.ua/>hd porno</a> kisankanna porno yaoi porno

  28. RichardBut

    porno zvezda <a href=https://brody.com.ua/>porno hub</a> porno swingers mother porno

  29. Jefferykip

    black porno <a href=http://irshansk-rada.com.ua/>porno online</a> porno photo vk porno

  30. Michaelemoth

    The most popular and convenient Cryptocurrency Exchange in 16 languages. Everything is made for people. Earning is now easier. No restrictions. Huge selection of tools Come and earn now! http://bit.ly/3bAtK2O *** Самая ТОПОВАЯ и удобная Биржа криптовалют на 16 языках. Все создано для людей. Зарабатывать теперь проще. Никаких ограничений. Огромный выбор инструментов Заходи и зарабатывай сейчас! http://bit.ly/3bAtK2O