import requests
#from lxml import etree
from pyquery import PyQuery as pq
import csv


url = 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/index.html'
baseUrl = 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/'


headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36',
}

filePath = './location.csv'
num = 0


def getContent( url):
    ret = requests.get(url)
    ret.content.decode('gbk', 'ignore')
    return pq(ret.content);



def create_csv(path,csv_head):
    with open(path,'a+', encoding='UTF-8') as f:
        csv_write = csv.writer(f)
        #csv_head = ["good","bad"]
        csv_write.writerow(csv_head)


def write_csv(path, data_row):
    with open(path,'a+', encoding='UTF-8') as f:
        csv_write = csv.writer(f)
        #data_row = ["1","2"]
        csv_write.writerow(data_row)


# 创建文件 并写入字段描述
create_csv(filePath, ["level", "code", "name", "p_code"])

doc = getContent(url)
tdas =  doc('.provincetr > td > a').items()

for a in tdas:
    num +=1
    tmpUrl = a.attr.href
    shengId = tmpUrl.split('.',1)[0];
    tmps = (1, shengId, a.text(), 0)
    #print(tmps)
    write_csv(filePath, tmps)
    # 处理市级逻辑
    shiList = getContent(baseUrl + tmpUrl)
    trs = shiList('.citytr').items()
    for shi in trs:
        #tmpstr = shi.find('a');
        tmpUrlShi = shi.find('a').attr.href;
        shiId = shi('a:eq(0)').text()[0:4]
        tmpshi = (2, shiId, shi('a:eq(1)').text(), shengId)
        #print(tmpshi)
        write_csv(filePath, tmpshi)
        # 处理县级、区逻辑
        xianList = getContent(baseUrl + tmpUrlShi);
        xians = xianList('.countytr').items();
        for xian in xians:
            tmpUrlXian = xian.find('a').attr.href;
            if tmpUrlXian is None:
                continue
            xianId = xian('a:eq(0)').text()[0:5]
            tmpXian = (3, xianId, xian('a:eq(1)').text(), shiId)
            #print(tmpXian)
            write_csv(filePath, tmpXian)
            # 处理街道级别
            jieList = getContent(baseUrl + shengId + '/' + tmpUrlXian)
            jies = jieList('.towntr').items();
            for jie in jies:
                tmpUrljie = jie.find('a').attr.href;
                jieId = jie('a:eq(0)').text()[0:8]
                tmpJie = (4, jieId, jie('a:eq(1)').text(), xianId)
                #print(tmpJie)
                write_csv(filePath, tmpJie)
    # if num == 1:
    #     exit();

ps:请自行安装所需库