对中国境内所有地区KFC门店基本信息的统计(简略版)

在这里插入图片描述
我们要获取每个地区的kfc信息就要先获取中国一共有哪些地区

中国所有城市名称获取

import requests
from lxml import etreewith open(f'./省份.txt', 'w') as fp:fp.write('')
with open(f'./城市.txt', 'w') as fp:fp.write('')url1='http://www.kfc.com.cn/kfccda/storelist/index.aspx'#页面网址
city=[]
res1=requests.get(url1).text
tre=etree.HTML(res1)
sheng=tre.xpath('//*[@id="container"]/div[1]/div[2]/div/div[1]/div[1]/div/div[3]/ul/li/strong/text()')
#得到页面源码
n=len(sheng)
for i in range(1,n+1):shi=tre.xpath(f'//*[@id="container"]/div[1]/div[2]/div/div[1]/div[1]/div/div[3]/ul/li[{i}]/div/a/text()')#使用xpath对信息获取for s in shi:city.append(sheng[i-1]+'-'+s)
print(city)
for i in city:with open(f'./城市.txt', 'a') as fp:fp.write(i+'\n')
for i in sheng:with open(f'./省份.txt', 'a') as fp:fp.write(i+'\n')

现在我们已经有了每个城市的名字我们只需要将他们替换到请求接口的data中即可获取每个地区的kfc门店基本信息

每个地区门店基本信息获取

import shutil
import time
import requests
import os
from multiprocessing.dummy import Pooldef RemoveDir():    #用来删除文件夹中的所有内容try:shutil.rmtree('./city')#删除文件夹except:passos.mkdir('./city')#创建文件夹amelist = []file = open('./省份.txt', "r", encoding="GBK")file = file.readlines() #读取所有文件按行for line in file:line = line.strip('\n')#删除掉line里的\namelist.append(line)#将line添加到列表amelist中for i in amelist:filepath=f'./city/{i}'os.mkdir(filepath)#创建名字为filepath的文件夹def zhu(name):s=str(name).split('-')#将name以-切片print(1)data={'cname': f'{s[1]}','pid': '','pageIndex': '1','pageSize': '900'}res=requests.post(url,data=data).json()#post方法获取数据url为网址data为负载.json为以json格式获取数据print(res)if str(res['Table'][0]['rowcount'])!='0':#如果res['Table'][0]['rowcount']==0说明这个城市没有店不在进行下面操作fort=[]#保存店铺的名称for i in res['Table1']:if str(i['addressDetail']) not in fort:#如果当前店铺名称已经保存过说明重复不在进行以下程序with open(f'./city/{s[0]}/{s[1]}.txt', 'a+') as fp:print(str(i['storeName'])+'%'+str(i['addressDetail'])+'%'+str(i['pro']))fp.write(str(i['storeName'])+'%'+str(i['addressDetail'])+'%'+str(i['pro'])+'\n')fort.append(str(i['addressDetail']))with open(f'./city/{s[0]}/{s[1]}.txt', 'a+') as fp:#以a+打开文件追加模式fp.write(str(len(fort)))time.sleep(0.5)RemoveDir()
number=[]
url='http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname'
namelist=[]
file = open('./城市.txt', "r", encoding="GBK")#r代表以只读模式打开文件 encoding="GBK"以GBK格式读取文件
file = file.readlines()
for line in file:line = line.strip('\n')namelist.append(line)
print(namelist)
pool = Pool(3)                                     #开启线程池# 定义循环数
origin_num = [x for x in namelist]                #每一个货物开启一个线程# 利用map让线程池中的所有线程‘同时’执行calc_power2函数
#zhu为函数zhu origin_num为任务列表
pool.map(zhu, origin_num)