Python代码优化（1）：天气预报获取

天气预报获取模块

基于http://www.weather.com.cn/做了代码优化，每个函数功能更加明确，更加适合单独集成。接口也进行了优化，比网络上博主抄来抄去的源码要好一些。

改进点：获取天气拆成了多个函数，多次保存csv文件不再出现标题栏重写的情况。

天气获取模块

1.1 依赖表

包名	含义	安装方式
系统包
os
csv
json
第三方依赖
requests	爬虫模块
bs4.BeautifulSoup	网页解析对象
bs4.element.Tag	网页标签对象

1.2 全局变量表

变量名	含义	初值

1.3 函数

函数名	含义	是否接口
get_html_text	请求网页内容，无代理无head模式	否
get_today_weather	获取当天天气	否
get_1_7_weather	获取当周天气	否
get_today_and_week	获取当周和当天的天气	否
get_8_14_weather	获取下一周天气	否
write_to_csv	保存文件模块	否

1.4 接口函数

函数名	含义
get_two_weak_weather	获取两周天气
入参	含义	类型
city_code	城市代码	字符串
出参
None

1.5 代码

# weather.py
import os
import csv
import jsonimport requests
from bs4 import BeautifulSoup
from bs4.element import Tagdef get_html_text(url):"""请求获得网页内容"""try:r = requests.get(url, timeout=30)r.raise_for_status()r.encoding = r.apparent_encodingprint("成功访问")return r.textexcept Exception as e:print(e)print("访问错误")return " "def get_today_weather(body_tag: Tag):td_wea_list = []  # 存放当天的数据，listcount = 0def get_today_json(_tag: Tag):# 获取今日数据的scriptweather_div = _tag.find_all('div', {'class': 'left-div'})observe24h_data = weather_div[2].find('script').string# 将 script 数据改变成为 json 数据 (移除 var data=)begin_index = observe24h_data.index('=') + 1end_index = -2observe24h_data = observe24h_data[begin_index: end_index]observe24h_json = json.loads(observe24h_data)t_json = observe24h_json['od']['od2']# 补充空气质量if t_json[0]['od28'] == "":t_json[0]['od28'] = t_json[1]['od28']return t_jsontoday_json = get_today_json(body_tag)for i in today_json:od_wea_list = []if count <= 23:od_wea_list.append(i['od21'])  # 添加时间od_wea_list.append(i['od22'])  # 添加当前时刻温度od_wea_list.append(i['od24'])  # 添加当前时刻风力方向od_wea_list.append(i['od25'])  # 添加当前时刻风级od_wea_list.append(i['od26'])  # 添加当前时刻降水量od_wea_list.append(i['od27'])  # 添加当前时刻相对湿度od_wea_list.append(i['od28'])  # 添加当前时刻空气质量print(od_wea_list)td_wea_list.append(od_wea_list)count = count + 1print(td_wea_list)return td_wea_listdef get_1_7_weather(body_tag: Tag):week_wea_list = list()  # 初始化一个列表保存数据，返回值# 解析7天的数据, body -> div -> ul -> uidata = body_tag.find('div', {'id': '7d'})ul = data.find_all('ul')[0]li = ul.find_all('li')for day in li:  # 遍历找到的每一个li# 临时存放每天的数据od_wea_list = list()# 获取日期date = day.find('h1').stringdate = date[0:date.index('日')]od_wea_list.append(date)# 找出li下面的p标签，有三个p标签，分别为天气，气温，风向inf = day.find_all('p')# 提取第一个p标签的值，即天气od_wea_list.append(inf[0].string)# 提取第二个p标签的值，最高气温和最低气温if inf[1].find('i') is None:tem_low = Noneelse:tem_low = inf[1].find('i').string  # 找到最低气温if inf[1].find('span') is None:  # 天气预报可能没有最高气温tem_high = Noneelse:tem_high = inf[1].find('span').string  # 找到最高气温if tem_low[-1] == '℃':od_wea_list.append(tem_low[:-1])else:od_wea_list.append(tem_low)if tem_high[-1] == '℃':od_wea_list.append(tem_high[:-1])else:od_wea_list.append(tem_high)# 提取第三个p标签的值，初始风向和转风向，风级wind = inf[2].find_all('span')  # 找到风向for j in wind:od_wea_list.append(j['title'])wind_scale = inf[2].find('i').string  # 找到风级index1 = wind_scale.index('级')od_wea_list.append(int(wind_scale[index1 - 1:index1]))# 添加日志week_wea_list.append(od_wea_list)# print(week_wea_list)return week_wea_listdef get_today_and_week(html: str):"""处理得到有用信息保存数据文件"""bs = BeautifulSoup(html, "html.parser")  # 创建BeautifulSoup对象body = bs.body# 获取当天数据td_wea_list = get_today_weather(body)# 获取七天内的数据week_wea_list = get_1_7_weather(body)return td_wea_list, week_wea_listdef get_8_14_weather(html):week_wea_list = []i = 0  # 控制爬取的天数bs = BeautifulSoup(html, "html.parser")  # 创建BeautifulSoup对象body = bs.bodydata = body.find('div', {'id': '15d'})  # 找到div标签且id = 15dul = data.find('ul')  # 找到所有的ul标签li = ul.find_all('li')  # 找到左右的li标签for day in li:  # 遍历找到的每一个liif i < 7:od_wea_list = list()# 获取日期date = day.find('span', {'class': 'time'}).string  # 得到日期date = date[date.index('（') + 1:-2]  # 取出日期号od_wea_list.append(date)# 获取天气weather = day.find('span', {'class': 'wea'}).string  # 找到天气print(day.find('span', {'class': 'wea'}).string)print(day.find('span', {'class': 'wea'}).text)od_wea_list.append(weather)# 获取温度tem = day.find('span', {'class': 'tem'}).text  # 找到温度print(tem)od_wea_list.append(tem[tem.index('/') + 1:-1])  # 找到最低气温od_wea_list.append(tem[:tem.index('/') - 1])  # 找到最高气温# 获取风向和风级wind = day.find('span', {'class': 'wind'}).string  # 找到风向if '转' in wind:  # 如果有风向变化od_wea_list.append(wind[:wind.index('转')])od_wea_list.append(wind[wind.index('转') + 1:])else:  # 如果没有风向变化，前后风向一致od_wea_list.append(wind)od_wea_list.append(wind)wind_scale = day.find('span', {'class': 'wind1'}).string  # 找到风级index1 = wind_scale.index('级')od_wea_list.append(int(wind_scale[index1 - 1:index1]))week_wea_list.append(od_wea_list)return week_wea_listdef write_to_csv(file_name, data, day=14):"""保存为csv文件"""if not os.path.exists(file_name):with open(file_name, 'w', errors='ignore', newline='') as f:if day == 14:header = ['日期', '天气', '最低气温', '最高气温', '风向1', '风向2', '风级']else:header = ['小时', '温度', '风力方向', '风级', '降水量', '相对湿度', '空气质量']f_csv = csv.writer(f)f_csv.writerow(header)f_csv.writerows(data)else:with open(file_name, 'a', errors='ignore', newline='') as f:f_csv = csv.writer(f)f_csv.writerows(data)def get_two_weak_weather(city_code: str):url_head = "http://www.weather.com.cn/weather"url_1_7 = "".join([url_head, "/", city_code, ".shtml"])url_8_14 = "".join([url_head, "15d", "/", city_code, ".shtml"])html_1_7 = get_html_text(url_1_7)data1, data1_7 = get_today_and_week(html_1_7)html8_14 = get_html_text(url_8_14)data8_14 = get_8_14_weather(html8_14)data14 = data1_7 + data8_14write_to_csv('weather14.csv', data14, 14)  # 保存为csv文件write_to_csv('weather1.csv', data1, 1)if __name__ == '__main__':get_two_weak_weather("101280701")