天气预报获取模块
基于http://www.weather.com.cn/做了代码优化,每个函数功能更加明确,更加适合单独集成。接口也进行了优化,比网络上博主抄来抄去的源码要好一些。
改进点:获取天气拆成了多个函数,多次保存csv文件不再出现标题栏重写的情况。
天气获取模块
1.1 依赖表
包名 含义 安装方式 系统包 os csv json 第三方依赖 requests 爬虫模块 bs4.BeautifulSoup 网页解析对象 bs4.element.Tag 网页标签对象
1.2 全局变量表
1.3 函数
函数名 含义 是否接口 get_html_text 请求网页内容,无代理无head模式 否 get_today_weather 获取当天天气 否 get_1_7_weather 获取当周天气 否 get_today_and_week 获取当周和当天的天气 否 get_8_14_weather 获取下一周天气 否 write_to_csv 保存文件模块 否
1.4 接口函数
函数名 含义 get_two_weak_weather 获取两周天气 入参 含义 类型 city_code 城市代码 字符串 出参 None
1.5 代码
import os
import csv
import jsonimport requests
from bs4 import BeautifulSoup
from bs4. element import Tagdef get_html_text ( url) : """请求获得网页内容""" try : r = requests. get( url, timeout= 30 ) r. raise_for_status( ) r. encoding = r. apparent_encodingprint ( "成功访问" ) return r. textexcept Exception as e: print ( e) print ( "访问错误" ) return " " def get_today_weather ( body_tag: Tag) : td_wea_list = [ ] count = 0 def get_today_json ( _tag: Tag) : weather_div = _tag. find_all( 'div' , { 'class' : 'left-div' } ) observe24h_data = weather_div[ 2 ] . find( 'script' ) . stringbegin_index = observe24h_data. index( '=' ) + 1 end_index = - 2 observe24h_data = observe24h_data[ begin_index: end_index] observe24h_json = json. loads( observe24h_data) t_json = observe24h_json[ 'od' ] [ 'od2' ] if t_json[ 0 ] [ 'od28' ] == "" : t_json[ 0 ] [ 'od28' ] = t_json[ 1 ] [ 'od28' ] return t_jsontoday_json = get_today_json( body_tag) for i in today_json: od_wea_list = [ ] if count <= 23 : od_wea_list. append( i[ 'od21' ] ) od_wea_list. append( i[ 'od22' ] ) od_wea_list. append( i[ 'od24' ] ) od_wea_list. append( i[ 'od25' ] ) od_wea_list. append( i[ 'od26' ] ) od_wea_list. append( i[ 'od27' ] ) od_wea_list. append( i[ 'od28' ] ) print ( od_wea_list) td_wea_list. append( od_wea_list) count = count + 1 print ( td_wea_list) return td_wea_listdef get_1_7_weather ( body_tag: Tag) : week_wea_list = list ( ) data = body_tag. find( 'div' , { 'id' : '7d' } ) ul = data. find_all( 'ul' ) [ 0 ] li = ul. find_all( 'li' ) for day in li: od_wea_list = list ( ) date = day. find( 'h1' ) . stringdate = date[ 0 : date. index( '日' ) ] od_wea_list. append( date) inf = day. find_all( 'p' ) od_wea_list. append( inf[ 0 ] . string) if inf[ 1 ] . find( 'i' ) is None : tem_low = None else : tem_low = inf[ 1 ] . find( 'i' ) . string if inf[ 1 ] . find( 'span' ) is None : tem_high = None else : tem_high = inf[ 1 ] . find( 'span' ) . string if tem_low[ - 1 ] == '℃' : od_wea_list. append( tem_low[ : - 1 ] ) else : od_wea_list. append( tem_low) if tem_high[ - 1 ] == '℃' : od_wea_list. append( tem_high[ : - 1 ] ) else : od_wea_list. append( tem_high) wind = inf[ 2 ] . find_all( 'span' ) for j in wind: od_wea_list. append( j[ 'title' ] ) wind_scale = inf[ 2 ] . find( 'i' ) . string index1 = wind_scale. index( '级' ) od_wea_list. append( int ( wind_scale[ index1 - 1 : index1] ) ) week_wea_list. append( od_wea_list) return week_wea_listdef get_today_and_week ( html: str ) : """处理得到有用信息保存数据文件""" bs = BeautifulSoup( html, "html.parser" ) body = bs. bodytd_wea_list = get_today_weather( body) week_wea_list = get_1_7_weather( body) return td_wea_list, week_wea_listdef get_8_14_weather ( html) : week_wea_list = [ ] i = 0 bs = BeautifulSoup( html, "html.parser" ) body = bs. bodydata = body. find( 'div' , { 'id' : '15d' } ) ul = data. find( 'ul' ) li = ul. find_all( 'li' ) for day in li: if i < 7 : od_wea_list = list ( ) date = day. find( 'span' , { 'class' : 'time' } ) . string date = date[ date. index( '(' ) + 1 : - 2 ] od_wea_list. append( date) weather = day. find( 'span' , { 'class' : 'wea' } ) . string print ( day. find( 'span' , { 'class' : 'wea' } ) . string) print ( day. find( 'span' , { 'class' : 'wea' } ) . text) od_wea_list. append( weather) tem = day. find( 'span' , { 'class' : 'tem' } ) . text print ( tem) od_wea_list. append( tem[ tem. index( '/' ) + 1 : - 1 ] ) od_wea_list. append( tem[ : tem. index( '/' ) - 1 ] ) wind = day. find( 'span' , { 'class' : 'wind' } ) . string if '转' in wind: od_wea_list. append( wind[ : wind. index( '转' ) ] ) od_wea_list. append( wind[ wind. index( '转' ) + 1 : ] ) else : od_wea_list. append( wind) od_wea_list. append( wind) wind_scale = day. find( 'span' , { 'class' : 'wind1' } ) . string index1 = wind_scale. index( '级' ) od_wea_list. append( int ( wind_scale[ index1 - 1 : index1] ) ) week_wea_list. append( od_wea_list) return week_wea_listdef write_to_csv ( file_name, data, day= 14 ) : """保存为csv文件""" if not os. path. exists( file_name) : with open ( file_name, 'w' , errors= 'ignore' , newline= '' ) as f: if day == 14 : header = [ '日期' , '天气' , '最低气温' , '最高气温' , '风向1' , '风向2' , '风级' ] else : header = [ '小时' , '温度' , '风力方向' , '风级' , '降水量' , '相对湿度' , '空气质量' ] f_csv = csv. writer( f) f_csv. writerow( header) f_csv. writerows( data) else : with open ( file_name, 'a' , errors= 'ignore' , newline= '' ) as f: f_csv = csv. writer( f) f_csv. writerows( data) def get_two_weak_weather ( city_code: str ) : url_head = "http://www.weather.com.cn/weather" url_1_7 = "" . join( [ url_head, "/" , city_code, ".shtml" ] ) url_8_14 = "" . join( [ url_head, "15d" , "/" , city_code, ".shtml" ] ) html_1_7 = get_html_text( url_1_7) data1, data1_7 = get_today_and_week( html_1_7) html8_14 = get_html_text( url_8_14) data8_14 = get_8_14_weather( html8_14) data14 = data1_7 + data8_14write_to_csv( 'weather14.csv' , data14, 14 ) write_to_csv( 'weather1.csv' , data1, 1 ) if __name__ == '__main__' : get_two_weak_weather( "101280701" )