爬取汽车之家期末作业:
代码如下所示:
import random
import timeimport requests #发送网络请求
import parsel
import csv
# 1.发送网络请求
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62','cookie' :'cna=4PnlF84bLHECATzVzKSn1QVt; sca=affb7120; atpsidas=0812ac0d0153e7414eefc32b_1634996188_1; atpsida=341b48c86ac6a1e14421aa46_1640613502_50'}
csv_lmy = open('lmy-1.csv',mode='a',encoding='utf-8-sig',newline='')
csv_write = csv.writer(csv_lmy)#将文件以csv的方式保存
csv_write.writerow(['品牌','里程(万公里)','车龄','城市','认证','售价(万元)','原价(万元)','链接','车辆图片',])#将列表写入csv的前一行
for page in range(1,101):print(f'------------正在爬取第{page}页----------------------')url = f'https://www.che168.com/china/a0_0msdgscncgpi1ltocsp{page}exx0/'response = requests.get(url=url , headers = headers)# 2。获取数据 网页源代码print(response.text)lmy_html = response.text# 3.解析数据selector = parsel.Selector(lmy_html)lis = selector.css('.viewlist_ul li')for li in lis:try:name = li.css('.card-name::text').get() #车名unit = li.css('.cards-unit::text').get() #信息kemNumber = unit.split('/')[0]years = unit.split('/')[1]city = unit.split('/')[2]business = unit.split('/')[3]pirce = li.css('.pirce em::text').get() #价格yprice = li.css('s::text').get() #原价carinfo = li.css('.carinfo::attr(href)').get() #详情页链接img = li.css('img::attr(src)').get() #图片链接#print(name,kemNumber,years,city,business,pirce,yprice,carinfo,img)csv_write.writerow([name,kemNumber,years,city,business,pirce,yprice,carinfo,img])except:pass