1.创建scrapy项目
首先创建python项目,在项目命令行中执行
#安装依赖
pip3 install scrapy
#创建scrapy项目
scrapy startproject scrapy_guazi_demo
cd scrapy_guazi_demo
scrapy genspider guazi guazi.com
2.item.py
声明字段
class ScrapyGuaziDemoItem(scrapy.Item):# define the fields for your item here like:# name = scrapy.Field()#车源号card_id=scrapy.Field()#车名称car_name=scrapy.Field#排量pailiang=scrapy.Field#变速箱bianshuxiang=scrapy.Field#价格price=scrapy.Field
3.spiders/guazi.py
from typing import Iterableimport scrapy
from scrapy import Request
import jsonfrom ..items import ScrapyGuaziDemoItemclass GuaziSpider(scrapy.Spider):name = "guazi"allowed_domains = ["guazi.com"]start_urls = ["https://guazi.com"]def start_requests(self):# 发送列表页请求with open("minor.txt", "r", encoding="utf-8") as f:minor = json.loads(f.read())data = minor.get("data")for k, value in data[1].get("filterValue").get("common").items():for v in value:url = "https://mapi.guazi.com/car-source/carList/wapList?versionId=0.0.0.0&sourceFrom=wap&deviceId=76f7395f-04e0-4d6c-8aa8-53d8347052c5&guid=76f7395f-04e0-4d6c-8aa8-53d8347052c5&userId=&orgUserId=&p_key=mguazicom_list&unit=&guazi_city=103&location_city=103&selectedCity=103&osv=IOS16.6&city_filter=103&page=1&incident_id=454034440138702918&pageSize=10&order=0&minor={}&tag=&license_date=0,-1&auto_type=&driving_type=&gearbox=&road_haul=0,-1&air_displacement=0,-1&emission=&car_color=&guobie=&seat=&fuel_type=&key_word=&priceRange=0,-1&tag_types=&finance_types=&diff_city=&initialPriceRange=0,-1&monthlyPriceRange=0,-1&transfer_num=&car_year=&carid_qigangshu=&carid_jinqixingshi=&cheliangjibie=&horsepower=0,-1&voyage=0,-1&platfromSource=wap".format(v.get("value"))yield scrapy.Request(url=url, callback=self.parse)breakdef parse(self, response):"""第一页列表也请求的返回:param response::return:"""data = response.json().get("data")guazi_items = data.get("postList")for item in guazi_items:detail_url = "https://m.guazi.com/detail?incident_id=1709734006486&clueId=132465431&hideTitlebar=1&h5Ready=1&cpc_ad=-1&ad_location=zero&rank=1&qpres=454043988656259095&storeId=2046695&carListRecommendId=c3865b2f-a61c-4d10-9385-f625533c4672&tk_p_mti=5.2.guazi_mall.list.feed-car.0".format(item.get("clue_id"))yield scrapy.Request(url=detail_url, callback=self.parse_detail)breakdef parse_detail(self, response):guazi_info = ScrapyGuaziDemoItem()# 车源号guazi_info["car_id"] = response.xpath("//div[@class='base-info__main__items'][2]/div[@class='item-list']/div[@class='item-list__items'][4]/p[1]/text()").extract_first().strip()# 车名称guazi_info["car_name"] = response.xpath("//div[@class='base-info__title']/text()").extract_first().strip()yield guazi_info