任务需求:输入关键字下载100个图片保存到本地,每个关键字单独存放一个文件夹(GUI版)
任务描述:当输入关键字时会爬取100个与关键词有关的图片到本地每个关键词单独保存到一个文件夹中,比如说我输入黑客下载了100个关于黑客的图片这些图片都保存到“黑客”文件夹,然后输入python会爬取100个与python有关的图片保存到本地的"python"文件夹中
pip install 模块名 [-i Simple Index]
pip install pyinstaller 打包程序
pip install PyQt5
import requests from urllib.parse import quote from pprint import pprint from pdb import set_trace class 批量爬取百度图片:headers = {'Accept': 'text/plain, */*; q=0.01','Accept-Language': 'zh-CN,zh;q=0.9','Connection': 'keep-alive',# 'Cookie': 'BDqhfp=%E9%BB%91%E5%AE%A2%26%26NaN-1undefined%26%265916%26%265; BIDUPSID=E695E9B2AF2F6BFFED9BD684584A8956; PSTM=1712380467; BAIDUID=34F3B544DDD48A4C76CCDD75A6DB9841:FG=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=40299_40377_40416_40459_40439_40510_40446_60026_60032_60046_40080; BAIDUID_BFESS=34F3B544DDD48A4C76CCDD75A6DB9841:FG=1; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; userFrom=null; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; ab_sr=1.0.1_Y2E1NjA2ODI1NmIyMWE0OGY3Y2Y5YzA1ZWE1MzBkY2YwMGQ0M2RjYWE5Mjk3YjdiNTUwMmEwZTk2ZGNiODZkNGI0NWVmYzAxODEwNTk5ZjA2NTA4ZTg0OTZhZjAzYjcwNjM3NjU5M2Y2MzY5YTRjNzJhY2MxNDc5MmMzN2ZhMTUwYTQ4MDVlZDViNWZlNDNhZGE1NjRlYjMyOWYwMzY1Mw==','Referer': 'https://image.baidu.com/search/index?tn=baiduimage&ps=1&ct=201326592&lm=-1&cl=2&nc=1&ie=utf-8&dyTabStr=MCwxLDMsMiw2LDQsNSw4LDcsOQ%3D%3D&word=%E9%BB%91%E5%AE%A2','Sec-Fetch-Dest': 'empty','Sec-Fetch-Mode': 'cors','Sec-Fetch-Site': 'same-origin','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36','X-Requested-With': 'XMLHttpRequest','sec-ch-ua': '"Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"','sec-ch-ua-mobile': '?0','sec-ch-ua-platform': '"Windows"',}def __init__(self):self.cont=1url='https://image.baidu.com/search/acjson'word=quote('黑客')li=self.发送请求(url,word,30)for i in li:#i就是图片链接self.发送请求(url=i)def 发送请求(self,url,word='黑客',pn=30):if 'JPEG' in url:response=requests.get(url,headers=self.headers).contentself.存储数据(response)else:params = {"tn": "resultjson_com","logid": "7871683271133482576","ipn": "rj","ct": "201326592","is": "","fp": "result","fr": "","word": word,"queryWord": word,"cl": "2","lm": "-1","ie": "utf-8","oe": "utf-8","adpicid": "","st": "","z": "","ic": "","hd": "","latest": "","copyright": "","s": "","se": "","tab": "","width": "","height": "","face": "","istype": "","qc": "","nc": "1","expermode": "","nojc": "","isAsync": "","pn": pn,"rn": "30",}try: res = requests.get(url, params=params, headers=self.headers).json()return self.解析源代码(res)except:passdef 解析源代码(self,res):if res:li=[]for i in res['data']:if i.get('hoverURL',False):li.append(i.get('hoverURL',False))return lielse:return Falsedef 存储数据(self,res):with open(str(self.cont)+'.jpg','wb') as f:f.write(res) 批量爬取百度图片()