只需要替换原来的Cookie和token即可使用,自动计算对应链接地址的sign直接使用即可。需要注意是一个账号爬取过多会有验证码
import json
import hashlib
import random
import timeimport pandas as pd
import requestsresults = []def fetch_review_list(datas, md5_hash,t):url = "https://h5api.m.tmall.com/h5/mtop.alibaba.review.list.for.new.pc.detail/1.0/"params = {"jsv": "2.7.2","appKey": "12574478","t": t, # 使用当前时间戳"sign": md5_hash, # 请替换为实际的签名值"api": "mtop.alibaba.review.list.for.new.pc.detail","v": "1.0","isSec": "0","ecode": "0","timeout": "20000","ttid": "2022@taobao_litepc_9.17.0","AntiFlood": "true","AntiCreep": "true","preventFallback": "true","type": "jsonp","dataType": "jsonp","callback": "mtopjsonp6","data": datas}# 设置 headersheaders = {"Cookie": "自己的""User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36","Referer": "https://detail.tmall.com/","Accept": "*/*","Connection": "keep-alive"}response = requests.get(url, params=params, headers=headers)print(response.text)json_content = json.loads(response.text.replace("mtopjsonp6(", "").replace("})", "}"))# 获取评论区counten = json_content['data']['module']['reviewVOList']rulist =[]for i in counten:pinglun = i['reviewWordContent']rulist.append(pinglun)return rulistdef taobao(sign, datas, appkey, t,coci):# 构造 URL 和参数url = "https://h5api.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/"params = {"jsv": "2.7.0","appKey": appkey,"t": t,"sign": sign,"api": "mtop.relationrecommend.WirelessRecommend.recommend","v": "2.0","H5Request": "true","preventFallback": "true","type": "jsonp","dataType": "jsonp","callback": "mtopjsonp2","data": datas}# 设置 headersheaders = {"Cookie": coci,"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36","Referer": "https://h5.m.taobao.com/","Accept": "*/*","Connection": "keep-alive"}# 发起请求response = requests.get(url, headers=headers, params=params)# 确保请求成功if response.status_code == 200:content = response.text.replace("mtopjsonp2(", "").replace("})", "}")# 检查响应内容是否为空if content.strip():# 解析JSON字符串try:json_content = json.loads(content)# 获取指定的JSON内容name = json_content['data']['itemsArray']for list in range(len(name)):# 商品链接product_url = name[list]['auctionURL']pic_path = name[list]['pic_path']title = name[list]['title']print(title)price = name[list]['priceShow']['price']# 标签tag = name[list]['structuredShopInfo']['infoList'][0]['text']# 销量sales = name[list]['realSales']time.sleep(random.randint(1, 5))# 获取评论区# 商品idpl_id = name[list]['item_id']pl_token = "自己的token"pl_t = str(1718204461753)pl_appKey = "12574478"pl_data = '{"itemId":"' + str(pl_id) + '","bizCode":"ali.china.tmall","channel":"pc_detail","pageSize":20,"pageNum":1}'md5_hash = md5_encrypt(pl_token + "&" + pl_t + "&" + pl_appKey + "&" + pl_data)print("获取评论区")pinglunqu = fetch_review_list(pl_data, md5_hash,pl_t)results.append([pl_id,product_url, pic_path, title, price, tag, sales,pinglunqu])time.sleep( random.randint(1, 5))except json.JSONDecodeError as e:print(f"JSON解析错误: {e}")except KeyError as e:print(f"键错误: {e}")except IndexError as e:print(f"索引错误: {e}")else:print("响应内容为空")else:print(f"请求失败,状态码: {response.status_code}")def md5_encrypt(data):"""对给定的数据进行MD5加密"""md5_obj = hashlib.md5()md5_obj.update(data.encode('utf-8')) # 确保数据是字节类型return md5_obj.hexdigest() # 返回16进制格式的哈希值if __name__ == '__main__':keyword = "碎花裙"token = "自己的token"t = str(int(time.time() * 1000))appKey = "12574478"coci="自己的Cookie"for page in range(30, 50):data = '{"appId":"29859","params":"{\\"isBeta\\":\\"false\\",\\"grayHair\\":\\"false\\",\\"appId\\":\\"29859\\",\\"from\\":\\"\\",\\"brand\\":\\"HUAWEI\\",\\"info\\":\\"wifi\\",\\"index\\":\\"4\\",\\"ttid\\":\\"600000@taobao_android_10.7.0\\",\\"needTabs\\":\\"true\\",\\"rainbow\\":\\"\\",\\"areaCode\\":\\"CN\\",\\"vm\\":\\"nw\\",\\"schemaType\\":\\"auction\\",\\"elderHome\\":\\"false\\",\\"device\\":\\"HMA-AL00\\",\\"isEnterSrpSearch\\":\\"true\\",\\"newSearch\\":\\"false\\",\\"network\\":\\"wifi\\",\\"subtype\\":\\"\\",\\"hasPreposeFilter\\":\\"false\\",\\"client_os\\":\\"Android\\",\\"gpsEnabled\\":\\"false\\",\\"searchDoorFrom\\":\\"srp\\",\\"debug_rerankNewOpenCard\\":\\"false\\",\\"homePageVersion\\":\\"v7\\",\\"searchElderHomeOpen\\":\\"false\\",\\"style\\":\\"wf\\",\\"page\\":' + str(page) + ',\\"n\\":\\"10\\",\\"q\\":\\"' + keyword + '\\",\\"search_action\\":\\"initiative\\",\\"sugg\\":\\"_4_1\\",\\"m\\":\\"h5\\",\\"sversion\\":\\"13.6\\",\\"prepositionVersion\\":\\"v2\\",\\"tab\\":\\"all\\",\\"channelSrp\\":\\"\\",\\"tagSearchKeyword\\":null,\\"sort\\":\\"_sale\\",\\"filterTag\\":\\"\\",\\"prop\\":\\"\\",\\"item_id\\":\\"\\\"}"}'md5_hash = md5_encrypt(token + "&" + t + "&" + appKey + "&" + data)print(page)taobao(md5_hash, data, appKey, t,coci)df = pd.DataFrame(results, columns=['商品id','商品链接', '图片路径', '标题', '价格', '标签', '销量','用户评论'])df.to_excel('淘宝1.xlsx', index=False, engine='openpyxl')# 根据销量高的产品来获取他们的评论