快手数据获取相对简单访问地址固定且不需要登录token
列表地址获取的固定接口
https://www.kuaishou.com/graphql
发送post请求注意每个快手账号对应的id
import time
from datetime import datetime
import logging
import json
import pymysql
import requests# 创建一个logger
logger = logging.getLogger('my_logger')
logger.setLevel(logging.DEBUG) # 设置日志级别# 创建一个handler,用于写入日志文件
fh = logging.FileHandler('ks.log', encoding='utf-8') # 日志文件名# 定义handler的输出格式
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fh.setFormatter(formatter)# 给logger添加handler
logger.addHandler(fh)#!!!数据库链接需要调整
mydatabase = pymysql.connect(host='localhost',user='root',password='123456',database='ry',charset='utf8mb4')
cursor = mydatabase.cursor()cursor.execute('SELECT id,base_media_name,dy_url FROM `media_account_manager2` where `type_id` = 484 AND `status` = 3 and dy_url is not null'
)
result = cursor.fetchall()
headers = {'Cookie': 'kpf=PC_WEB; clientid=3; did=web_8239e5591749f85a281700fcf0834715; didv=1719032992223; kpn=KUAISHOU_VISION','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
now = datetime.now()
now_formatted_date = now.strftime("%Y-%m-%d")try:for row in result:m_id = row[0]ks_name = row[1]ks_url = row[2]time.sleep(5)print(ks_name)print('***************************************')urlKs = 'https://www.kuaishou.com/graphql'jsonObj = {"operationName":"visionProfilePhotoList","variables":{"userId":ks_url,"pcursor":"","page":"profile"},"query":"fragment photoContent on PhotoEntity {\n __typename\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n commentCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n}\n\nfragment recoPhotoFragment on recoPhotoEntity {\n __typename\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n commentCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n}\n\nfragment feedContent on Feed {\n type\n author {\n id\n name\n headerUrl\n following\n headerUrls {\n url\n __typename\n }\n __typename\n }\n photo {\n ...photoContent\n ...recoPhotoFragment\n __typename\n }\n canAddComment\n llsid\n status\n currentPcursor\n tags {\n type\n name\n __typename\n }\n __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n ...feedContent\n __typename\n }\n hostName\n pcursor\n __typename\n }\n}\n"}response = requests.post(url=urlKs, json=jsonObj, headers=headers)if response.status_code == 200:response_data2 = response.json()data = response_data2.get('data',{}).get('visionProfilePhotoList',{}).get('feeds',[])print(data)for item in data:itemName = item.get('photo',{}).get('caption','null')photoUrl = item.get('photo',{}).get('photoUrl','null')timestamp = item.get('photo',{}).get('timestamp','null')timestamp_s = timestamp / 1000dt_object = datetime.fromtimestamp(timestamp_s)# dt_object = datetime.fromtimestamp(timestamp)# formatted_date = dt_object.strftime('%Y-%m-%d')create_time_str = dt_object.strftime("%Y-%m-%d %H:%M:%S")print(itemName)#视频名称print(photoUrl)#视频地址print(create_time_str)#视频发布时间text = ''insert_query = "INSERT INTO `ry`.`media_content`(`title`, `pub_date`, `url`, `content`, `media_id`, `media_name`,`type_id`,`platform`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s)"# 执行插入操作cursor.execute(insert_query,(itemName, create_time_str, photoUrl, text, m_id, ks_name, '483', '快手自动抓取'))# 提交事务mydatabase.commit()except Exception as e:logger.info('******快手获取发生错误********')logger.info(e)logger.info(item)logger.info('******快手账号:' + ks_name + ',数据获取异常******')
else:logger.info('******快手数据结束********')
finally:mydatabase.close()