爬取热门网站的热榜,集中展示;Integrate and display hot billboard or ranked topic from hot Chinese websites
抓取知乎热榜数据存入列表
#!/usr/bin/env python
# encoding: utf-8__author__ = 'HZT'import requests
import re
from bs4 import BeautifulSoupheaders={"User-Agent":"","Cookie":""}
zh_url = "https://www.zhihu.com/billboard"
zh_response = requests.get(zh_url,headers=headers)webcontent = zh_response.text
soup = BeautifulSoup(webcontent,"html.parser")
script_text = soup.find("script",id="js-initialData").get_text()
rule = r'"hotList":(.*?),"guestFeeds"'
result = re.findall(rule,script_text)temp = result[0].replace("false","False").replace("true","True")
hot_list = eval(temp)
print(hot_list)
抓取微博热门数据存入列表
#!/usr/bin/env python
# encoding: utf-8import requests
from bs4 import BeautifulSoupurl = "https://s.weibo.com/top/summary"
headers = {"User-Agent": "", "Cookie": ""}
wb_response = requests.get(url, headers=headers)
webcontent = wb_response.text
soup = BeautifulSoup(webcontent, "html.parser")
index_list = soup.find_all("td", class_="td-01")
title_list = soup.find_all("td", class_="td-02")
level_list = soup.find_all("td", class_="td-03")topic_list = []
for i in range(len(index_list)):item_index = index_list[i].get_text(strip=True)if item_index == "":item_index = "0"item_title = title_list[i].a.get_text(strip=True)if title_list[i].span:item_mark = title_list[i].span.get_text(strip=True)else:item_mark = "置顶"item_level = level_list[i].get_text(strip=True)topic_list.append({"index": item_index, "title": item_title, "mark": item_mark, "level": item_level,"link": f"https://s.weibo.com/weibo?q=%23{item_title}%23&Refer=top"})
print(topic_list)