1、爬取百度body存入txt
def get_baidu_hot():url = "https://top.baidu.com/board?tab=realtime"headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}response = requests.get(url, headers=headers)response.encoding = "utf-8"soup = BeautifulSoup(response.text, "html.parser")txt=soup.find_all("body")print(txt)my_utils.write_file(txt)
2、读取txt正则匹配获取json
data=my_utils.read_file()
3、将json存入数据库
json2=my_utils.ana_baidu(data)
# 假设表名为 "users"table_name = "users"# 遍历 JSON 数据中的键值对,生成插入语句insert_statements = []for i in json2:str=f"INSERT INTO {table_name} ("for key, value in i.items():str= str+f"`{key}`, "print(str)str=str[:-2]+") VALUES ("str2=""for key, value in i.items():str2= str2+f"'{value}', "str3=str+str2[:-2]+");"print (str3)my_sql.exe_sql(str3)
4、读取数据库信息生成词云
result_content=my_sql.query_sql("select `desc` from users order by create_time desc limit 50")result_content=str(result_content)result_content=result_content.replace("的","")my_wcloud.create_cy(result_content)
生成词云:
代码:
javaDev/public_python
ssh:
git@gitee.com:wangchao_1/public_python.git