1、html标记语言了解
<html>
<meta http-equiv="Content-Type"content="text/html;charset=utf-8">
<h1>我的祖国</h1>
<h1 align="center">我的祖国</h1>
# h1 标签
# align 属性
# center 属性值
<标签 属性="属性值">被标记的内容</标签>
<img src="xxx.jpg"/>
<a href="http://www.baidu.com">百度</a>
</html>
2、BeautifulSoup模块介绍
# 1.拿到页面源代码
# 2.使用bs4进行解析 拿到数据
import requests
from bs4 import BeautifulSoup
import csv
url = "http://www.xinfadi.com.cn/marketanalysis/0/list/1.shtml"
resp = requests.get(url)# # 解析数据
# # 1.把页面源代码交给BeautifulSoup进行处理 生成bs对象
# # page = BeautifulSoup(resp.text)
page = BeautifulSoup(resp.text, "html.parser")
# # 2.从bs对象中查找对象
# # find(标签名,属性=值)
# # find_all(标签名,属性=值)
table = page.find("table", class_="hq_table") # class 是python中的关键字
# # table = page.find("table", attrs={"class": "hq_table"}) #等价于上一行 可以避免class
# print(table)
# 拿到所有数据行trs = table.find_all("tr")
trs = table.find_all("tr")[1:]
f = open("菜价.csv", mode="w",encoding='utf-8')
csvwriter = csv.writer(f)
for tr in trs:tds = tr.find_all("td") # 拿到每行的tdprint(tds)name = tds[0].textlow = tds[1].textaverage = tds[2].texthigh = tds[3].textgui = tds[4].textkind = tds[5].textdate = tds[5].textprint(name, low, average, high, gui, kind, date)csvwriter.writerow([name, low, average, high, gui, kind, date])
f.close()
resp.close()