1、爬取豆瓣电影top250
import requests
from bs4 import BeautifulSoupheaders = { "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
} for i in range( 0 , 250 , 25 ) :print( f"--------第{i+1}到{i+25}个电影------------" ) response = requests.get( f"https://movie.douban.com/top250?start={i}" , headers = headers) if response.ok:html = response.textsoup = BeautifulSoup( html, "html.parser" ) all_titles = soup.findAll( "span" , attrs = { "class" : "title" } ) j = ifor title in all_titles:title_string = title.stringif "/" not in title_string:j += 1 print( f"{j}、{title_string}" ) else:print( "请求失败" )
2、爬取价格
import requests
from bs4 import BeautifulSoupcontent = requests.get( "http://books.toscrape.com/" ) .text
soup = BeautifulSoup( content, "html.parser" )
all_prices = soup.findAll( "p" , attrs = { "class" : "price_color" } )
print( all_prices)
for price in all_prices:print( price.string[ 2 :] )
3、爬取书名
import requests
from bs4 import BeautifulSoupcontent = requests.get( "http://books.toscrape.com/" ) .text
soup = BeautifulSoup( content, "html.parser" )
all_titles = soup.findAll( "h3" )
for title in all_titles:all_links = title.findAll( "a" ) for link in all_links:print( link.string)