1、模块介绍
所谓BeautifulSoup模块是通过html源代码进行筛选类似于正则表达式那种类型
2、代码
import os
import requests
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIOheaders = {'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0Safari/537.36 Edg/122.0.0.0'}
res = requests.get('https://www.douban.com/')
output_dir = 'downloaded_images'
os.makedirs(output_dir, exist_ok=True)
soup = BeautifulSoup(res.text, 'html.parser')
img_tags = soup.find_all('img')
for idx, img in enumerate(img_tags):img_url = img.get('src')if not img_url:continuetry:response = requests.get(img_url)response.raise_for_status() # 确保请求成功img_name = f'image_{idx}.jpg'img_path = os.path.join(output_dir, img_name)with open(img_path, 'wb') as file:file.write(response.content)print(f"图片 {img_name} 已下载")try:image = Image.open(BytesIO(response.content))image.verify() # 验证图片是否损坏print(f"图片 {img_name} 通过自动检测")except Exception as e:print(f"图片 {img_name} 自动检测失败:{e}")os.remove(img_path)except Exception as e:print(f"下载图片失败:{e}")
具体就长这个样子(这里res.text是做了一个转化在其他的地方可能会把res中去,后面那个是指定的解释器)