使用Python调用ImageMagick生成PDF文件缩略图
Imagemagick使用Ghostscript作为其依赖项之一,以便能够处理和转换PDF相关的图像。
准备
- 安装Ghostscript,网站
- 安装ImageMagick,网站
安装完毕后,需要自行配置环境路径
脚本
使用示例:
python .\get_thumbnail.py --filepath .\paper.pdf --page [4-8,12-17,20,24,27-] --output-dir test-dir
输出:
支持灵活传入页面参数
[ 4 − 8 , 12 − 17 , 20 , 24 , 27 − ] [4-8,12-17,20,24,27-] [4−8,12−17,20,24,27−]
- 4-8、12-17:第4页到第8页、第12页到第17页
- 20、24:第20页、第24页
- 27-:从第27页到最后一页
import subprocess
import argparse
from pathlib import Path
from math import sqrt# 解析参数
parser = argparse.ArgumentParser()
parser.add_argument("--filepath", type=str, help="pdf文件路径")
parser.add_argument("--page",type=str,default="all",help="指定要生成缩略图的pdf文件页面 e.[1-3,8,10,27-] 默认为全部页面",
)
parser.add_argument("--shape", type=str, default="cube", help="指定缩略图的形状 e.[8x3] 默认为正方形")
parser.add_argument("--output-dir", type=str, default="./images", help="指定输出文件夹")
args = parser.parse_args()# 写一个函数,调用外部程序获取pdf文件总页数
def get_file_page_num(filepath: str):result = subprocess.run(["magick", "identify", "-format", "%n", filepath], stdout=subprocess.PIPE)# 获取命令行输出output = result.stdout.decode("utf-8")for i in range(1, 4):if len(output) == i * int(output[:i]):return int(output[:i])# 预处理参数
# 获取目标页面总数目以及详细页码
def get_dst_page(filepath: str, page_str: str):total_num = 0detail_page = []if page_str == "all":total_num = get_file_page_num(filepath)detail_page.append((1, total_num))else:for item in page_str.replace(" ", "")[1:-1].split(","):if "-" not in item:total_num += 1detail_page.append(int(item))if "-" in item and not item.endswith("-"):start, end = list(map(lambda x: int(x), item.split("-")))total_num += end - start + 1detail_page.append((start, end))if "-" in item and item.endswith("-"):start = int(item[:-1])end = get_file_page_num(filepath)total_num += end - startdetail_page.append((start, end - 1))return total_num, detail_pagedef get_per_page_idx(detail_page_scope: list):pages = []for item in detail_page_scope:if isinstance(item, tuple):for i in range(item[0], item[1] + 1):pages.append(i)else:pages.append(item)return pages# 自动推断比较合适的形状
def infer_shape(num):H = int(sqrt(num / sqrt(2)))W = int(num / H + 1)if H * W < num:H += 1return W, Hdef get_shape(num):if args.shape == "cube":shape = infer_shape(num)return f"{shape[0]}x{shape[1]}"else:return args.shape.replace(" ", "")if __name__ == "__main__":if not Path(args.output_dir).exists():Path(args.output_dir).mkdir()total_num, detail_page = get_dst_page(args.filepath, args.page)# 调用外部程序将每个指定的页面转换为缩略图# 必须指定"-alpha remove",否则生成的图片背景永远是黑色for item in detail_page:if isinstance(item, tuple):subprocess.run(["magick","convert","-thumbnail","x800","-alpha","remove",f"{args.filepath}[{item[0]}-{item[1]}]",str(Path(args.output_dir) / "output.png"),],shell=True,)else:subprocess.run(["magick","convert","-thumbnail","x800","-alpha","remove",f"{args.filepath}[{item}]",str(Path(args.output_dir) / f"output-{item}.png"),],shell=True,)print("参与生成缩略图的页面为:", get_per_page_idx(detail_page))image_list = list(map(lambda x: str(Path(args.output_dir) / f"output-{x}.png"),get_per_page_idx(detail_page),))# 调用外部程序合并缩略图subprocess.run(["magick","montage",*image_list,"-geometry","+0+0","-border","3","-bordercolor","black","-tile",f"{get_shape(total_num)}",str(Path(args.output_dir) / "result.jpg"),],shell=True,)