此方法会将pdf的每一页转成一张图片
1、python代码
import fitz # pip install PyMuPDF
import sys
import io
import os
from PIL import Imagedef convert_pdf_to_image(pdf_path, image_folder_path):doc = fitz.open(pdf_path)pdf_name = os.path.splitext(os.path.basename(pdf_path))[0]# 确保图片保存的文件夹存在if not os.path.exists(image_folder_path):os.makedirs(image_folder_path)# 遍历每一页for page_num, page in enumerate(doc, start=1):pix = page.get_pixmap()# 将页面转换为图片img_bytes = io.BytesIO(pix.tobytes("png"))page_image = Image.open(img_bytes)# 保存图片image_file_name = f"{pdf_name}_{page_num}.png"image_file_path = os.path.join(image_folder_path, image_file_name)page_image.save(image_file_path)if __name__ == "__main__":if len(sys.argv) < 3:print("Usage: python pdf_to_image.py [PDF path] [Image output path]")sys.exit(1)pdf_path = sys.argv[1]image_path = sys.argv[2]convert_pdf_to_image(pdf_path, image_path)print("Conversion completed")
2、golang代码
package pdfToImgimport ("errors""os""path/filepath""sort""strconv""strings"
)/**pdfPath:pdf路径imgPath:图片保存路径(这里应该传文件夹路径)fontPath:python脚本路径return:返回的是图片名称数组(pdf文件名_页数),已根据页数排序,可根据需要自行调整*/
func PdfToImg(pdfPath, imgPath, pythonPath string) ([]string, error) {// 1、使用脚本转换pdfoutput, err :=executePythonScript(pdfPath, imgPath, pythonPath)if err != nil {return nil, err}output = strings.ReplaceAll(output, "\r\n", "")if output != "Conversion completed" {return nil, errors.New("脚本执行失败")}// 2、读取文件return readAndSortImageNames(imgPath)
}// 读取文件并跟进页数进行排序
func readAndSortImageNames(dir string) ([]string, error) {var imageNames []stringerr := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {if err != nil {return err}if !info.IsDir() {switch filepath.Ext(path) {case ".png", ".jpg", ".jpeg", ".gif", ".bmp":imageNames = append(imageNames, info.Name())}}return nil})if err != nil {return nil, err}// 根据数字排序sort.Slice(imageNames, func(i, j int) bool {return extractNumber(imageNames[i]) < extractNumber(imageNames[j])})return imageNames, nil
}// extractNumber 从文件名中提取数字
func extractNumber(filename string) int {parts := strings.Split(filename, "_")if len(parts) > 1 {numberStr := strings.TrimSuffix(parts[len(parts)-1], filepath.Ext(filename))number, err := strconv.Atoi(numberStr)if err == nil {return number}}return -1
}
注意:linux和windows执行脚本命令不一样,请根据需求调整
// 执行 Python 脚本(linux)
func executePythonScript(pdfPath, imgPath, pythonPath string) (string, error) {cmd := exec.Command("python3", pythonPath, pdfPath, imgPath)var out bytes.Buffercmd.Stdout = &outerr := cmd.Run()if err != nil {return "", err}return out.String(), nil
}// 执行 Python 脚本(windows)
func executePythonScript(pdfPath, imgPath, pythonPath string) (string, error) {cmd := exec.Command("python", pythonPath, pdfPath, imgPath)var out bytes.Buffercmd.Stdout = &outerr := cmd.Run()if err != nil {return "", err}return out.String(), nil
}