PDF2IMG
需要安装python的img2pdf和pdf2img的包,还需要安装https://github.com/oschwartz10612/poppler-windows/releases/,并且解压之后把路径+lib/bin添加到环境变量
import multiprocessing
import sys
from datetime import time
import time
from pdf2image import convert_from_path
import os
from tqdm import tqdm
def getFiles(path):Filelist = []for home, dirs, files in os.walk(path):for file in files:# 文件名列表,包含完整路径file_path = os.path.join(home, file).replace('\\', '/')Filelist.append(file_path)#Filelist.append(file)return Filelistdef report(outputpath, file):with open(str(outputpath+"/"+"convertreport.txt"), "a") as report:report.write(str(file+" has been converted. \n"))print(file+" has been converted.")returndef convert2(outputpath, pages, file):pagecount = 1for page in pages:outputname = str(file+'_'+str(pagecount)+'.png')page.save(str(outputpath+"/"+outputname), 'PNG')pagecount += 1returndef convert(path, outputpath):'''Takes all files from a given directory with pdf files and turns them into jpg files. filename.pdf leads to filename_1.jpg, filename_2.png jpg.'''#outputpath = path+'_output'if os.path.exists(outputpath):passelse:#os.system("mkdir "+ outputpath)os.makedirs(outputpath)pages = convert_from_path(str(path), 500, size=(1300, 1500))dir_path, filename = os.path.split(path)convert2(outputpath, pages, filename)#report(outputpath, filename)#print("All files are converted!")return outputpathdef main():path = r'C:\Users\Administrator\Desktop\1/'#os.getcwd()#'G:/xiao/dataset_molcreateV2/data/1/'save_path = pathtime_start = time.time()files = getFiles(path)for file in tqdm(files):#finder = os.path.split(file)[1].split('.')[0]file_finder = save_path + 'pics/'#finderif not os.path.exists(file_finder):os.mkdir(file_finder)if file.endswith('.pdf'):convert(file, file_finder)# if len(sys.argv) != 2:# print("\"Usage of this function: convert.py input_path")# if len(sys.argv) == 2:# convert(sys.argv[1])# sys.exit(1)time_end = time.time()print('use time', time_end - time_start)if __name__ == '__main__':main()
IMG2PDF
import os
import img2pdf
from PIL import Image
def doImg2Pdf(fileName):fileList = os.listdir(fileName)num = 14 #pdf有多少页if num > len(fileList):print('num长度需小于:', len(fileList))exit()if int(len(fileList) % num) == 0:num_file = int(len(fileList) / num)else:num_file = int(len(fileList) / num) + 1print(num_file)cnt = 0for n in range(1, num_file + 1): # 创建文件夹with open("sample1_%s.pdf"%n, "wb") as f:pngList = []list_n = fileList[num * cnt:num * (cnt + 1)]for pngName in list_n:pngList.append(fileName + pngName)pfn_bytes = img2pdf.convert(pngList)f.write(pfn_bytes)cnt += 1print("转换完成")doImg2Pdf(r'C:\Users\Administrator\Desktop\1\pics\\')```