前言
有次上班时小伙伴和我吐槽Java实现word转pdf太麻烦,我灵机一动Java调用python,python实现转换操作不就行了。
开发环境
Java:JDK1.8
python:3.12
代码
import docx2pdf
import sys
import glob
import osdef w2ps(d):word_file = dpdf_file = d.replace('.docx', '.pdf').replace('.doc', '.pdf')docx2pdf.convert(word_file, pdf_file)print(f"转换完成,PDF文件已保存为:{pdf_file}")def each():# 获取当前工作目录current_directory = os.getcwd()# 使用glob查找所有.doc文件doc_files = glob.glob(os.path.join(current_directory, '*.doc'))doc_files1 = glob.glob(os.path.join(current_directory, '*.docx'))# 遍历文件列表并打印文件路径for file_path in doc_files:w2ps(file_path)for file_path in doc_files1:w2ps(file_path)if __name__ == "__main__":if len(sys.argv) > 1:w2ps(sys.argv[1])else:each()
import java.io.IOException;public class PythonCaller {public static void main111(String[] args) {String pythonScriptPath = "D:\\WorkSpace\\python\\pycorrector-master\\shany\\W2P.py"; // Python脚本的路径String wordFilePath = "E:\\新建文件夹 (22)\\问题排查.docx"; // 要转换的Word文件的路径try {String command = "python " + pythonScriptPath + " \"" + wordFilePath+"\"";Process process = Runtime.getRuntime().exec(command);} catch (IOException e) {e.printStackTrace();}}public static void main(String[] args) {String executablePath = "D:\\新建文件夹\\a\\W2P.exe"; // 替换为你的a.exe文件的实际路径String wordFilePath = "E:\\新建文件夹 (22)\\问题排查.docx"; // 替换为你的Word文件路径try {// 将Word文件路径作为命令行参数传递给a.exeString[] command = {executablePath, wordFilePath};Process process = Runtime.getRuntime().exec(command);// 等待进程完成int exitCode = process.waitFor();System.out.println("Process exited with code " + exitCode);} catch (IOException | InterruptedException e) {e.printStackTrace();}}
}
备注
这里的python文件后来为了防止缺少三方依赖,单独打包成exe文件。Java代码中额外加了对exe文件的调用。
拓展
顺手额外写了一个pdf转word的,功能用法基本一样
from pdf2docx import Converter
import sys
import glob
import osdef pdf_to_word(pdf_path, word_path):cv = Converter(pdf_path)cv.convert(word_path, start=0, end=None)cv.close()def p2ws(file_path):pdf_file = file_pathword_file = file_path.replace('.pdf', '.docx')pdf_to_word(pdf_file, word_file)print(f"转换完成,WORD文件已保存为:{pdf_file}")def each():# 获取当前工作目录current_directory = os.getcwd()# 使用glob查找所有.doc文件doc_files = glob.glob(os.path.join(current_directory, '*.pdf'))# 遍历文件列表并打印文件路径for file_path in doc_files:p2ws(file_path)if __name__ == "__main__":if len(sys.argv) > 1:p2ws(sys.argv[1])else:each()