需要安装 python-docx 和 PyPDF2
pip install python-docx
pip install PyPDF2
记住,不能直接安装 pip install docx ,不然会报错 ImportError: No module named ‘exceptions’
import PyPDF2
from docx import Documentdef convert_pdf_to_doc(pdf_path, doc_path):with open(pdf_path, 'rb') as pdf_file:pdf_reader = PyPDF2.PdfReader(pdf_file)doc = Document()nums = len(pdf_reader.pages)print("total page:", nums)for page_num in range(nums):page = pdf_reader.pages[page_num]text = page.extract_text()doc.add_paragraph(text)print("current page:", page_num)doc.save(doc_path)# 使用示例
pdf_path = 'in.pdf' # 输入的PDF文件路径
doc_path = 'out.docx' # 输出的DOC文件路径
convert_pdf_to_doc(pdf_path, doc_path)