有两种方法:
通过提供的现成api进行调用读取pdf文件,或doc、xlsx、pptx文件;可能商业需要付费
https://www.e-iceblue.cn/pdf_java_document_operation/set-pdf-document-properties-in-java.html
Spire.PDF for Java
import com.spire.pdf.*;
import java.io.*;public class getPDFProperties {public static void main(String[] args) throws IOException {//创建 PdfDocument 类的对象PdfDocument pdf = new PdfDocument();//从磁盘加载PDF文档pdf.loadFromFile("" + "setPDFProperties.pdf");//创建 StringBuilder 的对象以储存获取的属性数据StringBuilder stringBuilder = new StringBuilder();//获取PDF文档的属性数据并储存于创建的 StringBuilderstringBuilder.append("标题:" + pdf.getDocumentInformation().getTitle() + "\r\n");stringBuilder.append("作者" + pdf.getDocumentInformation().getAuthor() + "\r\n");stringBuilder.append("主题:" + pdf.getDocumentInformation().getSubject() + "\r\n");stringBuilder.append("关键词:" + pdf.getDocumentInformation().getKeywords() + "\r\n");stringBuilder.append("创建者:" + pdf.getDocumentInformation().getCreator() + "\r\n");stringBuilder.append("创建时间:" + pdf.getDocumentInformation().getCreationDate() + "\r\n");stringBuilder.append("制作工具:" + pdf.getDocumentInformation().getProducer() + "\r\n");//创建一个TXT文件File file = new File("getPDFProperties.txt");file.createNewFile();//将 StringBuilder 写入TXT文件FileWriter fileWriter = new FileWriter(file, true);BufferedWriter bufferedWriter = new BufferedWriter(fileWriter);bufferedWriter.write(stringBuilder.toString());bufferedWriter.flush();}
}
第二种方法:
通过Apache POI进行读取实现;另外不同版本 方法实现也会有所不同;
Apache POI 的不同版本
引入依赖
<dependency><groupId>org.apache.pdfbox</groupId><artifactId>pdfbox</artifactId><version>2.0.24</version> <!-- 请检查最新版本 --></dependency><dependency><groupId>commons-io</groupId><artifactId>commons-io</artifactId><version>2.11.0</version> <!-- 检查是否有更新的版本 --></dependency><dependency><groupId>org.apache.logging.log4j</groupId><artifactId>log4j-api</artifactId><version>2.17.1</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId><version>5.2.2</version> <!-- 请检查最新版本 --><exclusions><exclusion><groupId>org.apache.logging.log4j</groupId><artifactId>log4j-api</artifactId></exclusion></exclusions></dependency>
最后直接提供方法实现:
package com.ruoyi.project.backstage.pdf;import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.poi.ooxml.POIXMLDocument;
import org.apache.poi.ooxml.POIXMLProperties;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.xmlbeans.XmlObject;
import org.apache.xmlbeans.XmlOptions;
import org.apache.xmlbeans.impl.values.XmlComplexContentImpl;import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;/*** @className: FileUtils* @author: 3.0* @date: 2024/10/16* @Version: 1.0* @description:*/public class FileUtils {// public static void main(String[] args) {
// try (PDDocument document = PDDocument.load(new File("E:\\project\\" + "1.pdf"))) {
// PDDocumentInformation info = document.getDocumentInformation();
// System.out.println("Title: " + info.getTitle());
// System.out.println("Author: " + info.getAuthor());
// System.out.println("Subject: " + info.getSubject());
// // 其他属性...
// } catch (IOException e) {
// e.printStackTrace();
// }
//
// }// public static void main(String[] args) throws Exception {
// XWPFDocument doc = new XWPFDocument(new FileInputStream(new File("E:\\project\\log.sh用法(1).docx")));
CTProperties coreProps = doc.getProperties().getCoreProperties();
// POIXMLProperties.CoreProperties coreProps = doc.getProperties().getCoreProperties();
// System.out.println("Title: " + coreProps.getTitle());
// System.out.println("Author: " + coreProps.getCreator());
// System.out.println("主题: " + coreProps.getSubject());
// // 更多属性...
// doc.close();
// }public static void main(String[] args) throws Exception {Workbook workbook = WorkbookFactory.create(new FileInputStream(new File("E:\\project\\工作簿1 - 副本.xls")));if (workbook instanceof POIXMLDocument) {POIXMLDocument poixmlDocument = (POIXMLDocument) workbook;POIXMLProperties properties = poixmlDocument.getProperties();POIXMLProperties.CoreProperties coreProperties = properties.getCoreProperties();// 现在你可以访问核心属性了String title = coreProperties.getTitle();String subject = coreProperties.getSubject();String creator = coreProperties.getCreator();// ... 其他属性// 打印属性到控制台System.out.println("Title: " + title);System.out.println("Subject: " + subject);System.out.println("Creator: " + creator);// ...} else {System.out.println("The workbook is not a POIXMLDocument (not an .xlsx file?).");}// 关闭工作簿(在try-with-resources中自动关闭fis,但这里显式关闭workbook以强调)workbook.close();}
//
// public static void main(String[] args) throws Exception {
// XMLSlideShow ppt = new XMLSlideShow(OPCPackage.open(new FileInputStream(new File("E:\\project\\演示文稿1.pptx"))));
// System.out.println("Title: " + ppt.getProperties().getCoreProperties().getTitle());
// System.out.println("Author: " + ppt.getProperties().getCoreProperties().getCreator());
// System.out.println("主题: " + ppt.getProperties().getCoreProperties().getSubject());
// // 更多属性...
// ppt.close();
// }}
如果有需要读取https地址的需求;可以实现下面的:
从远程 HTTPS URL 读取文件并将其转换为 FileInputStream 对象,你可以先将远程文件下载到本地磁盘,然后再使用 FileInputStream 打开它。以下是实现这一过程的一种方法:
下载文件到本地:
1、使用 Java 的 HttpURLConnection 或者 HttpClient 等工具来下载文件。
2、创建 FileInputStream:使用下载后的本地文件路径创建 FileInputStream。
public static void main(String[] args) throws Exception {String remoteUrl = "http://s3.api.com/diaoyun//survey/answer/.xlsx";String localPath = "E:\\project\\file11.xlsx"; // 本地临时文件路径downloadFileFromURL(remoteUrl, localPath);File file = new File(localPath);FileInputStream fileInputStream = new FileInputStream(file);Workbook workbook = WorkbookFactory.create(fileInputStream);
// Workbook workbook = WorkbookFactory.create(new FileInputStream(new File("E:\\project\\工作簿1 - 副本.xls")));if (workbook instanceof POIXMLDocument) {POIXMLDocument poixmlDocument = (POIXMLDocument) workbook;POIXMLProperties properties = poixmlDocument.getProperties();POIXMLProperties.CoreProperties coreProperties = properties.getCoreProperties();// 现在你可以访问核心属性了String title = coreProperties.getTitle();String subject = coreProperties.getSubject();String creator = coreProperties.getCreator();// ... 其他属性// 打印属性到控制台System.out.println("Title: " + title);System.out.println("Subject: " + subject);System.out.println("Creator: " + creator);// ...} else {System.out.println("The workbook is not a POIXMLDocument (not an .xlsx file?).");}// 关闭工作簿(在try-with-resources中自动关闭fis,但这里显式关闭workbook以强调)workbook.close();file.delete();}private static void downloadFileFromURL(String urlStr, String localPath) throws IOException {URL url = new URL(urlStr);HttpURLConnection urlConnection = (HttpURLConnection) url.openConnection();try (InputStream in = urlConnection.getInputStream();FileOutputStream out = new FileOutputStream(localPath)) {byte[] buffer = new byte[1024];int bytesRead;while ((bytesRead = in.read(buffer)) != -1) {out.write(buffer, 0, bytesRead);}}}