在maven项目中引入以下依赖包
<dependencies><dependency><groupId>org.apache.pdfbox</groupId><artifactId>pdfbox-examples</artifactId><version>3.0.1</version></dependency><dependency><groupId>commons-io</groupId><artifactId>commons-io</artifactId><version>2.9.0</version></dependency></dependencies>
创建一个工具类
package org.apache.pdfbox.utils;import org.apache.commons.io.FileUtils;
import org.apache.pdfbox.examples.util.PDFMergerExample;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.io.RandomAccessReadMemoryMappedFile;import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.function.Predicate;
import java.util.stream.Collectors;/*** @author: guanglai.zhou* @date: 2023/12/14 13:15*/
public class PdfMergerUtils {/*** 合并指定目录中的pdf文件** @param fromDir 指定目录* @param descFile 目标pdf文件* @return 目标pdf文件* @throws IOException*/public static File merge(String fromDir, String descFile) throws IOException {final File resultFile = new File(descFile);File file = new File(fromDir);List<File> files = new ArrayList<>();list(file, new Predicate<File>() {@Overridepublic boolean test(File file) {return true;}}, new Predicate<File>() {// 选择pdf文件@Overridepublic boolean test(File file) {return file.getPath().endsWith(".pdf");}}, files);if (files.isEmpty()) {throw new RuntimeException("源文件不存在pdf格式文档?");}
// files.sort(Comparator.comparing(File::getName));if (resultFile.exists()) {FileUtils.forceDelete(resultFile);}mergePdfs(resultFile, files);return resultFile;}/*** 针对文件进行遍历 如果文件夹满足directoryPredicate,则继续遍历文件夹,如果是文件,则判断是否满足filePredicate,如果满足则添加到* collector结果集当中** @param file 文件夹* @param directoryPredicate 文件夹预期 为null 则不针对文件夹做过滤* @param filePredicate 文件预期 为null 则不针对文件做过滤* @param collector 收集器 收集所有符合条件的文件*/public static void list(File file, Predicate<File> directoryPredicate, Predicate<File> filePredicate, List<File> collector) {File[] childFiles = file.listFiles();if (childFiles == null) {return;}// 根据脚本名称进行排序List<File> fileList = Arrays.stream(childFiles).sorted(Comparator.comparing(File::getName)).collect(Collectors.toList());for (File childFile : fileList) {if (childFile.isDirectory()) {boolean pass = directoryPredicate == null || directoryPredicate.test(childFile);if (pass) {// 继续遍历子文件夹目录list(childFile, directoryPredicate, filePredicate, collector);}} else {boolean pass = filePredicate == null || filePredicate.test(childFile);if (pass) {collector.add(childFile);}}}}private static void mergePdfs(File resultFile, List<File> files) throws IOException {PDFMergerExample example = new PDFMergerExample();List<RandomAccessRead> sources = new ArrayList<>();for (File currFile : files) {sources.add(new RandomAccessReadMemoryMappedFile(currFile));}InputStream inputStream = example.merge(sources);FileUtils.copyInputStreamToFile(inputStream, resultFile);}}
将需要合并的pdf文件都拷贝到指定目录a中,调用该工具类将该目录作为第一个参数,第二个参数传入输出文件对象即可。