一、引入maven
< dependency> < groupId> org.apache.pdfbox</ groupId> < artifactId> pdfbox</ artifactId> < version> 2.0.25</ version>
</ dependency>
二、代码工具类
package com. jiayou. peis. utils ;
import com. google. common. collect. Lists ;
import com. jiayou. peis. entity. ImageObject ;
import org. apache. commons. io. FileUtils ;
import org. apache. pdfbox. Loader ;
import org. apache. pdfbox. cos. COSName ;
import org. apache. pdfbox. pdmodel. PDDocument ;
import org. apache. pdfbox. pdmodel. PDResources ;
import org. apache. pdfbox. pdmodel. common. PDStream ;
import org. apache. pdfbox. pdmodel. graphics. PDXObject ;
import org. apache. pdfbox. pdmodel. graphics. image. PDImage ;
import org. apache. pdfbox. pdmodel. graphics. image. PDImageXObject ;
import org. apache. pdfbox. text. PDFTextStripper ; import javax. imageio. ImageIO ;
import java. awt. image. BufferedImage ;
import java. io. * ;
import java. util. ArrayList ;
import java. util. List ;
public class PdfUtils {
public static String text ( byte [ ] data) throws IOException { return PdfUtils . text ( data, true ) ; } public static String text ( byte [ ] data, boolean sortByPosition) throws IOException { ByteArrayInputStream inputStream = new ByteArrayInputStream ( data) ; return PdfUtils . text ( inputStream, sortByPosition) ; } public static String text ( File file, boolean sortByPosition) throws IOException { InputStream inputStream = new FileInputStream ( file) ; return PdfUtils . text ( inputStream, sortByPosition) ; } public static String text ( File file) throws IOException { return PdfUtils . text ( file, true ) ; } public static String text ( InputStream inputStream) throws IOException { return text ( inputStream, true ) ; } public static String text ( InputStream inputStream, boolean sortByPosition) throws IOException { PDDocument document = null ; try {
document = Loader . loadPDF ( inputStream) ; PDFTextStripper textStripper = new PDFTextStripper ( ) ; int numberOfPages = document. getNumberOfPages ( ) ; textStripper. setStartPage ( 1 ) ; textStripper. setEndPage ( numberOfPages) ; textStripper. setSortByPosition ( sortByPosition) ; textStripper. setShouldSeparateByBeads ( true ) ; return StrUtils . removeReturnChar ( textStripper. getText ( document) ) ; } finally { CloseUtils . closeQuietly ( document, inputStream) ; } } public static List < ImageObject > images ( File file) throws IOException { InputStream inputStream = new FileInputStream ( file) ; return PdfUtils . images ( inputStream) ; } public static List < ImageObject > images ( byte [ ] data) throws IOException { ByteArrayInputStream inputStream = null ; try { inputStream = new ByteArrayInputStream ( data) ; return PdfUtils . images ( inputStream) ; } finally { CloseUtils . closeQuietly ( inputStream) ; } } public static List < ImageObject > images ( InputStream inputStream) throws IOException { List < ImageObject > imageList = Lists . newArrayList ( ) ; PDDocument document = null ; try {
document = Loader . loadPDF ( inputStream) ; PDResources pdResources = document. getPage ( 0 ) . getResources ( ) ; int i = 0 ; for ( COSName csName : pdResources. getXObjectNames ( ) ) {
PDXObject pdxObject = pdResources. getXObject ( csName) ; if ( pdxObject instanceof PDImageXObject ) {
PDStream pdStream = pdxObject. getStream ( ) ; PDImageXObject image = new PDImageXObject ( pdStream, pdResources) ; String imageSuffix = imageSuffix ( image) ; BufferedImage bufferedImage = image. getImage ( ) ; ImageObject object = new ImageObject ( ) ; object. setIndex ( i++ ) ; object. setImage ( bufferedImage) ; object. setSuffix ( imageSuffix) ; imageList. add ( object) ; } } } finally { CloseUtils . closeQuietly ( document, inputStream) ; } return imageList; } private static String imageSuffix ( PDImageXObject pdImage) throws IOException { String suffix = pdImage. getSuffix ( ) ; if ( suffix == null || "jb2" . equals ( suffix) ) { suffix = "png" ; } else if ( "jpx" . equals ( suffix) ) { suffix = "jp2" ; } if ( hasMasks ( pdImage) ) { suffix = "png" ; } return suffix; } private static boolean hasMasks ( PDImage pdImage) throws IOException { if ( pdImage instanceof PDImageXObject ) { PDImageXObject ximg = ( PDImageXObject ) pdImage; return ximg. getMask ( ) != null || ximg. getSoftMask ( ) != null ; } return false ; } public static void saveImage ( List < ImageObject > imageList, String dir, String prefixName) throws IOException { File imgDir = new File ( dir) ; FileUtils . forceMkdir ( imgDir) ; for ( ImageObject image: imageList) { File imgFile = new File ( dir, prefixName+ "_" + image. getIndex ( ) + "." + image. getSuffix ( ) ) ; ImageIO . write ( image. getImage ( ) , image. getSuffix ( ) , imgFile) ; } }
}