POI读取word文件,(支持HSSF和XSSF两种方式)
参考:HSSF,XSSF,SXSSF三种方式
1.引用maven(版本必须一致)
<dependency><groupId>org.apache.poi</groupId><artifactId>poi</artifactId><version>4.1.1</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId><version>4.1.1</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-scratchpad</artifactId><version>4.1.1</version></dependency>
2.读取word
public class POIUtil {/*** @Description: POI 读取 word* @create: 2019-07-27 9:48* @update logs*/public static String readWord(String path) throws Exception {// WordExtractor extractor = new WordExtractor(is);String content = null;File file = new File(path);if (file.exists() && file.isFile()) {InputStream is = null;XWPFDocument xwpfDocument = null;POIXMLTextExtractor extractor = null;HWPFDocument hwpfDocument = null;WordExtractor wordExtractor = null;try {is = new FileInputStream(file);xwpfDocument = new XWPFDocument(is);extractor = new XWPFWordExtractor(xwpfDocument);// 文档文本内容content = extractor.getText();
// // 文档图片内容
// List<XWPFPictureData> pictures = docx.getAllPictures();
// for (XWPFPictureData picture : pictures) {
// byte[] bytev = picture.getData();
// // 输出图片到磁盘
// FileOutputStream out = new FileOutputStream(
// "D:\\temp\\temp\\" + UUID.randomUUID() + picture.getFileName());
// out.write(bytev);
// out.close();
// }} catch (FileNotFoundException e) {} catch (IOException e) {} catch (OLE2NotOfficeXmlFileException e) {//较低版本的word文件is = new FileInputStream(file);hwpfDocument = new HWPFDocument(is);wordExtractor = new WordExtractor(hwpfDocument);// 文档文本内容content = wordExtractor.getText();} finally {try {if (extractor != null) {extractor.close();}if (xwpfDocument != null) {xwpfDocument.close();}if (wordExtractor != null) {wordExtractor.close();}if (hwpfDocument != null) {hwpfDocument.close();}if (is != null) {is.close();}} catch (IOException e) {}}}return content;}public static void main(String[] args) {String path = "/Users/jj/Desktop/胜多负少的范德萨.doc";
// String path = "/Users/jj/Desktop/测试1 2.doc";
// String path = "/Users/jj/Desktop/测试1.docx";try {System.out.println(readWord(path));} catch (Exception e) {e.printStackTrace();}}}