引入依赖
<dependency><groupId>cn.afterturn</groupId><artifactId>easypoi-spring-boot-starter</artifactId>
</dependency>
<!-- 下面的版本需要对应上面依赖中的版本 否则可能会起冲突 -->
<!-- 下面的依赖主要是为了使用Apache原生的WordExtractor对doc后缀文件的解析 -->
<dependency><groupId>org.apache.poi</groupId><artifactId>poi-scratchpad</artifactId><version>4.1.1</version>
</dependency>
<!-- 糊涂Api工具 -->
<dependency><groupId>cn.hutool</groupId><artifactId>hutool-core</artifactId><version>5.8.10</version>
</dependency>
工具类封装
public class WordDocumentUtil {/*** 解析文档文件** @param file 文档文件* @return 文档内容*/public static String parseWord(MultipartFile file) {String wordTxt = "";InputStream stream = null;try {if (file.getOriginalFilename().endsWith(".doc")) {stream = file.getInputStream();// Apache PoiWordExtractor ex = new WordExtractor(stream);wordTxt = ex.getText();} else if (file.getOriginalFilename().endsWith(".docx")) {stream = file.getInputStream();// EasyPoiXWPFDocument document = new XWPFDocument(stream);XWPFWordExtractor ex = new XWPFWordExtractor(document);wordTxt = ex.getText();}} catch (Exception e) {// 此处建议抛出异常 "文档解析有误"e.printStackTrace();} finally {if (stream != null) {try {stream.close();} catch (IOException e) {e.printStackTrace();}}}return wordTxt;}/*** 判断文档类型进行不同的分割方式* ".doc"后缀需要以"\r\n"切割 而".docx"后缀需要以"\n"切割** @param file 文件名:以file.getOriginalFilename()传入* @param wordTxt 文件内容* @return 内容数组*/public static String[] judgeType(String file, String wordTxt) {boolean suffixFlag = file.endsWith(".doc");return suffixFlag ? Arrays.stream(wordTxt.split("\r\n")).toArray(String[]::new): Arrays.stream(wordTxt.split("\n")).toArray(String[]::new);}/*** 导出resources下的word模板表** @param fileName 文件名* @param response {@link HttpServletResponse}*/public void exportTemplate(String fileName, HttpServletResponse response) {InputStream inputStream = null;try {String path = "/word/" + fileName;inputStream = this.getClass().getResourceAsStream(path);String newFileName = IdUtil.simpleUUID() + StrUtil.DOT + FileUtil.extName(fileName);byte[] bytes = new byte[1024 * 1024];// 输入流读取文件if (inputStream != null) {inputStream.read(bytes);}response.setCharacterEncoding("UTF-8");response.setContentType("application/msword");response.setHeader("Access-Control-Expose-Headers","Content-disposition");response.setHeader("Content-Disposition","attachment;filename=" + newFileName);response.getOutputStream().write(bytes);} catch (Exception e) {e.printStackTrace();} finally {if (inputStream != null) {try {inputStream.close();} catch (IOException e) {e.printStackTrace();}}}}
}
乱码问题
如果这里造成了读取resources下的文件返回前端乱码问题:除了HttpServletResponse响应中设置字体问题,还有可能是因为在编译期文件就已经乱码了,所以需要在pom.xml中增加以下配置。
<build><plugins><plugin><groupId>org.apache.maven.plugins</groupId><artifactId>maven-resources-plugin</artifactId><version>2.6</version><configuration><encoding>UTF-8</encoding><nonFilteredFileExtensions><nonFilteredFileExtension>doc</nonFilteredFileExtension></nonFilteredFileExtensions></configuration></plugin></plugins>
</build>