一.pom引入依赖
< dependency> < groupId> com. aspose< / groupId> < artifactId> aspose- words< / artifactId> < version> 15.12 .0 < / version> < classifier> jdk16< / classifier>
< / dependency>
二.代码实现
package com. example. demo. handler ; import com. alibaba. fastjson. JSONObject ;
import com. aspose. words. HtmlSaveOptions ;
import org. jsoup. Jsoup ;
import org. jsoup. nodes. Document ;
import org. jsoup. nodes. Element ;
import org. jsoup. select. Elements ;
import org. springframework. stereotype. Component ;
import org. springframework. web. multipart. MultipartFile ;
import java. io. * ;
import java. util. * ; @Component
public class WordAnalysis { public List wordAnalysis ( MultipartFile multipartFile) throws IOException { byte [ ] byteArr = multipartFile. getBytes ( ) ; InputStream inputStream = new ByteArrayInputStream ( byteArr) ; List tableList = new ArrayList ( ) ; try { HtmlSaveOptions saveOptions = new HtmlSaveOptions ( ) ; saveOptions. setExportImagesAsBase64 ( false ) ; String property = System . getProperty ( "java.io.tmpdir" ) ; saveOptions. setImagesFolder ( property) ; ByteArrayOutputStream baos = new ByteArrayOutputStream ( ) ; com. aspose. words. Document doc = new com. aspose. words. Document( inputStream) ; doc. save ( baos, saveOptions) ; Document htmlDoc = Jsoup . parse ( baos. toString ( ) ) ; tableList = analysisDoc ( htmlDoc) ; } catch ( Exception e) { e. printStackTrace ( ) ; } finally { inputStream. close ( ) ; } return tableList; } public List analysisDoc ( Document htmlDoc) { Elements tables = htmlDoc. getElementsByTag ( "table" ) ; List tableList = new ArrayList ( ) ; for ( int i = 0 ; i < tables. size ( ) ; i++ ) { Map < String , Object > tableInfo = new HashMap < > ( ) ; UUID uuid = UUID . randomUUID ( ) ; Element table = tables. get ( i) ;
tableInfo. put ( "tableId" , uuid) ; tableInfo. put ( "tableName" , "表" + ( i+ 1 ) ) ; tableInfo. put ( "tableHtml" , tables. get ( i) . toString ( ) ) ; Elements rows = table. select ( "tr" ) ; List rowList = new ArrayList ( ) ; for ( Element row: rows) { if ( ! row. attributes ( ) . get ( "style" ) . contains ( "height:0pt" ) ) { List rowInfo = new ArrayList ( ) ; Elements cells = row. select ( "td" ) ; for ( Element cell: cells) { JSONObject cellInfo = new JSONObject ( ) ; String data = cell. text ( ) ; int rowspan = new Integer ( cell. attributes ( ) . get ( "rowspan" ) == "" ? "1" : cell. attributes ( ) . get ( "rowspan" ) ) ; int colspan = new Integer ( cell. attributes ( ) . get ( "colspan" ) == "" ? "1" : cell. attributes ( ) . get ( "colspan" ) ) ; System . out. print ( data + "\t" ) ; cellInfo. put ( "content" , data) ; cellInfo. put ( "rowspan" , rowspan) ; cellInfo. put ( "colspan" , colspan) ; rowInfo. add ( cellInfo) ; } System . out. println ( ) ; rowList. add ( rowInfo) ; } } tableInfo. put ( "tableContent" , rowList) ; tableList. add ( tableInfo) ; } return tableList; }
}