SpringBoot解压zip包,读取每个文件内容
一、运用场景
获取本地压缩包,解压后根据文件名称及类型,对读取的文件内容进行业务处理。
二、POM文件依赖
<!--读取文件--><dependency><groupId>org.apache.poi</groupId><artifactId>poi</artifactId><version>4.1.2</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId><version>4.1.2</version></dependency><!--阿里ocr--><dependency><groupId>com.aliyun</groupId><artifactId>aliyun-java-sdk-core</artifactId><version>3.4.0</version></dependency><dependency><groupId>software.amazon.awssdk</groupId><artifactId>aws-sdk-java</artifactId><version>2.17.0</version></dependency><!--百度ocr--><dependency><groupId>com.baidu.aip</groupId><artifactId>java-sdk</artifactId><version>4.11.3</version><exclusions><exclusion><groupId>org.slf4j</groupId><artifactId>slf4j-simple</artifactId></exclusion></exclusions></dependency>
三、代码部分
1、控制层方法
@GetMapping(value = "/localZipFile")public Result localZipFile(){String filePath = "C:\\Users\\Administrator\\Desktop\\11.zip";List<String> list = new ArrayList<>();try {ZipFile zipFile = new ZipFile(filePath);Enumeration<? extends ZipEntry> entries = zipFile.getEntries();//获取zip包下文件名while (entries.hasMoreElements()) {list.add(entries.nextElement().getName());}String packFileStr = "C:\\Users\\Administrator\\Desktop\\zip";File file = new File(filePath);String packFilePath = packFileStr + File.separator;//解压到指定路径UnPackeUtil.unPackZip(file, null,packFilePath);File readFileDir = new File(packFilePath);List<String> strings = new ArrayList<>();//获取文件夹下文件集合File[] files = readFileDir.listFiles();for (File file1 : files) {String savePath = MeFileUtils.uploadLocal(file1,"");FileInputStream inputStream = new FileInputStream(file1);String fileExtension =MeFileUtils. getFileExtension(file1.getName());//返回读取的文件内容String fileContent = MeFileUtils.readGsFile(inputStream,fileExtension,file1);strings.add(savePath);strings.add(fileContent);}return Result.OK(strings);} catch (IOException e) {e.printStackTrace();return Result.error(e.getMessage());}
2、MeFileUtils工具类
//上传
public class MeFileUtils{public static String uploadLocal(File file,String bizPath){try {String ctxPath = uploadpath;String fileName = null;File file = new File(ctxPath + File.separator + bizPath + File.separator );if (!file.exists()) {file.mkdirs();// 创建文件根目录}// 获取文件名String orgName = mf.getName();orgName = CommonUtils.getFileName(orgName);if(orgName.indexOf(".")!=-1){fileName = orgName.substring(0, orgName.lastIndexOf(".")) + "_" + System.currentTimeMillis() + orgName.substring(orgName.lastIndexOf("."));}else{fileName = orgName+ "_" + System.currentTimeMillis();}String savePath = file.getPath() + File.separator + fileName;File savefile = new File(savePath);FileCopyUtils.copy(mf, savefile);String dbpath = null;if(oConvertUtils.isNotEmpty(bizPath)){dbpath = bizPath + File.separator + fileName;}else{dbpath = fileName;}if (dbpath.contains("\\")) {dbpath = dbpath.replace("\\", "/");}return dbpath;} catch (IOException e) {log.error(e.getMessage(), e);}return "";}//获取文件名public String getFileExtension(String filename) {int dotIndex = filename.lastIndexOf(".");if (dotIndex > 0 && dotIndex < filename.length() - 1) {return filename.substring(dotIndex + 1).toLowerCase();}return "";}//获取文件内容public String readGsFile(FileInputStream inputStream,String fileExtension,File txtFile) {try {if (fileExtension.equalsIgnoreCase("doc") || fileExtension.equalsIgnoreCase("docx")) {// 处理Word文档XWPFDocument document = new XWPFDocument(inputStream);// 读取每个段落的文本内容StringBuilder content = new StringBuilder();for (XWPFParagraph paragraph : document.getParagraphs()) {for (XWPFRun run : paragraph.getRuns()) {content.append(run.text());}}// 关闭文档document.close();return content.toString();}else if(fileExtension.equalsIgnoreCase("txt")){StringBuffer buffer = new StringBuffer();// 创建Scanner对象来读取文件内容Scanner scanner = new Scanner(txtFile);// 逐行读取文件内容并输出while (scanner.hasNextLine()) {String line = scanner.nextLine();System.out.println(line);buffer.append(line).append(",");}// 关闭Scanner对象scanner.close();return buffer.toString();} else if (fileExtension.equalsIgnoreCase("xls") || fileExtension.equalsIgnoreCase("xlsx")) {// 处理Excel文档XSSFWorkbook workbook = new XSSFWorkbook(inputStream);return "";} else if (fileExtension.equalsIgnoreCase("pdf")) {// 处理PDF文档PDDocument document = PDDocument.load(inputStream);// 创建 PDFTextStripper 对象PDFTextStripper textStripper = new PDFTextStripper();// 读取文档内容String content = textStripper.getText(document);document.close();return content;}else if (fileExtension.equalsIgnoreCase("jpg") || fileExtension.equalsIgnoreCase("png")) {// 处理图片try {// 调用OCR工具类识别文件内容String result = BaiduOCRUtil.recognizeFile(txtFile.getAbsolutePath());return result;} catch (Exception e) {e.printStackTrace();return "";}} else {// 其他文件格式inputStream.close();return "";}} catch (IOException e) {e.printStackTrace();return "";}}
}
3、BaiduOCRUtil工具类
public class BaiduOCRUtil {private static final String APP_ID = "";private static final String API_KEY = "";private static final String SECRET_KEY = "";public static String recognizeFile(String filePath) {AipOcr client = new AipOcr(APP_ID, API_KEY, SECRET_KEY);// 设置可选参数HashMap<String, String> options = new HashMap<>();options.put("language_type", "CHN_ENG");options.put("detect_direction", "true");options.put("detect_language", "true");options.put("probability", "true");// 调用百度云OCR服务识别文件内容JSONObject response = client.basicGeneral(filePath, options);// 解析识别结果StringBuilder result = new StringBuilder();JSONArray wordsArray = response.getJSONArray("words_result");for (int i = 0; i < wordsArray.length(); i++) {JSONObject wordsObject = wordsArray.getJSONObject(i);String words = wordsObject.getString("words");result.append(words).append("\n");}return result.toString();}
}
一个在学习的开发者,勿喷,欢迎交流