基于java的英文翻译字典,附有源代码,源数据库初始化文件
源码地址
dict_demo: 提取一段英文对话中的英文词汇,输出为英文单词字典形式
解析json字条
private void readFile(String pathname) {long start = System.currentTimeMillis();
// String pathname = ;int count = 0;int errcount = 0;try (FileReader reader = new FileReader(pathname);BufferedReader br = new BufferedReader(reader)) {String line;while ((line = br.readLine()) != null) {EnglishDictVo englishDictVo = JSONObject.parseObject(line, EnglishDictVo.class);String headWord = englishDictVo.getHeadWord();String bookId = englishDictVo.getBookId();Object sentence = englishDictVo.getContent().getWord().getContent().getSentence();String ukphone = englishDictVo.getContent().getWord().getContent().getUkphone();String usphone = englishDictVo.getContent().getWord().getContent().getUsphone();String tranCn = englishDictVo.getContent().getWord().getContent().getTrans().get(0).getTranCn();EnglishDict englishDict = new EnglishDict();englishDict.setEnglishWord(headWord);englishDict.setBritishSound(ukphone);englishDict.setAmericanSound(usphone);englishDict.setChineseWord(tranCn);englishDict.setTag(bookId);if (sentence != null) {englishDict.setSentence(JSONObject.toJSONString(sentence));}if (headWord == null || bookId == null || ukphone == null || usphone == null || tranCn == null) {errcount++;continue;}// 这里可以不做单词唯一的校验。导入多本词汇。给出多个单词书的实例EnglishDict englishDictQuery = englishDictMapper.selectOne(Wrappers.<EnglishDict>lambdaQuery().eq(EnglishDict::getEnglishWord, headWord).last("limit 1"));if (englishDictQuery == null) {englishDictMapper.insert(englishDict);count++;}}} catch (IOException e) {e.printStackTrace();}long end = System.currentTimeMillis();System.out.println("====== 入库 " + count + " 错误" + errcount);System.out.println("====== 耗时 " + (end - start));}
从内容中提取词汇
public ReportVo extractKeywords(String contentString) {ReportVo reportVo = new ReportVo();String[] s = contentString.replace(" ", ",").split(",");HashSet<String> strings = new HashSet<String>(Arrays.asList(s));if (CollUtil.isNotEmpty(strings)) {Set<String> queryWords = strings.stream().map(vo -> {String s1 = vo.replaceAll("[^a-zA-Z]", ""); // 去掉无用的符号String lowerCase = s1.toLowerCase();return lowerCase;}).collect(Collectors.toSet());List<EnglishDict> englishDicts = englishDictMapper.selectList(Wrappers.<EnglishDict>lambdaQuery().in(EnglishDict::getEnglishWord, queryWords));reportVo.setWordNum(englishDicts.size());List<ReportVo.WordVo> words = englishDicts.stream().map(vo -> {ReportVo.WordVo wordVo = new ReportVo.WordVo();wordVo.setAmericanSound(vo.getAmericanSound());wordVo.setBritishSound(vo.getBritishSound());wordVo.setChinese(vo.getChineseWord());wordVo.setEnglish(vo.getEnglishWord());return wordVo;}).collect(Collectors.toList());reportVo.setWords(words);} else {reportVo.setWordNum(0);reportVo.setWords(Collections.EMPTY_LIST);}return reportVo;}
初始化接口如下
一千条耗时5秒
把剩下的也插入