背景:实现和百度搜索一样效果的,全文搜索引擎支持关键词高亮显示
文章目录
- 1. 企业级搜索引擎解决方案
- 2. 创建索引规则
- 3. 数据拉取
- 4. 搜索高亮
- 5. 自定义词库
- 6. 效果图
- 7. 开源源码
1. 企业级搜索引擎解决方案
分词器:english、standard、ik_max_smart、ik_smart、whitespace等
- 索引:创建索引时用ik_max_smart进行分词,搜索时使用ik_smart分词器进行搜索
- 查询:精准匹配使用term;查询匹配使用match
- 类型:需要分词使用text,部分词精准用keyword,分数用double,经纬度用geo_point,数字用integer
2. 创建索引规则
PUT /jd_goods
{"settings" : {"number_of_shards" : 1,"number_of_replicas" : 1},"mappings": {"properties": {"id":{"type": "integer"},"title":{"type": "text","analyzer": "ik_max_word","search_analyzer": "ik_smart"},"desc":{"type": "text","analyzer": "ik_max_word","search_analyzer": "ik_smart"},"price":{"type": "text"},"img":{"type": "text"}}}
}
3. 数据拉取
http://localhost:9090/parse/vue
public List<Content> parseJD(String keyword) throws IOException {String baseUrl = "https://search.xxx.com/Search?keyword=";// 解析网页 在线预览中有处理Document document = Jsoup.parse(new URL(baseUrl + keyword), 30000);// 所有js中可以使用的方法,这里都能用Element element = document.getElementById("J_goodsList");//获取搜有的li元素Elements elements = element.getElementsByTag("li");ArrayList<Content> goodList = new ArrayList<>();// 获取元素中的内容,这里el 就是每一个li标签了for (Element el : elements) {//关于这种图片特别多的网站,所有的图片都是拉加载的String img = el.getElementsByTag("img").eq(0).attr("data-lazy-img");String price = el.getElementsByClass("p-price").eq(0).text();String title = el.getElementsByClass("p-name").eq(0).text();Content content = new Content();content.setTitle(title);content.setImg(img);content.setPrice("内容标题 " + price);content.setDesc("内容描述 " + title);goodList.add(content);}return goodList;}
4. 搜索高亮
// 2. 获取es中的数据,实现基本搜索高亮功能public List<Map<String, Object>> searchPageHighlight(String keyword, int pageNo, int pageSize) throws IOException {if (pageNo <= 1) {pageNo = 1;}// 条件搜索SearchRequest searchRequest = new SearchRequest(ESConst.JD_SEARCH_INDEX);SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();// 分页searchSourceBuilder.from(pageNo);searchSourceBuilder.size(pageSize);// 精准匹配TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery(ESConst.SEARCH_CONDITION_FIELD, keyword);searchSourceBuilder.query(termQueryBuilder);searchSourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));//构建高亮HighlightBuilder highlightBuilder = new HighlightBuilder();highlightBuilder.field(ESConst.HIGHLIGHT_TITLE);highlightBuilder.field("desc");highlightBuilder.requireFieldMatch(true);//多个高亮 显示highlightBuilder.preTags(ESConst.HIGHLIGHT_PRE_TAGS);highlightBuilder.postTags(ESConst.HIGHLIGHT_POST_TAGS);searchSourceBuilder.highlighter(highlightBuilder);// 执行搜索searchRequest.source(searchSourceBuilder);SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);// 解析结果ArrayList<Map<String, Object>> list = new ArrayList<>();for (SearchHit hit : searchResponse.getHits().getHits()) {// 解析高亮的字段,将原来的字段置换为我们高亮的字段即可!Map<String, HighlightField> highlightFields = hit.getHighlightFields();HighlightField title = highlightFields.get(ESConst.HIGHLIGHT_TITLE);HighlightField desc = highlightFields.get("desc");// 获取原来的结果Map<String, Object> sourceAsMap = hit.getSourceAsMap();if (title != null) {Text[] fragments = title.fragments();String newTitle = "";for (Text text : fragments) {newTitle += text;}//高亮字段替换掉原来的内容即可sourceAsMap.put(ESConst.SEARCH_CONDITION_FIELD, newTitle);}if (desc != null) {Text[] fragments = desc.fragments();String newDesc = "";for (Text text : fragments) {newDesc += text;}//高亮字段替换掉原来的内容即可sourceAsMap.put("desc", newDesc);}// 将结果放入list容器返回list.add(sourceAsMap);}return list;}
5. 自定义词库
new_word.dic
女包
女士包
java语言
6. 效果图
http://localhost:9090/
7. 开源源码
https://gitee.com/gblfy/es7-jd-vue