Elasticsearch 的应用
本文使用的版本为:7.14.0
todo:前端部分
Kibana的开发工具
IK分词器粗粒度
# 请求,通过【ik_smart】最粗粒度划分
GET _analyze
{"analyzer": "ik_smart","text":"中国共产党"
}# 返回
{"tokens" : [{"token" : "中国共产党","start_offset" : 0,"end_offset" : 5,"type" : "CN_WORD","position" : 0}]
}
IK分词器细粒度
# 请求,通过【ik_max_word】最细粒度划分
GET _analyze
{"analyzer": "ik_max_word","text":"中国共产党"
}# 返回
{"tokens" : [{"token" : "中国共产党","start_offset" : 0,"end_offset" : 5,"type" : "CN_WORD","position" : 0},{"token" : "中国","start_offset" : 0,"end_offset" : 2,"type" : "CN_WORD","position" : 1},{"token" : "国共","start_offset" : 1,"end_offset" : 3,"type" : "CN_WORD","position" : 2},{"token" : "共产党","start_offset" : 2,"end_offset" : 5,"type" : "CN_WORD","position" : 3},{"token" : "共产","start_offset" : 2,"end_offset" : 4,"type" : "CN_WORD","position" : 4},{"token" : "党","start_offset" : 4,"end_offset" : 5,"type" : "CN_CHAR","position" : 5}]
}
Rest风格说明
method | url地址 | 描述 |
---|---|---|
PUT | localhost:9200/索引名称/类型名称/文档id | 创建文档(指定文档id) |
POST | localhost:9200/索引名称/类型名称 | 创建文档(随机文档id) |
POST | localhost:9200/索引名称/类型名称/文档id/_update | 修改文档 |
DELETE | localhost:9200/索引名称/类型名称/文档id | 删除文档 |
GET | localhost:9200/索引名称/类型名称/文档id | 查询文档通过文档id |
POST | localhost:9200/索引名称/类型名称/_search | 查询所有数据 |
关于索引的基本操作
# 创建索引并添加数据 反复提交则为覆盖修改
PUT bu/_doc/1
{"name":"张三","age":"12"
}# 返回值
{"_index" : "bu","_type" : "_doc","_id" : "2","_version" : 1, # 当多次提交后,版本信息则会随之改变"result" : "created","_shards" : {"total" : 2,"successful" : 1,"failed" : 0},"_seq_no" : 6,"_primary_term" : 1
}
# 创建索引规则
PUT /test1/
{"mappings": {"properties": {"name": {"type": "text"},"age": {"type": "long"},"birthday": {"type": "date"}}}
}# 返回值
{"acknowledged" : true,"shards_acknowledged" : true,"index" : "test1"
}
# 查询
GET /bu/# 返回值
{"bu" : {"aliases" : { },"mappings" : {"properties" : {"age" : {"type" : "text","fields" : {"keyword" : {"type" : "keyword","ignore_above" : 256}}},"name" : {"type" : "text","fields" : {"keyword" : {"type" : "keyword","ignore_above" : 256}}}}},"settings" : {"index" : {"routing" : {"allocation" : {"include" : {"_tier_preference" : "data_content"}}},"number_of_shards" : "1","provided_name" : "bu","creation_date" : "1700188636398","number_of_replicas" : "1","uuid" : "m-y5rTaqRQSyKr-a_sAdXw","version" : {"created" : "7140099"}}}}
}
# 通过_cat 获取ES更多信息
GET _cat/indices?v # 索引
GET _cat/aliases # 别名
GET _cat/allocation
GET _cat/count
GET _cat/fielddata
GET _cat/health
GET _cat/master
GET _cat/nodeattrs
GET _cat/nodes # 查看节点信息,docker容器即为容器信息
GET _cat/pending_tasks
GET _cat/plugins # 查看插件,如ik分词器
GET _cat/recovery
GET _cat/repositories
GET _cat/segments
GET _cat/shards
GET _cat/snapshots
GET _cat/tasks
GET _cat/templates
GET _cat/thread_pool
# 通过Post方法进行修改 【如果漏写字段,也不会把字段删除】
POST /test/_update/1/
{"doc":{"name":"李四"}
}// 结果
{"_index" : "test","_type" : "_doc","_id" : "1","_version" : 2,"result" : "noop","_shards" : {"total" : 0,"successful" : 0,"failed" : 0},"_seq_no" : 1,"_primary_term" : 1
}
# 删除索引
DELETE test
关于文档的基本操作
# 通过条件查询
GET /test/user/_search?q=name:张三# 通过条件查询
GET /test/user/_search
{"query":{"match": {"name": "张三"}}
}# 返回值
{"took" : 0,"timed_out" : false,"_shards" : {"total" : 1,"successful" : 1,"skipped" : 0,"failed" : 0},"hits" : { # hits: 命中"total" : {"value" : 2,"relation" : "eq"},"max_score" : 0.36464313,"hits" : [{"_index" : "test","_type" : "user","_id" : "1","_score" : 0.36464313,"_source" : {"name" : "张三","age" : 10,"desc" : "说明","tags" : ["1","2","3"]}},{"_index" : "test","_type" : "user","_id" : "2","_score" : 0.36464313,"_source" : {"name" : "张三","age" : 10,"desc" : "说明","tags" : ["1","2","3"]}}]}
}
# 过滤要查询的结果 【select name】
GET /test/_search
{"query":{"match": {"name": "张三"}}, "_source": ["name"] # 只显示name
}# 返回结果
{"took" : 1,"timed_out" : false,"_shards" : {"total" : 1,"successful" : 1,"skipped" : 0,"failed" : 0},"hits" : {"total" : {"value" : 2,"relation" : "eq"},"max_score" : 0.36464313,"hits" : [{"_index" : "test","_type" : "user","_id" : "1","_score" : 0.36464313,"_source" : {"name" : "张三"}},{"_index" : "test","_type" : "user","_id" : "2","_score" : 0.36464313,"_source" : {"name" : "张三"}}]}
}
# 排序
GET /test/_search
{"query":{"match": {"name": "张三"}}, "sort": [{"age": {"order": "desc"}}]
}
# 分页
GET /test/_search
{"query":{"match": {"name": "张三"}}, "sort": [{"age": {"order": "desc"}}], "from": 0, "size": 1
}
# 查询 name 必须【must】 是张三的
GET /test/user/_search
{"query":{"bool": {"must": [{"match": {"name": "张三"}}]}}
}# 查询 name 不能【must_not】 是张三的
GET /test/user/_search
{"query":{"bool": {"must_not": [{"match": {"name": "张三"}}]}}
}# 查询 name=张三 or age=10 的
GET /test/user/_search
{"query":{"bool": {"should": [{"match": {"name": "张三"}},{"match": {"age": "10"}}]}}
}# 查询 范围数据
"gt": 大于
"gte": 大于等于
"lt": 小于
"lte": 小于等于GET /test/user/_search
{"query":{"bool": {"filter": [{"range": {"age": {"gte": 3,"lte": 10}}}]}}
}
# 通过 标签查询,多个值用空格分隔
GET /test/user/_search
{"query":{"match": {"tags": "1 2"}}
}
term 查询是直接通过倒排索引指定的词条进程精确的查找
关于分词
- term 直接精确查询
- match 会使用分词器解析!!(先分析文档,然后通过分析的文档进行查询!!)
两个类型 text keyword
- text 可分
- keyword 不可再分
# 创建测试数据
PUT testdb
{"mappings": {"properties": {"name":{"type": "text" },"desc":{"type": "keyword"}}}
}PUT testdb/_doc/1
{"name":"测试","desc":""
}GET /testdb/_doc/1
# 通过keyword【关键字】进行分析 分析结果:测试
GET _analyze
{"analyzer": "keyword","text": "测试"
}# 通过standard【标准】进行分析 分析结果:测 试
GET _analyze
{"analyzer": "standard", "text": "测试"
}
# 通过term查询
GET testdb/_search
{"query": {"term": {"desc": {"value": ""}}}
}GET testdb/_search
{"query": {"term": {"name": {"value": "测"}}}
}
# 实现高亮查询
GET /test/user/_search
{"query":{"bool": {"should": [{"match": {"name": "张三"}},{"match": {"age": "10"}}]}},"highlight":{"pre_tags": "<span class='key' style='color:red'>", "post_tags": "</span>", "fields": {"name": {}}}
}# 结果
"_source" : {"name" : "张三","age" : 10,"desc" : "说明","tags" : ["1","2","3"]
},
"highlight" : {"name" : ["<span class='key' style='color:red'>张</span><span class='key' style='color:red'>三</span>"]
}
整合SpringBoot
ES 依赖版本
# 注意 ES 的依赖版本,需要与服务的版本保持一致
<properties><java.version>1.8</java.version><!-- 自定义版本,版本依赖需要保证和本地一致 --><elasticsearch.version>7.14.0</elasticsearch.version>
</properties>
爬取数据存入ES
# 第一步配置依赖
<dependency><groupId>org.jsoup</groupId><artifactId>jsoup</artifactId><version>1.10.2</version>
</dependency>
# 第二步编写方法public static List<Context> getGoodsList(String keyword) throws IOException {String url = "https://search.jd.com/Search?keyword=wd&enc=utf-8";Document document = Jsoup.parse(new URL(url.replace("wd", keyword)), 3000);Element j_goodsList = document.getElementById("J_goodsList");Elements li = j_goodsList.getElementsByTag("li");List<Context> goodsList = new ArrayList<>();for (Element e:li) {String img = e.getElementsByTag("img").eq(0).attr("data-lazy-img");String price = e.getElementsByClass("p-price").eq(0).text();String title = e.getElementsByClass("p-name").eq(0).text();goodsList.add(new Context(img,price,title));}return goodsList;}
ES Client配置类
package com.es.elasticsearch.config;import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;@Configuration
public class ElasticSearchClientConfig {@Beanpublic RestHighLevelClient restHighLevelClient(){return new RestHighLevelClient(RestClient.builder(new HttpHost("8.140.248.231", 9200, "http")));}
}
业务实现类
package com.es.elasticsearch.service;import com.alibaba.fastjson.JSON;
import com.es.elasticsearch.pojo.Context;
import com.es.elasticsearch.pojo.User;
import com.es.elasticsearch.util.HtmlParseUtil;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.GetIndexRequest;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.stereotype.Service;@Service
public class ContextService {@Autowired@Qualifier("restHighLevelClient")private RestHighLevelClient restHighLevelClient;// 1. 调用工具类,将预备的解析数据插入到索引public Boolean insert(String keyword) throws IOException {String index = "jd";GetIndexRequest getIndexRequest = new GetIndexRequest(index);boolean exists = restHighLevelClient.indices().exists(getIndexRequest, RequestOptions.DEFAULT);if(!exists){// 如果不存在则创建CreateIndexRequest createIndexRequest = new CreateIndexRequest(index);restHighLevelClient.indices().create(createIndexRequest,RequestOptions.DEFAULT);}List<Context> goodsList = HtmlParseUtil.getGoodsList(keyword);BulkRequest BulkRequest = new BulkRequest();BulkRequest.timeout("30s");for(Context context:goodsList){BulkRequest.add(new IndexRequest(index).source(JSON.toJSONString(context), XContentType.JSON));}BulkResponse bulkResponse = restHighLevelClient.bulk(BulkRequest, RequestOptions.DEFAULT);// 是否失败return !bulkResponse.hasFailures();}// 3. 获取这些数据,实现高亮的搜索功能public List<Map<String,Object>> searchPagehighLight(String keyword, int pageNo,int pageSize) throws IOException {if (pageNo <= 1)pageNo = 1;// 条件清晰SearchRequest searchRequest = new SearchRequest("jd");SearchSourceBuilder builder = new SearchSourceBuilder();builder.from(pageNo);builder.size(pageSize);// 精准匹配TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title",keyword);builder.query(termQueryBuilder);builder.timeout(new TimeValue(60, TimeUnit.SECONDS));// 高亮HighlightBuilder highlightBuilder = new HighlightBuilder();highlightBuilder.field("title");highlightBuilder.requireFieldMatch(false);highlightBuilder.preTags("<span style='color:red'>");highlightBuilder.postTags("</span>");builder.highlighter(highlightBuilder);// 执行搜索searchRequest.source(builder);SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);// 解析结果ArrayList<Map<String,Object>> list= new ArrayList<>();for (SearchHit hit: searchResponse.getHits().getHits()) {// 解析高亮的字段Map<String, HighlightField> highlightFields = hit.getHighlightFields();HighlightField title = highlightFields.get("title");Map<String,Object> sourceAsMap = hit.getSourceAsMap();// 原来的结果// 解析高亮字段,将原来的字段换成我们高亮的字段即可if (title != null) {Text[] fragments = title.fragments();StringBuilder nTitle = new StringBuilder();for (Text text:fragments) {nTitle.append(text);}sourceAsMap.put("title",nTitle);}list.add(hit.getSourceAsMap()); // 高亮的字段替换为原来的内容即可}return list;}// 2. 获取这些数据,实现基本的搜索功能public List<Map<String,Object>> searchPage (String keyword, int pageNo,int pageSize) throws IOException {if (pageNo <= 1)pageNo = 1;// 条件清晰SearchRequest searchRequest = new SearchRequest("jd");SearchSourceBuilder builder = new SearchSourceBuilder();builder.from(pageNo);builder.size(pageSize);// 精准匹配TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title",keyword);builder.query(termQueryBuilder);builder.timeout(new TimeValue(60, TimeUnit.SECONDS));// 执行搜索searchRequest.source(builder);SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);// 解析结果ArrayList<Map<String,Object>> list= new ArrayList<>();for (SearchHit hit: searchResponse.getHits().getHits()) {list.add(hit.getSourceAsMap()); // 高亮的字段替换为原来的内容即可}return list;}
}