IK分词器有几种模式
# 测试分词器
POST /_analyze
{"text":"黑马程序员学习java太棒了","analyzer": "standard"
}
# 测试分词器
POST /_analyze
{"text":"黑马程序员学习java太棒了","analyzer": "ik_max_word"
}
# 测试分词器
POST /_analyze
{"text":"黑马程序员学习java太棒了","analyzer": "ik_smart"
}
总结
ik_max_word最细切分, 占用内存多,但是分词多
ik_smart最少切分,占用内存少,但是分词少
standard中文没用
IK分词器如何拓展词条?如何停用词条
配置你的ext.dic和stopword.dic
ext.dic
mapping属性
文档操作
分词规则总结
CREATE TABLE `tb_hotel` (`id` bigint(20) NOT NULL COMMENT '酒店id',`name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '酒店名称',`address` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '酒店地址',`price` int(10) NOT NULL COMMENT '酒店价格',`score` int(2) NOT NULL COMMENT '酒店评分',`brand` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '酒店品牌',`city` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '所在城市',`star_name` varchar(16) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '酒店星级,1星到5星,1钻到5钻',`business` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '商圈',`latitude` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '纬度',`longitude` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '经度',`pic` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '酒店图片',PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Compact;
# 酒店的mapping
PUT /hotel
{"mappings": {"properties": {"id":{"type": "keyword"},"name":{"type": "text","analyzer": "ik_max_word","copy_to": "all"},"address":{"type":"keyword","index": false},"price":{"type":"integer"},"score":{"type":"integer"},"brand":{"type": "keyword"},"city":{"type":"keyword"},"star_name":{"type": "keyword"},"business":{"type": "keyword","copy_to": "all"},"location":{"type":"geo_point"},"pic":{"type": "keyword","index": false},"all":{"type": "text","analyzer": "ik_max_word"}}}
Java的测试代码
package cn.itcast.hotel.constants;public class HotelIndexConstants {public static final String MAPPING_TEMPLATE = "{\n" +" \"mappings\": {\n" +" \"properties\": {\n" +" \"id\":{\n" +" \"type\": \"keyword\"\n" +" },\n" +" \"name\":{\n" +" \"type\": \"text\",\n" +" \"analyzer\": \"ik_max_word\",\n" +" \"copy_to\": \"all\"\n" +" },\n" +" \"address\":{\n" +" \"type\":\"keyword\",\n" +" \"index\": false\n" +" },\n" +" \"price\":{\n" +" \"type\":\"integer\"\n" +" },\n" +" \"score\":{\n" +" \"type\":\"integer\"\n" +" },\n" +" \"brand\":{\n" +" \"type\": \"keyword\"\n" +" },\n" +" \"city\":{\n" +" \"type\":\"keyword\"\n" +" },\n" +" \"star_name\":{\n" +" \"type\": \"keyword\"\n" +" },\n" +" \"business\":{\n" +" \"type\": \"keyword\",\n" +" \"copy_to\": \"all\"\n" +" },\n" +" \"location\":{\n" +" \"type\":\"geo_point\"\n" +" },\n" +" \"pic\":{\n" +" \"type\": \"keyword\",\n" +" \"index\": false\n" +" },\n" +" \"all\":{\n" +" \"type\": \"text\",\n" +" \"analyzer\": \"ik_max_word\"\n" +" }\n" +" }\n" +" }\n" +" ";
}
package cn.itcast.hotel;import cn.itcast.hotel.pojo.Hotel;
import cn.itcast.hotel.pojo.HotelDoc;
import cn.itcast.hotel.service.impl.HotelService;
import com.alibaba.fastjson.JSON;
import org.apache.http.HttpHost;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.get.GetRequest;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.common.xcontent.XContentType;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;import java.io.IOException;import static cn.itcast.hotel.constants.HotelIndexConstants.MAPPING_TEMPLATE;/*** @author lst* @date 2023年11月23日 13:38*/
@SpringBootTest
public class HotelT {private RestHighLevelClient restHighLevelClient;@AutowiredHotelService hotelService;@BeforeEachpublic void before() {restHighLevelClient = new RestHighLevelClient(RestClient.builder(HttpHost.create("http://localhost:9200")));}@AfterEachvoid tearDown() throws IOException {restHighLevelClient.close();}@Testpublic void testCreateIndex() throws IOException {CreateIndexRequest request = new CreateIndexRequest("hotel");request.source(MAPPING_TEMPLATE, XContentType.JSON);restHighLevelClient.indices().create(request, RequestOptions.DEFAULT);}@Testpublic void testDeleteIndex() throws IOException {DeleteIndexRequest request = new DeleteIndexRequest("hotel");restHighLevelClient.indices().delete(request, RequestOptions.DEFAULT);}
}