下载
elasticsearch:https://www.elastic.co/cn/elasticsearch
elasticsearch-analysis-ik:https://github.com/medcl/elasticsearch-analysis-ik
启动 elasticsearch:elasticsearch\bin\elasticsearch.bat http://localhost:9200
启动 kibana:kibana\bin\kibana.bat http://localhost:5601
开发工具 - 控制台
最少切分
GET _analyze
{"analyzer": "ik_smart","text": ["好好学习"]
}
// 分词结果
{"tokens" : [{"token" : "好好学习","start_offset" : 0,"end_offset" : 4,"type" : "CN_WORD","position" : 0}]
}
最细粒度划分
GET _analyze
{"analyzer": "ik_max_word","text": ["好好学习"]
}
// 分词结果
{"tokens" : [{"token" : "好好学习","start_offset" : 0,"end_offset" : 4,"type" : "CN_WORD","position" : 0},{"token" : "好好学","start_offset" : 0,"end_offset" : 3,"type" : "CN_WORD","position" : 1},{"token" : "好好","start_offset" : 0,"end_offset" : 2,"type" : "CN_WORD","position" : 2},{"token" : "好学","start_offset" : 1,"end_offset" : 3,"type" : "CN_WORD","position" : 3},{"token" : "学习","start_offset" : 2,"end_offset" : 4,"type" : "CN_WORD","position" : 4}]
}
自定义配置分词字典
elasticsearch-7.6.2\plugins\elasticsearch-analysis-ik-7.6.2\config\IKAnalyzer.cfg.xml
<!--新建 dic文件录入分词,在IKAnalyzer.cfg.xml中配置-->
<entry key="ext_dict">春眠不觉晓.dic</entry>
GET _analyze
{"analyzer": "ik_max_word","text": ["春眠不觉晓"]
}
// 自定义分词前
{"tokens" : [{"token" : "春","start_offset" : 0,"end_offset" : 1,"type" : "CN_CHAR","position" : 0},{"token" : "眠","start_offset" : 1,"end_offset" : 2,"type" : "CN_CHAR","position" : 1},{"token" : "不觉","start_offset" : 2,"end_offset" : 4,"type" : "CN_WORD","position" : 2},{"token" : "晓","start_offset" : 4,"end_offset" : 5,"type" : "CN_CHAR","position" : 3}]
}
// 自定义分词后
{"tokens" : [{"token" : "春眠不觉晓","start_offset" : 0,"end_offset" : 5,"type" : "CN_WORD","position" : 0},{"token" : "不觉","start_offset" : 2,"end_offset" : 4,"type" : "CN_WORD","position" : 1},{"token" : "晓","start_offset" : 4,"end_offset" : 5,"type" : "CN_CHAR","position" : 2}]
}