常用命令
搜索
GET _search
{"query": {"match_all": {}}
}
获取全部模版
GET _index_template
GET _index_template/yst_crawler_template
获取全部索引
GET /_cat/indices?v
获取当前mapping
GET /yst_crawler/_mapping
创建一个mapping
PUT /yst_crawler
{"settings": {"number_of_shards": 1,"number_of_replicas": 1},"mappings": {"properties": {"id":{"type": "long"},"cover_url": {"type": "keyword"},"title": {"type": "text","analyzer": "english"},"create_at": {"type": "date","format": "epoch_second"},"article_time": {"type": "date","format": "epoch_second"},"milli_at": {"type": "date","format": "epoch_millis"}}}
}GET /yst_crawler/_mapping
删除一个索引
DELETE /yst_crawler
当前分词使用
POST /_analyze
{"analyzer": "standard","text": "In a polarized US, how to define a patriot increasingly depends on who’s being asked"}
查找数据
GET /yst_crawler/_search
{"query": { "match_all": {} },"sort": [{"milli_at": {"order": "desc"}}],"from": 0,"size": 20
}
精确查询
GET /yst_crawler/_search
{"query": {"term": {"id": {"value": "1"}}}
}GET /yst_crawler/_search
{"query": {"bool": {"must": [{"match": {"title": "$20 million settlement"}},{"range": {"milli_at": {"gte": "now-30d/d","lte": "now/d"}}}]}},"sort": [{"milli_at": {"order": "desc"}}],"_source": ["id", "title", "milli_at", "s3_url", "article_time", "detail_url", "ref_id", "req_md5", "tags", "category", "content", "milli_at"],"from": 0,"size": 20
}GET /yst_crawler/_search
{"query": {"match_all": {}},"sort": [{"milli_at": {"order": "desc"}}],"_source": ["id", "title","milli_at","s3_url","article_time","detail_url","ref_id","req_md5","tags","category","content","milli_at"],"from": 0,"size": 10
}
媒体和分页查询
GET /yst_crawler/_search
{"query": {"bool": {"must": [{"match": {"title": "$20 million settlement"}},{"terms": {"ref_id": ["8fac888ae06647c3a7d6093c60c9d9b5"]}},{"range": {"milli_at": {"gte": "now-30d/d","lte": "now/d"}}}]}},"sort": [{"milli_at": {"order": "desc"}}],"_source": ["id", "title", "milli_at", "s3_url", "article_time", "detail_url", "ref_id", "req_md5", "tags", "category", "content", "milli_at"],"from": 0,"size": 20
}GET /yst_crawler/_search
{"query": {"match": {"id": "22111"}},"sort": [{"milli_at": {"order": "desc"}}],"_source": ["id", "title","milli_at","s3_url","article_time","detail_url","ref_id","req_md5","tags","category","content","milli_at"],"from": 0,"size": 20
}GET /yst_crawler/_search
{"query": {"bool": {"must": [{"match": {"title": "$20 million settlement"}},{"range": {"milli_at": {"gte": "now-30d/d","lte": "now/d"}}}]}},"sort": [{"milli_at": {"order": "desc"}}],"_source": ["id", "title", "milli_at", "s3_url", "article_time", "detail_url", "ref_id", "req_md5", "tags", "category", "content", "milli_at","article_id"],"from": 0,"size": 19
}GET /yst_crawler/_search
{"query": {"bool": {"must": [{"term": {"is_status": 0}},{"term": {"category": "for_you"}},{"range": {"milli_at": {"gte": "now-30d/d","lte": "1690362492000"}}}]}},"_source": ["id", "title", "milli_at", "s3_url", "article_time", "detail_url", "ref_id", "req_md5", "tags", "category", "content", "milli_at","article_id","desc","state"],"from": 0,"sort": [{"milli_at": {"order": "desc"}}],"size": 20
}
修改mapping
GET /yst_crawler/_mappingPUT /yst_crawler/_mapping
{"properties": {"state": {"type": "keyword"}}
}
查看setting
PUT /yst_city
{"settings": {"index.max_ngram_diff": 2,"analysis": {"analyzer": {"comma_analyzer": {"type": "pattern","pattern": ","},"code_ngram_analyzer": {"tokenizer": "code_ngram_tokenizer"}},"tokenizer": {"code_ngram_tokenizer": {"type": "ngram","min_gram": 2,"max_gram": 4}}}},"mappings": {"properties": {"id": {"type": "integer"},"state": {"type": "keyword"},"country": {"type": "keyword"},"city": {"type": "text"},"code": {"type": "text","analyzer": "comma_analyzer","fields": {"ngram": {"type": "text","analyzer": "code_ngram_analyzer"}}},"create_at": {"type": "integer"},"is_popular": {"type": "byte"},"sort": {"type": "short"},"location": {"type": "geo_point"}}}
}
搜索 city 和 code
GET /yst_city/_search
{"query": {"match": {"code": "36310"}},"_source": ["id","state","city","code","is_popular","location"],"from": 0,"size": 20
}GET /yst_city/_search
{"query": {"match": {"code.ngram": "31"}},"_source": ["id","state","city","code","is_popular","location"],"from": 0,"size": 20
}GET /yst_city/_search
{"query": {"bool": {"filter": [{"term": {"is_popular": 1}}]}},"sort": [{"sort": {"order": "asc"}}],"_source": ["id","state","city","code","is_popular","location"],"from": 0,"size": 20
}GET /yst_city/_search
{"query": {"function_score": {"query": {"query_string": {"default_field": "city","query": "e york"}},"field_value_factor": {"field": "is_popular","factor": 1.2,"modifier": "sqrt","missing": 1},"boost_mode": "multiply"}},"_source": ["id","state","city","is_popular","location"],"from": 0,"size": 20
}GET /yst_crawler/_search
{"query": {"term": {"title.keyword": {"value": "Man kicks in door, assaults ex-girlfriend: Solon Police Blotter"}}}
}GET /yst_crawler/_search
{"query": {"bool": {"must": [{"match": {"title": "110"}},{"range": {"milli_at": {"gte": "now-8d/d","lte": "now/d"}}}]}},"_source": ["id", "title", "milli_at", "s3_url", "article_time", "detail_url", "ref_id", "req_md5", "tags", "category", "content", "milli_at","article_id","desc"],"from": 0,"size": 10
}
认证分词
GET /yst_crawler/_analyze
{"analyzer": "my_custom_analyzer","text": "这里填入你想要分析的文本,例如:110"
}GET /yst_crawler/_analyze
{"analyzer": "standard","text": "这里填入你想要分析的文本,例如:110"
}POST /_analyze
{"tokenizer": "standard","filter": ["lowercase",{"type": "stop","stopwords": ["a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"]}],"text": "hello 110th 10223"
}
删除一个模版
DELETE _index_template/yst_crawler_template
创建一个索引模版
该模版下,创建的索引前缀都会使用这个模版 yst_crawler
PUT _index_template/yst_crawler_template
{"index_patterns": ["yst_crawler*"],"template": {"settings": {"number_of_shards": 1,"number_of_replicas": 0,"analysis": {"filter": {"my_stop_filter": {"type": "stop","stopwords": ["a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"]}},"analyzer": {"my_new_custom_analyzer": {"type": "custom","tokenizer": "standard","filter": ["lowercase","my_stop_filter"]}}}},"mappings": {"properties": {"id": {"type": "long"},"is_status": {"type": "byte"},"detail_url": {"type": "keyword"},"ref_id": {"type": "keyword"},"article_id": {"type": "keyword"},"tags": {"type": "text","analyzer": "english"},"s3_url": {"type": "keyword"},"title": {"type": "text","analyzer": "my_new_custom_analyzer","fields": {"keyword": {"type": "keyword"}}},"article_time": {"type": "date","format": "epoch_second"},"desc": {"type": "keyword"},"content": {"type": "keyword"},"milli_at": {"type": "date","format": "epoch_millis"},"category": {"type": "keyword"},"score": {"type": "integer"},"state": {"type": "keyword"}}}}
}
迁移工作
1.设置别名
POST /_aliases
{"actions": [{"add": {"index": "yst_crawler","alias": "yst_crawler_alias"}}]
}
2 .查看状态
GET yst_crawler/_statsPUT /yst_crawler_v2
GET /yst_crawler_v2/_mapping
3.验证分词
GET /yst_crawler_v2/_analyze
{"analyzer": "my_new_custom_analyzer","text": "xxx 110 2323"
}
4.把数据同步到新的索引
4.1 同步方式迁移
POST /_reindex
{"source": {"index": "yst_crawler"},"dest": {"index": "yst_crawler_v2"}
}
4.2 异步方式迁移,设置了9个并发
POST /_reindex?slices=9&refresh&wait_for_completion=false
{"source": {"index": "yst_crawler","size": 10000},"dest": {"index": "yst_crawler_v2"}
}
4.3 异步方式迁移,查看完成状态
GET /_tasksGET /_tasks/eiPlKw2_T3iKReTflMHkEQ:39797979
5.修改别名,去除别名
POST /_aliases
{"actions": [{"remove": {"index": "yst_crawler","alias": "yst_crawler_alias"}},{"add": {"index": "yst_crawler_v2","alias": "yst_crawler_alias"}}]
}
6.验证数据
主要看数据量total
GET yst_crawler/_stats
GET yst_crawler_v2/_stats
7.最后确认没问题后,删除老的索引
DELETE /yst_crawlerGET /_cat/indices?vGET /yst_crawler_alias/_search
{"query": {"bool": {"must": [{"match": {"title": "Pass State Park"}},{"range": {"milli_at": {"gte": "now-4d/d","lte": "now/d"}}}]}},"_source": ["id", "title", "milli_at", "s3_url", "article_time", "detail_url", "ref_id", "req_md5", "tags", "category", "content", "milli_at","article_id","desc"],"from": 0,"size": 20
}