_search后面指定超时:
GET /_search?timeout=1s从es7开始就已经不建议使用type的概念,es8更是直接移除了type的概念,因此查询路径中我们之后都不带中间的type了。。
product索引,有数据:
GET /product/_search { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 4, "relation" : "eq" }, "max_score" : 1.0, "hits" : [ { "_index" : "product", "_type" : "_doc", "_id" : "3", "_score" : 1.0, "_source" : { "name" : "nfc phone", "desc" : "shouji zhong de hongzhaji", "price" : 2999, "tags" : [ "xingjiabi", "fashao", "menjinka" ] } }, { "_index" : "product", "_type" : "_doc", "_id" : "4", "_score" : 1.0, "_source" : { "name" : "xiaomi erji", "desc" : "erji zhong de huangmenji", "price" : 999, "tags" : [ "low", "bufangshui", "yinzhicha" ] } }, { "_index" : "product", "_type" : "_doc", "_id" : "5", "_score" : 1.0, "_source" : { "name" : "hongmi erji", "desc" : "erji zhong de kendeji", "price" : 399, "tags" : [ "lowbee", "xuhangduan", "zhiliangx" ] } }, { "_index" : "product", "_type" : "_doc", "_id" : "1", "_score" : 1.0, "_source" : { "name" : "xiaomi phone", "desc" : "shouji zhong de zhandouji", "price" : 99999 } } ] } }全文检索name字段包含xiaomi:
GET /product/_search?q=name:xiaomi { "took" : 1, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 2, "relation" : "eq" }, "max_score" : 0.45618832, "hits" : [ { "_index" : "product", "_type" : "_doc", "_id" : "4", "_score" : 0.45618832, "_source" : { "name" : "xiaomi erji", "desc" : "erji zhong de huangmenji", "price" : 999, "tags" : [ "low", "bufangshui", "yinzhicha" ] } }, { "_index" : "product", "_type" : "_doc", "_id" : "1", "_score" : 0.45618832, "_source" : { "name" : "xiaomi phone", "desc" : "shouji zhong de zhandouji", "price" : 99999 } } ] } }全文检索所有字段内容拼在一起是否包含xiaomi:
GET /product/_search?q=xiaomiES默认按照_score的高低进行排序,如果指定排序,可通过sort进行指定: 根据price正序排序
GET /product/_search?sort=price:asc { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 4, "relation" : "eq" }, "max_score" : null, "hits" : [ { "_index" : "product", "_type" : "_doc", "_id" : "5", "_score" : null, "_source" : { "name" : "hongmi erji", "desc" : "erji zhong de kendeji", "price" : 399, "tags" : [ "lowbee", "xuhangduan", "zhiliangx" ] }, "sort" : [ 399 ] }, { "_index" : "product", "_type" : "_doc", "_id" : "4", "_score" : null, "_source" : { "name" : "xiaomi erji", "desc" : "erji zhong de huangmenji", "price" : 999, "tags" : [ "low", "bufangshui", "yinzhicha" ] }, "sort" : [ 999 ] }, { "_index" : "product", "_type" : "_doc", "_id" : "3", "_score" : null, "_source" : { "name" : "nfc phone", "desc" : "shouji zhong de hongzhaji", "price" : 2999, "tags" : [ "xingjiabi", "fashao", "menjinka" ] }, "sort" : [ 2999 ] }, { "_index" : "product", "_type" : "_doc", "_id" : "1", "_score" : null, "_source" : { "name" : "xiaomi phone", "desc" : "shouji zhong de zhandouji", "price" : 99999 }, "sort" : [ 99999 ] } ] } }发现,_score属性的值没了。。因为ES知道你使用了某种排序,因此_score就没啥意义了。
在此基础上,进行分页,返回索引为0开始,2条数据。
GET /product/_search?from=0&size=2&sort=price:asc { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 4, "relation" : "eq" }, "max_score" : null, "hits" : [ { "_index" : "product", "_type" : "_doc", "_id" : "5", "_score" : null, "_source" : { "name" : "hongmi erji", "desc" : "erji zhong de kendeji", "price" : 399, "tags" : [ "lowbee", "xuhangduan", "zhiliangx" ] }, "sort" : [ 399 ] }, { "_index" : "product", "_type" : "_doc", "_id" : "4", "_score" : null, "_source" : { "name" : "xiaomi erji", "desc" : "erji zhong de huangmenji", "price" : 999, "tags" : [ "low", "bufangshui", "yinzhicha" ] }, "sort" : [ 999 ] } ] } }匹配name中包含erji的记录:
GET /product/_search { "query": { "match": { "name": "erji" } } } { "took" : 1, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 2, "relation" : "eq" }, "max_score" : 1.0630728, "hits" : [ { "_index" : "product", "_type" : "_doc", "_id" : "4", "_score" : 1.0630728, "_source" : { "name" : "xiaomi erji", "desc" : "erji zhong de huangmenji", "price" : 999, "tags" : [ "low", "bufangshui", "yinzhicha" ] } }, { "_index" : "product", "_type" : "_doc", "_id" : "5", "_score" : 1.0630728, "_source" : { "name" : "hongmi erji", "desc" : "erji zhong de kendeji", "price" : 399, "tags" : [ "lowbee", "xuhangduan", "zhiliangx" ] } } ] } }对上面全文检索出来的数据,进行根据price进行正序排序:
GET /product/_search { "query": { "match": { "name": "erji" } }, "sort": [ { "price": { "order": "asc" } } ] } { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 2, "relation" : "eq" }, "max_score" : null, "hits" : [ { "_index" : "product", "_type" : "_doc", "_id" : "5", "_score" : null, "_source" : { "name" : "hongmi erji", "desc" : "erji zhong de kendeji", "price" : 399, "tags" : [ "lowbee", "xuhangduan", "zhiliangx" ] }, "sort" : [ 399 ] }, { "_index" : "product", "_type" : "_doc", "_id" : "4", "_score" : null, "_source" : { "name" : "xiaomi erji", "desc" : "erji zhong de huangmenji", "price" : 999, "tags" : [ "low", "bufangshui", "yinzhicha" ] }, "sort" : [ 999 ] } ] } }在name和desc中查询包含“zhong”的记录:
GET /product/_search { "query": { "multi_match": { "query": "zhong", "fields": ["name","desc"] } } }根据price倒序,从索引为0的数据开始,查询1条出来。
GET /product/_search { "query": { "match_all": {} }, "sort": [ { "price": { "order": "desc" } } ], "from": 0, "size": 1 }搜索关键词,不会进行分词
GET /product/_search { "query": { "term": { "name": { "value": "nfc phone" } } } }match会对关键词进行分词,例如上面的“nfc phone” 会分词为“nfc”和“phone”:
GET /_analyze { "analyzer":"standard", "text":"nfc phone" } { "tokens" : [ { "token" : "nfc", "start_offset" : 0, "end_offset" : 3, "type" : "<ALPHANUM>", "position" : 0 }, { "token" : "phone", "start_offset" : 4, "end_offset" : 9, "type" : "<ALPHANUM>", "position" : 1 } ] }而term则不进行分词,直接拿“nfc phone”去匹配, 因此上面的term查询,实际是无法查询到数据的,因为虽然关键词不分词,但是es中的实际字段内容也是会进行分词的: 也就是说,字段内容为““nfc phone””会分词为“nfc”和“phone”, 但此时term是拿着“nfc phone”去匹配,自然匹不到东西了。。。
有没有办法使用term查询记录为“nfc phone”呢? 当然,使用terms:
terms就是去匹配字段内容分词为“nfc”或者“phone”的,类似于mysql的in操作,这样便可以实现类似match的效果了。。
{ "took" : 1, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 2, "relation" : "eq" }, "max_score" : 1.0, "hits" : [ { "_index" : "product", "_type" : "_doc", "_id" : "3", "_score" : 1.0, "_source" : { "name" : "nfc phone", "desc" : "shouji zhong de hongzhaji", "price" : 2999, "tags" : [ "xingjiabi", "fashao", "menjinka" ] } }, { "_index" : "product", "_type" : "_doc", "_id" : "1", "_score" : 1.0, "_source" : { "name" : "xiaomi phone", "desc" : "shouji zhong de zhandouji", "price" : 99999 } } ] } }类似于match,match_phrase还是分词后去搜的,但不同的是: 1、目标文档需要包含分词后的所有词 2、目标文档还要保持这些词的相对顺序和文档中的一致
效果有点像mysql的like查询。。
可以组合多个查询条件,bool查询也是采用more_matches_is_better的机制,因此满足must和should子句的文档将会合并起来计算分值。
必须满足 and 子句(查询)必须都能在文档中进行匹配成功
name必须包含xiaomi,且price大于2999的
GET /product/_search { "query": { "bool": { "must": [ { "match": { "name": "xiaomi" } }, { "range": { "price": { "gte": 2999 } } } ] } } }过滤器 不计算相关度分数,cache☆ 子句(查询)必须都能在文档中进行匹配成功。但是相比must查询,filter中的子句的分数将被忽略(不进行计算分数),因此相比must效率更高。
Filter子句在filter上下文中执行,这意味着计分被忽略,并且子句被考虑用于缓存。
price大于999的:
GET /product/_search { "query": { "bool": { "filter": [ {"range": { "price": { "gt": 999 } }} ] } } }一般可以先通过filter筛选出数据集,然后再去match匹配这堆数据集,得到最终的结果,而实际上,我们的分数排名计算也只想针对于这些结果集,而非全部数据。 接下来看一个案例: 首先筛选name包含“xiaomi phone”并且价格大于1999的数据(不排序,不计算分数),然后搜索name包含“xiaomi”and desc 并且包含“shouji”的
GET /product/_search { "query": { "bool":{ "must": [ {"match": { "name": "xiaomi"}}, {"match": {"desc": "shouji"}} ], "filter": [ {"match_phrase":{"name":"xiaomi phone"}}, {"range": { "price": { "gt": 1999 } }} ] } } }可能满足 or 至少需要minimum_should_match个子句(查询)匹配成功。
name包含erji或者xiaomi:
GET /product/_search { "query": { "bool": { "should": [ {"match": { "name": "erji" }},{ "match": { "name": "xiaomi" } } ] } } }必须不满足 不计算相关度分数 not 子句(查询)全都不得出现在匹配的文档中。子句在过滤器上下文中执行,这意味着计分被忽略,并且子句被视为用于缓存。由于忽略计分,因此将返回所有文档的分数都为0。
匹配name既不包含“erji”也不包含“hongzhaji”的记录
GET /product/_search { "query": { "bool": { "must_not": [ { "match": { "name": "erji" } }, { "match": { "name": "hongzhaji" } } ] } } } { "took" : 4, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 5, "relation" : "eq" }, "max_score" : 0.0, "hits" : [ { "_index" : "product", "_type" : "_doc", "_id" : "3", "_score" : 0.0, "_source" : { "name" : "nfc phone", "desc" : "shouji zhong de hongzhaji", "price" : 2999, "tags" : [ "xingjiabi", "fashao", "menjinka" ] } }, { "_index" : "product", "_type" : "_doc", "_id" : "1", "_score" : 0.0, "_source" : { "name" : "xiaomi phone", "desc" : "shouji zhong de zhandouji", "price" : 99999 } }, { "_index" : "product", "_type" : "_doc", "_id" : "MQhVI3IBIohknZNBAh6y", "_score" : 0.0, "_source" : { "name" : "balala nfc phone", "desc" : "shouji zhong de hongzhaji", "price" : 12999, "tags" : [ "ff", "we", "asd" ] } }, { "_index" : "product", "_type" : "_doc", "_id" : "MghbI3IBIohknZNBIB4B", "_score" : 0.0, "_source" : { "name" : "phone nfc", "desc" : "shouji zhong de hongzhaji", "price" : 12999, "tags" : [ "ff", "we", "asd" ] } }, { "_index" : "product", "_type" : "_doc", "_id" : "MwheI3IBIohknZNB-R4G", "_score" : 0.0, "_source" : { "name" : " nfc bbb phone", "desc" : "shouji zhong de hongzhaji", "price" : 12999, "tags" : [ "ff", "we", "asd" ] } } ] } }should子句最少需要满足匹配的个数或百分比,如果bool查询包含至少一个should子句,而没有must或 filter子句,此时该值为1,否则,该值为0(此时should中的任何子句没有匹配都可以)
这里同时存在must和should,minimum_should_match默认值为0,此时即使should一条都没匹上,只要must匹上了,就可以匹上
GET /product/_search { "query": { "bool": { "must": [ { "match": { "name": "erji" } } ], "should": [ {"match": { "name": "sdgsdag" }},{ "match": { "name": "qweqwt" } } ] } } }但如果手动设置minimum_should_match,则此时需要should也要至少满足1个子句能够匹配:
GET /product/_search { "query": { "bool": { "filter": [ { "range": { "price": { "gt": 2999 } } } ], "should": [ {"match": { "name": "sdgsdag" }},{ "match": { "name": "qweqwt" } } ], "minimum_should_match": 1 } } }bool 查询会为每个文档计算相关度评分 _score , 再将所有匹配的 must 和 should 语句的分数 _score 求和, 最后除以 must 和 should 语句的总数。 must_not 语句不会影响评分; 它的作用只是将不相关的文档排除
① 想要name包含 “nfc” 或者 “xiaomi”的产品, 但是不要包含“erji” SELECT * from product where (name like “%xiaomi%” or name like ‘%nfc%’) AND name not LIKE ‘%erji%’
这里使用了filter缓存查询的特性,如果整体都想使用filter做缓存查询,而不计算分数的话:
GET /product/_search { "query": { "constant_score":{ "filter": { "bool": { "should":[ {"term":{"name":"xiaomi"}}, {"term":{"name":"nfc"}} ], "must_not":[ {"term":{"name":"erji"}} ] } }, "boost": 1.2 //给一个固定分数 } } }② 搜索一台name包含“xiaomi nfc phone”或者一台name包含"phone" 并且 price小于等于2999
SELECT * FROM product WHERE NAME LIKE '%xiaomi nfc phone%' OR ( NAME LIKE '%erji%' AND price > 399 AND price <=999);这里用到了嵌套查询,也就是子句里可以继续放入bool来实现复杂查询:
GET /product/_search { "query": { "constant_score": { "filter": { "bool":{ "should":[ {"match_phrase":{"name":"xiaomi nfc phone"}}, { "bool":{ "must":[ {"term":{"name":"phone"}}, {"range":{"price":{"lte":"2999"}}} ] } } ] } } } } }匹配包含“nfc phone”句子的文档,并将匹配到的对应文档的name内容的关键词进行高亮“
GET /product/_search { "query" : { "match_phrase" : { "name" : "nfc phone" } }, "highlight":{ "fields":{ "name":{} } } } { "took" : 352, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 2, "relation" : "eq" }, "max_score" : 0.94048226, "hits" : [ { "_index" : "product", "_type" : "_doc", "_id" : "3", "_score" : 0.94048226, "_source" : { "name" : "nfc phone", "desc" : "shouji zhong de hongzhaji", "price" : 2999, "tags" : [ "xingjiabi", "fashao", "menjinka" ] }, "highlight" : { "name" : [ "<em>nfc</em> <em>phone</em>" ] } }, { "_index" : "product", "_type" : "_doc", "_id" : "MQhVI3IBIohknZNBAh6y", "_score" : 0.79284847, "_source" : { "name" : "balala nfc phone", "desc" : "shouji zhong de hongzhaji", "price" : 12999, "tags" : [ "ff", "we", "asd" ] }, "highlight" : { "name" : [ "balala <em>nfc</em> <em>phone</em>" ] } } ] } }可以在查询url后面添加pretty参数,使得返回的json更易查看。
如不需要返回元数据,仅仅返回原始数据,可以这样:
GET /index/type/id/_source例子:
GET /product/_doc/1/_source { "_index" : "product", "_type" : "1", "_id" : "_source", "found" : false }只查询name,price字段信息:(DSL版本)
GET /product/_search { "query": { "match_all": {} }, "_source": ["name","price"] }(query string版本):
GET /product/_doc/_search?_source=name,price通常来说,一条数据是否存在我们需要通过查询数据,然后如果能查到就说明存在。 那有没有更优的方法? 使用HEAD请求方式,去请求数据
HEAD /product/_doc/3如果存在返回200:
200 - OK如果不存在404:
404 - Not Found这样无需查询返回数据,只需通过状态码就可判断了。
为了减少网络传输次数,可以批量进行获取操作:
GET /product/_mget { "ids":[1,3,"balala"] }上面我指定了3个id,其中”balala“是不存在的
{ "docs" : [ { "_index" : "product", "_type" : "_doc", "_id" : "1", "_version" : 3, "_seq_no" : 6, "_primary_term" : 1, "found" : true, "_source" : { "name" : "xiaomi phone", "desc" : "shouji zhong de zhandouji", "price" : 99999 } }, { "_index" : "product", "_type" : "_doc", "_id" : "3", "_version" : 1, "_seq_no" : 2, "_primary_term" : 1, "found" : true, "_source" : { "name" : "nfc phone", "desc" : "shouji zhong de hongzhaji", "price" : 2999, "tags" : [ "xingjiabi", "fashao", "menjinka" ] } }, { "_index" : "product", "_type" : "_doc", "_id" : "balala", "found" : false } ] }通过查询结果可以发现,能匹配到的所有数据会返回,并且有一个found字段,而匹配不到,也会返回一条信息,但是found为false
当然,可以不仅仅那么简单,还可以指定每个文档的返回字段:
只包含某个字段:
除去某个字段外都要:
在Elasticsearch中,支持批量的插入、修改、删除操作,都是通过_bulk的api完成的。 请求格式如下:(请求格式不同寻常) 语法格式:
POST /index名称/_bulk { action: { metadata }}\n { request body }\n { action: { metadata }}\n { request body }\n…
批量操作之间可以组合使用,并且每一条的执行结果不会相互影响。
retry_on_conflict 更新发生冲突时,重试次数(ES使用的是乐观锁机制来处理并发下的数据问题,更新时使用自身version与目标数据version要进行比对,只有等于自身的version才能更新)
批量操作相比于json体语法操作,效率更高,json体语法会将查询体序列化成对象放到内存,而批量操作不需要。
有时我们只想得到失败的信息,而不关心成功的信息,此时可以加上filter_path来只过滤出错误信息:
exists 查询可以用于查找文档中是否包含指定字段或没有某个字段,类似于SQL语句中的 IS_NULL 条件
筛选出文档中tags字段不为null的数据:
GET /product/_search { "query": { "exists": { "field": "tags" } } }结合上面的bool查询,可以这样查询tags字段为null的数据:
GET /product/_search { "query": { "bool": { "must_not": [ {"exists": {"field": "tags"}} ] } } }默认来说,es对于match操作,会把查询内容分词后的 任一分词 命中的文档进行返回,也就是分词之间的关系是 or
如下: 查询name中包含xiaomi 或者 erji 的文档
GET /product/_search { "query": { "match": { "name": "xiaomi erji" } } }那如果想要分词之间的关系是and关系呢? 也就是必须都包含的才行 只需要这样写就行了:
GET /product/_search { "query": { "match": { "name": { "operator": "and", "query": "xiaomi erji" } } } }上面我们测试了全文检索中“OR” 和 “AND”搜索,这是两个极端,其实在实际场景中,并不会选取这2个极端,更有可能是选取只需要符合一定的相似度就可以查询到数据,在Elasticsearch中也支持这样的查询,通过 minimum_should_match来指定匹配度,如:70%;
GET /itcast2/_search { "query": { "match": { "hobby": { "minimum_should_match": "70%" //这里写匹配个数或者匹配相似度百分比, "query": "羽毛球 乒乓球" } } } }相似度应该多少合适,需要在实际的需求中进行反复测试,才可得到合理的值。
如果文档字段内容包含篮球,则额外增加5的评分权重:
GET /itcast2/_search { "query": { "bool": { "must": [ { "match": { "hobby": { "operator": "and", "query": "羽毛球足球" } } } ], "should": [ { "match": { "hobby": { "query": "篮球", "boost": 5 } } } ] } } }