POST /article/_doc/_bulk
{"index": {"_id": 1}}
{"articleId": "XHDK-A-1293-#fJ3", "userId": 1, "hidden": false, "postDate": "2017-01-01"}
{"index": {"_id": 2}}
{"articleId": "KDKE-B-9947-#kL5", "userId": 1, "hidden": false, "postDate": "2017-01-02"}
{"index": {"_id": 3}}
{"articleId": "JODL-X-1937-#pV7", "userId": 2, "hidden": false, "postDate": "2017-01-01"}
{"index": {"_id": 4}}
{"articleId": "QQPX-R-3956-#aD8", "userId": 2, "hidden": true, "postDate": "2017-01-02"}
GET /article/_mapping/_doc
{
"article" : {
"mappings" : {
"_doc" : {
"properties" : {
"articleId" : {
"type" : "text",
"fields" : {
# 不分词,最多保留256个字符
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"hidden" : {
"type" : "boolean"
},
"postDate" : {
"type" : "date"
},
"userId" : {
"type" : "long"
}
}
}
}
}
}
说明:测试数据会随着练习而不断加入新数据
输入的搜索文本不分词,直接拿去倒排索引中进行精确匹配
# 根据用户ID搜索帖子(返回2条结果)
GET /article/_doc/_search
{
"query": {
"constant_score": {
"filter": {
"term": {
"userId": 1
}
}
}
}
}
# 搜索没有隐藏的帖子(返回3条结果)
GET /article/_doc/_search
{
"query": {
"constant_score": {
"filter": {
"term": {
"hidden": false
}
}
}
}
}
# 根据发帖日期搜索帖子(返回2条结果)
GET /article/_doc/_search
{
"query": {
"constant_score": {
"filter": {
"term": {
"postDate": "2017-01-01"
}
}
}
}
}
# 根据帖子ID搜索帖子(无结果)
# text类型的field,建立倒排索引的时候,就会进行分词
# 分词以后,原本的articleID就没有了,只有分词后的各个单词存在于倒排索引中(qqpx、r、3956、ad8)
# term,是不对搜索文本分词的,所有精确匹配是匹配不到的
GET /article/_doc/_search
{
"query": {
"constant_score": {
"filter": {
"term": {
"articleId": "QQPX-R-3956-#aD8"
}
}
}
}
}
# 根据帖子ID搜索帖子(1条结果)
# articleID.keyword,是es内置建立的field,就是不分词的。
# 所以一个articleID过来的时候,会建立两次索引
# 一次是要分词的,分词后放入倒排索引
# 另外一次是基于articleID.keyword,不分词,最多保留256个字符最多,直接将字符串本身放入倒排索引中
# 所以term filter,对text过滤,可以考虑使用内置的field.keyword来进行匹配
# 但是有个问题,默认就保留256个字符,所以尽可能还是自己去手动建立索引,将type设置为keyword
GET /article/_doc/_search
{
"query": {
"constant_score": {
"filter": {
"term": {
"articleId.keyword": "QQPX-R-3956-#aD8"
}
}
}
}
}
# 重建索引
DELETE article
# 手动指定articleId的类型,这样直接将type设置为keyword,是没有保留字符串的长度限制的
PUT article
{
"mappings": {
"_doc": {
"properties": {
"articleId": {
"type": "keyword"
}
}
}
}
}
# 重新插入数据
POST /article/_doc/_bulk
{"index": {"_id": 1}}
{"articleId": "XHDK-A-1293-#fJ3", "userId": 1, "hidden": false, "postDate": "2017-01-01"}
{"index": {"_id": 2}}
{"articleId": "KDKE-B-9947-#kL5", "userId": 1, "hidden": false, "postDate": "2017-01-02"}
{"index": {"_id": 3}}
{"articleId": "JODL-X-1937-#pV7", "userId": 2, "hidden": false, "postDate": "2017-01-01"}
{"index": {"_id": 4}}
{"articleId": "QQPX-R-3956-#aD8", "userId": 2, "hidden": true, "postDate": "2017-01-02"}
# 然后直接使用articleId来进行term filter是有返回结果的
GET /article/_doc/_search
{
"query": {
"constant_score": {
"filter": {
"term": {
"articleId": "QQPX-R-3956-#aD8"
}
}
}
}
}
知识点总结:
查询条件:假设查询"2017-02-02"这个日期,{filter: {term: "2017-02-02"}}
且倒排索引中的数据如下,*代表存在于该文档中:
word | doc1 | doc2 | doc3 |
---|---|---|---|
2017-01-01 | * | * | |
2017-02-02 | * | * | |
2017-03-03 | * | * | * |
# 搜索发帖日期为2017-01-01,或者帖子ID为XHDK-A-1293-#fJ3的帖子,同时要求帖子的发帖日期绝对不为2017-01-02
GET /article/_doc/_search
{
"query": {
"constant_score": {
"filter": {
"bool": {
"should": [
{"term": {"postDate": "2017-01-01"}},
{"term": {"articleId": "XHDK-A-1293-#fJ3"}}
],
"must_not": [
{"term": {"postDate": "2017-01-02"}}
]
}
}
}
}
}
# must:所有的条件都必须匹配
# should:其中的条件匹配任意一个即可
# must_not:所有的条件都必须不匹配
# 搜索帖子ID为XHDK-A-1293-#fJ3,或者是帖子ID为JODL-X-1937-#pV7而且发帖日期为2017-01-01的帖子
GET /article/_doc/_search
{
"query": {
"constant_score": {
"filter": {
"bool": {
"should": [
{"term": {"articleId": "XHDK-A-1293-#fJ3"}},
{
"bool": {
"must": [
{"term": {"articleId": {"value": "JODL-X-1937-#pV7"}}},
{"term": {"postDate": {"value": "2017-01-01"}}}
]
}
}
]
}
}
}
}
}
知识点总结:
# 增加tag字段
POST /article/_doc/_bulk
{"update": {"_id": "1"}}
{"doc": {"tag": ["java", "hadoop"]}}
{"update": {"_id": "2"}}
{"doc": {"tag": ["java"]}}
{"update": {"_id": "3"}}
{"doc": {"tag": ["hadoop"]}}
{"update": {"_id": "4"}}
{"doc": {"tag": ["java", "elasticsearch"]}}
# 搜索articleID为KDKE-B-9947-#kL5或QQPX-R-3956-#aD8的帖子
GET /article/_doc/_search
{
"query": {
"constant_score": {
"filter": {
"terms": {
"articleId": ["KDKE-B-9947-#kL5", "QQPX-R-3956-#aD8"]
}
}
}
}
}
# 搜索tag中包含java的帖子
GET /article/_doc/_search
{
"query": {
"constant_score": {
"filter": {
"terms": {
"tag": ["java"]
}
}
}
}
}
# 增加一个tag_cnt字段,统计tag的个数
POST /article/_doc/_bulk
{"update": {"_id": "1"}}
{"doc": {"tag_cnt": 2}}
{"update": {"_id": "2"}}
{"doc": {"tag_cnt": 1}}
{"update": {"_id": "3"}}
{"doc": {"tag_cnt": 1}}
{"update": {"_id": "4"}}
{"doc": {"tag_cnt": 2}}
# 搜索tag中只包含java的帖子
GET /article/_doc/_search
{
"query": {
"constant_score": {
"filter": {
"bool": {
"must": [
{"term": {"tag_cnt": 1}},
{"terms": {"tag": ["java"]}}
]
}
}
}
}
}
知识点总结:
# 为帖子数据增加浏览量的字段
POST /article/_doc/_bulk
{"update": {"_id": "1"} }
{"doc": {"view_cnt": 30}}
{"update": {"_id": "2"}}
{"doc": {"view_cnt": 50}}
{"update": {"_id": "3"}}
{"doc": {"view_cnt": 100}}
{"update": { "_id": "4"}}
{"doc": {"view_cnt": 80}}
# 搜索浏览量在30~60之间的帖子
GET /article/_doc/_search
{
"query": {
"constant_score": {
"filter": {
"range": {
"view_cnt": {
"gt": 30,
"lt": 60
}
}
}
}
}
}
# 增加一条测试数据
PUT /article/_doc/5
{
"articleID": "DHJK-B-1395-#Ky5",
"userID": 3,
"hidden": false,
"postDate": "2017-03-01",
"tag": ["elasticsearch"],
"tag_cnt": 1,
"view_cnt": 10
}
# 搜索发帖日期在最近1个月的帖子(假设今天是2017-03-10)
GET /article/_doc/_search
{
"query": {
"constant_score": {
"filter": {
"range": {
"postDate": {
"gt": "2017-03-10||-30d"
}
}
}
}
}
}
# 当天日期可以使用now来获取
GET /article/_doc/_search
{
"query": {
"constant_score": {
"filter": {
"range": {
"postDate": {
"gt": "now-30d"
}
}
}
}
}
}
知识点总结: