本篇文章主要讲解elasticsearch在业务中经常用到的字段类型,通过大量的范例来学习和理解不同字段类型的应用场景。范例elasticsearch使用的版本为7.17.5。
在Elasticsearch的映射关系中,每个字段都对应一个数据类型或者字段类型,这些类型规范了字段存储的值和用途。
#创建名为userinfo的索引库并为其创建映射关系
PUT userinfo
{
"mappings": {
"properties": {
"age": {
"type": "long"
},
"aliasage": {
"type": "alias",
"path": "age"
},
"transit_mode": {
"type": "keyword"
}
}
}
}
#以上语句创建了userinfo索引库,而且为age字段创建了名为aliasage的别名。
#在索引库userinfo中插入一条文档数据
PUT userinfo/_doc/1
{
"age": 39,
"transit_mode": "transit_mode"
}
#通过年龄查询大于30的用户信息
GET userinfo/_doc/_search
{
"query": {
"range": {
"age": {
"gte": 30
}
}
}
}
#通过别名查询年龄大于30的用户信息
GET userinfo/_doc/_search
{
"query": {
"range": {
"aliasage": {
"gte": 30
}
}
}
}
#通过年龄、别名查询返回的结果信息如下
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "userinfo",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"age" : 39,
"transit_mode" : "transit_mode"
}
}
]
}
}
#创建索引映射并指定blob字段的类型为二进制类型
PUT myindex-2_02
{
"mappings": {
"properties": {
"name": {
"type": "text"
},
"blob": {
"type": "binary"
}
}
}
}
#在索引库中插入数据,blob的内容是Base64编码的字符串
PUT myindex-2_02/_doc/1
{
"name":"Some binary blob",
"blob":"c2FkZw=="
}
#创建索引映射并指定user字段为一个嵌套类型
PUT myindex-2_07
{
"mappings": {
"properties": {
"user":{
"type": "nested"
}
}
}
}
#在索引库中插入文档数据,user字段中嵌套了键值对
PUT myindex-2_07/_doc/1
{
"group": "fans",
"user": [
{
"first": "John",
"last": "Smith"
},
{
"first": "Alice",
"last": "White"
}
]
}
#查询user索引库字段中user.first的值是Alice以及User.last的值是Smith的结果
GET myindex-2_07/_search
{
"query": {
"nested": {
"path": "user",
"query": {
"bool": {
"must": [
{
"match": {
"user.first": "Alice"
}
},
{
"match": {
"user.last": "Smith"
}
}
]
}
}
}
}
}
范围类型 | 说明 |
---|---|
integer_range | 表示由符号的32位整数 |
float_range | 表示单精度浮点数 |
long_range | 表示有符号的64位整数 |
double_range | 表示双精度浮点数 |
date_range | 表示日期范围,可以通过format映射参数支持各种日期格式。无论使用哪种格式,日期值都会被解析为一个无符号的64位整数,该整数为纪元以来的毫秒数。 |
ip_range | 表示IPv4或IPv6地址的一系列IP值 |
#创建索引映射,并指定expected_attendees字段类型为整数范围类型,time_frame字段类型为日期范围类型
PUT myindex-2_08
{
"mappings": {
"properties": {
"expected_attendees": {
"type": "integer_range"
},
"time_frame":{
"type": "date_range",
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
}
}
}
}
#插入文档数据
PUT myindex-2_08/_doc/1?refresh
{
"expected_attendees":{
"gte":10,
"lt":20
},
"time_frame":{
"gte":"2021-10-31 12:00:00",
"lte":"2021-11-01"
}
}
GET myindex-2_08/_search
{
"query": {
"term": {
"expected_attendees": {
"value": "12"
}
}
}
}
#根据日期范围查询符合条件的文档数据
GET myindex-2_08/_search
{
"query": {
"range": {
"time_frame": {
"gte": "2021-10-31",
"lte": "2021-11-01",
"relation": "within"
}
}
}
}
#创建索引映射并将"pagerank"和"topics"这两个字段类型分别指定为"rank_feature"类型和"rank_features"类型
PUT myindex-2_10
{
"mappings": {
"properties": {
"pagerank":{
"type": "rank_feature"
},
"url_length":{
"type": "rank_feature",
"positive_score_impact":false
},
"topics":{
"type": "rank_features"
}
}
}
}
#在索引库中插入数据
PUT myindex-2_10/_doc/1
{
"url": "http://en.wikipedia.org/wiki/2016_Summer_Olympics",
"content": "Rio 2016",
"pagerank": 50.3,
"url_length": 42,
"topics": {
"sports": 50,
"brazil": 30
}
}
#在索引库中插入数据
PUT myindex-2_10/_doc/2
{
"url": "http://en.wikipedia.org/wiki/2016_Brazilian_Grand_Prix",
"content": "Formula One motor race held on 13 November 2016 at the Autodromo Jose Carlos Pace in Sao Paulo,Brazil",
"pagerank": 50.3,
"url_length": 47,
"topics": {
"sports": 50,
"brazil": 20,
"formula one":65
}
}
#在索引库中插入数据
PUT myindex-2_10/_doc/3
{
"url": "http://en.wikipedia.org/wiki/Deadpool_(film)",
"content": "Deadpool is a 2016 American superhero film",
"pagerank": 50.3,
"url_length": 37,
"topics": {
"movies":60,
"super hero":65
}
}
#查询索引库的content字段值中包含"2016"的文档,并根据评分(score字段的值)排序输出
GET myindex-2_10/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"content": "2016"
}
}
],
"should": [
{
"rank_feature": {
"field": "pagerank"
}
},
{
"rank_feature": {
"field": "url_length",
"boost": 0.1
}
},
{
"rank_feature": {
"field": "topics.sports",
"boost": 0.4
}
}
]
}
}
}
{
"mappings": {
"properties": {
"ip_field": {
"type": "ip"
}
}
}
}
PUT my-index/_doc/1
{
"ip_field": "192.168.1.1"
}
PUT my-index/_doc/2
{
"ip_field": "2001:0db8:85a3:0000:0000:8a2e:0370:7334"
}
GET my-index/_search
{
"query": {
"range": {
"ip_field": {
"gte": "192.168.1.1",
"lte": "192.168.1.10"
}
}
}
}
GET my-index/_search
{
"query": {
"term": {
"ip_field": "192.168.1.0/24"
}
}
}
GET my-index/_search
{
"sort": [
{
"ip_field": {
"order": "asc"
}
}
]
}
{
"mappings": {
"properties": {
"title": {
"type": "search_as_you_type"
}
}
}
}
创建的字段 | 说明 |
---|---|
my_field | 按照映射中的配置进行分析,如果未配置分词器,则使用索引的默认分词器 |
my_field._2gram | 用大小为2的shingle token filter 分词器对 ny_field进行分词 |
my_field._3gram | 用大小为3的shingle token filter 分词器对 ny_field进行分词 |
my_field._index_prefix | 用edge ngram token filter 打包 my_field._3gram的分词器 |
{
"query": {
"multi_match": {
"query": "search text",
"type": "bool_prefix",
"fields": [
"title",
"title._2gram",
"title._3gram"
]
}
}
}
#创建索引映射,给name字段添加子对象,其名称是length,类型是token_count,使用standard分词器进行分词
PUT myindex-tokencount
{
"mappings": {
"properties": {
"name":{
"type": "text",
"fields": {
"length":{
"type":"token_count",
"analyzer":"standard"
}
}
}
}
}
}
#添加文档数据
PUT myindex-tokencount/_doc/1
{
"name":"John Smith"
}
#添加文档数据
PUT myindex-tokencount/_doc/2?pretty
{
"name":"Rachel Alice Williams"
}
PUT myindex-tokencount/_doc/3
{
"name":"长大"
}
#查询索引库中name字段被分词后,分词的数量等于2的文档
GET myindex-tokencount/_search
{
"query": {
"term": {
"name.length": {
"value": "2"
}
}
}
}
#使用standard分词器对内容"John Smith"进行分词处理,返回分词后的结果
GET myindex-tokencount/_analyze
{
"analyzer": "standard",
"text":["John Smith"]
}
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。