在 Elasticsearch的日常管理中,有很多如系统日志,行为数据等方面的应用场景,这些场景的特点是数据量非常大,并且随着时间的增长索引的数量也会持续增长,然而这些场景基本上只有最近一段时间的数据有使用价值或者会被经常使用(热数据),而历史数据几乎没有作用或者很少会被使用(冷数据),这个时候就需要对索引进行一定策略的维护管理甚至是删除清理,否则随着数据量越来越多除了浪费磁盘与内存空间之外,还会严重影响 Elasticsearch 的性能。
在 Elastic Stack 6.6 版本后推出了新功能 Index Lifecycle Management(索引生命周期管理),支持针对索引的全生命周期托管管理,并且在 Kibana 上也提供了一套UI界面来配置策略。
注意:以上只是索引生命周期阶段的常见定义,具体策略可以根据实际业务情况来定义。
部署一个由2个hot节点,2个warm节点,2个cold节点组成的Elasticsearch集群,并且部署了Kibana和Cerebro方便调试和观察。docker-compose.yaml文件如下:
version: '2.2'
services:
cerebro:
image: lmenezes/cerebro:0.8.3
container_name: hwc_cerebro
ports:
- "9000:9000"
command:
- -Dhosts.0.host=http://elasticsearch:9200
networks:
- hwc_es7net
kibana:
image: docker.elastic.co/kibana/kibana:7.1.0
container_name: hwc_kibana7
environment:
#- I18N_LOCALE=zh-CN
- XPACK_GRAPH_ENABLED=true
- TIMELION_ENABLED=true
- XPACK_MONITORING_COLLECTION_ENABLED="true"
ports:
- "5601:5601"
networks:
- hwc_es7net
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.1.0
container_name: es7_hot1
environment:
- cluster.name=cr7-hwc
- node.name=es7_hot1
- node.attr.box_type=hot
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
- discovery.seed_hosts=es7_hot1,es7_warm1,es7_cold1,es7_hot2,es7_warm2,es7_cold2
- cluster.initial_master_nodes=es7_hot1,es7_warm1,es7_cold1,es7_hot2,es7_warm2,es7_cold2
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- hwc_es7data_hot1:/usr/share/elasticsearch/data
ports:
- 9200:9200
networks:
- hwc_es7net
elasticsearch2:
image: docker.elastic.co/elasticsearch/elasticsearch:7.1.0
container_name: es7_warm1
environment:
- cluster.name=cr7-hwc
- node.name=es7_warm1
- node.attr.box_type=warm
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
- discovery.seed_hosts=es7_hot1,es7_warm1,es7_cold1,es7_hot2,es7_warm2,es7_cold2
- cluster.initial_master_nodes=es7_hot1,es7_warm1,es7_cold1,es7_hot2,es7_warm2,es7_cold2
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- hwc_es7data_warm1:/usr/share/elasticsearch/data
networks:
- hwc_es7net
elasticsearch3:
image: docker.elastic.co/elasticsearch/elasticsearch:7.1.0
container_name: es7_cold1
environment:
- cluster.name=cr7-hwc
- node.name=es7_cold1
- node.attr.box_type=cold
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
- discovery.seed_hosts=es7_hot1,es7_warm1,es7_cold1,es7_hot2,es7_warm2,es7_cold2
- cluster.initial_master_nodes=es7_hot1,es7_warm1,es7_cold1,es7_hot2,es7_warm2,es7_cold2
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- hwc_es7data_cold1:/usr/share/elasticsearch/data
networks:
- hwc_es7net
elasticsearch4:
image: docker.elastic.co/elasticsearch/elasticsearch:7.1.0
container_name: es7_hot2
environment:
- cluster.name=cr7-hwc
- node.name=es7_hot2
- node.attr.box_type=hot
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
- discovery.seed_hosts=es7_hot1,es7_warm1,es7_cold1,es7_hot2,es7_warm2,es7_cold2
- cluster.initial_master_nodes=es7_hot1,es7_warm1,es7_cold1,es7_hot2,es7_warm2,es7_cold2
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- hwc_es7data_hot2:/usr/share/elasticsearch/data
networks:
- hwc_es7net
elasticsearch5:
image: docker.elastic.co/elasticsearch/elasticsearch:7.1.0
container_name: es7_warm2
environment:
- cluster.name=cr7-hwc
- node.name=es7_warm2
- node.attr.box_type=warm
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
- discovery.seed_hosts=es7_hot1,es7_warm1,es7_cold1,es7_hot2,es7_warm2,es7_cold2
- cluster.initial_master_nodes=es7_hot1,es7_warm1,es7_cold1,es7_hot2,es7_warm2,es7_cold2
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- hwc_es7data_warm2:/usr/share/elasticsearch/data
networks:
- hwc_es7net
elasticsearch6:
image: docker.elastic.co/elasticsearch/elasticsearch:7.1.0
container_name: es7_cold2
environment:
- cluster.name=cr7-hwc
- node.name=es7_cold2
- node.attr.box_type=cold
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
- discovery.seed_hosts=es7_hot1,es7_warm1,es7_cold1,es7_hot2,es7_warm2,es7_cold2
- cluster.initial_master_nodes=es7_hot1,es7_warm1,es7_cold1,es7_hot2,es7_warm2,es7_cold2
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- hwc_es7data_cold2:/usr/share/elasticsearch/data
networks:
- hwc_es7net
volumes:
hwc_es7data_hot1:
driver: local
hwc_es7data_warm1:
driver: local
hwc_es7data_cold1:
driver: local
hwc_es7data_hot2:
driver: local
hwc_es7data_warm2:
driver: local
hwc_es7data_cold2:
driver: local
networks:
hwc_es7net:
driver: bridge
执行docker-compose up -d
命令即可启动Elasticsearch集群。(前提安装好docker和docker-compose)
将ILM刷新时间设定为1秒,便于实验演示(默认为10分钟):
PUT _cluster/settings
{
"persistent": {
"indices.lifecycle.poll_interval":"1s"
}
}
设置hot/warm/cold和delete四个阶段:
PUT /_ilm/policy/log_ilm_policy
{
"policy": {
"phases": {
"hot": {
"actions": {
"rollover": {
"max_docs": 5
}
}
},
"warm": {
"min_age": "20s",
"actions": {
"allocate": {
"include": {
"box_type": "warm"
}
},
"readonly": {}
}
},
"cold": {
"min_age": "40s",
"actions": {
"allocate": {
"include": {
"box_type": "cold"
},
"number_of_replicas": 0
}
}
},
"delete": {
"min_age": "60s",
"actions": {
"delete": {}
}
}
}
}
}
将索引分配到hot节点,并且调用之前设置的IML策略log_ilm_policy,设置rollover的别名为ilm_alias,设置主分片为1,副本分片为1。设置 "is_write_index": true
在rollover的时候,alias会包含所有rollover文档。
PUT ilm_index-000001
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 1,
"index.lifecycle.name": "log_ilm_policy",
"index.lifecycle.rollover_alias": "ilm_alias",
"index.routing.allocation.include.box_type":"hot"
},
"aliases": {
"ilm_alias": {
"is_write_index": true
}
}
}
连续执行5次POST,创建5个文档。
POST ilm_alias/_doc
{
"name":"cr7",
"age": 15
}
刚开始可以看到总共有5个doc。
当达索引iml_index-000001到5个doc时,做了rollover操作,新建了新的索引iml_index-000002。
等待20s后,将索引iml_index-000001移动到warm节点。
等待40s后,将索引iml_index-000001移动到cold节点,并且将副本数从1缩小为0。
等待60s后,索引iml_index-000001被删除。
由于在warm阶段设置了readonly,在warm和cold阶段如果尝试往iml_index-000001写入或修改数据,将会收到以下报错:
{
"error": {
"root_cause": [
{
"type": "cluster_block_exception",
"reason": "blocked by: [FORBIDDEN/8/index write (api)];"
}
],
"type": "cluster_block_exception",
"reason": "blocked by: [FORBIDDEN/8/index write (api)];"
},
"status": 403
}