--name #etcd集群中的节点名,这里可以随意,可区分且不重复就行
--initial-cluster-token #节点的token值,设置该值后集群将生成唯一id,并为每个节点也生成唯一id,当使用相同配置文件再启动一个集群时,只要该token值不一样,etcd集群就不会相互影响.
--initial-cluster #集群自举时的url
--initial-advertise-peer-urls #建议用于节点之间通信的url,节点间将以该值进行通信.
--listen-peer-urls #监听的用于节点之间通信的url,可监听多个,集群内部将通过这些url进行数据交互(如选举,数据同步等)
--listen-client-urls #监听的用于客户端通信的url,同样可以监听多个.
--advertise-client-urls #建议使用的客户端通信url,该值用于etcd代理或etcd成员与etcd节点通信.
/opt/app #etcd的根目录
/opt/app/etcd/create_etcd_first_node.sh #初始化集群时,第一个节点首次执行的脚本
/opt/app/etcd/run_etcd.sh #以后正常启动的脚本
/opt/app/etcd/bin/etcd #etcd的server程序
/opt/app/etcd/bin/etcdctl #etcd的管理程序
/opt/app/etcd/data/ #etcd的数据目录
export NODE_IP="192.168.68.17"
export ETCD_INITIAL_CLUSTER_TOKEN="token-01"
export ETCD_NAME="node1"
export ETCD_DATA_DIR="/opt/app/etcd/data"
/opt/app/etcd/bin/etcd \
--initial-cluster-token "${ETCD_INITIAL_CLUSTER_TOKEN}" --initial-cluster "${ETCD_NAME}=http://${NODE_IP}:2380" \
--initial-advertise-peer-urls "http://${NODE_IP}:2380" \
--listen-peer-urls="http://${NODE_IP}:2380" \
--listen-client-urls "http://${NODE_IP}:2379" \
--advertise-client-urls "http://${NODE_IP}:2379"
根据实际情况替换变量
NODE_IP
,ETCD_INITIAL_CLUSTER_TOKEN
,ETCD_NAME
/opt/app/etcd/bin/etcdctl --endpoints "http://192.168.68.17:2379" member list
/opt/app/etcd/bin/etcdctl --endpoints "http://192.168.68.17:2379" cluster-health
pkill etcd
,然后使用/opt/app/etcd/run_etcd.sh
脚本正常启动第一个节点的etcd登录需要加入到集群的那个节点机器
etcdctl
命令把节点加入集群export NODE_IP="192.168.68.18"
export ETCD_NAME="node2"
/opt/app/etcd/bin/etcdctl --endpoints "http://192.168.68.17:2379" member add ${ETCD_NAME} http://${NODE_IP}:2380
根据实际情况替换变量
NODE_IP
,ETCD_NAME
然后根据屏幕提示把环境变量保存下来:ETCD_NAME
,ETCD_INITIAL_CLUSTER
,ETCD_INITIAL_CLUSTER_STATE
export NODE_IP="192.168.68.18"
export ETCD_NAME="node2"
export ETCD_DATA_DIR="/opt/app/etcd/data"
export ETCD_INITIAL_CLUSTER="node1=http://192.168.68.17:2380,node2=http://192.168.68.18:2380"
export ETCD_INITIAL_CLUSTER_STATE="existing"
/opt/app/etcd/bin/etcd \
--initial-advertise-peer-urls "http://${NODE_IP}:2380" \
--listen-peer-urls="http://${NODE_IP}:2380" \
--listen-client-urls "http://${NODE_IP}:2379" \
--advertise-client-urls "http://${NODE_IP}:2379"
注意:
ETCD_NAME
,ETCD_INITIAL_CLUSTER
,ETCD_INITIAL_CLUSTER_STATE
必须使用上面的etcdctl member add
命令产生的输出! 根据实际情况替换变量NODE_IP
,ETCD_NAME
pkill etcd
,然后执行/opt/app/etcd/run_etcd.sh
命令正常启动新加入的节点,再次检查集群状态! 以下是/opt/app/etcd/run_etcd.sh
文件内容#! /bin/sh
basedir=`dirname $0`
echo "BASE DIR:$basedir"
cd $basedir
export NODE_IP="192.168.68.18"
export ETCD_NAME="node2"
export ETCD_DATA_DIR="/opt/app/etcd/data"
/opt/app/etcd/bin/etcd \
--initial-advertise-peer-urls "http://${NODE_IP}:2380" \
--listen-peer-urls="http://${NODE_IP}:2380" \
--listen-client-urls "http://${NODE_IP}:2379" \
--advertise-client-urls "http://${NODE_IP}:2379"
注意根据实际情况替换变量
NODE_IP
,ETCD_NAME
假设一个节点node2
异常重启,可以执行/opt/app/etcd/run_etcd.sh
脚本命令正常起来
/opt/app/etcd/bin/etcdctl --endpoints "http://192.168.68.17:2379" set /message hello
/opt/app/etcd/bin/etcdctl --endpoints "http://192.168.68.17:2379" get /message
/opt/app/etcd/bin/etcdctl --endpoints "http://192.168.68.17:2379" member remove ${NODE_ID}
${NODE_ID}
可以从/opt/app/etcd/bin/etcdctl --endpoints "http://192.168.68.17:2379" member list
的输出来查找!
当你节点所在的机器出现硬件故障,或者节点出现如数据目录损坏等问题,导致节点永久性的不可恢复时,就需要对节点进行迁移或者替换.当一个节点失效以后,必须尽快修复,因为etcd集群正常运行的必要条件是集群中多数节点都正常工作. 迁移一个节点需要进行四步操作:
/opt/app/etcd/bin/etcdctl --endpoints "http://192.168.68.17:2379" member update ${NODE_ID} ${LISTEN_PEER_URLS}
${NODE_ID}
可以从/opt/app/etcd/bin/etcdctl --endpoints "http://192.168.68.17:2379" member list
的输出来查找!
当集群超过半数的节点都失效时,就需要通过手动的方式,强制性让某个节点以自己为Leader,利用原有数据启动一个新集群. 此时你需要进行一下操作.
export ETCD_DATA_DIR="/opt/app/etcd/data"
/opt/app/etcd/bin/etcdctl \
backup \
--data-dir "${ETCD_DATA_DIR}" \
--backup-dir /tmp/etcd_backup
它首先将节点的源信息写入到备份区,但是节点的id,集群的id等将会被重写,这就意味着节点之前的集群信息就被抹掉.
export NODE_IP="192.168.68.17"
export ETCD_INITIAL_CLUSTER_TOKEN="token-01"
export ETCD_NAME="node1"
export ETCD_DATA_DIR="/tmp/etcd_backup"
/opt/app/etcd/bin/etcd \
--force-new-cluster \
--initial-cluster-token "${ETCD_INITIAL_CLUSTER_TOKEN}" --initial-cluster "${ETCD_NAME}=http://${NODE_IP}:2380" \
--initial-advertise-peer-urls "http://${NODE_IP}:2380" \
--listen-peer-urls="http://${NODE_IP}:2380" \
--listen-client-urls "http://${NODE_IP}:2379" \
--advertise-client-urls "http://${NODE_IP}:2379"
注意:强制性重启是一个迫不得已的选择,它会破坏一致性协议保证的安全性(如果操作时集群中尚有其它节点在正常工作,就会出错),所以在操作前请务必要保存好数据.
pkill etcd
/opt/app/etcd/create_etcd_first_node.sh
脚本,启动第一个节点的etcdpkill etcd
/opt/app/etcd/run_etcd.sh
脚本正常启动etcd检查第一个节点是否运行正常!
/opt/app/etcd/bin/etcdctl --endpoints "http://192.168.68.17:2379" user add root
/opt/app/etcd/bin/etcdctl --endpoints "http://192.168.68.17:2379" auth enable
/opt/app/etcd/bin/etcdctl --endpoints "http://192.168.68.17:2379" --username root:root user add wjw
/opt/app/etcd/bin/etcdctl --endpoints "http://192.168.68.17:2379" --username root:root user list
/opt/app/etcd/bin/etcdctl --endpoints "http://192.168.68.17:2379" --username root:root role add test1
/opt/app/etcd/bin/etcdctl --endpoints "http://192.168.68.17:2379" --username root:root role grant --rw --path "/*" test1
/opt/app/etcd/bin/etcdctl --endpoints "http://192.168.68.17:2379" --username root:root role list
/opt/app/etcd/bin/etcdctl --endpoints "http://192.168.68.17:2379" --username root:root role get test1
/opt/app/etcd/bin/etcdctl --endpoints "http://192.168.68.17:2379" --username root:root user grant --roles test1 wjw
/opt/app/etcd/bin/etcdctl --endpoints "http://192.168.68.17:2379" --username root:root user get wjw