前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >dpvs源码分析(续二)

dpvs源码分析(续二)

原创
作者头像
榴莲其实还可以
发布2018-08-10 15:31:49
1.9K0
发布2018-08-10 15:31:49
举报

前言

在上一篇<dpvs源码分析(续)>中,我们以tcp为例,讲到了连接的建立,同时也提到了full-nat,snat这些术语。在该篇中,我们再来讲讲连接建立的过程。

建立连接

代码语言:javascript
复制
static int tcp_conn_sched(struct dp_vs_proto *proto, 
                          const struct dp_vs_iphdr *iph,
                          struct rte_mbuf *mbuf, 
                          struct dp_vs_conn **conn,
                          int *verdict)
{
    struct tcphdr *th, _tcph;
    struct dp_vs_service *svc;
    assert(proto && iph && mbuf && conn && verdict);

    th = mbuf_header_pointer(mbuf, iph->len, sizeof(_tcph), &_tcph);
    if (unlikely(!th)) {
        *verdict = INET_DROP;
        return EDPVS_INVPKT;
    }

    /* Syn-proxy step 2 logic: receive client's 3-handshacke ack packet */
    /* When synproxy disabled, only SYN packets can arrive here.
     * So don't judge SYNPROXY flag here! If SYNPROXY flag judged, and syn_proxy
     * got disbled and keepalived reloaded, SYN packets for RS may never be sent. */
    if (dp_vs_synproxy_ack_rcv(iph->af, mbuf, th, proto, conn, iph, verdict) == 0) {
        /* Attention: First ACK packet is also stored in conn->ack_mbuf */
        return EDPVS_PKTSTOLEN;
    }

   ..................
    svc = dp_vs_service_lookup(iph->af, iph->proto, 
                               &iph->daddr, th->dest, 0, mbuf, NULL);
    if (!svc) {
        *verdict = INET_ACCEPT;
        return EDPVS_NOSERV;
    }
 
    ..............................

    *conn = dp_vs_schedule(svc, iph, mbuf, false);
    if (!*conn) {
        dp_vs_service_put(svc);
        *verdict = INET_DROP;
        return EDPVS_RESOURCE;
    }

    dp_vs_service_put(svc);

    return EDPVS_OK;
}

代码依然是有删减的,删除的部分用"..................."替代了。但是主体逻辑依然存在。该函数中dp_vs_synproxy_ack_rcv

是tcp握手代理,也就是说client先和部署了dpvs的机器进行3次握手,完了之后,dpvs机器才会和真正的服务器RS去握手,此处细节就不深究了。然后第二个调用是dp_vs_service_lookup,这个函数干嘛呢?看名字大概就能猜出来了——通过传递的参数,协议,ip, port查找service。service是dest的集合,dest就是真正的服务器。找了了svc之后,就可以准备建立连接了。

2,dp_vs_schedule

代码语言:javascript
复制
/* select an RS by service's scheduler and create a connection */
struct dp_vs_conn *dp_vs_schedule(struct dp_vs_service *svc, 
                                  const struct dp_vs_iphdr *iph,
                                  struct rte_mbuf *mbuf,
                                  bool is_synproxy_on)
{
    uint16_t _ports[2], *ports; /* sport, dport */
    struct dp_vs_dest *dest;
    struct dp_vs_conn *conn;
    struct dp_vs_conn_param param;
    struct sockaddr_in daddr, saddr;
    int err;

    assert(svc && iph && mbuf);

    ports = mbuf_header_pointer(mbuf, iph->len, sizeof(_ports), _ports);
    if (!ports)
        return NULL;
        
    /* persistent service */
    if (svc->flags & DP_VS_SVC_F_PERSISTENT)
        return dp_vs_sched_persist(svc, iph,  mbuf, is_synproxy_on);

    dest = svc->scheduler->schedule(svc, mbuf);
    if (!dest) {
        RTE_LOG(WARNING, IPVS, "%s: no dest found.\n", __func__);
        return NULL;
    }
        
    if (dest->fwdmode == DPVS_FWD_MODE_SNAT) {
        if (unlikely(iph->proto == IPPROTO_ICMP)) {
            struct icmphdr *ich, _icmph;
            ich = mbuf_header_pointer(mbuf, iph->len, sizeof(_icmph), &_icmph);
            if (!ich)
                return NULL;

            ports = _ports;
            _ports[0] = icmp4_id(ich);
            _ports[1] = ich->type << 8 | ich->code;

            /* ID may confict for diff host,
             * need we use ID pool ? */
            dp_vs_conn_fill_param(iph->af, iph->proto,
                                  &iph->daddr, &dest->addr,
                                  ports[1], ports[0],
                                  0, &param);
        } else {
            /* we cannot inherit dest (host's src port),
             * that may confict for diff hosts,
             * and using dest->port is worse choice. */
            memset(&daddr, 0, sizeof(daddr));
            daddr.sin_family = AF_INET;
            daddr.sin_addr = iph->daddr.in;
            daddr.sin_port = ports[1];
            memset(&saddr, 0, sizeof(saddr));
            saddr.sin_family = AF_INET;
            saddr.sin_addr = dest->addr.in;
            saddr.sin_port = 0;

            err = sa_fetch(NULL, &daddr, &saddr);
            if (err != 0)
                return NULL;

            dp_vs_conn_fill_param(iph->af, iph->proto,
                                  &iph->daddr, &dest->addr,
                                  ports[1], saddr.sin_port,
                                  0, &param);
        }
    } else {
        if (unlikely(iph->proto == IPPROTO_ICMP)) {
            struct icmphdr *ich, _icmph;
            ich = mbuf_header_pointer(mbuf, iph->len, sizeof(_icmph), &_icmph);
            if (!ich)
                return NULL;

            ports = _ports;
            _ports[0] = icmp4_id(ich);
            _ports[1] = ich->type << 8 | ich->code;

            dp_vs_conn_fill_param(iph->af, iph->proto,
                                  &iph->saddr, &iph->daddr,
                                  ports[0], ports[1], 0, &param);
        } else {
            dp_vs_conn_fill_param(iph->af, iph->proto,
                                  &iph->saddr, &iph->daddr,
                                  ports[0], ports[1], 0, &param);
        }
    }

    conn = dp_vs_conn_new(mbuf, &param, dest,
            is_synproxy_on ? DPVS_CONN_F_SYNPROXY : 0);
    if (!conn) {
        if (dest->fwdmode == DPVS_FWD_MODE_SNAT && iph->proto != IPPROTO_ICMP)
            sa_release(NULL, &daddr, &saddr);
        return NULL;
    }

    dp_vs_stats_conn(conn);
    return conn;
}

恩,继续看代码,其实注释的也挺清楚的——通过service的scheduler函数,选择一个RS(real server), 然后新建一个连接。该函数首先调用了 dest = svc->scheduler->schedule(svc, mbuf); 这句话的意思也很简单,按照service的调度规则,从中选择一个dest(也就是一台RS)。svc的结构体叫struct dp_vs_service,其中有个成员struct dp_vs_scheduler *scheduler,看一下这个结构体:

代码语言:javascript
复制
struct dp_vs_scheduler {
    struct list_head    n_list;
    char                *name;
//    rte_atomic32_t      refcnt;

    struct dp_vs_dest *
        (*schedule)(struct dp_vs_service *svc, 
                    const struct rte_mbuf *mbuf);

    int (*init_service)(struct dp_vs_service *svc);
    int (*exit_service)(struct dp_vs_service *svc);
    int (*update_service)(struct dp_vs_service *svc);
} __rte_cache_aligned;

也就是说不同的dp_vs_scheduler 对象,是有不同的调度方式的。这里看起来是不是和前面两篇中的job,pkt_type有些相似呢?在dpvs中这个scheduler有3中:dp_vs_rr_scheduler,

代码语言:javascript
复制
static struct dp_vs_scheduler dp_vs_rr_scheduler = {
    .name = "rr",       /* name */
//    .refcnt = ATOMIC_INIT(0),
    .n_list = LIST_HEAD_INIT(dp_vs_rr_scheduler.n_list),
    .init_service = dp_vs_rr_init_svc,
    .update_service = dp_vs_rr_update_svc,
    .schedule = dp_vs_rr_schedule,
};

static struct dp_vs_scheduler dp_vs_wlc_scheduler = {
    .name = "wlc",
    .n_list = LIST_HEAD_INIT(dp_vs_wlc_scheduler.n_list),
    .schedule = dp_vs_wlc_schedule,
};

static struct dp_vs_scheduler dp_vs_wrr_scheduler = {
    .name = "wrr",
    .n_list = LIST_HEAD_INIT(dp_vs_wrr_scheduler.n_list),
    .init_service = dp_vs_wrr_init_svc,
    .exit_service = dp_vs_wrr_done_svc,
    .update_service = dp_vs_wrr_update_svc,
    .schedule = dp_vs_wrr_schedule,
};

这个注册调用是dpvs_init->dp_vs_sched_init,在dp_vs_sched_init中分别调用了dp_vs_rr_init, dp_vs_wrr_init以及dp_vs_wlc_init分别对这三个scheduler进行了注册。具体的调度策略我们就不详述了。之所以追溯这些代码,其实是想将dpvs的一些代码串联起来。一般在我们刚看dpvs的代码的时候,main函数就是一堆init,然后就是数据平面与控制平面线程启动。刚看起来可能会一脸懵逼,但是从数据平面线程看,往后一直追溯,就会发现,前面的初始化都会在后面有用到。

再回到dp_vs_schedule这个函数,当我们找到一个dest后,然后会有很多dp_vs_conn_fill_param这个函数的调用,dp_vs_conn_fill_param主要就是填充最后一个参数param。这个参数保存了源ip端口和目的ip端口信息。这个参数在dp_vs_conn_new会用到。

代码语言:javascript
复制
struct dp_vs_conn * dp_vs_conn_new(struct rte_mbuf *mbuf,
                                   struct dp_vs_conn_param *param,
                                   struct dp_vs_dest *dest, uint32_t flags)

dp_vs_conn_new这个函数比较长,就不都贴出来了,大体讲下其中的逻辑就行了。从这个函数的定义就可以看出,函数最后会构建了一个struct dp_vs_conn对象,这个结构体里面保存有连接的源和目的ip,port。此外这个函数中还会保存inboud和outbound对应的地址端口信息。以snat为例,client发包给dpvs,dpvs转发给RS,处理完成之后RS会发包到dpvs,再由dpvs转发到client,那么在dpvs这个层面是需要保存这个对应关系的.还需说明dp_vs_conn这个结构体中,有两个成员变量packet_xmit,packet_out_xmit函数指针,一个是转发到RS,一个是转发给client。这两个函数指针真正的赋值是在dp_vs_conn_new中调用的conn_bind_dest中完成的。不同的转发模式赋值不同。

结语

到此整个dpvs连接的就算完了,回到第二篇<dpvs源码分析(续)>最后的dp_vs_in函数,连接建立完了之后,如果是client发包过来,会调用xmit_inbound,xmit_inbound函数又会调用到dp_vs_conn::packet_xmit进行真正的转发。

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • 前言
  • 建立连接
  • 结语
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档