上篇讲解了Hqos基本知识点及vpp Hqos配置及基本流程框架,今天通过源码来分析Hqos实现逻辑。
网卡配置信息在dpdk模块配置,通过pci编号查询网卡配置结构体在配置pool池的索引。得到当前网卡的配置结构,将解析hqos信息存储到结构体中以供后续网卡设备启动时使用。
上图是管理dpdk配置信息的全局结构体,这里只关心HQOS相关的配置。
hqos_enable: 当前网卡是否使能hqos标志。
hqos_thread: 这里设置当前网卡与hqos线程id之间对应关系。
hqos_thread_valid: hqos_thread 索引是否有效。
|---dpdk_cofing /*dpdk 配置信息解析*/
| |---dpdk_device_config() /*解析网卡配置 dev 0000:02:00.0*/
| |---dpdk_device_config_hqos_default ();/*Hqos缺省配置初始化*/
| |---unformat_hqos() /*解析网卡Hoqs配置 hqos {hqos-thread x}*/
dpdk_device_config_hqos_default 从代码分析是所有的网卡都会设置缺省Hqos资源
2、Hqos线程配置解析
线程注册相关的可以阅读前面的文章(创建自己的核绑定线程)
下面只是完成配置文件中cpu模块的核绑定情况解析:corelist-hqos-threads 5, 6 )
/* *hqos-threads线程管理结构在dpdk plugins加载时注册全局vlib_thread_main链表上* */
VLIB_REGISTER_THREAD (hqos_thread_reg, static) =
{
.name = "hqos-threads",
.short_name = "hqos-threads",
.function = dpdk_hqos_thread_fn,
};
/*cpu_config 函数配置信息解析 */
static clib_error_t *
cpu_config (vlib_main_t * vm, unformat_input_t * input)
{
....
/*1、遍历全居线程管理结构链表,生成thread name 和 对应线程结构体地址的hash关系*/
while (tr)
{
hash_set_mem (tm->thread_registrations_by_name, tr->name, (uword) tr);
tr = tr->next;
}
....
/*2、解析corelist-hqos-threads 5, 6 信息*/
else if (unformat (input, "coremask-%s %U", &name,
unformat_bitmap_mask, &bitmap) ||
unformat (input, "corelist-%s %U", &name,
unformat_bitmap_list, &bitmap))
{
/*通过名字查询hqos线程结构体*/
p = hash_get_mem (tm->thread_registrations_by_name, name);
if (p == 0)
return clib_error_return (0, "no such thread type '%s'", name);
tr = (vlib_thread_registration_t *) p[0];
if (tr->use_pthreads)/*设置绑核的线程,不能置位使用父线程标识*/
return clib_error_return (0,
"corelist cannot be set for '%s' threads",
name);
/*设置对应coremask及线程数量*/
tr->coremask = bitmap;
tr->count = clib_bitmap_count_set_bits (tr->coremask);
}
Hqos线程资源初始化及创建流程:
vlib_main() /*main函数*/
| |---vlib_thread_init() /*线程资源初始化*/
| |---vlib_call_all_main_loop_enter_functions()/*调用所有进入主循环前的函数*/
| |---start_workers() /*完成线程的创建及线程私有数据的申请*/
| |---dpdk_hqos_thread_fn() /*Hqos线程处理函数*/
| |---dpdk_hqos_thread_internal /*Hqos线程业务处理函数*/
| |---vlib_main_loop() /*进入main核主循环函数*/
| |---dispatch_process() /*process类型node节点处理*/
| |---dpdk_process() /*dpdk process node 处理函数*/
| |---dpdk_lib_init() /*遍历网卡资源,完成网卡资源初始化及相关配置设置*/
| |---dpdk_port_setup_hqos() /*对使能Hqos功能网卡配置Hqos资源*/
| |---dpdk_update_link_state() /*网卡设备状态更新*/
| |---dpdk process协程定时任务主循环
| |---dpdk_update_counters()/*接口统计信息*/
| |---dpdk_update_link_state()/*状态更新*/
| |---other process 节点
1、vlib_thread_init 主要是完成对main线程亲核性设置及对所有worker线程设置的corelist有效性检查、注册worker线程的数量及起始线程的索引(first_index,这个对Hqos比较关键,后续代码细看) 2、start_workers函数注册宏是VLIB_MAIN_LOOP_ENTER_FUNCTION (start_workers);这里会遍历所有注册的线程,调用dpdk函数完成线程创建及核绑定设置。 3、dpdk_lib_init()函数遍历网卡设备,根据网卡的配置信息进行使能网卡功能。这里Hqos的相关配置都是在这里执行的。
clib_error_t *
vlib_thread_init (vlib_main_t * vm)
{
.....
u32 n_vlib_mains = 1;
u32 first_index = 1;
.....
/* pin main thread to main_lcore main线程亲核性设置 */
if (tm->cb.vlib_thread_set_lcore_cb)
{
tm->cb.vlib_thread_set_lcore_cb (0, tm->main_lcore);
}
.....
/*遍历注册线程,设置每个类型线程的起始线程id及线程数量*/
for (i = 0; i < vec_len (tm->registrations); i++)
{
int j;
tr = tm->registrations[i];
tr->first_index = first_index;
first_index += tr->count;
/*no_data_structure_clone 表示线程不复制私有vlib——mains相关结构*/
n_vlib_mains += (tr->no_data_structure_clone == 0) ? tr->count : 0;
....
}
/*记录vlib_mains总个数*/
tm->n_vlib_mains = n_vlib_mains;
/*vlib_worker_threads 创建first_index个*/
vec_validate_aligned (vlib_worker_threads, first_index - 1,CLIB_CACHE_LINE_BYTES);
return 0;
}
static clib_error_t *
dpdk_lib_init (dpdk_main_t * dm)
{
....
/*默认Hqos线程起始索引及线程数量清0*/
dm->hqos_cpu_first_index = 0;
dm->hqos_cpu_count = 0;
/*通过hqos线程name 来hash到线程地址*/
p_hqos = hash_get_mem (tm->thread_registrations_by_name, "hqos-threads");
tr_hqos = p_hqos ? (vlib_thread_registration_t *) p_hqos[0] : 0;
/**/
if (tr_hqos && tr_hqos->count > 0)
{
dm->hqos_cpu_first_index = tr_hqos->first_index;
dm->hqos_cpu_count = tr_hqos->count;
}
/*调用dpdk接口遍历所有网卡设备*/
RTE_ETH_FOREACH_DEV(i)
{ ......
/*通过pci地址查询网卡设备配置pool索引*/
p = hash_get (dm->conf->device_config_index_by_pci_addr,pci_addr.as_u32);
if (p)
{ /*查询到,获取网卡配置信息*/
devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]);
xd->name = devconf->name;
} else { /*未查询到,使用缺省网卡配置*/
devconf = &dm->conf->default_devconf;
}
/*当前网卡使能Hqos功能*/
if (devconf->hqos_enabled)
{ /*置位使能Hqos标识*/
xd->flags |= DPDK_DEVICE_FLAG_HQOS;
int cpu;
if (devconf->hqos.hqos_thread_valid)
{/*当前网卡指定绑定hqos线程,判断绑定线程id大于hqos线程数量异常推出*/
if (devconf->hqos.hqos_thread >= dm->hqos_cpu_count)
return clib_error_return (0, "invalid HQoS thread index");
/*计算出线程索引=起始hqos索引+配置thread ID*/
cpu = dm->hqos_cpu_first_index + devconf->hqos.hqos_thread;
} else {
if (dm->hqos_cpu_count == 0)
return clib_error_return (0, "no HQoS threads available");
/*当前线程绑定ID计算线程Id*/
cpu = dm->hqos_cpu_first_index + next_hqos_cpu;
next_hqos_cpu++;
if (next_hqos_cpu == dm->hqos_cpu_count)
{
next_hqos_cpu = 0;
} /*保存网卡的线程ID号,这里存储的是线程索引*/
devconf->hqos.hqos_thread_valid = 1;
devconf->hqos.hqos_thread = cpu;
}
/*将当前设置信息device ID和queue ID 添加到对应线程下*/
dpdk_device_and_queue_t *dq;
vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1);
dq->device = xd->device_index;
dq->queue_id = 0;
}
....
/*当前网卡Hqos资源初始化*/
if (devconf->hqos_enabled)
{
clib_error_t *rv;
rv = dpdk_port_setup_hqos (xd, &devconf->hqos);
if (rv)
return rv;
}
}
....
}
从代码分析主要是创建了Hqos线程调度时存储报文数据的软件缓存区Swq队列;初始化port subport pipe tc等资源。 1、hqos_wt 和Hqos_ht之前的关系
Hqos_ht和hqo_wt 结构体swq关系
1、hqos_ht (hqos per hqos thread):这里每个网卡Xd只能申请一个hqos_ht(也就是一个网卡只能绑定一个hqos 线程,这样就是多核处理无锁化)。 2、hqos_wt(hqos per worker thread):每个workers核对应一个私有数据,在tx接口判断接口是能Hqos时,报文enqueue对应workers核心swq队列中。无需加锁设置。 3、pkts_enq: 用于在Hqos线程调度时从swq队列取出报文后临时缓存区。
vec_validate (xd->hqos_ht->pkts_enq, 2 * hqos->burst_enq - 1);
4、pkts_deq:用于Hqos线程调度dequeue时临时存储待发送报文的mbuf缓存区,
vec_validate (xd->hqos_ht->pkts_deq, hqos->burst_deq - 1);
.burst_enq = 256, .burst_deq = 220, 这里pkts_enq 是申请2个burst_enq大小,这里应该是为了防止pkts_enq时越界吧(burst_enq默认设置为256,和vpp一次PMD读取最大256个一致。)pkts_deq设置为220,小于256,这个数值大小意义不确定?疑问?
5、dpdk_main_t结构中devices_by_hqos_cpu 一个二级数组: 一级是一个基于n_vlib_main数量(目前就是worker核+hqos thread 核)的vector结构,存储的是dpdk_device_and_queue_t指针.
/*dpdk_lib_init 开头部分 devices_by_hqos_cpu[hqos_thread_index]*/
vec_validate_aligned (dm->devices_by_hqos_cpu, tm->n_vlib_mains - 1,CLIB_CACHE_LINE_BYTES);
二级也是一个vector结构,存储当前Hqos线程绑定的网卡。
{
dpdk_device_and_queue_t *dq;
vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1);
/*当前网卡设备vector结构的下标devices[device_index ]*/
dq->device = xd->device_index;
dq->queue_id = 0;/*设置成0的意义*/
}
具体代码如下:
clib_error_t *
dpdk_port_setup_hqos (dpdk_device_t * xd, dpdk_device_config_hqos_t * hqos)
{
vlib_thread_main_t *tm = vlib_get_thread_main ();
char name[32];
u32 subport_id, i;
int rv;
/* 检测worker线程的数量及起始thread id */
int worker_thread_first = 0;
int worker_thread_count = 0;
uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers");
vlib_thread_registration_t *tr = p ? (vlib_thread_registration_t *) p[0] : 0;
if (tr && tr->count > 0)
{
worker_thread_first = tr->first_index;
worker_thread_count = tr->count;
}
/* 分配每个线程的设备Hqos私有数据数组,并进行初始化*/
vec_validate_aligned (xd->hqos_wt, tm->n_vlib_mains - 1, CLIB_CACHE_LINE_BYTES);
clib_memset (xd->hqos_wt, 0, tm->n_vlib_mains * sizeof (xd->hqos_wt[0]));
/*⚠️ 每个网卡只能绑定一个hqos 线程,保证线程在Hqos处理时无需加锁*/
vec_validate_aligned (xd->hqos_ht, 0, CLIB_CACHE_LINE_BYTES);
clib_memset (xd->hqos_ht, 0, sizeof (xd->hqos_ht[0]));
/* 在I/O TX线程数据结构中为每个工作线程分配一个Hqos的SWQ空间,实现enqueue无锁化*/
vec_validate (xd->hqos_ht->swq, worker_thread_count);
/* SWQ 调用DPDK ring接口为每个workers线程创建单生产单消费队列,实现入队出对无需加锁*/
for (i = 0; i < worker_thread_count + 1; i++)
{
u32 swq_flags = RING_F_SP_ENQ | RING_F_SC_DEQ;
snprintf (name, sizeof (name), "SWQ-worker%u-to-device%u", i,xd->port_id);
xd->hqos_ht->swq[i] = rte_ring_create (name, hqos->swq_size, xd->cpu_socket, swq_flags);
if (xd->hqos_ht->swq[i] == NULL)
return clib_error_return (0, "SWQ-worker%u-to-device%u: rte_ring_create err", i, xd->port_id);
}
/* HQoS port 配置*/
snprintf (name, sizeof (name), "HQoS%u", xd->port_id);
hqos->port.name = strdup (name);
if (hqos->port.name == NULL)
return clib_error_return (0, "HQoS%u: strdup err", xd->port_id);
hqos->port.socket = rte_eth_dev_socket_id (xd->port_id);
if (hqos->port.socket == SOCKET_ID_ANY)
hqos->port.socket = 0;
xd->hqos_ht->hqos = rte_sched_port_config (&hqos->port);
if (xd->hqos_ht->hqos == NULL)
return clib_error_return (0, "HQoS%u: rte_sched_port_config err",
xd->port_id);
/* HQoS subport 配置,1个port 可配置多少subport*/
for (subport_id = 0; subport_id < hqos->port.n_subports_per_port;
subport_id++)
{
u32 pipe_id;
rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport_id, &hqos->subport[subport_id]);
if (rv)
return clib_error_return (0, "HQoS%u subport %u: rte_sched_subport_config err (%d)", xd->port_id, subport_id, rv);
/* HQoS pipe,每个subport可配置多个pipe */
for (pipe_id = 0; pipe_id < hqos->port.n_pipes_per_subport; pipe_id++)
{
u32 pos = subport_id * hqos->port.n_pipes_per_subport + pipe_id;
u32 profile_id = hqos->pipe_map[pos];
rv = rte_sched_pipe_config (xd->hqos_ht->hqos, subport_id, pipe_id, profile_id);
if (rv)
return clib_error_return (0, "HQoS%u subport %u pipe %u: rte_sched_pipe_config err (%d)", xd->port_id, subport_id, pipe_id, rv);
}
}
/*设置报文收发包缓存区,在Hqos thread调度时使用*/
xd->hqos_ht->hqos_burst_enq = hqos->burst_enq;
xd->hqos_ht->hqos_burst_deq = hqos->burst_deq;
vec_validate (xd->hqos_ht->pkts_enq, 2 * hqos->burst_enq - 1);
vec_validate (xd->hqos_ht->pkts_deq, hqos->burst_deq - 1);
xd->hqos_ht->pkts_enq_len = 0;
xd->hqos_ht->swq_pos = 0;
xd->hqos_ht->flush_count = 0;
/* 为每个工作线程设置私有线程数据 */
for (i = 0; i < worker_thread_count + 1; i++)
{
u32 tid;
if (i)
tid = worker_thread_first + (i - 1);
else
tid = i;
xd->hqos_wt[tid].swq = xd->hqos_ht->swq[i];
xd->hqos_wt[tid].hqos_field0_slabpos = hqos->pktfield0_slabpos;
xd->hqos_wt[tid].hqos_field0_slabmask = hqos->pktfield0_slabmask;
xd->hqos_wt[tid].hqos_field0_slabshr = count_trailing_zeros (hqos->pktfield0_slabmask);
xd->hqos_wt[tid].hqos_field1_slabpos = hqos->pktfield1_slabpos;
xd->hqos_wt[tid].hqos_field1_slabmask = hqos->pktfield1_slabmask;
xd->hqos_wt[tid].hqos_field1_slabshr = count_trailing_zeros (hqos->pktfield1_slabmask);
xd->hqos_wt[tid].hqos_field2_slabpos = hqos->pktfield2_slabpos;
xd->hqos_wt[tid].hqos_field2_slabmask = hqos->pktfield2_slabmask;
xd->hqos_wt[tid].hqos_field2_slabshr = count_trailing_zeros (hqos->pktfield2_slabmask);
memcpy (xd->hqos_wt[tid].hqos_tc_table, hqos->tc_table, sizeof (hqos->tc_table));
}
return 0;
}
接口tx发包的Hqos处理流程如下:
|---VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) () /*通用dpdk网卡tx处理函数*/
| |---tx_burst_vector_internal() /*tx burst 发包函数*/
| |---rte_ring_sp_enqueue_burst() /*Hqos使能,报文入swq队列*/
下面是具体代码处理,比较简单:
static_always_inline
u32 tx_burst_vector_internal (vlib_main_t * vm,
dpdk_device_t * xd,
struct rte_mbuf **mb, u32 n_left)
{
....
do
{
/*判断接口使能HQos功能*/
if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HQOS)) /* HQoS ON */
{
/* 获取当前线程的Hqos资源 */
dpdk_device_hqos_per_worker_thread_t *hqos = &xd->hqos_wt[vm->thread_index];
/*todo :这里从报文中获取suport pipe tc 等信息,写入mbuf指定字段,后续分析*/
dpdk_hqos_metadata_set (hqos, mb, n_left);
/*报文入swq队列*/
n_sent = rte_ring_sp_enqueue_burst (hqos->swq, (void **) mb,
n_left, 0);
} else if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD) {
/* 正常调用DPDK PMD驱动发送报文 */
n_sent = rte_eth_tx_burst (xd->port_id, queue_id, mb, n_left);
} else { /*异常场景*/
ASSERT (0);
n_sent = 0;
}
}
关于Hqos五级调度模型中subport、pipe、tc、queue、color的数值的获取可以用户根据需要自己定义。下面是dpdk qos sched 代码示例中的是使用(vpp中的使用自己阅读吧):
/*用户自定义五元组从报文各字段获取
* QoS parameters are encoded as follows:
* Outer VLAN ID defines subport
* Inner VLAN ID defines pipe
* Destination IP 0.0.XXX.0 defines traffic class
* Destination IP host (0.0.0.XXX) defines queue
* Values below define offset to each field from start of frame
*/
#define SUBPORT_OFFSET 7
#define PIPE_OFFSET 9
#define TC_OFFSET 20
#define QUEUE_OFFSET 20
#define COLOR_OFFSET 19
/*从报文信息中获取Hqos 五元组信息*/
static inline int
get_pkt_sched(struct rte_mbuf *m, uint32_t *subport, uint32_t *pipe,
uint32_t *traffic_class, uint32_t *queue, uint32_t *color)
{
/*pdata 应该是指向报文起始位置*/
uint16_t *pdata = rte_pktmbuf_mtod(m, uint16_t *);
/*按照上面定义情况获取字段*/
*subport = (rte_be_to_cpu_16(pdata[SUBPORT_OFFSET]) & 0x0FFF) &
(port_params.n_subports_per_port - 1); /* Outer VLAN ID*/
*pipe = (rte_be_to_cpu_16(pdata[PIPE_OFFSET]) & 0x0FFF) &
(port_params.n_pipes_per_subport - 1); /* Inner VLAN ID */
*traffic_class = (pdata[QUEUE_OFFSET] & 0x0F) &
(RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1); /* Destination IP */
*queue = ((pdata[QUEUE_OFFSET] >> 8) & 0x0F) &
(RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS - 1) ; /* Destination IP */
*color = pdata[COLOR_OFFSET] & 0x03; /* Destination IP */
return 0;
}
/*将Hqos五元组信息设置到mbuf hash.sched字段*/
void
rte_sched_port_pkt_write(struct rte_mbuf *pkt,
uint32_t subport, uint32_t pipe, uint32_t traffic_class,
uint32_t queue, enum rte_meter_color color)
{
struct rte_sched_port_hierarchy *sched
= (struct rte_sched_port_hierarchy *) &pkt->hash.sched;
RTE_BUILD_BUG_ON(sizeof(*sched) > sizeof(pkt->hash.sched));
sched->color = (uint32_t) color;
sched->subport = subport;
sched->pipe = pipe;
sched->traffic_class = traffic_class;
sched->queue = queue;
}
Hqos线程调度代码逻辑不是特别复杂。 1、获取当前线程绑定网卡的数量,如果未绑定直接跳过。 2、遍历当前设备绑定的网卡,读取网卡的索引和队列索引 3、执行Hqos调度处理: 3.1 从Swq软件缓存队列收包。 3.2 调用dpdk hqos 调度入队操作 3.3 调用dpdk hqos 调度出队操作 3.4 调用dpdk tx发包函数报文从网卡发送出去。
static_always_inline void
dpdk_hqos_thread_internal (vlib_main_t * vm)
{
dpdk_main_t *dm = &dpdk_main;
u32 thread_index = vm->thread_index;
u32 dev_pos;
dev_pos = 0;
while (1)
{
vlib_worker_thread_barrier_check ();
/*1、获取当前线程绑定网卡,如果未绑定直接跳过。*/
u32 n_devs = vec_len (dm->devices_by_hqos_cpu[thread_index]);
if (PREDICT_FALSE (n_devs == 0))
{
dev_pos = 0;
continue;
}
if (dev_pos >= n_devs)
dev_pos = 0;
/*2、遍历当前Hqos线程绑定网卡,获取设备xd及队列索引*/
dpdk_device_and_queue_t *dq = vec_elt_at_index (dm->devices_by_hqos_cpu[thread_index], dev_pos);
dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device);
dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht;
u32 device_index = xd->port_id;
u16 queue_id = dq->queue_id;
struct rte_mbuf **pkts_enq = hqos->pkts_enq;
struct rte_mbuf **pkts_deq = hqos->pkts_deq;
u32 pkts_enq_len = hqos->pkts_enq_len;
u32 swq_pos = hqos->swq_pos;
u32 n_swq = vec_len (hqos->swq), i;
/*这个设计应该 是为了提高Hqos并发处理的性能*/
u32 flush_count = hqos->flush_count;
/*3、遍历当前设备Swq 缓存队列,执行出队、调度、发包操作*/
for (i = 0; i < n_swq; i++)
{
/* Get current SWQ for this device */
struct rte_ring *swq = hqos->swq[swq_pos];
/* Swq 出队操作 */
pkts_enq_len += rte_ring_sc_dequeue_burst (swq,
(void **)&pkts_enq[pkts_enq_len], hqos->hqos_burst_enq, 0);
/* Get next SWQ for this device */
swq_pos++;
if (swq_pos >= n_swq)
swq_pos = 0;
hqos->swq_pos = swq_pos;
/* HQoS enqueue when burst available */
if (pkts_enq_len >= hqos->hqos_burst_enq)
{
rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len);
pkts_enq_len = 0;
flush_count = 0;
break;
}
}
if (pkts_enq_len)
{
flush_count++;
if (PREDICT_FALSE (flush_count == HQOS_FLUSH_COUNT_THRESHOLD))
{
rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len);
pkts_enq_len = 0;
flush_count = 0;
}
}
hqos->pkts_enq_len = pkts_enq_len;
hqos->flush_count = flush_count;
/*Hqos 调度出队操作*/
{
u32 pkts_deq_len, n_pkts;
pkts_deq_len = rte_sched_port_dequeue (hqos->hqos,
pkts_deq,
hqos->hqos_burst_deq);
/*循环发包操作*/
for (n_pkts = 0; n_pkts < pkts_deq_len;)
n_pkts += rte_eth_tx_burst (device_index,
(uint16_t) queue_id,
&pkts_deq[n_pkts],
(uint16_t) (pkts_deq_len - n_pkts));
}
/* 遍历下一个设备 */
dev_pos++;
}
}
💫思考 1、获取当前Hqos线程绑定的网卡实例放在了while(1)循环内; vec_len (dm->devices_by_hqos_cpu[thread_index]); 具体原因是vpp 提供了cli命令行动态设置网卡与Hqos线程绑定关系,所以每次循环都需要重新获取。 set dpdk interface hqos placementthread 2、queue_id字段, 等于 dq->queue_id字段用于tx接口时,报文从网卡的那个队列发送。(全局搜索代码,只有dpdk_lib_init函数在处理网卡和Hqos线程绑定关系的时候赋值为0,所以默认都使用的main核队列) 2.1 正常的处理逻辑是每个核都对应一个tx队列,实现发包无锁化。 2.2 某些网卡不支持多队列,在发包时会有加锁操作(Hqos处理时貌似未考虑此场景)。 3、flush_count 字段,作用应该就是积攒报文,当达到256报文时,才进行Hqos调度入队操作。
每个HqoS调度器实例都是用配置hqos port、subport、pipe和队列所需的默认参数初始化的。一些参数可以通过CLI命令在运行时重新配置。 以下命令可用于配置QoS调度器参数。
如令牌桶速率(字节/秒)、令牌桶大小(字节)、流量类速率(字节/秒)和令牌更新周期(毫秒)。
set dpdk interface hqos subport <interface> subport <subport_id> [rate <n>] [bktsize <n>] [tc0 <n>] [tc1 <n>] [tc2 <n>] [tc3 <n>] [period <n>]
set dpdk interface hqos pipe <interface> subport <subport_id> pipe <pipe_id> profile <profile_id>
set dpdk interface hqos placement <interface> thread <n>
分类过程将包字段信息映射为5个元组(subport, pipe, traffic class, pipe, color),并存储在包mbuf中。
set dpdk interface hqos pktfield <interface> id subport|pipe|tc offset <n> mask <hex-mask>
set dpdk interface hqos tctbl <interface> entry <map_val> tc <tc_id> queue <queue_id>
vpp# show dpdk interface hqos TenGigabitEthernet2/0/0
Thread:
Input SWQ size = 4096 packets
Enqueue burst size = 256 packets
Dequeue burst size = 220 packets
Packet field 0: slab position = 0, slab bitmask = 0x0000000000000000 (subport)
Packet field 1: slab position = 40, slab bitmask = 0x0000000fff000000 (pipe)
Packet field 2: slab position = 8, slab bitmask = 0x00000000000000fc (tc)
Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...)
[ 0 .. 15]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3
[16 .. 31]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3
[32 .. 47]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3
[48 .. 63]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3
Port:
Rate = 1250000000 bytes/second
MTU = 1514 bytes
Frame overhead = 24 bytes
Number of subports = 1
Number of pipes per subport = 4096
Packet queue size: TC0 = 64, TC1 = 64, TC2 = 64, TC3 = 64 packets
Number of pipe profiles = 1
Subport 0:
Rate = 120000000 bytes/second
Token bucket size = 1000000 bytes
Traffic class rate: TC0 = 120000000, TC1 = 120000000, TC2 = 120000000, TC3 = 120000000 bytes/second
TC period = 10 milliseconds
Pipe profile 0:
Rate = 305175 bytes/second
Token bucket size = 1000000 bytes
Traffic class rate: TC0 = 305175, TC1 = 305175, TC2 = 305175, TC3 = 305175 bytes/second
TC period = 40 milliseconds
TC0 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1
TC1 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1
TC2 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1
TC3 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1
vpp# show dpdk interface hqos placement
Thread 5 (vpp_hqos-threads_0 at lcore 5):
TenGigabitEthernet2/0/0 queue 0
Thread 6 (vpp_hqos-threads_1 at lcore 6):
TenGigabitEthernet4/0/1 queue 0
本文基于vpp 19.01版本分析Hqos模块的实现流程,大致分为几个步骤: 1、Hqos配置文件相关设置解析 2、Hqos线程初始化(主要分配起始线程ID及core设置是否有效) 3、Hqos线程创建 4、遍历网卡设备,建立网卡与Hqos线程的绑定关系及初始化Hqos线程资源。 5、网卡Tx模块判断接口使能Hqos后,报文入Hqos swq队列 6、Hqos线程对报文进行调度处理。
本文分享自 DPDK VPP源码分析 微信公众号,前往查看
如有侵权,请联系 cloudcommunity@tencent.com 删除。
本文参与 腾讯云自媒体同步曝光计划 ,欢迎热爱写作的你一起参与!