在上一篇文章中我们通过一个简单的例子大概描述了如何创建SLUB缓存,如何分配一个object。本文详细描述下涉及的结构体,从结构体的描述中就可以大概理解slub的工作原理了。
首先就是kmem_cache结构体
/*
* Slab cache management.
*/
struct kmem_cache {
struct kmem_cache_cpu __percpu *cpu_slab;
/* Used for retriving partial slabs etc */
slab_flags_t flags;
unsigned long min_partial;
unsigned int size; /* The size of an object including meta data */
unsigned int object_size;/* The size of an object without meta data */
unsigned int offset; /* Free pointer offset. */
struct kmem_cache_order_objects oo;
/* Allocation and freeing of slabs */
struct kmem_cache_order_objects max;
struct kmem_cache_order_objects min;
gfp_t allocflags; /* gfp flags to use on each alloc */
int refcount; /* Refcount for slab cache destroy */
void (*ctor)(void *);
unsigned int inuse; /* Offset to metadata */
unsigned int align; /* Alignment */
unsigned int red_left_pad; /* Left redzone padding size */
const char *name; /* Name (only for display!) */
struct list_head list; /* List of slab caches */
struct kmem_cache_node *node[MAX_NUMNODES];
};
struct kmem_cache_cpu {
void **freelist; /* Pointer to next available object */
unsigned long tid; /* Globally unique transaction id */
struct page *page; /* The slab from which we are allocating */
};
/*
* The slab lists for all objects.
*/
struct kmem_cache_node {
spinlock_t list_lock;
#ifdef CONFIG_SLUB
unsigned long nr_partial;
struct list_head partial;
#endif
};
看完了上面的结构体是不是感觉还是很晕,怎么办? 我们还是通过上节(SLUB的引入及举例说明)的例子来分析各个结构体是如何联系的。
slub_test = kmem_cache_create("slub_test", sizeof(struct student), 0, 0, NULL);
if(slub_test != NULL){
printk("slub_test create success!\n");
当调用kmem_cache_create的时候,代码流程是:
struct kmem_cache *
kmem_cache_create_usercopy(const char *name, unsigned int size, unsigned int align, slab_flags_t flags, unsigned int useroffset, unsigned int usersize, void (*ctor)(void *))
{
struct kmem_cache *s = NULL;
const char *cache_name;
/* Fail closed on bad usersize of useroffset values. */
if (WARN_ON(!usersize && useroffset) ||
WARN_ON(size < usersize || size - usersize < useroffset))
usersize = useroffset = 0;
if (!usersize)
s = __kmem_cache_alias(name, size, align, flags, ctor);
cache_name = kstrdup_const(name, GFP_KERNEL);
s = create_cache(cache_name, size,
calculate_alignment(flags, align, size),
flags, useroffset, usersize, ctor, NULL, NULL);
return s;
}
static struct kmem_cache *create_cache(const char *name, unsigned int object_size, unsigned int align, slab_flags_t flags, unsigned int useroffset, unsigned int usersize, void (*ctor)(void *),
struct mem_cgroup *memcg, struct kmem_cache *root_cache)
{
struct kmem_cache *s;
int err;
s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
if (!s)
goto out;
s->name = name;
s->size = s->object_size = object_size;
s->align = align;
s->ctor = ctor;
s->useroffset = useroffset;
s->usersize = usersize;
err = __kmem_cache_create(s, flags);
if (err)
goto out_free_cache;
s->refcount = 1;
list_add(&s->list, &slab_caches);
memcg_link_cache(s);
}
int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
{
int err;
err = kmem_cache_open(s, flags);
if (err)
return err;
/* Mutex is not taken during early boot */
if (slab_state <= UP)
return 0;
memcg_propagate_slab_attrs(s);
err = sysfs_slab_add(s);
if (err)
__kmem_cache_release(s);
return err;
}
再解析分析之前,我们先看几个小函数
static inline unsigned int order_objects(unsigned int order, unsigned int size)
{
return ((unsigned int)PAGE_SIZE << order) / size;
}
根据你的order和size,看需要多个object的。比如order为0,size等于8,PAGE_SIZE=4K。则值就是object的大小为8,一个page中有多少个object. 4096/8=512个object
static inline __attribute_const__ int get_order(unsigned long size)
{
if (__builtin_constant_p(size)) {
if (!size)
return BITS_PER_LONG - PAGE_SHIFT;
if (size < (1UL << PAGE_SHIFT))
return 0;
return ilog2((size) - 1) - PAGE_SHIFT + 1;
}
size--;
size >>= PAGE_SHIFT;
#if BITS_PER_LONG == 32
return fls(size);
#else
return fls64(size);
#endif
}
根据传递的size,计算出需要的order是多少。比如我们传递进来的值是128,则order等于0。大家可以算算大于4K的时候,怎么算
static inline unsigned int slab_order(unsigned int size,
unsigned int min_objects, unsigned int max_order,
unsigned int fract_leftover)
{
unsigned int min_order = slub_min_order;
unsigned int order;
if (order_objects(min_order, size) > MAX_OBJS_PER_PAGE)
return get_order(size * MAX_OBJS_PER_PAGE) - 1;
for (order = max(min_order, (unsigned int)get_order(min_objects * size));
order <= max_order; order++) {
unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
unsigned int rem;
rem = slab_size % size;
if (rem <= slab_size / fract_leftover)
break;
}
return order;
}
min_objects = 4 * (fls(nr_cpu_ids) + 1); //nr_cpu_id代表cpu的个数
比如当前CPU的个数是4,fls是获取最高bit位的位数,则fls(4)等3,则min_objects=4*(3+1)=16
这个函数的算法是:
这些小函数看完,我们在看一个包含各个小函数的大函数
static inline int calculate_order(unsigned int size)
{
unsigned int order;
unsigned int min_objects;
unsigned int max_objects;
/*
* Attempt to find best configuration for a slab. This
* works by first attempting to generate a layout with
* the best configuration and backing off gradually.
*
* First we increase the acceptable waste in a slab. Then
* we reduce the minimum objects required in a slab.
*/
min_objects = slub_min_objects;
if (!min_objects)
min_objects = 4 * (fls(nr_cpu_ids) + 1); //nr_cpu_ids=4, 则min_objects=16个
max_objects = order_objects(slub_max_order, size); //2的3次方是昂贵的页,除size就是可以申请的最大的object
min_objects = min(min_objects, max_objects);
while (min_objects > 1) {
unsigned int fraction;
fraction = 16;
while (fraction >= 4) {
order = slab_order(size, min_objects,
slub_max_order, fraction);
if (order <= slub_max_order)
return order;
fraction /= 2;
}
min_objects--;
}
/*
* We were unable to place multiple objects in a slab. Now
* lets see if we can place a single object there.
*/
order = slab_order(size, 1, slub_max_order, 1);
if (order <= slub_max_order)
return order;
/*
* Doh this slab cannot be placed using slub_max_order.
*/
order = slab_order(size, 1, MAX_ORDER, 1);
if (order < MAX_ORDER)
return order;
return -ENOSYS;
}
再看一个函数,会根据order的值计算kmem_cache的一些值得大小
static int calculate_sizes(struct kmem_cache *s, int forced_order)
{
slab_flags_t flags = s->flags;
unsigned int size = s->object_size;
unsigned int order;
/*
* Round up object size to the next word boundary. We can only
* place the free pointer at word boundaries and this determines
* the possible location of the free pointer.
*/
size = ALIGN(size, sizeof(void *));
/*
* SLUB stores one object immediately after another beginning from
* offset 0. In order to align the objects we have to simply size
* each object to conform to the alignment.
*/
size = ALIGN(size, s->align);
s->size = size;
if (forced_order >= 0)
order = forced_order;
else
order = calculate_order(size);
if ((int)order < 0)
return 0;
/*
* Determine the number of objects per slab
*/
s->oo = oo_make(order, size);
s->min = oo_make(get_order(size), size);
if (oo_objects(s->oo) > oo_objects(s->max))
s->max = s->oo;
return !!oo_objects(s->oo);
}
static inline struct kmem_cache_order_objects oo_make(unsigned int order,
unsigned int size)
{
struct kmem_cache_order_objects x = {
(order << OO_SHIFT) + order_objects(order, size)
};
return x;
}
static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
{
s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
if (!calculate_sizes(s, -1))
goto error;
/*
* The larger the object size is, the more pages we want on the partial
* list to avoid pounding the page allocator excessively.
*/
set_min_partial(s, ilog2(s->size) / 2); //min_partial是node中最大的slab
set_cpu_partial(s); //pre cpu上最大的free_object的个数
if (!init_kmem_cache_nodes(s))
goto error;
if (alloc_kmem_cache_cpus(s))
return 0;
}
创建完毕之后,就多出来一个名为slub_test的slab,此slab的object个数是512的,size是8。order等于0。此时只是建立好个slab缓冲区,里面还是没有object的。也就是freelist指向为NULL的。
创建完毕后,大家可以去/sys/kernel/slab/slub_test下看一些节点的信息:
/sys/kernel/slab/slub_test# ls
aliases destroy_by_rcu order slab_size
align free_calls partial slabs
alloc_calls hwcache_align poison slabs_cpu_partial
cache_dma min_partial reclaim_account store_user
cgroup object_size red_zone total_objects
cpu_partial objects reserved trace
cpu_slabs objects_partial sanity_checks validate
ctor objs_per_slab shrink
/sys/kernel/slab/slub_test #