linux内存管理slab算法之slab初始化

用户4415180

发布于 2022-06-23 14:24:39

1K0

发布于 2022-06-23 14:24:39

文章被收录于专栏：高并发

业余时间写的玩具操作系统，准备把内存管理部分加强一下，伙伴系统分配页面算法已经完成，下面就要开始写更加细粒度的内存分配，毕竟伙伴系统是按照页为基本单位分配的，参考内核版本linux2.6.30，没分析高版本的源码，算法基本思想应该差不多。

slab算法是一个高效的内存分配算法，它通过把经常使用的内存块比如32字节，64字节大小或者某个常用结构体大小的类型组织成一个kmem_cache结构，经常分配和释放的内存会保存在一个array_cache数组里，这样对频繁分配和释放的内存，分配和回收效率都是O(1)。kmalloc函数就是从slab分配的。整体结构就是如下图，其中一个slab包含1到多个页面，slab管理结构可能在页面上，也可能从其它kmem_cache上动态分配的。

源码分析：为了较好的理解算法核心思想，内核各种锁和bug检查代码先不去分析，kmem_cache初始化，先看几个重要的结构体

/*
 * struct kmem_cache
 *
 * manages a cache.
 */
/*这个结构体是slab的核心，每个类型内存都需要创建一个这样的结构体，比如32字节，64字节，task_struct类型的等等*/
struct kmem_cache {
/* 1) per-cpu data, touched during every alloc/free */
    /*表示本cpu的缓存数组，如果是单核NR_CPUS就是1*/
	struct array_cache *array[NR_CPUS];
/* 2) Cache tunables. Protected by cache_chain_mutex */
	unsigned int batchcount;         //每次更新array_cache数组元素个数
	unsigned int limit;             //array_cache数组大小
	unsigned int shared;            //cpu共享个数

	unsigned int buffer_size;       //当前对象大小
	u32 reciprocal_buffer_size;     //当前对象大小个数
/* 3) touched by every alloc & free from the backend */

	unsigned int flags;		/* constant flags */
	unsigned int num;		/* # of objs per slab */

/* 4) cache_grow/shrink */
	/* order of pgs per slab (2^n) */
	unsigned int gfporder;         //每个slab包含页面的阶

	/* force GFP flags, e.g. GFP_DMA */
	gfp_t gfpflags;               //分配标志

	size_t colour;			/* cache colouring range */ //每个slab颜色个数
	unsigned int colour_off;	/* colour offset */         //着色偏移，等于cacheline的大小
	struct kmem_cache *slabp_cache; 
	unsigned int slab_size;                                    //slab管理结构的大小
	unsigned int dflags;		/* dynamic flags */

	/* constructor func */
	void (*ctor)(void *obj);                              //初始化对象的回调函数，用户自己设置

/* 5) cache creation/removal */
	const char *name;                                  //kmem_cache的名字
	struct list_head next;                             //kmem_cache链表

	/*
	 * We put nodelists[] at the end of kmem_cache, because we want to size
	 * this array to nr_node_ids slots instead of MAX_NUMNODES
	 * (see kmem_cache_init())
	 * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache
	 * is statically defined, so we reserve the max number of nodes.
	 */
	struct kmem_list3 *nodelists[MAX_NUMNODES];       //三链结构，UMA架构下MAX_NUMNODES是1
	/*
	 * Do not add fields after nodelists[]
	 */
};

/*
 * The slab lists for all objects.
 */
//主要连接3种类型的slab链表
struct kmem_list3 {
	struct list_head slabs_partial;	/* partial list first, better asm code *///部分空slab类型链表
	struct list_head slabs_full;	//满slab类型链表
	struct list_head slabs_free;    //空slab类型链表
	unsigned long free_objects;     //空闲对象个数
	unsigned int free_limit;        //空闲对象大小限制
	unsigned int colour_next;	/* Per-node cache coloring *///下一个偏移个数
	spinlock_t list_lock;
	struct array_cache *shared;	/* shared per node */   //numa共享node的节点
	struct array_cache **alien;	/* on other nodes */
	unsigned long next_reap;	/* updated without locking */
	int free_touched;		/* updated without locking */
};

/*
 * bootstrap: The caches do not work without cpuarrays anymore, but the
 * cpuarrays are allocated from the generic caches...
 */
#define BOOT_CPUCACHE_ENTRIES	1
struct arraycache_init {
	struct array_cache cache;
	void *entries[BOOT_CPUCACHE_ENTRIES];
};

struct array_cache {
	unsigned int avail;
	unsigned int limit;
	unsigned int batchcount;
	unsigned int touched;
	spinlock_t lock;
	void *entry[];	/*
			 * Must have this definition in here for the proper
			 * alignment of array_cache. Also simplifies accessing
			 * the entries.
			 */
};

主要分析UMA类型的

/*
 * Initialisation.  Called after the page allocator have been initialised and
 * before smp_init().
 * 初始化kmem_cache,主要初始化创建初始化的kmem_cache的slab
 */
void  kmem_cache_init(void)
{
	size_t left_over;           //剩余字节
	struct cache_sizes *sizes;  //初始化kmem_cache的size
	struct cache_names *names;  //初始化kmem_cache的名字
	int i;
	int order;
	int node;
	//非NUMA架构
	/*if (num_possible_nodes() == 1) {
		use_alien_caches = 0;
		numa_platform = 0;
	}*/
	//NUM_INIT_LISTS为3
	for (i = 0; i < NUM_INIT_LISTS; i++) {
		kmem_list3_init(&initkmem_list3[i]);
		if (i < MAX_NUMNODES)
			cache_cache.nodelists[i] = NULL;   //cache_cache结构就是kmem_cache类型的slab
	}
	set_up_list3s(&cache_cache, CACHE_CACHE);

	/*
	 * Fragmentation resistance on low memory - only use bigger
	 * page orders on machines with more than 32MB of memory.
	 */
	if (num_physpages > (32 << 20) >> PAGE_SHIFT)
		slab_break_gfp_order = BREAK_GFP_ORDER_HI;

	/* Bootstrap is tricky, because several objects are allocated
	 * from caches that do not exist yet:
	 * 1) initialize the cache_cache cache: it contains the struct
	 *    kmem_cache structures of all caches, except cache_cache itself:
	 *    cache_cache is statically allocated.
	 *    Initially an __init data area is used for the head array and the
	 *    kmem_list3 structures, it's replaced with a kmalloc allocated
	 *    array at the end of the bootstrap.
	 * 2) Create the first kmalloc cache.
	 *    The struct kmem_cache for the new cache is allocated normally.
	 *    An __init data area is used for the head array.
	 * 3) Create the remaining kmalloc caches, with minimally sized
	 *    head arrays.
	 * 4) Replace the __init data head arrays for cache_cache and the first
	 *    kmalloc cache with kmalloc allocated arrays.
	 * 5) Replace the __init data for kmem_list3 for cache_cache and
	 *    the other cache's with kmalloc allocated memory.
	 * 6) Resize the head arrays of the kmalloc caches to their final sizes.
	 */

	node = numa_node_id();  //node = 0

	/* 1) create the cache_cache */
	INIT_LIST_HEAD(&cache_chain);   //初始化kmem_cache链表头
	list_add(&cache_cache.next, &cache_chain);//把cache_cache结构体加入链表
	cache_cache.colour_off = cache_line_size(); //cache_cache的着色偏移,cache_line的大小
	//cache_cache的空闲数组指向初始化的array数组
	cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
	//cache_cache的三链指向初始化的三链
	cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];

	/*
	 * struct kmem_cache size depends on nr_node_ids, which
	 * can be less than MAX_NUMNODES.
	 */
	 //cache_cache的对象大小，也就是kmem_cache的大小
	cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
				 1 * sizeof(struct kmem_list3 *);

	//对象大小和cache_line对齐
	cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
					cache_line_size());
	//kmem_cache大小的倒数
	//cache_cache.reciprocal_buffer_size =
	//	reciprocal_value(cache_cache.buffer_size);
    //计算所需页面的阶，对象个数，剩余字节数
	for (order = 0; order < MAX_ORDER; order++) {
		cache_estimate(order, cache_cache.buffer_size,
			cache_line_size(), 0, &left_over, &cache_cache.num);
		if (cache_cache.num)
			break;
	}
	//BUG_ON(!cache_cache.num);
	cache_cache.gfporder = order;
	//颜色个数等于剩余字节数除以cacheline的大小，因为slab初始位置是按照空闲字节数做偏移的
	//最大偏移到空闲字节
	cache_cache.colour = left_over / cache_cache.colour_off;
    //存放slab_size大小,slab_size包括slab结构体大小，包括object索引数组大小
    //kmem_bufctl_t就是数组索引大小，比如a[1] = 2；
	cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
				      sizeof(struct slab), cache_line_size());

	/* 2+3) create the kmalloc caches */
	sizes = malloc_sizes;
	names = cache_names;

	/*
	 * Initialize the caches that provide memory for the array cache and the
	 * kmem_list3 structures first.  Without this, further allocations will
	 * bug.
	 */
    //创建对象为32字节的缓存
	sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
					sizes[INDEX_AC].cs_size,
					ARCH_KMALLOC_MINALIGN,
					ARCH_KMALLOC_FLAGS|SLAB_PANIC,
					NULL);
					printf("sizes[INDEX_AC].cs_cachep:0x%p\n",sizes[INDEX_AC].cs_cachep);
    //创建对象为64字节的对象
	if (INDEX_AC != INDEX_L3) {
		sizes[INDEX_L3].cs_cachep =
			kmem_cache_create(names[INDEX_L3].name,
				sizes[INDEX_L3].cs_size,
				ARCH_KMALLOC_MINALIGN,
				ARCH_KMALLOC_FLAGS|SLAB_PANIC,
				NULL);
	}

    //slab早期初始化结束
	slab_early_init = 0;

	while (sizes->cs_size != ULONG_MAX) {
		/*
		 * For performance, all the general caches are L1 aligned.
		 * This should be particularly beneficial on SMP boxes, as it
		 * eliminates "false sharing".
		 * Note for systems short on memory removing the alignment will
		 * allow tighter packing of the smaller caches.
		 */
		if (!sizes->cs_cachep) {
			sizes->cs_cachep = kmem_cache_create(names->name,
					sizes->cs_size,
					ARCH_KMALLOC_MINALIGN,
					ARCH_KMALLOC_FLAGS|SLAB_PANIC,
					NULL);
		}
		sizes++;
		names++;
	}
	/* 4) Replace the bootstrap head arrays */
	//替换cache_cache的array_cache成员，使用slab管理的空闲内存替换全局内存区
	{
		struct array_cache *ptr;

		ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);

		//local_irq_disable();
		//BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
		memcpy(ptr, cpu_cache_get(&cache_cache),
		       sizeof(struct arraycache_init));
		/*
		 * Do not assume that spinlocks can be initialized via memcpy:
		 */
	//	spin_lock_init(&ptr->lock);

		cache_cache.array[smp_processor_id()] = ptr;
	//	local_irq_enable();

		ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);

		//local_irq_disable();
	//	BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
		//       != &initarray_generic.cache);
		//替换INDEX_AC cache的array_cache成员，也就是32字节的cache
		memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
		       sizeof(struct arraycache_init));
		/*
		 * Do not assume that spinlocks can be initialized via memcpy:
		 */
	//	spin_lock_init(&ptr->lock);

		malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
		    ptr;
		//local_irq_enable();
	}
	/* 5) Replace the bootstrap kmem_list3's */
	{
		int nid;

		for(nid = 0; nid < 1; nid++) {
			init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);

			init_list(malloc_sizes[INDEX_AC].cs_cachep,
				  &initkmem_list3[SIZE_AC + nid], nid);

			if (INDEX_AC != INDEX_L3) {
				init_list(malloc_sizes[INDEX_L3].cs_cachep,
					  &initkmem_list3[SIZE_L3 + nid], nid);
			}
		}
	}

	/* 6) resize the head arrays to their final sizes */
	{
		struct kmem_cache *cachep;
		//mutex_lock(&cache_chain_mutex);
		list_for_each_entry(cachep, &cache_chain, next) {
		    if (enable_cpucache(cachep))
				assert(0);
		}
		//mutex_unlock(&cache_chain_mutex);
	}

	/* Annotate slab for lockdep -- annotate the malloc caches */
//	init_lock_keys();


	/* Done! */
	g_cpucache_up = FULL;

	/*
	 * Register a cpu startup notifier callback that initializes
	 * cpu_cache_get for all new cpus
	 */
//	register_cpu_notifier(&cpucache_notifier);

	/*
	 * The reap timers are started later, with a module init call: That part
	 * of the kernel is not yet operational.
	 */
}

本文参与腾讯云自媒体同步曝光计划，分享自作者个人站点/博客。

原始发表：2017-07-31，如有侵权请联系 cloudcommunity@tencent.com 删除

编程算法

本文分享自作者个人站点/博客前往查看

如有侵权，请联系 cloudcommunity@tencent.com 删除。

本文参与腾讯云自媒体同步曝光计划，欢迎热爱写作的你一起参与！

编程算法

登录后参与评论

0 条评论

热度

linux内存管理slab算法之slab初始化

linux内存管理slab算法之slab初始化

社区

活动

资源

关于

腾讯云开发者

热门产品

热门推荐

更多推荐