bio子系统学习一:
主要源码目录 :block/bio.c include/linux/bio.h
内核版本:4.19.1
这部分先总结下bio子系统的初始化部分,后续再总结bio的申请以及释放,还有其它api的使用等介绍。
bio的涉及到频繁的内存申请以及释放,包括bio结构体本身以及它比较重要的成员bi_io_vec*的内存申请和释放,所以初始化部分主要是相关的内存池的初始化,用于后面bio和bi_io_vec的内存申请和释放。
其中bio结构体和bio_vec结构体定义如下:
/** main unit of I/O for the block layer and lower layers (ie drivers and* stacking drivers)*/
struct bio {struct bio *bi_next; /* request queue link */struct gendisk *bi_disk;unsigned int bi_opf; /* bottom bits req flags,* top bits REQ_OP. Use* accessors.*/unsigned short bi_flags; /* status, etc and bvec pool number */unsigned short bi_ioprio;unsigned short bi_write_hint;blk_status_t bi_status;u8 bi_partno;/* Number of segments in this BIO after* physical address coalescing is performed.*/unsigned int bi_phys_segments;/** To keep track of the max segment size, we account for the* sizes of the first and last mergeable segments in this bio.*/unsigned int bi_seg_front_size;unsigned int bi_seg_back_size;struct bvec_iter bi_iter; //用来遍历bvec,标记bio的处理进度atomic_t __bi_remaining;bio_end_io_t *bi_end_io;void *bi_private;
#ifdef CONFIG_BLK_CGROUP/** Optional ioc and css associated with this bio. Put on bio* release. Read comment on top of bio_associate_current().*/struct io_context *bi_ioc;struct cgroup_subsys_state *bi_css;struct blkcg_gq *bi_blkg;struct bio_issue bi_issue;
#endifunion {
#if defined(CONFIG_BLK_DEV_INTEGRITY)struct bio_integrity_payload *bi_integrity; /* data integrity */
#endif};unsigned short bi_vcnt; /* how many bio_vec's *//** Everything starting with bi_max_vecs will be preserved by bio_reset()*/unsigned short bi_max_vecs; /* max bvl_vecs we can hold */atomic_t __bi_cnt; /* pin count */struct bio_vec *bi_io_vec; /* the actual vec list */struct bio_set *bi_pool;/** We can inline a number of vecs at the end of the bio, to avoid* double allocations for a small number of bio_vecs. This member* MUST obviously be kept at the very end of the bio.*/struct bio_vec bi_inline_vecs[0];
};
/** was unsigned short, but we might as well be ready for > 64kB I/O pages*/
struct bio_vec {struct page *bv_page;unsigned int bv_len;unsigned int bv_offset;
};
入口函数init_bio:
其中,bio_integrity_init函数和bioset_integrity_create函数先不看吧。
//这个是涉及到的结构体定义
struct bio_slab {struct kmem_cache *slab; //指向cache描述符,cache描述符描述了slab信息unsigned int slab_ref; //bio_slab结构体的引用计数unsigned int slab_size; //内存池中object大小char name[8]; //内存池名称,/proc/slabinfo可以看到
};static DEFINE_MUTEX(bio_slab_lock);
static struct bio_slab *bio_slabs;
static unsigned int bio_slab_nr, bio_slab_max;/*1:https://zhuanlan.zhihu.com/p/596543999?utm_id=02:https://blog.csdn.net/geshifei/article/details/119959905
*/
static int __init init_bio(void)
{/*全局数组*/bio_slab_max = 2;bio_slab_nr = 0;bio_slabs = kcalloc(bio_slab_max, sizeof(struct bio_slab), GFP_KERNEL);if (!bio_slabs)panic("bio: can't allocate bios\n");/*这部分先不看*/bio_integrity_init();//bvec_slabs数组管理bvec内存池biovec_init_slabs(); //分配bio_vec的高速内存池/*参数1:需要初始化的bio_set指针参数二:内存池中bio的最小数量参数三:struct bio结构体前附加数据块的大小,front_pad+sizeof(struct bio)就是内存池中object的大小。从fs_bio_set类型内存池中申请bio内存块,bio前无附加数据参数四:BIOSET_NEED_BVECS :创建一个单独的内存池用于分配iovecs,fs_bio_set需设置该标记BIOSET_NEED_RESCUER:创建一个workqueue,处理函数为bio_alloc_rescue,当内存不足无法从内存池中申请到bio时,该workqueue把处理stack device时(比如raid)暂存在list中的bio(参考后文“bio的提交”)提交处理,从而回收bio。对于非stack device场景,不需要这个工作队列。对fs_bio_set这个变量进行初始化*/if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS)) //2, 0 ,1panic("bio: can't allocate bios\n");/*这部分先不看*/if (bioset_integrity_create(&fs_bio_set, BIO_POOL_SIZE)) //2panic("bio: can't create integrity pool\n");return 0;
}
subsys_initcall(init_bio);
这里先分析下bio_slabs 这个结构体,,看变量名字也比较容易分析,它是后续用于申请bio这个结构体时使用的,只是初始化时,它的数组下标只有两个,且其成员也没有初始化,在实际使用时,它的成员才会进行初始化,而且在必要时也会进行扩容,也就是它的数组下标不是2了。(这个见bio_find_or_create_slab函数),,所以在申请bio的时候根据算法的设计,它有可能是从任何一个下标的成员(slab)当中申请内存的。
biovec_init_slabs函数分析
这个函数,看名称也不难分析,是对后续在申请biovec相关内存时做的一些初始化。
#define BIO_INLINE_VECS 4
/*1://https://blog.csdn.net/sinat_32960911/article/details/1322377812:__read_mostly gcc编译选项 标记该变量会被频繁访问 提高系统性能struct biovec_slab {int nr_vecs;char *name;struct kmem_cache *slab;};
*/
#define BV(x, n) { .nr_vecs = x, .name = "biovec-"#n }
static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = { BV(1, 1), BV(4, 4), BV(16, 16), BV(64, 64), BV(128, 128), BV(BIO_MAX_PAGES, max), //这个当作是256吧
};
#undef BV //取消该宏定义static void __init biovec_init_slabs(void)
{int i;for (i = 0; i < BVEC_POOL_NR; i++) { //6int size;struct biovec_slab *bvs = bvec_slabs + i; //指向结构体数组的首地址if (bvs->nr_vecs <= BIO_INLINE_VECS) { //4, 前面两个成员bvs->slab = NULL; continue;}/*这个size比较关键*/size = bvs->nr_vecs * sizeof(struct bio_vec);bvs->slab = kmem_cache_create(bvs->name, size, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);}
}
bioset_init函数分析
这里比较重要的是调用的bio_find_or_create_slab函数,其余的就是对fs_bio_set这个结构体的一些成员变量进行初始化。
struct bio_set {struct kmem_cache *bio_slab;unsigned int front_pad;mempool_t bio_pool;mempool_t bvec_pool;
#if defined(CONFIG_BLK_DEV_INTEGRITY)mempool_t bio_integrity_pool;mempool_t bvec_integrity_pool;
#endif//Deadlock avoidance for stacking block drivers: see comments in bio_alloc_bioset() for detailsspinlock_t rescue_lock;struct bio_list rescue_list;struct work_struct rescue_work;struct workqueue_struct *rescue_workqueue;
};/** fs_bio_set is the bio_set containing bio and iovec memory pools used by* IO code that does not need private memory pools.*/
struct bio_set fs_bio_set;
EXPORT_SYMBOL(fs_bio_set);int bioset_init(struct bio_set *bs,unsigned int pool_size, unsigned int front_pad, int flags)
{//sizeof(bio)的多余部分unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec); //4 * sizeof(struct bio_vec)bs->front_pad = front_pad; //0spin_lock_init(&bs->rescue_lock); //自旋锁初始化bio_list_init(&bs->rescue_list); //bio_list 初始化INIT_WORK(&bs->rescue_work, bio_alloc_rescue); //工作队列初始化/*返回的这个值,也记录在这个bio_slabs全局数组里了看函数名称,bio当中寻找slab或者创建一个slab*/bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad); //0+back_padif (!bs->bio_slab)return -ENOMEM;/*bio_pool的初始化,后面分配内存使用mempool_allocpool_size内存池至少保留两个元素mempool_init(pool, min_nr, mempool_alloc_slab, mempool_free_slab, (void *) kc);后续申请内存走的是这个路线 -> mempool_alloc_slab -> kmem_cache_alloc*/if (mempool_init_slab_pool(&bs->bio_pool, pool_size, bs->bio_slab))//2goto bad;//1 & (1 << 0)if ((flags & BIOSET_NEED_BVECS) && biovec_init_pool(&bs->bvec_pool, pool_size)) //2goto bad;if (!(flags & BIOSET_NEED_RESCUER)) // 1 & (1 << 1) -> 0001 & 0010return 0;bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0); //申请一个工作队列if (!bs->rescue_workqueue)goto bad;return 0;
bad:bioset_exit(bs);return -ENOMEM;
}
EXPORT_SYMBOL(bioset_init);
bio_find_or_create_slab函数分析
static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
{unsigned int sz = sizeof(struct bio) + extra_size; struct kmem_cache *slab = NULL;struct bio_slab *bslab, *new_bio_slabs;unsigned int new_bio_slab_max;unsigned int i, entry = -1;mutex_lock(&bio_slab_lock);/*遍历bio_slabs数组,若某个bio_slab->slab_size等于sz,那么就选择这个bio_slab.*/i = 0;while (i < bio_slab_nr) { //这个一开始是0bslab = &bio_slabs[i];//若bio_slabs有空闲未用的bio_slab,选择该bio_slabif (!bslab->slab && entry == -1)entry = i;else if (bslab->slab_size == sz) {slab = bslab->slab;bslab->slab_ref++;break;}i++;}//找到bio_slab了if (slab)goto out_unlock;/*经过上面两步依然未找到可用的bio_slab,通过krealloc将bio_slabs数组长度扩大一倍,并选择其中一个空闲的bio_slab。*/if (bio_slab_nr == bio_slab_max && entry == -1) {new_bio_slab_max = bio_slab_max << 1; //相当于乘以2new_bio_slabs = krealloc(bio_slabs, new_bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);if (!new_bio_slabs)goto out_unlock;bio_slab_max = new_bio_slab_max;bio_slabs = new_bio_slabs;}if (entry == -1)entry = bio_slab_nr++;bslab = &bio_slabs[entry];snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);slab = kmem_cache_create(bslab->name, sz, ARCH_KMALLOC_MINALIGN, SLAB_HWCACHE_ALIGN, NULL);if (!slab)goto out_unlock;bslab->slab = slab;bslab->slab_ref = 1;bslab->slab_size = sz;
out_unlock:mutex_unlock(&bio_slab_lock);return slab;
}
其初始化过程基本就是这样了,如果对内存池用的不多也不要紧,可以看看相关源码,也不多。
总结:初始化部分,如果光光看这部分,会比较晦涩,后面在涉及到bio的申请和释放时,再回头来看初始化部分,就清晰许多了。