blk-mq编程,主要要调用两个函数进行初始化工作,blk_mq_init_queue这是第二个。该函数先是申请了struct request_queue结构,这个请求队列后面用于赋值给磁盘那个结构体的相应成员。
struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
{struct request_queue *uninit_q, *q;//分配struct request_queue并初始化uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node, NULL);if (!uninit_q)return ERR_PTR(-ENOMEM);/*1:分配每个cpu专属的软件队列并初始化2:分配硬件队列,并初始化3:建立软件队列和硬件队列的联系*/q = blk_mq_init_allocated_queue(set, uninit_q);if (IS_ERR(q))blk_cleanup_queue(uninit_q);return q;
}
EXPORT_SYMBOL(blk_mq_init_queue);
blk_mq_init_allocated_queue函数分析
一眼望过去,确实有点复杂,不过,多看几遍就好了,
这里面,主要就是给struct request_queue *q结构体里面的成员变量赋值的,简单的变量赋值就不分析了,看看它调用的函数进行分析。
struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q)
{/* mark the queue as mq asap */q->mq_ops = set->ops;q->poll_cb = blk_stat_alloc_callback(blk_mq_poll_stats_fn, blk_mq_poll_stats_bkt, BLK_MQ_POLL_STATS_BKTS, q);if (!q->poll_cb)goto err_exit;q->queue_ctx = alloc_percpu(struct blk_mq_ctx);//percpu变量 软件队列if (!q->queue_ctx)goto err_exit;/* init q->mq_kobj and sw queues' kobjects */blk_mq_sysfs_init(q); //主要是初始化kobject变量//二级指针 硬件队列q->queue_hw_ctx = kcalloc_node(nr_cpu_ids, sizeof(*(q->queue_hw_ctx)), GFP_KERNEL, set->numa_node);if (!q->queue_hw_ctx)goto err_percpu;//赋值q->mq_map,这个数组保存了每个CPU对应的硬件队列编号q->mq_map = set->mq_map;blk_mq_realloc_hw_ctxs(set, q);if (!q->nr_hw_queues)goto err_hctxs;/*定时器初始化,设置超时时间*/INIT_WORK(&q->timeout_work, blk_mq_timeout_work);blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);q->nr_queues = nr_cpu_ids;q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;if (!(set->flags & BLK_MQ_F_SG_MERGE))queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);q->sg_reserved_size = INT_MAX;INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);INIT_LIST_HEAD(&q->requeue_list);spin_lock_init(&q->requeue_lock);blk_queue_make_request(q, blk_mq_make_request);if (q->mq_ops->poll)q->poll_fn = blk_mq_poll;/** Do this after blk_queue_make_request() overrides it...*/q->nr_requests = set->queue_depth; //防止被覆盖/** Default to classic polling*/q->poll_nsec = -1;if (set->ops->complete)blk_queue_softirq_done(q, set->ops->complete);blk_mq_init_cpu_queues(q, set->nr_hw_queues);blk_mq_add_queue_tag_set(set, q);blk_mq_map_swqueue(q);if (!(set->flags & BLK_MQ_F_NO_SCHED)) {int ret;ret = elevator_init_mq(q);if (ret)return ERR_PTR(ret);}return q;
err_hctxs:kfree(q->queue_hw_ctx);
err_percpu:free_percpu(q->queue_ctx);
err_exit:q->mq_ops = NULL;return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL(blk_mq_init_allocated_queue);
blk_stat_alloc_callback
这个函数一看也没什么分析的,主要也是给poll_cb进行赋值,采用了很多的默认函数进行赋值。
struct blk_stat_callback *
blk_stat_alloc_callback(void (*timer_fn)(struct blk_stat_callback *),int (*bucket_fn)(const struct request *), unsigned int buckets, void *data)
{struct blk_stat_callback *cb;cb = kmalloc(sizeof(*cb), GFP_KERNEL);if (!cb)return NULL;cb->stat = kmalloc_array(buckets, sizeof(struct blk_rq_stat), GFP_KERNEL);if (!cb->stat) {kfree(cb);return NULL;}cb->cpu_stat = __alloc_percpu(buckets * sizeof(struct blk_rq_stat), __alignof__(struct blk_rq_stat));if (!cb->cpu_stat) {kfree(cb->stat);kfree(cb);return NULL;}cb->timer_fn = timer_fn;cb->bucket_fn = bucket_fn;cb->data = data;cb->buckets = buckets;timer_setup(&cb->timer, blk_stat_timer_fn, 0);return cb;
}
EXPORT_SYMBOL_GPL(blk_stat_alloc_callback);
blk_mq_sysfs_init
接着到这个函数,也是变量的初始化工作,struct request_queue队列里面的kobject变量初始化,以及取出在每一个cpu上q->queue_ctx结构体,然后对齐成员kobject变量进行初始化。
void blk_mq_sysfs_init(struct request_queue *q)
{struct blk_mq_ctx *ctx;int cpu;kobject_init(&q->mq_kobj, &blk_mq_ktype);for_each_possible_cpu(cpu) {ctx = per_cpu_ptr(q->queue_ctx, cpu);//返回每个cpu上的q->queue_ctx变量的首地址kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);}
}
blk_mq_realloc_hw_ctxs
这个函数相对来说比较重要。后面再补充。
在这里插入代码片
blk_queue_make_request
这个函数也是给q的其成员赋值的,先大概熟悉一下,如果有实际的调试分析,需要了解某个参数的值,到时再回来看吧。
void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
{/** set defaults*/q->nr_requests = BLKDEV_MAX_RQ; //这个值后面会重新赋值进行覆盖q->make_request_fn = mfn;blk_queue_dma_alignment(q, 511);blk_queue_congestion_threshold(q);q->nr_batching = BLK_BATCH_REQ;blk_set_default_limits(&q->limits);
}
EXPORT_SYMBOL(blk_queue_make_request);
blk_queue_softirq_done
也是赋值,IO操作完成时,会调用这个回调函数。
void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn)
{q->softirq_done_fn = fn;
}
EXPORT_SYMBOL(blk_queue_softirq_done);
blk_mq_init_cpu_queues
static void blk_mq_init_cpu_queues(struct request_queue *q, unsigned int nr_hw_queues)
{unsigned int i;for_each_possible_cpu(i) { //硬件队列数/*返回这个变量在编号为i的cpu上的起始地址*/struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);struct blk_mq_hw_ctx *hctx;//其成员做一些赋值操作__ctx->cpu = i;spin_lock_init(&__ctx->lock);INIT_LIST_HEAD(&__ctx->rq_list);__ctx->queue = q;/** Set local node, IFF we have more than one hw queue. If* not, we remain on the home node of the device*/hctx = blk_mq_map_queue(q, i); //取出每个cpu上的硬件队列if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)hctx->numa_node = local_memory_node(cpu_to_node(i));}
}
blk_mq_add_queue_tag_set
blk_mq_map_swqueue