目录
- Linux设备驱动模型(以PCI/PCIe为例)
- 前期构建准备:setup_machine_fdt
- 设备树解析:unflatten_device_tree
- 总线模型:of_platform_default_populate_init
- 设备初始化:platform_driver_probe
- 总线初始化:pci_driver_init
- smmu对pci虚拟化的支持
Linux设备驱动模型(以PCI/PCIe为例)
本文以
PCI/PCIe
为切入点分析Linux
设备驱动模型,并且以SMMU
技术来分析PCI
虚拟化的应用关于
UEFI/BIOS
和BootLoader
对PCI/PCIe
链路的初始化过程此处不做分析,可自行查看edk2和uboot源码
此处以qcom/sm8250.dtsi
为例
// linux-5.14.5/arch/arm64/boot/dts/qcom/sm8250.dtsipcie2: pci@1c10000 {compatible = "qcom,pcie-sm8250", "snps,dw-pcie";reg = <0 0x01c10000 0 0x3000>,<0 0x64000000 0 0xf1d>,<0 0x64000f20 0 0xa8>,<0 0x64001000 0 0x1000>,<0 0x64100000 0 0x100000>;reg-names = "parf", "dbi", "elbi", "atu", "config";device_type = "pci";linux,pci-domain = <2>;bus-range = <0x00 0xff>;num-lanes = <2>;#address-cells = <3>;#size-cells = <2>;ranges = <0x01000000 0x0 0x64200000 0x0 0x64200000 0x0 0x100000>,<0x02000000 0x0 0x64300000 0x0 0x64300000 0x0 0x3d00000>;interrupts = <GIC_SPI 236 IRQ_TYPE_EDGE_RISING>;interrupt-names = "msi";#interrupt-cells = <1>;interrupt-map-mask = <0 0 0 0x7>;interrupt-map = <0 0 0 1 &intc 0 290 IRQ_TYPE_LEVEL_HIGH>, /* int_a */<0 0 0 2 &intc 0 415 IRQ_TYPE_LEVEL_HIGH>, /* int_b */<0 0 0 3 &intc 0 416 IRQ_TYPE_LEVEL_HIGH>, /* int_c */<0 0 0 4 &intc 0 417 IRQ_TYPE_LEVEL_HIGH>; /* int_d */clocks = <&gcc GCC_PCIE_2_PIPE_CLK>,<&gcc GCC_PCIE_2_AUX_CLK>,<&gcc GCC_PCIE_2_CFG_AHB_CLK>,<&gcc GCC_PCIE_2_MSTR_AXI_CLK>,<&gcc GCC_PCIE_2_SLV_AXI_CLK>,<&gcc GCC_PCIE_2_SLV_Q2A_AXI_CLK>,<&gcc GCC_PCIE_MDM_CLKREF_EN>,<&gcc GCC_AGGRE_NOC_PCIE_TBU_CLK>,<&gcc GCC_DDRSS_PCIE_SF_TBU_CLK>;clock-names = "pipe","aux","cfg","bus_master","bus_slave","slave_q2a","ref","tbu","ddrss_sf_tbu";assigned-clocks = <&gcc GCC_PCIE_2_AUX_CLK>;assigned-clock-rates = <19200000>;iommus = <&apps_smmu 0x1d00 0x7f>;iommu-map = <0x0 &apps_smmu 0x1d00 0x1>,<0x100 &apps_smmu 0x1d01 0x1>;resets = <&gcc GCC_PCIE_2_BCR>;reset-names = "pci";power-domains = <&gcc PCIE_2_GDSC>;phys = <&pcie2_lane>;phy-names = "pciephy";perst-gpio = <&tlmm 85 GPIO_ACTIVE_LOW>;enable-gpio = <&tlmm 87 GPIO_ACTIVE_HIGH>;pinctrl-names = "default";pinctrl-0 = <&pcie2_default_state>;status = "disabled";};
前期构建准备:setup_machine_fdt
在start_kernel
过程中分析设备树文件,最终调用unflatten_device_tree
// linux-5.14.5/arch/arm64/kernel/setup.cvoid __init __no_sanitize_address setup_arch(char **cmdline_p)
{setup_machine_fdt(__fdt_pointer);// .../* Parse the ACPI tables for possible boot-time configuration */acpi_boot_table_init();if (acpi_disabled)unflatten_device_tree();
在setup_machine_fdt
中
// linux-5.14.5/arch/arm/kernel/devtree.cconst struct machine_desc * __init setup_machine_fdt(void *dt_virt)
{const struct machine_desc *mdesc, *mdesc_best = NULL;if (!dt_virt || !early_init_dt_verify(dt_virt))return NULL;mdesc = of_flat_dt_match_machine(mdesc_best, arch_get_next_mach);
在early_init_dt_verify
中会检查设备树的一些标识,并赋值全局变量initial_boot_params
等于设备树地址
// linux-5.14.5/drivers/of/fdt.cbool __init early_init_dt_verify(void *params)
{if (!params)return false;/* check device tree validity */if (fdt_check_header(params))return false;/* Setup flat device-tree pointer */initial_boot_params = params;of_fdt_crc32 = crc32_be(~0, initial_boot_params,fdt_totalsize(initial_boot_params));return true;
}
of_flat_dt_match_machine
用于读取compatible
属性,并对mdesc
变量(用于描述硬件单板信息)进行赋值
// linux-5.14.5/drivers/of/fdt.cconst void * __init of_flat_dt_match_machine(const void *default_match,const void * (*get_next_compat)(const char * const**))
{// ...dt_root = of_get_flat_dt_root();// ...pr_info("Machine model: %s\n", of_flat_dt_get_machine_name());return best_data;
}
在内核输出信息如下
[ 0.000000] Machine model: Raspberry Pi 3 Model B
early_init_dt_scan_nodes
用于扫描设备树的各节点,可见其主要分析三个节点
early_init_dt_scan_chosen
early_init_dt_scan_root
early_init_dt_scan_memory
// linux-5.14.5/drivers/of/fdt.cvoid __init early_init_dt_scan_nodes(void)
{int rc = 0;/* Retrieve various information from the /chosen node */rc = of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line);if (!rc)pr_warn("No chosen node found, continuing without\n");/* Initialize {size,address}-cells info */of_scan_flat_dt(early_init_dt_scan_root, NULL);/* Setup memory, calling early_init_dt_add_memory_arch */of_scan_flat_dt(early_init_dt_scan_memory, NULL);
}
并将chosen
信息存入boot_command_line
,作为启动参数,关于这三个节点,设备树中如下
// linux-5.14.5/arch/arm64/boot/dts/qcom/sm8250.dtsi/chosen { };memory@80000000 {device_type = "memory";/* We expect the bootloader to fill in the size */reg = <0x0 0x80000000 0x0 0x0>;};
关于chosen
的产生,有两种情况
uboot
基本上可以不通过显式的bootargs=xxx
来传递给内核,而是在env
拿出,并存放进设备树中的chosen
节点中Linux
也开始在设备树中的chosen
节点中获取出来
而对于memory
节点,则是将其地址范围加入memblock
中进行管理,调用过程如下
// linux-5.14.5/drivers/of/fdt.cof_scan_flat_dt(early_init_dt_scan_memory, NULL);early_init_dt_add_memory_arch(base, size);memblock_add(base, size);
并且在setup_machine_fdt
之后就是memblock
系统的初始化
// linux-5.14.5/arch/arm64/kernel/setup.cvoid __init __no_sanitize_address setup_arch(char **cmdline_p)
{setup_machine_fdt(__fdt_pointer);arm64_memblock_init();
设备树解析:unflatten_device_tree
unflatten_device_tree
// linux-5.14.5/arch/arm64/kernel/setup.cvoid __init __no_sanitize_address setup_arch(char **cmdline_p)
{/* Parse the ACPI tables for possible boot-time configuration */acpi_boot_table_init();if (acpi_disabled)unflatten_device_tree();
of_root
是一个全局struct device_node
根节点,链接了所有的struct device_node
// linux-5.14.5/drivers/of/base.cstruct device_node *of_root;
initial_boot_params
前面已经指向了设备树地址
// linux-5.14.5/drivers/of/fdt.cvoid __init unflatten_device_tree(void)
{__unflatten_device_tree(initial_boot_params, NULL, &of_root,early_init_dt_alloc_memory_arch, false);/* Get pointer to "/chosen" and "/aliases" nodes for use everywhere */of_alias_scan(early_init_dt_alloc_memory_arch);unittest_unflatten_overlay_base();
}
如下,可见会进行两次扫描
- 第一次扫描会转换成所有
struct device_node
需要的空间然后系统会申请空间dt_alloc
(所有的节点都会形成struct device_node
结构) - 第二次扫描进行解析,重点在此
// linux-5.14.5/drivers/of/fdt.cvoid *__unflatten_device_tree(const void *blob,struct device_node *dad,struct device_node **mynodes,void *(*dt_alloc)(u64 size, u64 align),bool detached)
{/* First pass, scan for size */size = unflatten_dt_nodes(blob, NULL, dad, NULL);mem = dt_alloc(size + 4, __alignof__(struct device_node));/* Second pass, do actual unflattening */ret = unflatten_dt_nodes(blob, mem, dad, mynodes);// ...
第二次的参数会传入参数mem
和mynodes
,populate_node
会填充节点,用第一次扫描申请的内存,为节点分配内存
// linux-5.14.5/drivers/of/fdt.cstatic int unflatten_dt_nodes(const void *blob,void *mem,struct device_node *dad,struct device_node **nodepp)
{for (offset = 0;offset >= 0 && depth >= initial_depth;offset = fdt_next_node(blob, offset, &depth)) {if (WARN_ON_ONCE(depth >= FDT_MAX_DEPTH))continue;if (!IS_ENABLED(CONFIG_OF_KOBJ) &&!of_fdt_device_is_available(blob, offset))continue;ret = populate_node(blob, offset, &mem, nps[depth],&nps[depth+1], dryrun);if (ret < 0)return ret;if (!dryrun && nodepp && !*nodepp)*nodepp = nps[depth+1];if (!dryrun && !root)
总线模型:of_platform_default_populate_init
入口函数为of_platform_default_populate_init
start_kernelarch_call_rest_initrest_initkernel_thread(kernel_init, NULL, CLONE_FS);kernel_init_freeabledo_basic_setup();do_initcalls()do_initcall_level(3)
do_initcall_level
会依次调用初始化级别为3
的函数,如下
// linux-5.14.5/init/main.cextern initcall_entry_t __initcall_start[];
extern initcall_entry_t __initcall0_start[];
extern initcall_entry_t __initcall1_start[];
extern initcall_entry_t __initcall2_start[];
extern initcall_entry_t __initcall3_start[];
extern initcall_entry_t __initcall4_start[];
extern initcall_entry_t __initcall5_start[];
extern initcall_entry_t __initcall6_start[];
extern initcall_entry_t __initcall7_start[];
extern initcall_entry_t __initcall_end[];static initcall_entry_t *initcall_levels[] __initdata = {__initcall0_start,__initcall1_start,__initcall2_start,__initcall3_start,__initcall4_start,__initcall5_start,__initcall6_start,__initcall7_start,__initcall_end,
};/* Keep these in sync with initcalls in include/linux/init.h */
static const char *initcall_level_names[] __initdata = {"pure","core","postcore","arch","subsys","fs","device","late",
};
而of_platform_default_populate_init
被定义为级别为arch(3)
// linux-5.14.5/drivers/of/platform.carch_initcall_sync(of_platform_default_populate_init);
// linux-5.14.5/include/linux/init.h#define arch_initcall_sync(fn) __define_initcall(fn, 3s)
进入到函数主体中,注意到他会先处理reserved_mem_matches
,即/reserved-memory
节点
// linux-5.14.5/drivers/of/platform.cstatic int __init of_platform_default_populate_init(void)
{struct device_node *node;device_links_supplier_sync_state_pause();if (!of_have_populated_dt())return -ENODEV;/** Handle certain compatibles explicitly, since we don't want to create* platform_devices for every node in /reserved-memory with a* "compatible",*/for_each_matching_node(node, reserved_mem_matches)of_platform_device_create(node, NULL, NULL);node = of_find_node_by_path("/firmware");if (node) {of_platform_populate(node, NULL, NULL, NULL);of_node_put(node);}/* Populate everything else. */of_platform_default_populate(NULL, NULL, NULL);return 0;
}
查看reserved_mem_matches
结构
// inux-5.14.5/drivers/of/platform.cstatic const struct of_device_id reserved_mem_matches[] = {{ .compatible = "qcom,rmtfs-mem" },{ .compatible = "qcom,cmd-db" },{ .compatible = "ramoops" },{ .compatible = "nvmem-rmem" },{}
};
我的linux-5.14.5/arch/arm64/boot/dts/qcom/sm8250.dtsi
的/reserved-memory
结构如下
// linux-5.14.5/arch/arm64/boot/dts/qcom/sm8250.dtsi/ {reserved-memory {#address-cells = <2>;#size-cells = <2>;ranges;hyp_mem: memory@80000000 {reg = <0x0 0x80000000 0x0 0x600000>;no-map;};xbl_aop_mem: memory@80700000 {reg = <0x0 0x80700000 0x0 0x160000>;no-map;};cmd_db: memory@80860000 {compatible = "qcom,cmd-db";reg = <0x0 0x80860000 0x0 0x20000>;no-map;};smem_mem: memory@80900000 {reg = <0x0 0x80900000 0x0 0x200000>;no-map;};removed_mem: memory@80b00000 {reg = <0x0 0x80b00000 0x0 0x5300000>;no-map;};camera_mem: memory@86200000 {reg = <0x0 0x86200000 0x0 0x500000>;no-map;};wlan_mem: memory@86700000 {reg = <0x0 0x86700000 0x0 0x100000>;no-map;};ipa_fw_mem: memory@86800000 {reg = <0x0 0x86800000 0x0 0x10000>;no-map;};ipa_gsi_mem: memory@86810000 {reg = <0x0 0x86810000 0x0 0xa000>;no-map;};gpu_mem: memory@8681a000 {reg = <0x0 0x8681a000 0x0 0x2000>;no-map;};npu_mem: memory@86900000 {reg = <0x0 0x86900000 0x0 0x500000>;no-map;};video_mem: memory@86e00000 {reg = <0x0 0x86e00000 0x0 0x500000>;no-map;};cvp_mem: memory@87300000 {reg = <0x0 0x87300000 0x0 0x500000>;no-map;};cdsp_mem: memory@87800000 {reg = <0x0 0x87800000 0x0 0x1400000>;no-map;};slpi_mem: memory@88c00000 {reg = <0x0 0x88c00000 0x0 0x1500000>;no-map;};adsp_mem: memory@8a100000 {reg = <0x0 0x8a100000 0x0 0x1d00000>;no-map;};spss_mem: memory@8be00000 {reg = <0x0 0x8be00000 0x0 0x100000>;no-map;};cdsp_secure_heap: memory@8bf00000 {reg = <0x0 0x8bf00000 0x0 0x4600000>;no-map;};};}
可见其会对compatible = "qcom,cmd-db"
的节点创建struct platform_node
,该节点需要做特殊处理,同时对其他非reserved_mem_matches
不会创建platform_device
节点
下面的/firmware
同理,填充其下的所有子节点
// linux-5.14.5/arch/arm64/boot/dts/qcom/sm8250.dtsifirmware {scm: scm {compatible = "qcom,scm";#reset-cells = <1>;};};
接下来就是
// linux-5.14.5/drivers/of/platform.c/* Populate everything else. */of_platform_default_populate(NULL, NULL, NULL);
// linux-5.14.5/drivers/of/platform.cint of_platform_default_populate(struct device_node *root,const struct of_dev_auxdata *lookup,struct device *parent)
{return of_platform_populate(root, of_default_bus_match_table, lookup,parent);
}
此处有一个数组被传入:of_default_bus_match_table
// linux-5.14.5/drivers/of/platform.cconst struct of_device_id of_default_bus_match_table[] = {{ .compatible = "simple-bus", },{ .compatible = "simple-mfd", },{ .compatible = "isa", },
#ifdef CONFIG_ARM_AMBA{ .compatible = "arm,amba-bus", },
#endif /* CONFIG_ARM_AMBA */{} /* Empty terminated list */
};
// linux-5.14.5/drivers/of/platform.cint of_platform_populate(struct device_node *root,const struct of_device_id *matches,const struct of_dev_auxdata *lookup,struct device *parent)
{struct device_node *child;int rc = 0;root = root ? of_node_get(root) : of_find_node_by_path("/");if (!root)return -EINVAL;pr_debug("%s()\n", __func__);pr_debug(" starting at: %pOF\n", root);device_links_supplier_sync_state_pause();for_each_child_of_node(root, child) {rc = of_platform_bus_create(child, matches, lookup, parent, true);if (rc) {of_node_put(child);break;}}device_links_supplier_sync_state_resume();of_node_set_flag(root, OF_POPULATED_BUS);of_node_put(root);return rc;
}
// linux-5.14.5/drivers/of/platform.cstatic int of_platform_bus_create(struct device_node *bus,const struct of_device_id *matches,const struct of_dev_auxdata *lookup,struct device *parent, bool strict)
{// ...// device_node -> platform_nodedev = of_platform_device_create_pdata(bus, bus_id, platform_data, parent);// include lookup?if (!dev || !of_match_node(matches, bus))return 0;// for_each_child_of_node(bus, child) {pr_debug(" create child: %pOF\n", child);rc = of_platform_bus_create(child, matches, lookup, &dev->dev, strict);if (rc) {of_node_put(child);break;}}of_node_set_flag(bus, OF_POPULATED_BUS);return rc;
我们先来重点关注一下他们之间是怎么转化的:of_platform_device_create_pdata
// linux-5.14.5/drivers/of/platform.cstatic struct platform_device *of_platform_device_create_pdata(struct device_node *np,const char *bus_id,void *platform_data,struct device *parent)
{struct platform_device *dev;// device->of_node = device_nodedev = of_device_alloc(np, bus_id, parent);if (!dev)goto err_clear_flag;dev->dev.coherent_dma_mask = DMA_BIT_MASK(32);if (!dev->dev.dma_mask)dev->dev.dma_mask = &dev->dev.coherent_dma_mask;dev->dev.bus = &platform_bus_type;dev->dev.platform_data = platform_data;of_msi_configure(&dev->dev, dev->dev.of_node);// addif (of_device_add(dev) != 0) {platform_device_put(dev);goto err_clear_flag;
}
注意此处他将platform_device->dev.bus
添加到platform_bus_type
上
// linux-5.14.5/drivers/base/platform.cstruct bus_type platform_bus_type = {.name = "platform",.dev_groups = platform_dev_groups,.match = platform_match,.uevent = platform_uevent,.probe = platform_probe,.remove = platform_remove,.shutdown = platform_shutdown,.dma_configure = platform_dma_configure,.pm = &platform_dev_pm_ops,
};
EXPORT_SYMBOL_GPL(platform_bus_type);
of_device_alloc
主要是初始化platform_device->resource
,即irq
和reg
of_device_add
用于将当前struct device
添加到系统空间中,即sysfs
文件系统,用户可访问
设备驱动模型和平台总线模型是同步关系,不是互斥关系
其中在of_device_alloc
又将device
添加到platform_bus
中
// linux-5.14.5/drivers/of/platform.cdev->dev.parent = parent ? : &platform_bus;
// linux-5.14.5/drivers/base/platform.cstruct device platform_bus = {.init_name = "platform",
};
EXPORT_SYMBOL_GPL(platform_bus);
从本章可知,从device_node
转化为platform_node
只有几种节点
- 带
compatible
属性的一级节点,会转化为platform_device
,子节点不会处理 compatible
为simple-bus,simple-mfd,isa,arm,amba-bus
之一,那么其子节点也会转化为platform_device
/firmware
节点的子节点/reserved-memory
中子节点中reserved_mem_matches
的节点
到这里平台总线模型就完成了
设备初始化:platform_driver_probe
在int device_add(struct device *dev)
中调用:bus_probe_device(dev)
// linux-5.14.5/drivers/base/bus.c/*** bus_probe_device - probe drivers for a new device* @dev: device to probe** - Automatically probe for a driver if the bus allows it.*/
void bus_probe_device(struct device *dev)
{struct bus_type *bus = dev->bus;struct subsys_interface *sif;if (!bus)return;if (bus->p->drivers_autoprobe)device_initial_probe(dev);mutex_lock(&bus->p->mutex);list_for_each_entry(sif, &bus->p->interfaces, node)if (sif->add_dev)sif->add_dev(dev, sif);mutex_unlock(&bus->p->mutex);
}
看device_initial_probe
函数
// linux-5.14.5/drivers/base/dd.cvoid device_initial_probe(struct device *dev)
{__device_attach(dev, true);
}
其中__device_attach
用于为一个platform_device
绑定一个platform_driver
// linux-5.14.5/drivers/base/dd.cstatic int __device_attach(struct device *dev, bool allow_async)
{// ...struct device_attach_data data = {.dev = dev,.check_async = allow_async,.want_async = false,};if (dev->parent)pm_runtime_get_sync(dev->parent);ret = bus_for_each_drv(dev->bus, NULL, &data,__device_attach_driver);if (!ret && allow_async && data.have_async) {/** If we could not find appropriate driver* synchronously and we are allowed to do* async probes and there are drivers that* want to probe asynchronously, we'll* try them.*/dev_dbg(dev, "scheduling asynchronous probe\n");get_device(dev);async_schedule_dev(__device_attach_async_helper, dev);} else {pm_request_idle(dev);}if (dev->parent)pm_runtime_put(dev->parent);
bus_for_each_drv
用于查找platform_bus_type
上klist_drivers
链表,匹配compatible
对应的驱动
// linux-5.14.5/drivers/base/bus.cint bus_for_each_drv(struct bus_type *bus, struct device_driver *start,void *data, int (*fn)(struct device_driver *, void *))
{struct klist_iter i;struct device_driver *drv;int error = 0;if (!bus)return -EINVAL;klist_iter_init_node(&bus->p->klist_drivers, &i,start ? &start->p->knode_bus : NULL);while ((drv = next_driver(&i)) && !error)error = fn(drv, data);klist_iter_exit(&i);return error;
}
EXPORT_SYMBOL_GPL(bus_for_each_drv);
driver_match_device
用于匹配对应的驱动
// linux-5.14.5/drivers/base/dd.cstatic int __device_attach_driver(struct device_driver *drv, void *_data)
{struct device_attach_data *data = _data;struct device *dev = data->dev;bool async_allowed;int ret;ret = driver_match_device(drv, dev);// .../** Ignore errors returned by ->probe so that the next driver can try* its luck.*/ret = driver_probe_device(drv, dev);if (ret < 0)return ret;return ret == 0;
}
匹配函数如下
// linux-5.14.5/drivers/base/base.hstatic inline int driver_match_device(struct device_driver *drv,struct device *dev)
{return drv->bus->match ? drv->bus->match(dev, drv) : 1;
}
其实就是调用platform_bus_type
的匹配函数
// linux-5.14.5/drivers/base/platform.cstatic int platform_match(struct device *dev, struct device_driver *drv)
{struct platform_device *pdev = to_platform_device(dev);struct platform_driver *pdrv = to_platform_driver(drv);/* When driver_override is set, only bind to the matching driver */if (pdev->driver_override)return !strcmp(pdev->driver_override, drv->name);/* Attempt an OF style match first */if (of_driver_match_device(dev, drv))return 1;/* Then try ACPI style match */if (acpi_driver_match_device(dev, drv))return 1;/* Then try to match against the id table */if (pdrv->id_table)return platform_match_id(pdrv->id_table, pdev) != NULL;/* fall-back to driver name match */return (strcmp(pdev->name, drv->name) == 0);
}
接下来就是probe
的初始化和调用了
// linux-5.14.5/drivers/base/dd.cstatic int driver_probe_device(struct device_driver *drv, struct device *dev)
{int trigger_count = atomic_read(&deferred_trigger_count);int ret;atomic_inc(&probe_count);ret = __driver_probe_device(drv, dev);if (ret == -EPROBE_DEFER || ret == EPROBE_DEFER) {driver_deferred_probe_add(dev);/** Did a trigger occur while probing? Need to re-trigger if yes*/if (trigger_count != atomic_read(&deferred_trigger_count) &&!defer_all_probes)driver_deferred_probe_trigger();}atomic_dec(&probe_count);wake_up_all(&probe_waitqueue);return ret;
}
// linux-5.14.5/drivers/base/dd.cstatic int __driver_probe_device(struct device_driver *drv, struct device *dev)
{int ret = 0;if (dev->p->dead || !device_is_registered(dev))return -ENODEV;if (dev->driver)return -EBUSY;dev->can_match = true;pr_debug("bus: '%s': %s: matched device %s with driver %s\n",drv->bus->name, __func__, dev_name(dev), drv->name);pm_runtime_get_suppliers(dev);if (dev->parent)pm_runtime_get_sync(dev->parent);pm_runtime_barrier(dev);if (initcall_debug)ret = really_probe_debug(dev, drv);elseret = really_probe(dev, drv);pm_request_idle(dev);if (dev->parent)pm_runtime_put(dev->parent);pm_runtime_put_suppliers(dev);return ret;
}
看really_probe
和really_probe_debug
两个函数
// linux-5.14.5/drivers/base/dd.cstatic int really_probe(struct device *dev, struct device_driver *drv)
{// ...
re_probe:dev->driver = drv;ret = call_driver_probe(dev, drv);driver_bound(dev);
driver_bound
用于将设备加入到驱动支持的设备链表中,一个设备需要一个驱动,一个驱动支持多个设备
// linux-5.14.5/drivers/base/dd.cstatic int call_driver_probe(struct device *dev, struct device_driver *drv)
{int ret = 0;if (dev->bus->probe)ret = dev->bus->probe(dev);else if (drv->probe)ret = drv->probe(dev);return ret;
}
可见其逻辑:如果bus
上定义probe
函数则调用,否则调用驱动上的probe
函数
我们在前面可知platform_bus_type
的结构如下
// linux-5.14.5/drivers/base/platform.cstruct bus_type platform_bus_type = {.name = "platform",.dev_groups = platform_dev_groups,.match = platform_match,.uevent = platform_uevent,.probe = platform_probe,.remove = platform_remove,.shutdown = platform_shutdown,.dma_configure = platform_dma_configure,.pm = &platform_dev_pm_ops,
};
EXPORT_SYMBOL_GPL(platform_bus_type);
那么调用其probe
函数
// linux-5.14.5/drivers/base/platform.cstatic int platform_probe(struct device *_dev)
{// ...if (drv->probe) {ret = drv->probe(dev);if (ret)dev_pm_domain_detach(_dev, true);}out:if (drv->prevent_deferred_probe && ret == -EPROBE_DEFER) {dev_warn(_dev, "probe deferral not supported\n");ret = -ENXIO;}return ret;
}
最重要的就是drv->probe(dev)
了,以linux-5.14.5/drivers/pci/controller/dwc/pcie-qcom.c
为例
// linux-5.14.5/drivers/pci/controller/dwc/pcie-qcom.cstatic struct platform_driver qcom_pcie_driver = {.probe = qcom_pcie_probe,.driver = {.name = "qcom-pcie",.suppress_bind_attrs = true,.of_match_table = qcom_pcie_match,},
};
builtin_platform_driver(qcom_pcie_driver);
这里是一个pci controller
,因此此处必然存在和pci_bus_type
进行绑定的过程,那我们就看看他的probe
函数
// linux-5.14.5/drivers/pci/controller/dwc/pcie-qcom.cstatic int qcom_pcie_probe(struct platform_device *pdev)
{// ...ret = dw_pcie_host_init(pp);if (ret) {dev_err(dev, "cannot initialize host\n");pm_runtime_disable(&pdev->dev);goto err_pm_runtime_put;}return 0;
重点:dw_pcie_host_init
// linux-5.14.5/drivers/pci/controller/dwc/pcie-designware-host.cint dw_pcie_host_init(struct pcie_port *pp)
{struct pci_host_bridge *bridge;// ....ret = pci_host_probe(bridge);
// linux-5.14.5/drivers/pci/probe.cint pci_host_probe(struct pci_host_bridge *bridge)
{struct pci_bus *bus, *child;int ret;ret = pci_scan_root_bus_bridge(bridge);pci_bus_add_devices(bus);
pci_scan_root_bus_bridge
的作用
- 注册
pci host bridge
- 将该
platform device
下的所有子结点与该pci controller
相关联
// linux-5.14.5/drivers/pci/probe.cint pci_scan_root_bus_bridge(struct pci_host_bridge *bridge)
{ret = pci_register_host_bridge(bridge);max = pci_scan_child_bus(b);
// linux-5.14.5/drivers/pci/probe.cunsigned int pci_scan_child_bus(struct pci_bus *bus)
{return pci_scan_child_bus_extend(bus, 0);
}
EXPORT_SYMBOL_GPL(pci_scan_child_bus);
在pci_scan_child_bus_extend
中
// linux-5.14.5/drivers/pci/probe.cstatic unsigned int pci_scan_child_bus_extend(struct pci_bus *bus,unsigned int available_buses)
{for (devfn = 0; devfn < 256; devfn += 8) {nr_devs = pci_scan_slot(bus, devfn);
pci_scan_single_device
用于将struct device
添加到当前bus->devices
以便后面进行初始化
// linux-5.14.5/drivers/pci/probe.cint pci_scan_slot(struct pci_bus *bus, int devfn)
{dev = pci_scan_single_device(bus, devfn); // call pci_device_add
// linux-5.14.5/drivers/pci/probe.cvoid pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
{// ....list_add_tail(&dev->bus_list, &bus->devices);
在此处插入到对应链表中
并在pci_bus_add_devices
创建struct pci_device
与该pci bus
相关联
// linux-5.14.5/drivers/pci/bus.cvoid pci_bus_add_devices(const struct pci_bus *bus)
{struct pci_dev *dev;struct pci_bus *child;list_for_each_entry(dev, &bus->devices, bus_list) {/* Skip already-added devices */if (pci_dev_is_added(dev))continue;pci_bus_add_device(dev);}list_for_each_entry(dev, &bus->devices, bus_list) {/* Skip if device attach failed */if (!pci_dev_is_added(dev))continue;child = dev->subordinate;if (child)pci_bus_add_devices(child);}
}
EXPORT_SYMBOL(pci_bus_add_devices);
// linux-5.14.5/drivers/pci/bus.cvoid pci_bus_add_device(struct pci_dev *dev)
{int retval;/** Can not put in pci_device_add yet because resources* are not assigned yet for some devices.*/pcibios_bus_add_device(dev);pci_fixup_device(pci_fixup_final, dev);pci_create_sysfs_dev_files(dev);pci_proc_attach_device(dev);pci_bridge_d3_update(dev);dev->match_driver = true;retval = device_attach(&dev->dev);if (retval < 0 && retval != -EPROBE_DEFER)pci_warn(dev, "device attach failed (%d)\n", retval);pci_dev_assign_added(dev, true);
}
重点是device_attach(&dev->dev)
// linux-5.14.5/drivers/base/dd.cint device_attach(struct device *dev)
{return __device_attach(dev, false);
}
EXPORT_SYMBOL_GPL(device_attach);
用于查找对应的驱动程序,绑定
同
device_initial_probe
函数,不过它是绑定platform_device
的驱动
// linux-5.14.5/drivers/pci/probe.cstatic int pci_register_host_bridge(struct pci_host_bridge *bridge)
{// ...bus->dev.class = &pcibus_class;bus->dev.parent = bus->bridge;
这里是怎么让pci controller
管理pci device
的呢?pci_bus_type
定义了pci
总线的相关操作
pci_bus_type
用于管理所有的pci
总线
// linux-5.14.5/drivers/pci/pci-driver.cstruct bus_type pci_bus_type = {.name = "pci",.match = pci_bus_match,.uevent = pci_uevent,.probe = pci_device_probe,.remove = pci_device_remove,.shutdown = pci_device_shutdown,.dev_groups = pci_dev_groups,.bus_groups = pci_bus_groups,.drv_groups = pci_drv_groups,.pm = PCI_PM_OPS_PTR,.num_vf = pci_bus_num_vf,.dma_configure = pci_dma_configure,
};
而struct pci_bus
代表一个pci
总线,pci_bus_type
是用来描述这种总线类型的结构体,使用pcibus_class
描述总线类型
// linux-5.14.5/drivers/pci/probe.cstatic struct class pcibus_class = {.name = "pci_bus",.dev_release = &release_pcibus_dev,.dev_groups = pcibus_groups,
};
当然,到这里还远远没有结束
总线初始化:pci_driver_init
我们在前面探讨过,initcall_level_names
数组的调用顺序
static const char *initcall_level_names[] __initdata = {"pure","core","postcore","arch","subsys","fs","device","late",
};
那么此处轮到core
的部分了:pci_driver_init
// linux-5.14.5/drivers/pci/pci-driver.cstatic int __init pci_driver_init(void)
{int ret;ret = bus_register(&pci_bus_type);if (ret)return ret;#ifdef CONFIG_PCIEPORTBUSret = bus_register(&pcie_port_bus_type);if (ret)return ret;
#endifdma_debug_add_bus(&pci_bus_type);return 0;
}
postcore_initcall(pci_driver_init);
此处调用bus_register
会将pci_bus_type
加入设备驱动模型中
此时要分清pci_bus_type
和pci host bridge
的职责所在:
-
当
pci_device
被注册进内核时,会与pci host bridge
关联起来 -
当
pci_device
被调用进内核时,pci_bus_type
负责匹配相关的驱动
smmu对pci虚拟化的支持
device到host
从device
到host
主要是进行dma
操作
背景:在多虚拟机系统中,每个
os
都有自己的地址空间,都是从0x0
为起始地址,当pci
设备要对某一个虚拟机进行dma
传输时,pci
此时只知道一个地址,在他看来只有一个os
,而不知道该地址是属于哪个虚拟机,此时就要smmu
发力了
使用smmu
,让pci dma
操作映射过来相同的虚拟机地址在host
上映射不同虚拟机在存储域占用的不同物理地址
smmu
是一个硬件模块,应该在设备树中定义资源
smmu: iommu@2b400000 {compatible = "arm,smmu-v3";reg = <0x0 0x2b400000 0x0 0x100000>;interrupts = <GIC_SPI 74 IRQ_TYPE_EDGE_RISING>,<GIC_SPI 79 IRQ_TYPE_EDGE_RISING>,<GIC_SPI 75 IRQ_TYPE_EDGE_RISING>,<GIC_SPI 77 IRQ_TYPE_EDGE_RISING>;interrupt-names = "eventq", "gerror", "priq", "cmdq-sync";dma-coherent;#iommu-cells = <1>;msi-parent = <&its 0x10000>;};
在Linux
中,smmu
的功能通常通过iommu
子系统接口进行暴露:linux-5.14.5/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
常见接口如下:(具体实现依赖于平台)
iommu_domain_alloc()
:用于为设备创建iommu
域。设备在该域中进行地址转换iommu_map()
:用于将物理内存映射到设备的虚拟地址空间iommu_unmap()
:用于解除设备的地址映射iommu_attach_device()
:将设备与指定的iommu
域关联iommu_detach_device()
:解除设备与iommu
域的关联iommu_flush()
:刷新设备的缓存或延迟写入,以确保地址转换表的更新对设备可见
细节部分不在本文范畴,若感兴趣请自行梳理
host到device
启动sr-iov
:int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
// linux-5.14.5/drivers/pci/iov.cint pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
{might_sleep();if (!dev->is_physfn)return -ENOSYS;return sriov_enable(dev, nr_virtfn);
}
EXPORT_SYMBOL_GPL(pci_enable_sriov);
一般在pci_driver
进行配置,如下
// linux-5.14.5/drivers/net/ethernet/emulex/benet/be_main.cstatic struct pci_driver be_driver = {.name = DRV_NAME,.id_table = be_dev_ids,.probe = be_probe,.remove = be_remove,.driver.pm = &be_pci_pm_ops,.shutdown = be_shutdown,.sriov_configure = be_pci_sriov_configure,.err_handler = &be_eeh_handlers
};
sriov_enable
用于配置硬件reg
和地址空间,主要由硬件支持