The Linux driver implementer’s API guide — The Linux Kernel documentation
一、igb_uid驱动
参考博客:https://zhuanlan.zhihu.com/p/543217445
UIO(Userspace I/O)是运行在用户空间的I/O技术
代码位置:dpdk----/kernel/linux/igb_uio目录
igb_uio 是 dpdk 内部实现的将网卡映射到用户态的内核模块,它是 uio 模块的一个实例。
igb_uio 是一种 pci 驱动,将网卡绑定到 igb_uio 隔离了网卡的内核驱动,同时 igb_uio 完成网卡中断内核态初始化并将中断信号映射到用户态。
igb_uio 与 uio 模块密切相关,uio是一种字符设备驱动,在此驱动中注册了单独的 file_operations 函数表,uio 设备可以看做是一种独立的设备类型。
1.数据结构
//dpdk定义的uio pci设备描述结构
struct rte_uio_pci_dev {struct uio_info info; //uio 通用结构struct pci_dev *pdev; //pci设备描述结构enum rte_intr_mode mode; //中断模式
};
struct uio_info {struct uio_device *uio_dev; //uio设备属于const char *name; //名称const char *version; //版本号struct uio_mem mem[MAX_UIO_MAPS];//可映射的内存区域列表,size == 0表示列表结束struct uio_port port[MAX_UIO_PORT_REGIONS]; //网口区域列表long irq; //UIO_IRQ_CUSTOM 中断号unsigned long irq_flags; //请求中断号的标志void *priv; //可选的私有数据irqreturn_t (*handler)(int irq, struct uio_info *dev_info); //中断信息处理int (*mmap)(struct uio_info *info, struct vm_area_struct *vma);//内存映射操作int (*open)(struct uio_info *info, struct inode *inode); //打开int (*release)(struct uio_info *info, struct inode *inode); //释放int (*irqcontrol)(struct uio_info *info, s32 irq_on); //中断控制操作 关闭/打开 当向/dev/uioX中写入值时
static struct pci_driver igbuio_pci_driver = {.name = "igb_uio", //名称//id_table 用来存储当前driver支持的所有设备的信息//模块初始化时是没有设备的,设置需要通过脚本或者程序添加绑定.id_table = NULL,.probe = igbuio_pci_probe, //探测回调函数.remove = igbuio_pci_remove, //删除回调函数
};
2. insmod igb_uio
//igb_uio初始化
static int __init
igbuio_pci_init_module(void)
{int ret;if (igbuio_kernel_is_locked_down()) {pr_err("Not able to use module, kernel lock down is enabled\n");return -EINVAL;}if (wc_activate != 0)pr_info("wc_activate is set\n");//配置insmod时传入的中断模式ret = igbuio_config_intr_mode(intr_mode);if (ret < 0)return ret;//注册pci设备,注册成功时,则调用igbuio_pci_probe开始探测return pci_register_driver(&igbuio_pci_driver);
}//insmod 的时候,执行此函数,但是驱动并未开始工作
module_init(igbuio_pci_init_module);
在igb_uio注册后,会一直调用igbuio_pci_probe
3. igbuio_pci_probe
igbuio_pci_probe主要作用就是:探测网卡,探测到新网卡就进入了igb_uio
的设备数组中。
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
static int __devinit
#else
static int
#endif
igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
{struct rte_uio_pci_dev *udev;dma_addr_t map_dma_addr;void *map_addr;int err;#ifdef HAVE_PCI_IS_BRIDGE_APIif (pci_is_bridge(dev)) {dev_warn(&dev->dev, "Ignoring PCI bridge device\n");return -ENODEV;}
#endif//在内核空间分配udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL);if (!udev)return -ENOMEM;/** enable device: ask low-level code to enable I/O and* memory*///使能设备: 调用更底层的PCI代码使能设备的内存和I/O区域err = pci_enable_device(dev);if (err != 0) {dev_err(&dev->dev, "Cannot enable PCI device\n");goto fail_free;}/* 设备设置层DMA总线主模式 */pci_set_master(dev);/* remap IO memory *///该函数的功能是将当前设备的所有PCI BAR的全部信息读取到//struct uio_info结构体中,后续注册UIO设备时需要使用err = igbuio_setup_bars(dev, &udev->info);if (err != 0)goto fail_release_iomem;/* set 64-bit DMA mask *///设置DMA模式err = pci_set_dma_mask(dev, DMA_BIT_MASK(64));if (err != 0) {dev_err(&dev->dev, "Cannot set DMA mask\n");goto fail_release_iomem;}//内存范围一致性的处理err = pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(64));if (err != 0) {dev_err(&dev->dev, "Cannot set consistent DMA mask\n");goto fail_release_iomem;}/* fill uio infos */udev->info.name = "igb_uio";udev->info.version = "0.1";udev->info.irqcontrol = igbuio_pci_irqcontrol;udev->info.open = igbuio_pci_open;udev->info.release = igbuio_pci_release;udev->info.priv = udev;udev->pdev = dev;atomic_set(&udev->refcnt, 0);err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp);if (err != 0)goto fail_release_iomem;/* register uio driver *///函数将当前设备注册为UIO设备err = uio_register_device(&dev->dev, &udev->info);if (err != 0)goto fail_remove_group;pci_set_drvdata(dev, udev);/** Doing a harmless dma mapping for attaching the device to* the iommu identity mapping if kernel boots with iommu=pt.* Note this is not a problem if no IOMMU at all.*/map_addr = dma_alloc_coherent(&dev->dev, 1024, &map_dma_addr,GFP_KERNEL);if (map_addr)memset(map_addr, 0, 1024);if (!map_addr)dev_info(&dev->dev, "dma mapping failed\n");else {dev_info(&dev->dev, "mapping 1K dma=%#llx host=%p\n",(unsigned long long)map_dma_addr, map_addr);dma_free_coherent(&dev->dev, 1024, map_addr, map_dma_addr);dev_info(&dev->dev, "unmapping 1K dma=%#llx host=%p\n",(unsigned long long)map_dma_addr, map_addr);}return 0;fail_remove_group:sysfs_remove_group(&dev->dev.kobj, &dev