基于MDEV的PCI设备虚拟化DEMO实现

利用周末时间做了一个MDEV虚拟化PCI设备的小试验,简单记录一下:

DEMO架构,此图参考了内核文档:Documentation/driver-api/vfio-mediated-device.rst

host kernel watchdog pci driver:

#include <linux/init.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/cdev.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/uuid.h>
#include <linux/vfio.h>
#include <linux/iommu.h>
#include <linux/sysfs.h>
#include <linux/ctype.h>
#include <linux/file.h>
#include <linux/mdev.h>
#include <linux/pci.h>#define IO_BAR0_SIZE 32
#define IO_CONF_SIZE 0x100
#define CZL_WDG_DEVICE_VENDOR_ID 0xbeef
#define CZL_WDG_DEVICE_DEVICE_ID 0x1001
#define API_DBG(fmt, ...) do { \printk("%s line %d, "fmt, __func__, __LINE__, ##__VA_ARGS__); \} while (0)struct czl_wdg_dev {dev_t         wdg_devt;struct class *wdg_class;struct cdev   wdg_cdev;struct device dev;
};struct mdev_region_info {u64 start;u64 phys_start;u32 size;u64 vfio_offset;
};struct wdg_mdev_state {u8 *config;u8 *iobase;struct mdev_device *mdev;struct mdev_region_info region_info[VFIO_PCI_NUM_REGIONS];u32 bar_mask[VFIO_PCI_NUM_REGIONS];struct list_head next;struct vfio_device_info dev_info;int index;struct mutex ops_lock;
};static const struct file_operations czl_wdg_fops = {.owner          = THIS_MODULE,
};static struct mutex wdg_mdev_list_lock;
static struct list_head wdg_mdev_devices_list;
#define WDG_VFIO_PCI_OFFSET_SHIFT   (40)
#define WDG_VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> WDG_VFIO_PCI_OFFSET_SHIFT)
#define WDG_VFIO_PCI_INDEX_TO_OFFSET(index) \((u64)(index) << WDG_VFIO_PCI_OFFSET_SHIFT)
#define WDG_VFIO_PCI_OFFSET_MASK    \(((u64)(1) << WDG_VFIO_PCI_OFFSET_SHIFT) - 1)
#define MAX_WDGS                    (16)
static struct czl_wdg_dev czl_wdg;static ssize_t
czl_wdg_dev_show(struct device *dev, struct device_attribute *attr,char *buf)
{return sprintf(buf, "mdev emulated pci watchdog device by caozilong.\n");
}
static DEVICE_ATTR_RO(czl_wdg_dev);static struct attribute *wdg_dev_attrs[] = {&dev_attr_czl_wdg_dev.attr,NULL,
};static const struct attribute_group wdg_dev_group = {.name  = "czl_wdg",.attrs = wdg_dev_attrs,
};static const struct attribute_group *wdg_dev_groups[] = {&wdg_dev_group,NULL,
};static ssize_t
mdev_dev_show(struct device *dev, struct device_attribute *attr,char *buf)
{if (mdev_from_dev(dev)) {return sprintf(buf, "This is watchdog %s\n", dev_name(dev));}return sprintf(buf, "\n");
}static DEVICE_ATTR_RO(mdev_dev);static struct attribute *mdev_dev_attrs[] = {&dev_attr_mdev_dev.attr,NULL,
};static const struct attribute_group mdev_dev_group = {.name  = "caozilong",.attrs = mdev_dev_attrs,
};static const struct attribute_group *mdev_dev_groups[] = {&mdev_dev_group,NULL,
};static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf)
{int i;char name[128];const char *name_str[3] = {"Soft Watchdog", "Hardware Watchdog", "Dummy Watchdog"};for (i = 0; i < 3; i++) {snprintf(name, 128, "%s-%d", dev_driver_string(dev), i + 1);if (!strcmp(kobj->name, name)) {return sprintf(buf, "%s\n", name_str[i]);}}return -EINVAL;
}static ssize_t device_api_show(struct kobject *kobj, struct device *dev,char *buf)
{return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
}static ssize_t
available_instances_show(struct kobject *kobj, struct device *dev, char *buf)
{struct wdg_mdev_state *mds;int used = 0;list_for_each_entry(mds, &wdg_mdev_devices_list, next) {used ++;}return sprintf(buf, "%d\n", (MAX_WDGS - used));
}static MDEV_TYPE_ATTR_RO(name);
static MDEV_TYPE_ATTR_RO(device_api);
static MDEV_TYPE_ATTR_RO(available_instances);static struct attribute *mdev_types_attrs[] = {&mdev_type_attr_name.attr,&mdev_type_attr_device_api.attr,&mdev_type_attr_available_instances.attr,NULL,
};static struct attribute_group mdev_type_group1 = {.name  = "1",.attrs = mdev_types_attrs,
};static struct attribute_group mdev_type_group2 = {.name  = "2",.attrs = mdev_types_attrs,
};static struct attribute_group mdev_type_group3 = {.name  = "3",.attrs = mdev_types_attrs,
};static struct attribute_group *mdev_type_groups[] = {&mdev_type_group1,&mdev_type_group2,&mdev_type_group3,NULL,
};static int czl_wdg_open(struct mdev_device *mdev)
{pr_info("%s line %d, wdg device opened.\n",__func__, __LINE__);return 0;
}static void czl_wdg_close(struct mdev_device *mdev)
{pr_info("%s line %d, wdg device close.\n",__func__, __LINE__);return;
}// fill pci config space meta data & capabilities.
int wdg_create_config_space(struct wdg_mdev_state *mstate)
{// vendor id, device id.*((unsigned int *)&mstate->config[0]) = CZL_WDG_DEVICE_VENDOR_ID |(CZL_WDG_DEVICE_DEVICE_ID << 16);*((unsigned short *)&mstate->config[4]) = 0x0001;*((unsigned short *)&mstate->config[6]) = 0x0200;mstate->config[0x8] =  0x10;mstate->config[0x9] =  0x02;mstate->config[0xa] =  0x00;mstate->config[0xb] =  0x07;*((unsigned int *)&mstate->config[0x10]) = 0x000001;mstate->bar_mask[0] = ~(IO_BAR0_SIZE) + 1;*((unsigned int *)&mstate->config[0x2c]) = 0x10011af4;// cap ptr.mstate->config[0x34] =  0x00;mstate->config[0x3d] =  0x01;mstate->config[0x40] =  0x23;mstate->config[0x43] =  0x80;mstate->config[0x44] =  0x23;mstate->config[0x48] =  0x23;mstate->config[0x4c] =  0x23;mstate->config[0x60] =  0x50;mstate->config[0x61] =  0x43;mstate->config[0x62] =  0x49;mstate->config[0x63] =  0x20;mstate->config[0x64] =  0x53;mstate->config[0x65] =  0x65;mstate->config[0x66] =  0x72;mstate->config[0x67] =  0x69;mstate->config[0x68] =  0x61;mstate->config[0x69] =  0x6c;mstate->config[0x6a] =  0x2f;mstate->config[0x6b] =  0x55;mstate->config[0x6c] =  0x41;mstate->config[0x6d] =  0x52;mstate->config[0x6e] =  0x54;return 0;
}static int czl_wdg_create(struct kobject *kobj, struct mdev_device *mdev)
{int i;struct wdg_mdev_state *mstate;char name[32];if (!mdev)return -EINVAL;for (i = 0; i < 3; i++) {snprintf(name, 32, "%s-%d", dev_driver_string(mdev_parent_dev(mdev)), i + 1);if (!strcmp(kobj->name, name)) {break;}}if (i >= 3) {return -EINVAL;}mstate = kzalloc(sizeof(struct wdg_mdev_state), GFP_KERNEL);if (mstate == NULL)return -ENOMEM;// group number in mdev_type.mstate->index = i + 1;mstate->config = kzalloc(IO_CONF_SIZE, GFP_KERNEL);if (mstate->config == NULL) {pr_err("%s line %d, alloc pci config buffer failure.\n",__func__, __LINE__);kfree(mstate);return -ENOMEM;}mstate->iobase = kzalloc(IO_BAR0_SIZE, GFP_KERNEL);if (mstate->iobase == NULL) {pr_err("%s line %d, alloc pci io buffer failure.\n",__func__, __LINE__);kfree(mstate->config);kfree(mstate);return -ENOMEM;}memset(mstate->config, 0x00, IO_CONF_SIZE);mutex_init(&mstate->ops_lock);mstate->mdev = mdev;mdev_set_drvdata(mdev, mstate);wdg_create_config_space(mstate);mutex_lock(&wdg_mdev_list_lock);list_add(&mstate->next, &wdg_mdev_devices_list);mutex_unlock(&wdg_mdev_list_lock);return 0;
}static int czl_wdg_remove(struct mdev_device *mdev)
{struct wdg_mdev_state *mds, *tmp_mds;struct wdg_mdev_state *mstate = mdev_get_drvdata(mdev);int ret = -EINVAL;mutex_lock(&wdg_mdev_list_lock);list_for_each_entry_safe(mds, tmp_mds, &wdg_mdev_devices_list, next) {if (mstate == mds) {list_del(&mstate->next);mdev_set_drvdata(mdev, NULL);kfree(mstate->config);kfree(mstate->iobase);kfree(mstate);ret = 0;break;}}mutex_unlock(&wdg_mdev_list_lock);return ret;
}static void handle_pci_cfg_space_write(struct wdg_mdev_state *mstate, u16 offset,u8 *buf, u32 count)
{u32 cfg_addr, bar_mask;switch (offset) {case 0x04: /* device control */case 0x06: /* device status */// do nothingbreak;case 0x3c:mstate->config[0x3c] = buf[0];break;case 0x3d:break;case 0x10:  /* BAR0 */cfg_addr = *(u32 *)buf;pr_info("BAR0 addr 0x%x\n", cfg_addr);if (cfg_addr == 0xffffffff) {bar_mask = mstate->bar_mask[0];cfg_addr = (cfg_addr & bar_mask);}cfg_addr |= (mstate->config[offset] & 0x3ul);*((unsigned int *)&mstate->config[offset]) = cfg_addr;break;case 0x14:  /* BAR1 */case 0x18:  /* BAR2 */case 0x20:  /* BAR4 */*((unsigned int *)&mstate->config[offset]) = 0;break;default:pr_info("PCI config write @0x%x of %d bytes not handled\n",offset, count);break;}return;
}static void handle_pci_cfg_space_read(struct wdg_mdev_state *mstate, u16 offset,u8 *buf, u32 count)
{memcpy(buf, (mstate->config + offset), count);return;
}static void mdev_read_base(struct wdg_mdev_state *mstate)
{int index, pos;u32 start_lo, start_hi;u32 mem_type;pos = PCI_BASE_ADDRESS_0;for (index = 0; index <= VFIO_PCI_BAR5_REGION_INDEX; index++)  {if (!mstate->region_info[index].size)continue;start_lo = (*(u32 *)(mstate->config + pos)) &PCI_BASE_ADDRESS_MEM_MASK;mem_type = (*(u32 *)(mstate->config + pos)) &PCI_BASE_ADDRESS_MEM_TYPE_MASK;switch (mem_type) {case PCI_BASE_ADDRESS_MEM_TYPE_64:start_hi = (*(u32 *)(mstate->config + pos + 4));pos += 4;break;case PCI_BASE_ADDRESS_MEM_TYPE_32:case PCI_BASE_ADDRESS_MEM_TYPE_1M:default:start_hi = 0;break;}pos += 4;mstate->region_info[index].start = ((u64)start_hi << 32) | start_lo;}return;
}static void handle_bar_write(unsigned int index, struct wdg_mdev_state *mstate,u16 offset, u8 *buf, u32 count)
{pr_info("%s line %d, bar %d, write offset 0x%x, count 0x%x, val 0x%x.\n",__func__, __LINE__, index, offset, count, *buf);memcpy(mstate->iobase + offset, buf, count);return;
}static void handle_bar_read(unsigned int index, struct wdg_mdev_state *mstate,u16 offset, u8 *buf, u32 count)
{pr_info("%s line %d, bar %d, read offset 0x%x, count 0x%x, val 0x%x.\n",__func__, __LINE__, index, offset, count, *buf);memcpy(buf, mstate->iobase + offset, count);return;
}static ssize_t mdev_access(struct mdev_device *mdev, u8 *buf, size_t count,loff_t pos, bool is_write)
{int ret = 0;unsigned int index;loff_t offset;struct wdg_mdev_state *mstate;if (!mdev || !buf)return -EINVAL;mstate = mdev_get_drvdata(mdev);if (!mstate) {pr_err("%s line %d. get mstate failure.\n", __func__, __LINE__);return -EINVAL;}mutex_lock(&mstate->ops_lock);index = WDG_VFIO_PCI_OFFSET_TO_INDEX(pos);offset = pos & WDG_VFIO_PCI_OFFSET_MASK;switch (index) {case VFIO_PCI_CONFIG_REGION_INDEX:pr_info("%s: PCI config space %s at offset 0x%llx\n",__func__, is_write ? "write" : "read", offset);if (is_write) {handle_pci_cfg_space_write(mstate, offset, buf, count);} else {handle_pci_cfg_space_read(mstate, offset, buf, count);}break;case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:if (!mstate->region_info[index].start)mdev_read_base(mstate);if (is_write) {pr_info("%s: write bar%d offset 0x%llx, val 0x%x.\n",__func__, index, offset, *buf);handle_bar_write(index, mstate, offset, buf, count);} else {pr_info("%s: read bar%d offset 0x%llx, val 0x%x.\n",__func__, index, offset, *buf);handle_bar_read(index, mstate, offset, buf, count);}break;default:ret = -1;goto failed;}ret = count;failed:mutex_unlock(&mstate->ops_lock);return ret;
}static ssize_t czl_wdg_read(struct mdev_device *mdev, char __user *buf,size_t count, loff_t *ppos)
{unsigned int done = 0;int ret;pr_info("%s line %d, read count 0x%lx, pos 0x%llx.\n", __func__, __LINE__, count, *ppos);while (count) {size_t filled;if (count >= 4 && !(*ppos % 4)) {u32 val;ret =  mdev_access(mdev, (u8 *)&val, sizeof(val),*ppos, false);if (ret <= 0)goto read_err;if (copy_to_user(buf, &val, sizeof(val)))goto read_err;filled = 4;} else if (count >= 2 && !(*ppos % 2)) {u16 val;ret = mdev_access(mdev, (u8 *)&val, sizeof(val),*ppos, false);if (ret <= 0)goto read_err;if (copy_to_user(buf, &val, sizeof(val)))goto read_err;filled = 2;} else {u8 val;ret = mdev_access(mdev, (u8 *)&val, sizeof(val),*ppos, false);if (ret <= 0)goto read_err;if (copy_to_user(buf, &val, sizeof(val)))goto read_err;filled = 1;}count -= filled;done += filled;*ppos += filled;buf += filled;}pr_info("%s line %d, read count 0x%x.\n", __func__, __LINE__, done);return done;read_err:pr_err("%s line %d, read err happend.\n", __func__, __LINE__);return -EFAULT;
}static ssize_t czl_wdg_write(struct mdev_device *mdev, const char __user *buf,size_t count, loff_t *ppos)
{unsigned int done = 0;int ret;pr_info("%s line %d, write count 0x%lx, pos 0x%llx.\n", __func__, __LINE__, count, *ppos);while (count) {size_t filled;if (count >= 4 && !(*ppos % 4)) {u32 val;if (copy_from_user(&val, buf, sizeof(val)))goto write_err;ret = mdev_access(mdev, (u8 *)&val, sizeof(val),*ppos, true);if (ret <= 0)goto write_err;filled = 4;}  else if (count >= 2 && !(*ppos % 2)) {u16 val;if (copy_from_user(&val, buf, sizeof(val)))goto write_err;ret = mdev_access(mdev, (u8 *)&val, sizeof(val),*ppos, true);if (ret <= 0)goto write_err;filled = 2;} else {u8 val;if (copy_from_user(&val, buf, sizeof(val)))goto write_err;ret = mdev_access(mdev, (u8 *)&val, sizeof(val),*ppos, true);if (ret <= 0)goto write_err;filled = 1;}count -= filled;done += filled;*ppos += filled;buf += filled;}pr_info("%s line %d, write count 0x%x.\n", __func__, __LINE__, done);return done;write_err:pr_err("%s line %d, write failure.\n", __func__, __LINE__);return -EFAULT;
}static int wdg_get_device_info(struct mdev_device *mdev, struct vfio_device_info *dev_info)
{dev_info->flags = VFIO_DEVICE_FLAGS_PCI;dev_info->num_regions = VFIO_PCI_NUM_REGIONS;dev_info->num_irqs = VFIO_PCI_NUM_IRQS;return 0;
}static int wdg_get_region_info(struct mdev_device *mdev, struct vfio_region_info *region_info)
{unsigned int size = 0;struct wdg_mdev_state *mstate;u32 bar_index;if (!mdev) {pr_err("%s line %d,mdev is null.\n", __func__, __LINE__);return -EINVAL;}mstate = mdev_get_drvdata(mdev);if (!mstate) {pr_err("%s line %d,mstat is null.\n", __func__, __LINE__);return -EINVAL;}bar_index = region_info->index;if (bar_index >= VFIO_PCI_NUM_REGIONS) {pr_err("%s line %d,bar index %d exceeds.\n", __func__, __LINE__, bar_index);return -EINVAL;}mutex_lock(&mstate->ops_lock);switch (bar_index) {case VFIO_PCI_CONFIG_REGION_INDEX:size = IO_CONF_SIZE;break;case VFIO_PCI_BAR0_REGION_INDEX:size = IO_BAR0_SIZE;break;default:size = 0;break;}mstate->region_info[bar_index].size = size;mstate->region_info[bar_index].vfio_offset =WDG_VFIO_PCI_INDEX_TO_OFFSET(bar_index);region_info->size = size;region_info->offset = WDG_VFIO_PCI_INDEX_TO_OFFSET(bar_index);region_info->flags = VFIO_REGION_INFO_FLAG_READ |VFIO_REGION_INFO_FLAG_WRITE;mutex_unlock(&mstate->ops_lock);return 0;
}static int wdg_get_irq_info(struct mdev_device *mdev, struct vfio_irq_info *irq_info)
{switch (irq_info->index) {case VFIO_PCI_INTX_IRQ_INDEX:case VFIO_PCI_MSI_IRQ_INDEX:case VFIO_PCI_REQ_IRQ_INDEX:break;default:pr_err("%s line %d, irq idx %d is invalid.\n",__func__, __LINE__, irq_info->index);return -EINVAL;}irq_info->flags = VFIO_IRQ_INFO_EVENTFD;irq_info->count = 1;if (irq_info->index == VFIO_PCI_INTX_IRQ_INDEX)irq_info->flags |= (VFIO_IRQ_INFO_MASKABLE |VFIO_IRQ_INFO_AUTOMASKED);elseirq_info->flags |= VFIO_IRQ_INFO_NORESIZE;return 0;
}static long czl_wdg_ioctl(struct mdev_device *mdev, unsigned int cmd,unsigned long arg)
{int ret = 0;unsigned long minsz;struct wdg_mdev_state *mstate;pr_info("czl wdg ioctl enter.\n");if (!mdev) {pr_err("%s line %d, mdev is null.\n", __func__, __LINE__);return -EINVAL;}mstate = mdev_get_drvdata(mdev);if (!mstate) {pr_err("%s line %d, cant find mstate data.\n", __func__, __LINE__);return -ENODEV;}switch (cmd) {case VFIO_DEVICE_GET_INFO: {struct vfio_device_info info;minsz = offsetofend(struct vfio_device_info, num_irqs);if (copy_from_user(&info, (void __user *)arg, minsz))return -EFAULT;if (info.argsz < minsz) {pr_err("%s line %d, info.argsz %d < minsz %ld.\n",__func__, __LINE__, info.argsz, minsz);return -EINVAL;}ret = wdg_get_device_info(mdev, &info);if (ret) {pr_err("%s line %d, get device info failure.\n", __func__, __LINE__);return ret;}memcpy(&mstate->dev_info, &info, sizeof(info));if (copy_to_user((void __user *)arg, &info, minsz))return -EFAULT;return 0;}case VFIO_DEVICE_GET_REGION_INFO: {struct vfio_region_info info;minsz = offsetofend(struct vfio_region_info, offset);if (copy_from_user(&info, (void __user *)arg, minsz))return -EFAULT;if (info.argsz < minsz) {pr_err("%s line %d, info.argsz %d < minsz %ld.\n",__func__, __LINE__, info.argsz, minsz);return -EINVAL;}ret = wdg_get_region_info(mdev, &info);if (ret) {pr_err("%s line %d, get region info failure.\n", __func__, __LINE__);return ret;}if (copy_to_user((void __user *)arg, &info, minsz))return -EFAULT;return 0;}case VFIO_DEVICE_GET_IRQ_INFO: {struct vfio_irq_info info;minsz = offsetofend(struct vfio_irq_info, count);if (copy_from_user(&info, (void __user *)arg, minsz))return -EFAULT;if ((info.argsz < minsz) ||(info.index >= mstate->dev_info.num_irqs))return -EINVAL;ret = wdg_get_irq_info(mdev, &info);if (ret)return ret;if (copy_to_user((void __user *)arg, &info, minsz))return -EFAULT;return 0;}case VFIO_DEVICE_SET_IRQS: {pr_info("%s line %d, set irqs.\n", __func__, __LINE__);return 0;}case VFIO_DEVICE_RESET:pr_info("%s line %d, reset.\n", __func__, __LINE__);return 0;}return -EINVAL;
}static const struct mdev_parent_ops wdg_mdev_fops = {.owner                  = THIS_MODULE,.dev_attr_groups        = wdg_dev_groups,.mdev_attr_groups       = mdev_dev_groups,.supported_type_groups  = mdev_type_groups,.create                 = czl_wdg_create,.remove                 = czl_wdg_remove,.open                   = czl_wdg_open,.release                = czl_wdg_close,.read                   = czl_wdg_read,.write                  = czl_wdg_write,.ioctl                  = czl_wdg_ioctl,
};static void wdg_device_release(struct device *dev)
{pr_info("czl wdg devide release.\n");
}static int mdev_wdg_init(void)
{int ret = 0;pr_info("czl wdg init.\n");memset(&czl_wdg, 0x00, sizeof(czl_wdg));ret = alloc_chrdev_region(&czl_wdg.wdg_devt, 0, MINORMASK + 1, "czl_wdg");if (ret < 0) {pr_err("error: failed to register czl wdg device, err:%d\n", ret);return -1;}cdev_init(&czl_wdg.wdg_cdev, &czl_wdg_fops);cdev_add(&czl_wdg.wdg_cdev, czl_wdg.wdg_devt, MINORMASK + 1);pr_info("major_number:%d\n", MAJOR(czl_wdg.wdg_devt));czl_wdg.wdg_class = class_create(THIS_MODULE, "czl_wdg");if (IS_ERR(czl_wdg.wdg_class)) {pr_err("error: failed to create wdg class.\n");ret = -1;goto failed1;}czl_wdg.dev.class = czl_wdg.wdg_class;czl_wdg.dev.release = wdg_device_release;dev_set_name(&czl_wdg.dev, "%s", "czl_wdg");ret = device_register(&czl_wdg.dev);if (ret) {pr_err("%s line %d, register wdg device failure.\n", __func__, __LINE__);ret = -1;goto  failed2;}ret = mdev_register_device(&czl_wdg.dev, &wdg_mdev_fops);if (ret) {pr_err("%s line %d, register wdg mdev device failure.\n", __func__, __LINE__);ret = -1;goto  failed3;}mutex_init(&wdg_mdev_list_lock);INIT_LIST_HEAD(&wdg_mdev_devices_list);pr_info("czl wdg init success.\n");goto done;
failed3:device_unregister(&czl_wdg.dev);
failed2:class_destroy(czl_wdg.wdg_class);
failed1:cdev_del(&czl_wdg.wdg_cdev);unregister_chrdev_region(czl_wdg.wdg_devt, MINORMASK + 1);
done:return ret;
}static void mdev_wdg_exit(void)
{czl_wdg.dev.bus = NULL;mdev_unregister_device(&czl_wdg.dev);device_unregister(&czl_wdg.dev);cdev_del(&czl_wdg.wdg_cdev);unregister_chrdev_region(czl_wdg.wdg_devt, MINORMASK + 1);class_destroy(czl_wdg.wdg_class);czl_wdg.wdg_class = NULL;pr_info("czl_wdg_unload.\n");return;
}module_init(mdev_wdg_init)
module_exit(mdev_wdg_exit)
MODULE_LICENSE("GPL v2");

virtual machine pci watchdog pci driver

#include <linux/init.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/cdev.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/uuid.h>
#include <linux/vfio.h>
#include <linux/iommu.h>
#include <linux/sysfs.h>
#include <linux/ctype.h>
#include <linux/file.h>
#include <linux/mdev.h>
#include <linux/pci.h>
#include <linux/idr.h>static int devno;
static DEFINE_IDR(wdg_minors);
static DEFINE_MUTEX(wdg_minors_lock);
#define WDG_MINORS_COUNT 256struct wdg_pci_state {struct pci_dev *pdev;struct device *dev;int iobase;int iolen;int major;int minor;
};static struct class *wdg_class;
static const struct pci_device_id czl_pci_table[] = {{       PCI_DEVICE(0xbeef, 0x1001),       },{ 0,                                      }
};static int czl_wdg_open(struct inode *inode, struct file *file)
{int rc = 0;int major, minor;major = imajor(inode);minor = iminor(inode);mutex_lock(&wdg_minors_lock);file->private_data = idr_find(&wdg_minors, minor);mutex_unlock(&wdg_minors_lock);if (!file->private_data) {pr_err("%s line %d, cant find wdg structure.\n",__func__, __LINE__);rc = -1;}return rc;
}static int czl_wdg_release(struct inode *inode, struct file *file)
{return 0;
}ssize_t czl_wdg_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
{int i;struct wdg_pci_state *wdgdev = NULL;unsigned char *kbuf = NULL;int actuallen = 0;wdgdev = file->private_data;if (!wdgdev) {pr_err("%s line %d, read failure.\n", __func__, __LINE__);return -1;}if (*ppos > wdgdev->iolen) {pr_err("%s line %d, read pos %lld exceed max io len %d.\n",__func__, __LINE__, *ppos, wdgdev->iolen);return -1;}kbuf = kzalloc(GFP_KERNEL, size);if (kbuf == NULL) {pr_err("%s line %d, alloc kbuf failure.\n",__func__, __LINE__);return -1;}for (i = 0; (i < size) && ((*ppos + i) <  wdgdev->iolen); i++) {kbuf[i] = inb(wdgdev->iobase + *ppos + i);actuallen ++;}copy_to_user(buf, kbuf, actuallen);kfree(kbuf);return actuallen;
}static ssize_t czl_wdg_write(struct file *file, const char __user *buf,size_t count, loff_t *ppos)
{int i;struct wdg_pci_state *wdgdev = NULL;unsigned char *kbuf = NULL;int actuallen = 0;wdgdev = file->private_data;if (!wdgdev) {pr_err("%s line %d, read failure.\n", __func__, __LINE__);return -1;}if (*ppos > wdgdev->iolen) {pr_err("%s line %d, read pos %lld exceed max io len %d.\n",__func__, __LINE__, *ppos, wdgdev->iolen);return -1;}kbuf = kzalloc(GFP_KERNEL, count);if (kbuf == NULL) {pr_err("%s line %d, alloc kbuf failure.\n",__func__, __LINE__);return -1;}copy_from_user(kbuf, buf, count);for (i = 0; (i < count) && ((*ppos + i) <  wdgdev->iolen); i++) {outb((u8)kbuf[i], wdgdev->iobase + *ppos + i);actuallen ++;}kfree(kbuf);return actuallen;
}static const struct file_operations czl_wdg_fops = {.owner          = THIS_MODULE,.open           = czl_wdg_open,.release        = czl_wdg_release,.read           = czl_wdg_read,.write          = czl_wdg_write,
};static char *wdg_devnode(struct device *dev, umode_t *mode)
{if (mode)*mode = 06666;return kasprintf(GFP_KERNEL, "%s", dev_name(dev));
}static int wdg_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{struct wdg_pci_state *wdgdev = NULL;pr_info("%s line %d, wdg pci device & driver binding.\n", __func__, __LINE__);wdgdev = kzalloc(GFP_KERNEL, sizeof(*wdgdev));if (!wdgdev) {pr_err("%s line %d, fail to alloc buffer.\n",__func__, __LINE__);goto err0;}wdgdev->major = devno;wdgdev->pdev = pci_dev_get(pdev);wdgdev->iobase = pci_resource_start(pdev, 0);wdgdev->iolen = pci_resource_len(pdev, 0);mutex_lock(&wdg_minors_lock);wdgdev->minor = idr_alloc(&wdg_minors, wdgdev, 0, WDG_MINORS_COUNT, GFP_KERNEL);mutex_unlock(&wdg_minors_lock);if (wdgdev->minor < 0) {pr_err("%s line %d, get minor failure from idr.\n", __func__, __LINE__);goto err1;}pr_info("%s line %d, major %d, minor %d, iobase 0x%x.\n", __func__, __LINE__,devno, wdgdev->minor, wdgdev->iobase);wdgdev->dev = device_create(wdg_class, NULL, MKDEV(devno, wdgdev->minor),NULL, "czl-wdg-%d", wdgdev->minor);if (!wdgdev->dev || IS_ERR(wdgdev->dev)) {pr_err("%s line %d, create wdg device failure.\n",__func__, __LINE__);goto err2;}pci_set_drvdata(pdev, wdgdev);return 0;
err2:idr_remove(&wdg_minors, wdgdev->minor);
err1:if (wdgdev) {kfree(wdgdev);}
err0:return -1;
}static void wdg_pci_remove(struct pci_dev *pdev)
{struct wdg_pci_state *wdgdev;pr_info("%s line %d, wdg pci device & driver removing.\n", __func__, __LINE__);wdgdev = pci_get_drvdata(pdev);pci_set_drvdata(pdev, NULL);pci_dev_put(pdev);wdgdev->pdev = NULL;device_destroy(wdg_class, MKDEV(devno, wdgdev->minor));idr_remove(&wdg_minors, wdgdev->minor);kfree(wdgdev);return;
}static struct pci_driver czl_wdg_driver = {.name           = "czl-mdev-wdg",.id_table       = czl_pci_table,.probe          = wdg_pci_probe,.remove         = wdg_pci_remove,
};
static int czl_wdg_init(void)
{int ret;wdg_class = class_create(THIS_MODULE, "czl-wdg");if (!wdg_class) {pr_err("%s line %d, create watchdog class failure.\n",__func__, __LINE__);return -1;}wdg_class->devnode = wdg_devnode;devno = register_chrdev(0, "czl-wdg", &czl_wdg_fops);if (devno < 0) {pr_err("%s line %d, register wdg device chrno failure.\n",__func__, __LINE__);class_destroy(wdg_class);return -1;}ret = pci_register_driver(&czl_wdg_driver);return ret;
}static void czl_wdg_exit(void)
{pci_unregister_driver(&czl_wdg_driver);unregister_chrdev(devno, "czl-wdg");class_destroy(wdg_class);idr_destroy(&wdg_minors);return;
}module_init(czl_wdg_init)
module_exit(czl_wdg_exit)
MODULE_LICENSE("GPL v2");

virtual machine kernel space test case

#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdarg.h>void dump_buf(unsigned char *buf, int len)
{int i;for (i = 0; i < len; i++) {if (i % 16 == 0)printf("\n0x%04x: ", i);printf("0x%02x ", buf[i]);}printf("\n");return;
}int main(void)
{int wdgfd;int status;unsigned char buf[32];wdgfd = open("/dev/czl-wdg-0", O_RDWR);if (wdgfd < 0) {printf("%s line %d, open failure.\n",__func__, __LINE__);return -1;}while (1) {memset(buf, 0x00, 32);status = read(wdgfd, buf, 32);if (status < 0) {printf("%s line %d, read failure.\n",__func__, __LINE__);return -1;}printf("%s line %d, read %d.\n", __func__, __LINE__, status);dump_buf(buf, 32);memset(buf, 0x5a, 32);lseek(wdgfd, 0, SEEK_SET);status = write(wdgfd, buf, 32);if (status < 0) {printf("%s line %d, read failure.\n",__func__, __LINE__);return -1;}printf("%s line %d, read %d.\n", __func__, __LINE__, status);sleep(1);}close(wdgfd);return 0;
}

测试过程:

1.安装WDG MDEV驱动:

sudo insmod czl-mdev-wdg.ko

2.创建mdev设备

创建两个mdev设备

echo "f422fd86-35c0-11ef-8e50-9342c1138a56" > /sys/devices/virtual/czl_wdg/czl_wdg/mdev_supported_types/czl_wdg-1/create
echo "c04de378-35d8-11ef-95c3-339660dfc874" > /sys/devices/virtual/czl_wdg/czl_wdg/mdev_supported_types/czl_wdg-2/create

3.将第二步创建的mdev设别透传给QEMU虚拟机启动:

qemu-system-x86_64 -m 4096 -smp 4 --enable-kvm -drive file=/home/zlcao/Workspace/iso/ps.img -device vfio-pci,sysfsdev=/sys/bus/mdev/devices/f422fd86-35c0-11ef-8e50-9342c1138a56 -device vfio-pci,sysfsdev=/sys/bus/mdev/devices/c04de378-35d8-11ef-95c3-339660dfc874

系统启动后,可以看到虚拟机环境下出现了透传的MDEV PCI设备,设备vendor/device id为0xbeef1001,符合代码设定。

4.虚拟机内安装WDG PCI设备驱动:

上图中可以看到,两个透传的MDEV设备已经和一个名为"serial"的PCI设备驱动绑定,这并不符合预期,需要将默认的"serial"驱动和MDEV设备解绑,在QEMU虚拟机控制台中输入如下命令解绑驱动:

echo -n 0000:00:04.0 > /sys/bus/pci/drivers/serial/unbind
echo -n 0000:00:05.0 > /sys/bus/pci/drivers/serial/unbind

之后就可以安装我们的WDG PCI驱动了:

sudo insmod czl-mdev-drv.ko

安装成功后,虚拟机设备目录下出现了WDG PCI的设备节点:

此时,两个MDEV PCI设备也显示绑定到了正确的驱动:

5.运行测试用例,读写WDG PCI设备的BAR0地址空间:

此时可以看到,虚拟机中对WDG设备BAR0空间的读写调用被“透传"到了HOST机的MDEV PCI设备驱动上,可以基于对BAR0空间的回调实现我们的业务逻辑。


结束

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.mzph.cn/pingmian/37653.shtml

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

【网络架构】keepalive

目录 一、keepalive基础 1.1 作用 1.2 原理 1.3 功能 二、keepalive安装 2.1 yum安装 2.2 编译安装 三、配置文件 3.1 keepalived相关文件 3.2 主配置的组成 3.2.1 全局配置 3.2.2 配置虚拟路由器 四、实际操作 4.1 lvskeepalived高可用群集 4.2 keepalivedngi…

AI视界引擎 | ​基于 YOLOv8 和计算机视觉 CV 的实时识别系统!

本文来源公众号“AI视界引擎”&#xff0c;仅用于学术分享&#xff0c;侵权删&#xff0c;干货满满。 原文链接&#xff1a;​基于 YOLOv8 和计算机视觉 CV 的实时识别系统&#xff01; 技术进步和创新正在尽可能地推进作者的日常生活&#xff0c;但仍有很大一部分社会群体因为…

自研Eclipse插件的生成及安装和使用

说明&#xff1a; 本处是使用个人自研的Eclipse插件为例&#xff0c;创建了一个菜单式的插件组&#xff0c;插件组下&#xff0c;有一个生成右击Jakarta EE服务端点类后&#xff0c;生成端点对应的Restful客户端。有什么问题&#xff0c;欢迎大家交流&#xff01;&#xff01;…

cython 笔记

数据类型 # bool 类型 // bool_type_ptactice.pyx cdef bint a 123 # 非0 为 真 &#xff0c; 0 为假 cdef bint b -123 cdef bint c 0 py_a a # cdef 定义的内容没法直接在python中直接引用 py_b b py_c c// main.py import pyximport pyximport.install(language_le…

教您设置打开IDM下载浮动条的快捷键 全网最强下载神器idm怎么使用教程 idm浮动条不显示怎么办

很多人都知道Internet Download Manager(以下简称IDM)是一款非常优秀的下载提速软件。它功能强大&#xff0c;几乎能下载网页中的所有数据&#xff08;包括视频、音频、图片等&#xff09;&#xff0c;且适用于现在市面上几乎所有的浏览器&#xff0c;非常受大家欢迎。 在使用I…

面向对象和面向过程编程的区别

引言 小伙伴们&#xff0c;当你们看到这章的时候&#xff0c;显然你们已经跨过了来自指针给你们带来的麻烦&#xff0c;唔~真棒呢&#xff0c;但是我们只学会一些基础的C语法并不能帮我们解决问题&#xff0c;甚至是稍微难一些的题目我们都没办法解决&#xff0c;那怎么办呢&am…

多机调度问题

#include<iostream> #include<string> using namespace std; struct work {int time;int number; }; int setwork0(int m,int n,int a[],struct work w[]) {int maxtime0;for(int i1; i<m; i){cout<<i<<"号设备处理作业"<<w[i].num…

python系列30:各种爬虫技术总结

1. 使用requests获取网页内容 以巴鲁夫产品为例&#xff0c;可以用get请求获取内容&#xff1a; https://www.balluff.com.cn/zh-cn/products/BES02YF 对应的网页为&#xff1a; 使用简单方法进行解析即可 import requests r BES02YF res requests.get("https://www.…

YOLOv8改进 | 卷积模块 | 分布移位卷积DSConv替换Conv

秋招面试专栏推荐 &#xff1a;深度学习算法工程师面试问题总结【百面算法工程师】——点击即可跳转 &#x1f4a1;&#x1f4a1;&#x1f4a1;本专栏所有程序均经过测试&#xff0c;可成功执行&#x1f4a1;&#x1f4a1;&#x1f4a1; 专栏目录&#xff1a;《YOLOv8改进有效…

2024百度之星第一场-110串

补题链接&#xff1a; 码蹄集 三个状态转移的计数dp 先确定状态 n个数至多修改k次&#xff0c;保证不出现字串“110” 常规想法先把状态确定为dp[n][k][0/1]&#xff0c;前n个数&#xff0c;修改k次后&#xff0c;末尾数为0/1&#xff0c;不能转移再换思路。 初始状态设定如…

存储请求地址但是使用时请求的是端口

baseURL默认全局加载一次&#xff0c;后续直接读取缓存 解决方案&#xff1a;

类和对象(封装、继承、多态、友元)

c面相对象的三大特性为&#xff1a;封装、继承、多态 c 认为万事万物都皆为对象&#xff0c;对象上有其属性和行为 一、类和对象&#xff08;封装&#xff09; &#xff08;一&#xff09;封装的意义 封装是c面相对象的三大特性之一 封装的意义&#xff1a; 将属性和行为…

网页如何快速被收录?

其实就是要要吸引搜索引擎爬虫更快地抓取你的网页&#xff0c;想让爬虫爬取网页&#xff0c;首要做的自然是创建并提交站点地图。站点地图是搜索引擎了解你网站结构的重要工具。它可以帮助爬虫更快地发现和抓取你网站上的所有重要页面。通过Google Search Console提交站点地图&…

6. 较全的Open3D点云数据处理(python)

注意&#xff1a;以下内容来自博客爆肝5万字❤️Open3D 点云数据处理基础&#xff08;Python版&#xff09;_python 点云 焊缝-CSDN博客&#xff0c;这篇博客写的全且详细&#xff0c;在这里是为了记笔记方便查看&#xff0c;并非抄袭。 1.点云的读写 代码如下&#xff1a; …

ARM功耗管理软件之软件栈及示例

安全之安全(security)博客目录导读 思考:功耗管理软件栈及示例?WFI&WFE?时钟&电源树?DVFS&AVS?

ubuntu22.04速装中文输入法

附送ubuntu安装chrome wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb sudo dpkg -i google-chrome-stable_current_amd64.deb

python中pip换源

目录 1. 背景2. Python 的 pip 换源2.1 临时换源&#xff08;命令行中使用参数&#xff09;2.2 永久换源&#xff08;修改配置文件&#xff09;2.2.1 Windows系统2.2.2 Linux/macOS系统 2.3 使用 pip-config 命令换源&#xff08;Linux/macOS 特定&#xff09; 3. 常用的 PyPI …

深入分析 Android BroadcastReceiver (七)

文章目录 深入分析 Android BroadcastReceiver (七)1. 高级应用场景1.1 示例&#xff1a;动态权限请求1.2 示例&#xff1a;应用内通知更新 2. 安全性与性能优化2.1 示例&#xff1a;设置权限防止广播攻击2.2 示例&#xff1a;使用 LocalBroadcastManager2.3 示例&#xff1a;在…

三分钟给AI Agent应用对话增加人类情感!

点击下方“JavaEdge”&#xff0c;选择“设为星标” 第一时间关注技术干货&#xff01; 免责声明~ 任何文章不要过度深思&#xff01; 万事万物都经不起审视&#xff0c;因为世上没有同样的成长环境&#xff0c;也没有同样的认知水平&#xff0c;更「没有适用于所有人的解决方案…

[算法]——堆排序(C语言实现)

简单的介绍一下用堆排序的算法对整形数据的数据进行排序。 一、堆的概念 堆是具有下列性质的完全二叉树&#xff1a;每个结点的值都大于或等于其左右孩子节点的值&#xff0c;称为大顶堆&#xff1b;或者每个结点的值都小于或等于其左右孩子结点的值&#xff0c;称为小顶堆。 …