基于MDEV的PCI设备虚拟化DEMO实现

利用周末时间做了一个MDEV虚拟化PCI设备的小试验,简单记录一下:

DEMO架构,此图参考了内核文档:Documentation/driver-api/vfio-mediated-device.rst

host kernel watchdog pci driver:

#include <linux/init.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/cdev.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/uuid.h>
#include <linux/vfio.h>
#include <linux/iommu.h>
#include <linux/sysfs.h>
#include <linux/ctype.h>
#include <linux/file.h>
#include <linux/mdev.h>
#include <linux/pci.h>#define IO_BAR0_SIZE 32
#define IO_CONF_SIZE 0x100
#define CZL_WDG_DEVICE_VENDOR_ID 0xbeef
#define CZL_WDG_DEVICE_DEVICE_ID 0x1001
#define API_DBG(fmt, ...) do { \printk("%s line %d, "fmt, __func__, __LINE__, ##__VA_ARGS__); \} while (0)struct czl_wdg_dev {dev_t         wdg_devt;struct class *wdg_class;struct cdev   wdg_cdev;struct device dev;
};struct mdev_region_info {u64 start;u64 phys_start;u32 size;u64 vfio_offset;
};struct wdg_mdev_state {u8 *config;u8 *iobase;struct mdev_device *mdev;struct mdev_region_info region_info[VFIO_PCI_NUM_REGIONS];u32 bar_mask[VFIO_PCI_NUM_REGIONS];struct list_head next;struct vfio_device_info dev_info;int index;struct mutex ops_lock;
};static const struct file_operations czl_wdg_fops = {.owner          = THIS_MODULE,
};static struct mutex wdg_mdev_list_lock;
static struct list_head wdg_mdev_devices_list;
#define WDG_VFIO_PCI_OFFSET_SHIFT   (40)
#define WDG_VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> WDG_VFIO_PCI_OFFSET_SHIFT)
#define WDG_VFIO_PCI_INDEX_TO_OFFSET(index) \((u64)(index) << WDG_VFIO_PCI_OFFSET_SHIFT)
#define WDG_VFIO_PCI_OFFSET_MASK    \(((u64)(1) << WDG_VFIO_PCI_OFFSET_SHIFT) - 1)
#define MAX_WDGS                    (16)
static struct czl_wdg_dev czl_wdg;static ssize_t
czl_wdg_dev_show(struct device *dev, struct device_attribute *attr,char *buf)
{return sprintf(buf, "mdev emulated pci watchdog device by caozilong.\n");
}
static DEVICE_ATTR_RO(czl_wdg_dev);static struct attribute *wdg_dev_attrs[] = {&dev_attr_czl_wdg_dev.attr,NULL,
};static const struct attribute_group wdg_dev_group = {.name  = "czl_wdg",.attrs = wdg_dev_attrs,
};static const struct attribute_group *wdg_dev_groups[] = {&wdg_dev_group,NULL,
};static ssize_t
mdev_dev_show(struct device *dev, struct device_attribute *attr,char *buf)
{if (mdev_from_dev(dev)) {return sprintf(buf, "This is watchdog %s\n", dev_name(dev));}return sprintf(buf, "\n");
}static DEVICE_ATTR_RO(mdev_dev);static struct attribute *mdev_dev_attrs[] = {&dev_attr_mdev_dev.attr,NULL,
};static const struct attribute_group mdev_dev_group = {.name  = "caozilong",.attrs = mdev_dev_attrs,
};static const struct attribute_group *mdev_dev_groups[] = {&mdev_dev_group,NULL,
};static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf)
{int i;char name[128];const char *name_str[3] = {"Soft Watchdog", "Hardware Watchdog", "Dummy Watchdog"};for (i = 0; i < 3; i++) {snprintf(name, 128, "%s-%d", dev_driver_string(dev), i + 1);if (!strcmp(kobj->name, name)) {return sprintf(buf, "%s\n", name_str[i]);}}return -EINVAL;
}static ssize_t device_api_show(struct kobject *kobj, struct device *dev,char *buf)
{return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
}static ssize_t
available_instances_show(struct kobject *kobj, struct device *dev, char *buf)
{struct wdg_mdev_state *mds;int used = 0;list_for_each_entry(mds, &wdg_mdev_devices_list, next) {used ++;}return sprintf(buf, "%d\n", (MAX_WDGS - used));
}static MDEV_TYPE_ATTR_RO(name);
static MDEV_TYPE_ATTR_RO(device_api);
static MDEV_TYPE_ATTR_RO(available_instances);static struct attribute *mdev_types_attrs[] = {&mdev_type_attr_name.attr,&mdev_type_attr_device_api.attr,&mdev_type_attr_available_instances.attr,NULL,
};static struct attribute_group mdev_type_group1 = {.name  = "1",.attrs = mdev_types_attrs,
};static struct attribute_group mdev_type_group2 = {.name  = "2",.attrs = mdev_types_attrs,
};static struct attribute_group mdev_type_group3 = {.name  = "3",.attrs = mdev_types_attrs,
};static struct attribute_group *mdev_type_groups[] = {&mdev_type_group1,&mdev_type_group2,&mdev_type_group3,NULL,
};static int czl_wdg_open(struct mdev_device *mdev)
{pr_info("%s line %d, wdg device opened.\n",__func__, __LINE__);return 0;
}static void czl_wdg_close(struct mdev_device *mdev)
{pr_info("%s line %d, wdg device close.\n",__func__, __LINE__);return;
}// fill pci config space meta data & capabilities.
int wdg_create_config_space(struct wdg_mdev_state *mstate)
{// vendor id, device id.*((unsigned int *)&mstate->config[0]) = CZL_WDG_DEVICE_VENDOR_ID |(CZL_WDG_DEVICE_DEVICE_ID << 16);*((unsigned short *)&mstate->config[4]) = 0x0001;*((unsigned short *)&mstate->config[6]) = 0x0200;mstate->config[0x8] =  0x10;mstate->config[0x9] =  0x02;mstate->config[0xa] =  0x00;mstate->config[0xb] =  0x07;*((unsigned int *)&mstate->config[0x10]) = 0x000001;mstate->bar_mask[0] = ~(IO_BAR0_SIZE) + 1;*((unsigned int *)&mstate->config[0x2c]) = 0x10011af4;// cap ptr.mstate->config[0x34] =  0x00;mstate->config[0x3d] =  0x01;mstate->config[0x40] =  0x23;mstate->config[0x43] =  0x80;mstate->config[0x44] =  0x23;mstate->config[0x48] =  0x23;mstate->config[0x4c] =  0x23;mstate->config[0x60] =  0x50;mstate->config[0x61] =  0x43;mstate->config[0x62] =  0x49;mstate->config[0x63] =  0x20;mstate->config[0x64] =  0x53;mstate->config[0x65] =  0x65;mstate->config[0x66] =  0x72;mstate->config[0x67] =  0x69;mstate->config[0x68] =  0x61;mstate->config[0x69] =  0x6c;mstate->config[0x6a] =  0x2f;mstate->config[0x6b] =  0x55;mstate->config[0x6c] =  0x41;mstate->config[0x6d] =  0x52;mstate->config[0x6e] =  0x54;return 0;
}static int czl_wdg_create(struct kobject *kobj, struct mdev_device *mdev)
{int i;struct wdg_mdev_state *mstate;char name[32];if (!mdev)return -EINVAL;for (i = 0; i < 3; i++) {snprintf(name, 32, "%s-%d", dev_driver_string(mdev_parent_dev(mdev)), i + 1);if (!strcmp(kobj->name, name)) {break;}}if (i >= 3) {return -EINVAL;}mstate = kzalloc(sizeof(struct wdg_mdev_state), GFP_KERNEL);if (mstate == NULL)return -ENOMEM;// group number in mdev_type.mstate->index = i + 1;mstate->config = kzalloc(IO_CONF_SIZE, GFP_KERNEL);if (mstate->config == NULL) {pr_err("%s line %d, alloc pci config buffer failure.\n",__func__, __LINE__);kfree(mstate);return -ENOMEM;}mstate->iobase = kzalloc(IO_BAR0_SIZE, GFP_KERNEL);if (mstate->iobase == NULL) {pr_err("%s line %d, alloc pci io buffer failure.\n",__func__, __LINE__);kfree(mstate->config);kfree(mstate);return -ENOMEM;}memset(mstate->config, 0x00, IO_CONF_SIZE);mutex_init(&mstate->ops_lock);mstate->mdev = mdev;mdev_set_drvdata(mdev, mstate);wdg_create_config_space(mstate);mutex_lock(&wdg_mdev_list_lock);list_add(&mstate->next, &wdg_mdev_devices_list);mutex_unlock(&wdg_mdev_list_lock);return 0;
}static int czl_wdg_remove(struct mdev_device *mdev)
{struct wdg_mdev_state *mds, *tmp_mds;struct wdg_mdev_state *mstate = mdev_get_drvdata(mdev);int ret = -EINVAL;mutex_lock(&wdg_mdev_list_lock);list_for_each_entry_safe(mds, tmp_mds, &wdg_mdev_devices_list, next) {if (mstate == mds) {list_del(&mstate->next);mdev_set_drvdata(mdev, NULL);kfree(mstate->config);kfree(mstate->iobase);kfree(mstate);ret = 0;break;}}mutex_unlock(&wdg_mdev_list_lock);return ret;
}static void handle_pci_cfg_space_write(struct wdg_mdev_state *mstate, u16 offset,u8 *buf, u32 count)
{u32 cfg_addr, bar_mask;switch (offset) {case 0x04: /* device control */case 0x06: /* device status */// do nothingbreak;case 0x3c:mstate->config[0x3c] = buf[0];break;case 0x3d:break;case 0x10:  /* BAR0 */cfg_addr = *(u32 *)buf;pr_info("BAR0 addr 0x%x\n", cfg_addr);if (cfg_addr == 0xffffffff) {bar_mask = mstate->bar_mask[0];cfg_addr = (cfg_addr & bar_mask);}cfg_addr |= (mstate->config[offset] & 0x3ul);*((unsigned int *)&mstate->config[offset]) = cfg_addr;break;case 0x14:  /* BAR1 */case 0x18:  /* BAR2 */case 0x20:  /* BAR4 */*((unsigned int *)&mstate->config[offset]) = 0;break;default:pr_info("PCI config write @0x%x of %d bytes not handled\n",offset, count);break;}return;
}static void handle_pci_cfg_space_read(struct wdg_mdev_state *mstate, u16 offset,u8 *buf, u32 count)
{memcpy(buf, (mstate->config + offset), count);return;
}static void mdev_read_base(struct wdg_mdev_state *mstate)
{int index, pos;u32 start_lo, start_hi;u32 mem_type;pos = PCI_BASE_ADDRESS_0;for (index = 0; index <= VFIO_PCI_BAR5_REGION_INDEX; index++)  {if (!mstate->region_info[index].size)continue;start_lo = (*(u32 *)(mstate->config + pos)) &PCI_BASE_ADDRESS_MEM_MASK;mem_type = (*(u32 *)(mstate->config + pos)) &PCI_BASE_ADDRESS_MEM_TYPE_MASK;switch (mem_type) {case PCI_BASE_ADDRESS_MEM_TYPE_64:start_hi = (*(u32 *)(mstate->config + pos + 4));pos += 4;break;case PCI_BASE_ADDRESS_MEM_TYPE_32:case PCI_BASE_ADDRESS_MEM_TYPE_1M:default:start_hi = 0;break;}pos += 4;mstate->region_info[index].start = ((u64)start_hi << 32) | start_lo;}return;
}static void handle_bar_write(unsigned int index, struct wdg_mdev_state *mstate,u16 offset, u8 *buf, u32 count)
{pr_info("%s line %d, bar %d, write offset 0x%x, count 0x%x, val 0x%x.\n",__func__, __LINE__, index, offset, count, *buf);memcpy(mstate->iobase + offset, buf, count);return;
}static void handle_bar_read(unsigned int index, struct wdg_mdev_state *mstate,u16 offset, u8 *buf, u32 count)
{pr_info("%s line %d, bar %d, read offset 0x%x, count 0x%x, val 0x%x.\n",__func__, __LINE__, index, offset, count, *buf);memcpy(buf, mstate->iobase + offset, count);return;
}static ssize_t mdev_access(struct mdev_device *mdev, u8 *buf, size_t count,loff_t pos, bool is_write)
{int ret = 0;unsigned int index;loff_t offset;struct wdg_mdev_state *mstate;if (!mdev || !buf)return -EINVAL;mstate = mdev_get_drvdata(mdev);if (!mstate) {pr_err("%s line %d. get mstate failure.\n", __func__, __LINE__);return -EINVAL;}mutex_lock(&mstate->ops_lock);index = WDG_VFIO_PCI_OFFSET_TO_INDEX(pos);offset = pos & WDG_VFIO_PCI_OFFSET_MASK;switch (index) {case VFIO_PCI_CONFIG_REGION_INDEX:pr_info("%s: PCI config space %s at offset 0x%llx\n",__func__, is_write ? "write" : "read", offset);if (is_write) {handle_pci_cfg_space_write(mstate, offset, buf, count);} else {handle_pci_cfg_space_read(mstate, offset, buf, count);}break;case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:if (!mstate->region_info[index].start)mdev_read_base(mstate);if (is_write) {pr_info("%s: write bar%d offset 0x%llx, val 0x%x.\n",__func__, index, offset, *buf);handle_bar_write(index, mstate, offset, buf, count);} else {pr_info("%s: read bar%d offset 0x%llx, val 0x%x.\n",__func__, index, offset, *buf);handle_bar_read(index, mstate, offset, buf, count);}break;default:ret = -1;goto failed;}ret = count;failed:mutex_unlock(&mstate->ops_lock);return ret;
}static ssize_t czl_wdg_read(struct mdev_device *mdev, char __user *buf,size_t count, loff_t *ppos)
{unsigned int done = 0;int ret;pr_info("%s line %d, read count 0x%lx, pos 0x%llx.\n", __func__, __LINE__, count, *ppos);while (count) {size_t filled;if (count >= 4 && !(*ppos % 4)) {u32 val;ret =  mdev_access(mdev, (u8 *)&val, sizeof(val),*ppos, false);if (ret <= 0)goto read_err;if (copy_to_user(buf, &val, sizeof(val)))goto read_err;filled = 4;} else if (count >= 2 && !(*ppos % 2)) {u16 val;ret = mdev_access(mdev, (u8 *)&val, sizeof(val),*ppos, false);if (ret <= 0)goto read_err;if (copy_to_user(buf, &val, sizeof(val)))goto read_err;filled = 2;} else {u8 val;ret = mdev_access(mdev, (u8 *)&val, sizeof(val),*ppos, false);if (ret <= 0)goto read_err;if (copy_to_user(buf, &val, sizeof(val)))goto read_err;filled = 1;}count -= filled;done += filled;*ppos += filled;buf += filled;}pr_info("%s line %d, read count 0x%x.\n", __func__, __LINE__, done);return done;read_err:pr_err("%s line %d, read err happend.\n", __func__, __LINE__);return -EFAULT;
}static ssize_t czl_wdg_write(struct mdev_device *mdev, const char __user *buf,size_t count, loff_t *ppos)
{unsigned int done = 0;int ret;pr_info("%s line %d, write count 0x%lx, pos 0x%llx.\n", __func__, __LINE__, count, *ppos);while (count) {size_t filled;if (count >= 4 && !(*ppos % 4)) {u32 val;if (copy_from_user(&val, buf, sizeof(val)))goto write_err;ret = mdev_access(mdev, (u8 *)&val, sizeof(val),*ppos, true);if (ret <= 0)goto write_err;filled = 4;}  else if (count >= 2 && !(*ppos % 2)) {u16 val;if (copy_from_user(&val, buf, sizeof(val)))goto write_err;ret = mdev_access(mdev, (u8 *)&val, sizeof(val),*ppos, true);if (ret <= 0)goto write_err;filled = 2;} else {u8 val;if (copy_from_user(&val, buf, sizeof(val)))goto write_err;ret = mdev_access(mdev, (u8 *)&val, sizeof(val),*ppos, true);if (ret <= 0)goto write_err;filled = 1;}count -= filled;done += filled;*ppos += filled;buf += filled;}pr_info("%s line %d, write count 0x%x.\n", __func__, __LINE__, done);return done;write_err:pr_err("%s line %d, write failure.\n", __func__, __LINE__);return -EFAULT;
}static int wdg_get_device_info(struct mdev_device *mdev, struct vfio_device_info *dev_info)
{dev_info->flags = VFIO_DEVICE_FLAGS_PCI;dev_info->num_regions = VFIO_PCI_NUM_REGIONS;dev_info->num_irqs = VFIO_PCI_NUM_IRQS;return 0;
}static int wdg_get_region_info(struct mdev_device *mdev, struct vfio_region_info *region_info)
{unsigned int size = 0;struct wdg_mdev_state *mstate;u32 bar_index;if (!mdev) {pr_err("%s line %d,mdev is null.\n", __func__, __LINE__);return -EINVAL;}mstate = mdev_get_drvdata(mdev);if (!mstate) {pr_err("%s line %d,mstat is null.\n", __func__, __LINE__);return -EINVAL;}bar_index = region_info->index;if (bar_index >= VFIO_PCI_NUM_REGIONS) {pr_err("%s line %d,bar index %d exceeds.\n", __func__, __LINE__, bar_index);return -EINVAL;}mutex_lock(&mstate->ops_lock);switch (bar_index) {case VFIO_PCI_CONFIG_REGION_INDEX:size = IO_CONF_SIZE;break;case VFIO_PCI_BAR0_REGION_INDEX:size = IO_BAR0_SIZE;break;default:size = 0;break;}mstate->region_info[bar_index].size = size;mstate->region_info[bar_index].vfio_offset =WDG_VFIO_PCI_INDEX_TO_OFFSET(bar_index);region_info->size = size;region_info->offset = WDG_VFIO_PCI_INDEX_TO_OFFSET(bar_index);region_info->flags = VFIO_REGION_INFO_FLAG_READ |VFIO_REGION_INFO_FLAG_WRITE;mutex_unlock(&mstate->ops_lock);return 0;
}static int wdg_get_irq_info(struct mdev_device *mdev, struct vfio_irq_info *irq_info)
{switch (irq_info->index) {case VFIO_PCI_INTX_IRQ_INDEX:case VFIO_PCI_MSI_IRQ_INDEX:case VFIO_PCI_REQ_IRQ_INDEX:break;default:pr_err("%s line %d, irq idx %d is invalid.\n",__func__, __LINE__, irq_info->index);return -EINVAL;}irq_info->flags = VFIO_IRQ_INFO_EVENTFD;irq_info->count = 1;if (irq_info->index == VFIO_PCI_INTX_IRQ_INDEX)irq_info->flags |= (VFIO_IRQ_INFO_MASKABLE |VFIO_IRQ_INFO_AUTOMASKED);elseirq_info->flags |= VFIO_IRQ_INFO_NORESIZE;return 0;
}static long czl_wdg_ioctl(struct mdev_device *mdev, unsigned int cmd,unsigned long arg)
{int ret = 0;unsigned long minsz;struct wdg_mdev_state *mstate;pr_info("czl wdg ioctl enter.\n");if (!mdev) {pr_err("%s line %d, mdev is null.\n", __func__, __LINE__);return -EINVAL;}mstate = mdev_get_drvdata(mdev);if (!mstate) {pr_err("%s line %d, cant find mstate data.\n", __func__, __LINE__);return -ENODEV;}switch (cmd) {case VFIO_DEVICE_GET_INFO: {struct vfio_device_info info;minsz = offsetofend(struct vfio_device_info, num_irqs);if (copy_from_user(&info, (void __user *)arg, minsz))return -EFAULT;if (info.argsz < minsz) {pr_err("%s line %d, info.argsz %d < minsz %ld.\n",__func__, __LINE__, info.argsz, minsz);return -EINVAL;}ret = wdg_get_device_info(mdev, &info);if (ret) {pr_err("%s line %d, get device info failure.\n", __func__, __LINE__);return ret;}memcpy(&mstate->dev_info, &info, sizeof(info));if (copy_to_user((void __user *)arg, &info, minsz))return -EFAULT;return 0;}case VFIO_DEVICE_GET_REGION_INFO: {struct vfio_region_info info;minsz = offsetofend(struct vfio_region_info, offset);if (copy_from_user(&info, (void __user *)arg, minsz))return -EFAULT;if (info.argsz < minsz) {pr_err("%s line %d, info.argsz %d < minsz %ld.\n",__func__, __LINE__, info.argsz, minsz);return -EINVAL;}ret = wdg_get_region_info(mdev, &info);if (ret) {pr_err("%s line %d, get region info failure.\n", __func__, __LINE__);return ret;}if (copy_to_user((void __user *)arg, &info, minsz))return -EFAULT;return 0;}case VFIO_DEVICE_GET_IRQ_INFO: {struct vfio_irq_info info;minsz = offsetofend(struct vfio_irq_info, count);if (copy_from_user(&info, (void __user *)arg, minsz))return -EFAULT;if ((info.argsz < minsz) ||(info.index >= mstate->dev_info.num_irqs))return -EINVAL;ret = wdg_get_irq_info(mdev, &info);if (ret)return ret;if (copy_to_user((void __user *)arg, &info, minsz))return -EFAULT;return 0;}case VFIO_DEVICE_SET_IRQS: {pr_info("%s line %d, set irqs.\n", __func__, __LINE__);return 0;}case VFIO_DEVICE_RESET:pr_info("%s line %d, reset.\n", __func__, __LINE__);return 0;}return -EINVAL;
}static const struct mdev_parent_ops wdg_mdev_fops = {.owner                  = THIS_MODULE,.dev_attr_groups        = wdg_dev_groups,.mdev_attr_groups       = mdev_dev_groups,.supported_type_groups  = mdev_type_groups,.create                 = czl_wdg_create,.remove                 = czl_wdg_remove,.open                   = czl_wdg_open,.release                = czl_wdg_close,.read                   = czl_wdg_read,.write                  = czl_wdg_write,.ioctl                  = czl_wdg_ioctl,
};static void wdg_device_release(struct device *dev)
{pr_info("czl wdg devide release.\n");
}static int mdev_wdg_init(void)
{int ret = 0;pr_info("czl wdg init.\n");memset(&czl_wdg, 0x00, sizeof(czl_wdg));ret = alloc_chrdev_region(&czl_wdg.wdg_devt, 0, MINORMASK + 1, "czl_wdg");if (ret < 0) {pr_err("error: failed to register czl wdg device, err:%d\n", ret);return -1;}cdev_init(&czl_wdg.wdg_cdev, &czl_wdg_fops);cdev_add(&czl_wdg.wdg_cdev, czl_wdg.wdg_devt, MINORMASK + 1);pr_info("major_number:%d\n", MAJOR(czl_wdg.wdg_devt));czl_wdg.wdg_class = class_create(THIS_MODULE, "czl_wdg");if (IS_ERR(czl_wdg.wdg_class)) {pr_err("error: failed to create wdg class.\n");ret = -1;goto failed1;}czl_wdg.dev.class = czl_wdg.wdg_class;czl_wdg.dev.release = wdg_device_release;dev_set_name(&czl_wdg.dev, "%s", "czl_wdg");ret = device_register(&czl_wdg.dev);if (ret) {pr_err("%s line %d, register wdg device failure.\n", __func__, __LINE__);ret = -1;goto  failed2;}ret = mdev_register_device(&czl_wdg.dev, &wdg_mdev_fops);if (ret) {pr_err("%s line %d, register wdg mdev device failure.\n", __func__, __LINE__);ret = -1;goto  failed3;}mutex_init(&wdg_mdev_list_lock);INIT_LIST_HEAD(&wdg_mdev_devices_list);pr_info("czl wdg init success.\n");goto done;
failed3:device_unregister(&czl_wdg.dev);
failed2:class_destroy(czl_wdg.wdg_class);
failed1:cdev_del(&czl_wdg.wdg_cdev);unregister_chrdev_region(czl_wdg.wdg_devt, MINORMASK + 1);
done:return ret;
}static void mdev_wdg_exit(void)
{czl_wdg.dev.bus = NULL;mdev_unregister_device(&czl_wdg.dev);device_unregister(&czl_wdg.dev);cdev_del(&czl_wdg.wdg_cdev);unregister_chrdev_region(czl_wdg.wdg_devt, MINORMASK + 1);class_destroy(czl_wdg.wdg_class);czl_wdg.wdg_class = NULL;pr_info("czl_wdg_unload.\n");return;
}module_init(mdev_wdg_init)
module_exit(mdev_wdg_exit)
MODULE_LICENSE("GPL v2");

virtual machine pci watchdog pci driver

#include <linux/init.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/cdev.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/uuid.h>
#include <linux/vfio.h>
#include <linux/iommu.h>
#include <linux/sysfs.h>
#include <linux/ctype.h>
#include <linux/file.h>
#include <linux/mdev.h>
#include <linux/pci.h>
#include <linux/idr.h>static int devno;
static DEFINE_IDR(wdg_minors);
static DEFINE_MUTEX(wdg_minors_lock);
#define WDG_MINORS_COUNT 256struct wdg_pci_state {struct pci_dev *pdev;struct device *dev;int iobase;int iolen;int major;int minor;
};static struct class *wdg_class;
static const struct pci_device_id czl_pci_table[] = {{       PCI_DEVICE(0xbeef, 0x1001),       },{ 0,                                      }
};static int czl_wdg_open(struct inode *inode, struct file *file)
{int rc = 0;int major, minor;major = imajor(inode);minor = iminor(inode);mutex_lock(&wdg_minors_lock);file->private_data = idr_find(&wdg_minors, minor);mutex_unlock(&wdg_minors_lock);if (!file->private_data) {pr_err("%s line %d, cant find wdg structure.\n",__func__, __LINE__);rc = -1;}return rc;
}static int czl_wdg_release(struct inode *inode, struct file *file)
{return 0;
}ssize_t czl_wdg_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
{int i;struct wdg_pci_state *wdgdev = NULL;unsigned char *kbuf = NULL;int actuallen = 0;wdgdev = file->private_data;if (!wdgdev) {pr_err("%s line %d, read failure.\n", __func__, __LINE__);return -1;}if (*ppos > wdgdev->iolen) {pr_err("%s line %d, read pos %lld exceed max io len %d.\n",__func__, __LINE__, *ppos, wdgdev->iolen);return -1;}kbuf = kzalloc(GFP_KERNEL, size);if (kbuf == NULL) {pr_err("%s line %d, alloc kbuf failure.\n",__func__, __LINE__);return -1;}for (i = 0; (i < size) && ((*ppos + i) <  wdgdev->iolen); i++) {kbuf[i] = inb(wdgdev->iobase + *ppos + i);actuallen ++;}copy_to_user(buf, kbuf, actuallen);kfree(kbuf);return actuallen;
}static ssize_t czl_wdg_write(struct file *file, const char __user *buf,size_t count, loff_t *ppos)
{int i;struct wdg_pci_state *wdgdev = NULL;unsigned char *kbuf = NULL;int actuallen = 0;wdgdev = file->private_data;if (!wdgdev) {pr_err("%s line %d, read failure.\n", __func__, __LINE__);return -1;}if (*ppos > wdgdev->iolen) {pr_err("%s line %d, read pos %lld exceed max io len %d.\n",__func__, __LINE__, *ppos, wdgdev->iolen);return -1;}kbuf = kzalloc(GFP_KERNEL, count);if (kbuf == NULL) {pr_err("%s line %d, alloc kbuf failure.\n",__func__, __LINE__);return -1;}copy_from_user(kbuf, buf, count);for (i = 0; (i < count) && ((*ppos + i) <  wdgdev->iolen); i++) {outb((u8)kbuf[i], wdgdev->iobase + *ppos + i);actuallen ++;}kfree(kbuf);return actuallen;
}static const struct file_operations czl_wdg_fops = {.owner          = THIS_MODULE,.open           = czl_wdg_open,.release        = czl_wdg_release,.read           = czl_wdg_read,.write          = czl_wdg_write,
};static char *wdg_devnode(struct device *dev, umode_t *mode)
{if (mode)*mode = 06666;return kasprintf(GFP_KERNEL, "%s", dev_name(dev));
}static int wdg_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{struct wdg_pci_state *wdgdev = NULL;pr_info("%s line %d, wdg pci device & driver binding.\n", __func__, __LINE__);wdgdev = kzalloc(GFP_KERNEL, sizeof(*wdgdev));if (!wdgdev) {pr_err("%s line %d, fail to alloc buffer.\n",__func__, __LINE__);goto err0;}wdgdev->major = devno;wdgdev->pdev = pci_dev_get(pdev);wdgdev->iobase = pci_resource_start(pdev, 0);wdgdev->iolen = pci_resource_len(pdev, 0);mutex_lock(&wdg_minors_lock);wdgdev->minor = idr_alloc(&wdg_minors, wdgdev, 0, WDG_MINORS_COUNT, GFP_KERNEL);mutex_unlock(&wdg_minors_lock);if (wdgdev->minor < 0) {pr_err("%s line %d, get minor failure from idr.\n", __func__, __LINE__);goto err1;}pr_info("%s line %d, major %d, minor %d, iobase 0x%x.\n", __func__, __LINE__,devno, wdgdev->minor, wdgdev->iobase);wdgdev->dev = device_create(wdg_class, NULL, MKDEV(devno, wdgdev->minor),NULL, "czl-wdg-%d", wdgdev->minor);if (!wdgdev->dev || IS_ERR(wdgdev->dev)) {pr_err("%s line %d, create wdg device failure.\n",__func__, __LINE__);goto err2;}pci_set_drvdata(pdev, wdgdev);return 0;
err2:idr_remove(&wdg_minors, wdgdev->minor);
err1:if (wdgdev) {kfree(wdgdev);}
err0:return -1;
}static void wdg_pci_remove(struct pci_dev *pdev)
{struct wdg_pci_state *wdgdev;pr_info("%s line %d, wdg pci device & driver removing.\n", __func__, __LINE__);wdgdev = pci_get_drvdata(pdev);pci_set_drvdata(pdev, NULL);pci_dev_put(pdev);wdgdev->pdev = NULL;device_destroy(wdg_class, MKDEV(devno, wdgdev->minor));idr_remove(&wdg_minors, wdgdev->minor);kfree(wdgdev);return;
}static struct pci_driver czl_wdg_driver = {.name           = "czl-mdev-wdg",.id_table       = czl_pci_table,.probe          = wdg_pci_probe,.remove         = wdg_pci_remove,
};
static int czl_wdg_init(void)
{int ret;wdg_class = class_create(THIS_MODULE, "czl-wdg");if (!wdg_class) {pr_err("%s line %d, create watchdog class failure.\n",__func__, __LINE__);return -1;}wdg_class->devnode = wdg_devnode;devno = register_chrdev(0, "czl-wdg", &czl_wdg_fops);if (devno < 0) {pr_err("%s line %d, register wdg device chrno failure.\n",__func__, __LINE__);class_destroy(wdg_class);return -1;}ret = pci_register_driver(&czl_wdg_driver);return ret;
}static void czl_wdg_exit(void)
{pci_unregister_driver(&czl_wdg_driver);unregister_chrdev(devno, "czl-wdg");class_destroy(wdg_class);idr_destroy(&wdg_minors);return;
}module_init(czl_wdg_init)
module_exit(czl_wdg_exit)
MODULE_LICENSE("GPL v2");

virtual machine kernel space test case

#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdarg.h>void dump_buf(unsigned char *buf, int len)
{int i;for (i = 0; i < len; i++) {if (i % 16 == 0)printf("\n0x%04x: ", i);printf("0x%02x ", buf[i]);}printf("\n");return;
}int main(void)
{int wdgfd;int status;unsigned char buf[32];wdgfd = open("/dev/czl-wdg-0", O_RDWR);if (wdgfd < 0) {printf("%s line %d, open failure.\n",__func__, __LINE__);return -1;}while (1) {memset(buf, 0x00, 32);status = read(wdgfd, buf, 32);if (status < 0) {printf("%s line %d, read failure.\n",__func__, __LINE__);return -1;}printf("%s line %d, read %d.\n", __func__, __LINE__, status);dump_buf(buf, 32);memset(buf, 0x5a, 32);lseek(wdgfd, 0, SEEK_SET);status = write(wdgfd, buf, 32);if (status < 0) {printf("%s line %d, read failure.\n",__func__, __LINE__);return -1;}printf("%s line %d, read %d.\n", __func__, __LINE__, status);sleep(1);}close(wdgfd);return 0;
}

测试过程:

1.安装WDG MDEV驱动:

sudo insmod czl-mdev-wdg.ko

2.创建mdev设备

创建两个mdev设备

echo "f422fd86-35c0-11ef-8e50-9342c1138a56" > /sys/devices/virtual/czl_wdg/czl_wdg/mdev_supported_types/czl_wdg-1/create
echo "c04de378-35d8-11ef-95c3-339660dfc874" > /sys/devices/virtual/czl_wdg/czl_wdg/mdev_supported_types/czl_wdg-2/create

3.将第二步创建的mdev设别透传给QEMU虚拟机启动:

qemu-system-x86_64 -m 4096 -smp 4 --enable-kvm -drive file=/home/zlcao/Workspace/iso/ps.img -device vfio-pci,sysfsdev=/sys/bus/mdev/devices/f422fd86-35c0-11ef-8e50-9342c1138a56 -device vfio-pci,sysfsdev=/sys/bus/mdev/devices/c04de378-35d8-11ef-95c3-339660dfc874

系统启动后,可以看到虚拟机环境下出现了透传的MDEV PCI设备,设备vendor/device id为0xbeef1001,符合代码设定。

4.虚拟机内安装WDG PCI设备驱动:

上图中可以看到,两个透传的MDEV设备已经和一个名为"serial"的PCI设备驱动绑定,这并不符合预期,需要将默认的"serial"驱动和MDEV设备解绑,在QEMU虚拟机控制台中输入如下命令解绑驱动:

echo -n 0000:00:04.0 > /sys/bus/pci/drivers/serial/unbind
echo -n 0000:00:05.0 > /sys/bus/pci/drivers/serial/unbind

之后就可以安装我们的WDG PCI驱动了:

sudo insmod czl-mdev-drv.ko

安装成功后,虚拟机设备目录下出现了WDG PCI的设备节点:

此时,两个MDEV PCI设备也显示绑定到了正确的驱动:

5.运行测试用例,读写WDG PCI设备的BAR0地址空间:

此时可以看到,虚拟机中对WDG设备BAR0空间的读写调用被“透传"到了HOST机的MDEV PCI设备驱动上,可以基于对BAR0空间的回调实现我们的业务逻辑。


结束

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.mzph.cn/pingmian/37653.shtml

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

【网络架构】keepalive

目录 一、keepalive基础 1.1 作用 1.2 原理 1.3 功能 二、keepalive安装 2.1 yum安装 2.2 编译安装 三、配置文件 3.1 keepalived相关文件 3.2 主配置的组成 3.2.1 全局配置 3.2.2 配置虚拟路由器 四、实际操作 4.1 lvskeepalived高可用群集 4.2 keepalivedngi…

Transformer 模型的详细配置---归一化、位置、激活函数和注意力机制

文章目录 归一化方法激活函数位置编码注意力机制归一化方法 大语言模型的预训练过程中经常会出现不稳定的问题。为了应对这一问题,深度学习方法通常会采用特定的归一化策略来加强神经网络训练过程的稳定性。原始的 Transformer 模型主要使用了层归一化方法(Layer Normalizati…

AI视界引擎 | ​基于 YOLOv8 和计算机视觉 CV 的实时识别系统!

本文来源公众号“AI视界引擎”&#xff0c;仅用于学术分享&#xff0c;侵权删&#xff0c;干货满满。 原文链接&#xff1a;​基于 YOLOv8 和计算机视觉 CV 的实时识别系统&#xff01; 技术进步和创新正在尽可能地推进作者的日常生活&#xff0c;但仍有很大一部分社会群体因为…

自研Eclipse插件的生成及安装和使用

说明&#xff1a; 本处是使用个人自研的Eclipse插件为例&#xff0c;创建了一个菜单式的插件组&#xff0c;插件组下&#xff0c;有一个生成右击Jakarta EE服务端点类后&#xff0c;生成端点对应的Restful客户端。有什么问题&#xff0c;欢迎大家交流&#xff01;&#xff01;…

cython 笔记

数据类型 # bool 类型 // bool_type_ptactice.pyx cdef bint a 123 # 非0 为 真 &#xff0c; 0 为假 cdef bint b -123 cdef bint c 0 py_a a # cdef 定义的内容没法直接在python中直接引用 py_b b py_c c// main.py import pyximport pyximport.install(language_le…

深度解析服务发布策略之A/B测试

A/B测试&#xff0c;作为一种科学决策方法&#xff0c;被广泛应用于产品迭代、营销策略优化、用户体验改进等多个领域&#xff0c;其核心在于通过对比实验&#xff0c;定量分析不同方案的效果差异。这种测试方法通常是将用户随机分配到两个或多个不同的版本中&#xff0c;然后收…

教您设置打开IDM下载浮动条的快捷键 全网最强下载神器idm怎么使用教程 idm浮动条不显示怎么办

很多人都知道Internet Download Manager(以下简称IDM)是一款非常优秀的下载提速软件。它功能强大&#xff0c;几乎能下载网页中的所有数据&#xff08;包括视频、音频、图片等&#xff09;&#xff0c;且适用于现在市面上几乎所有的浏览器&#xff0c;非常受大家欢迎。 在使用I…

面向对象和面向过程编程的区别

引言 小伙伴们&#xff0c;当你们看到这章的时候&#xff0c;显然你们已经跨过了来自指针给你们带来的麻烦&#xff0c;唔~真棒呢&#xff0c;但是我们只学会一些基础的C语法并不能帮我们解决问题&#xff0c;甚至是稍微难一些的题目我们都没办法解决&#xff0c;那怎么办呢&am…

多机调度问题

#include<iostream> #include<string> using namespace std; struct work {int time;int number; }; int setwork0(int m,int n,int a[],struct work w[]) {int maxtime0;for(int i1; i<m; i){cout<<i<<"号设备处理作业"<<w[i].num…

python系列30:各种爬虫技术总结

1. 使用requests获取网页内容 以巴鲁夫产品为例&#xff0c;可以用get请求获取内容&#xff1a; https://www.balluff.com.cn/zh-cn/products/BES02YF 对应的网页为&#xff1a; 使用简单方法进行解析即可 import requests r BES02YF res requests.get("https://www.…

YOLOv8改进 | 卷积模块 | 分布移位卷积DSConv替换Conv

秋招面试专栏推荐 &#xff1a;深度学习算法工程师面试问题总结【百面算法工程师】——点击即可跳转 &#x1f4a1;&#x1f4a1;&#x1f4a1;本专栏所有程序均经过测试&#xff0c;可成功执行&#x1f4a1;&#x1f4a1;&#x1f4a1; 专栏目录&#xff1a;《YOLOv8改进有效…

spring mvc实现一个自定义Converter转换器

介绍 自定义转换器输入Spring MVC框架范畴&#xff0c;总体上输入Spring生态的一个特性&#xff0c;对Web开发起作用。 使用场景 在Spring Boot应用中&#xff0c;自定义转换器主要用于处理HTTP请求参数到Java对象的自动转换&#xff0c;或者Java对象到HTTP响应的序列化过程…

使用Apache Kafka 构建实时数据处理应用

简介 Apache Kafka的基本概念 Apache Kafka是一种高吞吐量的分布式发布订阅消息系统,它可以处理消费者和生产者的所有实时消息。以下是一些Apache Kafka的核心概念: Producer:生产者,消息和数据的发布者。生产者负责将数据发送到Kafka集群。 Consumer:消费者,消息和数…

2024百度之星第一场-110串

补题链接&#xff1a; 码蹄集 三个状态转移的计数dp 先确定状态 n个数至多修改k次&#xff0c;保证不出现字串“110” 常规想法先把状态确定为dp[n][k][0/1]&#xff0c;前n个数&#xff0c;修改k次后&#xff0c;末尾数为0/1&#xff0c;不能转移再换思路。 初始状态设定如…

使用ECharts创建动态数据可视化图表

使用ECharts创建动态数据可视化图表 大家好&#xff0c;我是免费搭建查券返利机器人省钱赚佣金就用微赚淘客系统3.0的小编&#xff0c;也是冬天不穿秋裤&#xff0c;天冷也要风度的程序猿&#xff01; 在现代Web应用开发中&#xff0c;数据可视化是至关重要的一环。ECharts作…

左耳听风_100_99_高效学习如何学习和阅读代码

你好&#xff0c;我是陈浩网名&#xff0c;做我个house.这节课呢我想来谈一谈如何学习和阅读代码。 杰夫阿特伍德啊说过这么一句话&#xff0c;code tell you how comments tell you why.那我把它扩展一下呢&#xff0c;就是代码会告诉你what how和details.而文档和书呢会告诉…

rk3568 rockit编译测试

前言 环境介绍&#xff1a; 1.编译环境 Ubuntu 20.04.6 LTS 2.SDK版本 rk3568_linux_5.10 3.单板 迅为itop-3568开发板 一、编译rockit组件包 rockit组件包在4.10版本需要手动编译&#xff0c;奈何我的版本怎么都编译不了&#xff0c;后来改用5.10版本才编译通过。 4.1…

存储请求地址但是使用时请求的是端口

baseURL默认全局加载一次&#xff0c;后续直接读取缓存 解决方案&#xff1a;

类和对象(封装、继承、多态、友元)

c面相对象的三大特性为&#xff1a;封装、继承、多态 c 认为万事万物都皆为对象&#xff0c;对象上有其属性和行为 一、类和对象&#xff08;封装&#xff09; &#xff08;一&#xff09;封装的意义 封装是c面相对象的三大特性之一 封装的意义&#xff1a; 将属性和行为…

实现List接口的ArrayList和LinkedList

package study;import java.util.*;public class day01_list {public static void main(String[] args) {// <Integer> 这个尖括号表示的是 Java 的泛型&#xff08;Generics&#xff09;// 泛型是 Java 5 引入的一项特性&#xff0c;它允许你在 类、接口和方法 中使用类…