From patchwork Wed May 31 05:37:39 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Li, Miao" X-Patchwork-Id: 127739 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 6A3BA42BEB; Wed, 31 May 2023 07:38:02 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 00DC942B8B; Wed, 31 May 2023 07:38:00 +0200 (CEST) Received: from mga17.intel.com (mga17.intel.com [192.55.52.151]) by mails.dpdk.org (Postfix) with ESMTP id 48B33427E9 for ; Wed, 31 May 2023 07:37:57 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1685511477; x=1717047477; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=b+hf+ebLi3z9+3jbj2JZqCysOXVE1xIzJ23YHdSCfN4=; b=ILVZirklyWFp5+ICDmf8ItT7cWu7nvMdFG3oP0/se/ZquDckUxKmZR90 OUB/y4hgXKYtD6w0LV9FF7jRWuZqCO9orOzIqw/TG0ZtV5G3A5x+ppu/h wSpDKna9zIlSHCTFtOSfH76H607jl+Yj8MQdqugfSC4cs+VQ8yGgn4eha w41pMTTMauQjAansLftkHhh2sREnal7dhe0Jz9oX2tEm6ZdYrFIxfYniJ URGLQaDe2aZmFTkkiZlSfgporgz9Q/KA3TUvNoHhQC678q8yFZJdlmeaJ mY5SP1kgMirY8nhDSejZ+Hnpgf6dP9lJDumPl5SYkVJVtol3JzAR8heH0 A==; X-IronPort-AV: E=McAfee;i="6600,9927,10726"; a="335489273" X-IronPort-AV: E=Sophos;i="6.00,205,1681196400"; d="scan'208";a="335489273" Received: from orsmga006.jf.intel.com ([10.7.209.51]) by fmsmga107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 30 May 2023 22:37:56 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10726"; a="684273518" X-IronPort-AV: E=Sophos;i="6.00,205,1681196400"; d="scan'208";a="684273518" Received: from dpdk-limiao-icelake.sh.intel.com ([10.67.111.26]) by orsmga006.jf.intel.com with ESMTP; 30 May 2023 22:37:54 -0700 From: Miao Li To: dev@dpdk.org Cc: skori@marvell.com, thomas@monjalon.net, david.marchand@redhat.com, ferruh.yigit@amd.com, chenbo.xia@intel.com, yahui.cao@intel.com Subject: [PATCH v4 1/4] bus/pci: introduce an internal representation of PCI device Date: Wed, 31 May 2023 05:37:39 +0000 Message-Id: <20230531053743.129442-2-miao.li@intel.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20230531053743.129442-1-miao.li@intel.com> References: <20230525163116.682000-1-miao.li@intel.com> <20230531053743.129442-1-miao.li@intel.com> MIME-Version: 1.0 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org From: Chenbo Xia This patch introduces an internal representation of the PCI device which will be used to store the internal information that don't have to be exposed to drivers, e.g., the VFIO region sizes/offsets. In this patch, the internal structure is simply a wrapper of the rte_pci_device structure. More fields will be added. Signed-off-by: Chenbo Xia Acked-by: Sunil Kumar Kori Acked-by: Yahui Cao --- drivers/bus/pci/bsd/pci.c | 13 ++++++++----- drivers/bus/pci/linux/pci.c | 28 ++++++++++++++++------------ drivers/bus/pci/pci_common.c | 12 ++++++------ drivers/bus/pci/private.h | 14 +++++++++++++- drivers/bus/pci/windows/pci.c | 14 +++++++++----- 5 files changed, 52 insertions(+), 29 deletions(-) diff --git a/drivers/bus/pci/bsd/pci.c b/drivers/bus/pci/bsd/pci.c index 7459d15c7e..a747eca58c 100644 --- a/drivers/bus/pci/bsd/pci.c +++ b/drivers/bus/pci/bsd/pci.c @@ -208,16 +208,19 @@ pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, static int pci_scan_one(int dev_pci_fd, struct pci_conf *conf) { + struct rte_pci_device_internal *pdev; struct rte_pci_device *dev; struct pci_bar_io bar; unsigned i, max; - dev = malloc(sizeof(*dev)); - if (dev == NULL) { + pdev = malloc(sizeof(*pdev)); + if (pdev == NULL) { + RTE_LOG(ERR, EAL, "Cannot allocate memory for internal pci device\n"); return -1; } - memset(dev, 0, sizeof(*dev)); + memset(pdev, 0, sizeof(*pdev)); + dev = &pdev->device; dev->device.bus = &rte_pci_bus.bus; dev->addr.domain = conf->pc_sel.pc_domain; @@ -303,7 +306,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf) memmove(dev2->mem_resource, dev->mem_resource, sizeof(dev->mem_resource)); - pci_free(dev); + pci_free(pdev); } return 0; } @@ -313,7 +316,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf) return 0; skipdev: - pci_free(dev); + pci_free(pdev); return 0; } diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c index ebd1395502..4c2c5ba382 100644 --- a/drivers/bus/pci/linux/pci.c +++ b/drivers/bus/pci/linux/pci.c @@ -211,22 +211,26 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) { char filename[PATH_MAX]; unsigned long tmp; + struct rte_pci_device_internal *pdev; struct rte_pci_device *dev; char driver[PATH_MAX]; int ret; - dev = malloc(sizeof(*dev)); - if (dev == NULL) + pdev = malloc(sizeof(*pdev)); + if (pdev == NULL) { + RTE_LOG(ERR, EAL, "Cannot allocate memory for internal pci device\n"); return -1; + } - memset(dev, 0, sizeof(*dev)); + memset(pdev, 0, sizeof(*pdev)); + dev = &pdev->device; dev->device.bus = &rte_pci_bus.bus; dev->addr = *addr; /* get vendor id */ snprintf(filename, sizeof(filename), "%s/vendor", dirname); if (eal_parse_sysfs_value(filename, &tmp) < 0) { - pci_free(dev); + pci_free(pdev); return -1; } dev->id.vendor_id = (uint16_t)tmp; @@ -234,7 +238,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) /* get device id */ snprintf(filename, sizeof(filename), "%s/device", dirname); if (eal_parse_sysfs_value(filename, &tmp) < 0) { - pci_free(dev); + pci_free(pdev); return -1; } dev->id.device_id = (uint16_t)tmp; @@ -243,7 +247,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) snprintf(filename, sizeof(filename), "%s/subsystem_vendor", dirname); if (eal_parse_sysfs_value(filename, &tmp) < 0) { - pci_free(dev); + pci_free(pdev); return -1; } dev->id.subsystem_vendor_id = (uint16_t)tmp; @@ -252,7 +256,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) snprintf(filename, sizeof(filename), "%s/subsystem_device", dirname); if (eal_parse_sysfs_value(filename, &tmp) < 0) { - pci_free(dev); + pci_free(pdev); return -1; } dev->id.subsystem_device_id = (uint16_t)tmp; @@ -261,7 +265,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) snprintf(filename, sizeof(filename), "%s/class", dirname); if (eal_parse_sysfs_value(filename, &tmp) < 0) { - pci_free(dev); + pci_free(pdev); return -1; } /* the least 24 bits are valid: class, subclass, program interface */ @@ -297,7 +301,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) snprintf(filename, sizeof(filename), "%s/resource", dirname); if (pci_parse_sysfs_resource(filename, dev) < 0) { RTE_LOG(ERR, EAL, "%s(): cannot parse resource\n", __func__); - pci_free(dev); + pci_free(pdev); return -1; } @@ -306,7 +310,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) ret = pci_get_kernel_driver_by_path(filename, driver, sizeof(driver)); if (ret < 0) { RTE_LOG(ERR, EAL, "Fail to get kernel driver\n"); - pci_free(dev); + pci_free(pdev); return -1; } @@ -320,7 +324,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) else dev->kdrv = RTE_PCI_KDRV_UNKNOWN; } else { - pci_free(dev); + pci_free(pdev); return 0; } /* device is valid, add in list (sorted) */ @@ -375,7 +379,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) pci_common_set(dev2); } } - pci_free(dev); + pci_free(pdev); } return 0; } diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c index e32a9d517a..52404ab0fe 100644 --- a/drivers/bus/pci/pci_common.c +++ b/drivers/bus/pci/pci_common.c @@ -121,12 +121,12 @@ pci_common_set(struct rte_pci_device *dev) } void -pci_free(struct rte_pci_device *dev) +pci_free(struct rte_pci_device_internal *pdev) { - if (dev == NULL) + if (pdev == NULL) return; - free(dev->bus_info); - free(dev); + free(pdev->device.bus_info); + free(pdev); } /* map a particular resource from a file */ @@ -465,7 +465,7 @@ pci_cleanup(void) rte_intr_instance_free(dev->vfio_req_intr_handle); dev->vfio_req_intr_handle = NULL; - pci_free(dev); + pci_free(RTE_PCI_DEVICE_INTERNAL(dev)); } return error; @@ -681,7 +681,7 @@ pci_unplug(struct rte_device *dev) if (ret == 0) { rte_pci_remove_device(pdev); rte_devargs_remove(dev->devargs); - pci_free(pdev); + pci_free(RTE_PCI_DEVICE_INTERNAL(pdev)); } return ret; } diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h index c8161a1074..b564646e03 100644 --- a/drivers/bus/pci/private.h +++ b/drivers/bus/pci/private.h @@ -13,6 +13,14 @@ #include #include +/* + * Convert struct rte_pci_device to struct rte_pci_device_internal + */ +#define RTE_PCI_DEVICE_INTERNAL(ptr) \ + container_of(ptr, struct rte_pci_device_internal, device) +#define RTE_PCI_DEVICE_INTERNAL_CONST(ptr) \ + container_of(ptr, const struct rte_pci_device_internal, device) + /** * Structure describing the PCI bus */ @@ -34,6 +42,10 @@ extern struct rte_pci_bus rte_pci_bus; struct rte_pci_driver; struct rte_pci_device; +struct rte_pci_device_internal { + struct rte_pci_device device; +}; + /** * Scan the content of the PCI bus, and the devices in the devices * list @@ -53,7 +65,7 @@ pci_common_set(struct rte_pci_device *dev); * Free a PCI device. */ void -pci_free(struct rte_pci_device *dev); +pci_free(struct rte_pci_device_internal *pdev); /** * Validate whether a device with given PCI address should be ignored or not. diff --git a/drivers/bus/pci/windows/pci.c b/drivers/bus/pci/windows/pci.c index 5cf05ce1a0..df5221d913 100644 --- a/drivers/bus/pci/windows/pci.c +++ b/drivers/bus/pci/windows/pci.c @@ -336,6 +336,7 @@ set_kernel_driver_type(PSP_DEVINFO_DATA device_info_data, static int pci_scan_one(HDEVINFO dev_info, PSP_DEVINFO_DATA device_info_data) { + struct rte_pci_device_internal *pdev = NULL; struct rte_pci_device *dev = NULL; int ret = -1; char pci_device_info[REGSTR_VAL_MAX_HCID_LEN]; @@ -370,11 +371,14 @@ pci_scan_one(HDEVINFO dev_info, PSP_DEVINFO_DATA device_info_data) goto end; } - dev = malloc(sizeof(*dev)); - if (dev == NULL) + pdev = malloc(sizeof(*pdev)); + if (pdev == NULL) { + RTE_LOG(ERR, EAL, "Cannot allocate memory for internal pci device\n"); goto end; + } - memset(dev, 0, sizeof(*dev)); + memset(pdev, 0, sizeof(*pdev)); + dev = &pdev->device; dev->device.bus = &rte_pci_bus.bus; dev->addr = addr; @@ -409,7 +413,7 @@ pci_scan_one(HDEVINFO dev_info, PSP_DEVINFO_DATA device_info_data) dev2->max_vfs = dev->max_vfs; memmove(dev2->mem_resource, dev->mem_resource, sizeof(dev->mem_resource)); - pci_free(dev); + pci_free(pdev); } return 0; } @@ -418,7 +422,7 @@ pci_scan_one(HDEVINFO dev_info, PSP_DEVINFO_DATA device_info_data) return 0; end: - pci_free(dev); + pci_free(pdev); return ret; } From patchwork Wed May 31 05:37:40 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Li, Miao" X-Patchwork-Id: 127740 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id AF11A42BEB; Wed, 31 May 2023 07:38:08 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 115F242D0C; Wed, 31 May 2023 07:38:04 +0200 (CEST) Received: from mga17.intel.com (mga17.intel.com [192.55.52.151]) by mails.dpdk.org (Postfix) with ESMTP id BCBCB42C76 for ; Wed, 31 May 2023 07:38:00 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1685511481; x=1717047481; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=soP2lvVEpw7JwuhdzdmSlMafIu7EmDfVhdSaXQIaRNE=; b=irupvEebRzIJ0Xv9cbU4Wt1DevbnIwI/KMS5zh8Ha6y3dwX+grYuu3+e SIzK+naZtGF6Hzec3B30nFskUDV7v6XLmo+Gdp+6uzr/twU5qALQjDWHE 6f4dDOBlJqLQ5A4130m3rIe26VHDhWfX9ttLgnw2deWebubIpMBYlNqjk ajWSBl3tHskMYqx2gqrWNQ8XRhz6xOWsSaQFNqPTfEuoOBPIEO9RH4LJI FHakXngNJ7F8+s9AMupoSyb6kdNKQ9OnkLQvzwJpMTGrXtSNLtb+exBiE NaVOxP93IsihF6cclHItd77rWHjBSuIHgF+j4leYRKsu60B2NNdTbK3fS w==; X-IronPort-AV: E=McAfee;i="6600,9927,10726"; a="335489286" X-IronPort-AV: E=Sophos;i="6.00,205,1681196400"; d="scan'208";a="335489286" Received: from orsmga006.jf.intel.com ([10.7.209.51]) by fmsmga107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 30 May 2023 22:38:00 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10726"; a="684273524" X-IronPort-AV: E=Sophos;i="6.00,205,1681196400"; d="scan'208";a="684273524" Received: from dpdk-limiao-icelake.sh.intel.com ([10.67.111.26]) by orsmga006.jf.intel.com with ESMTP; 30 May 2023 22:37:57 -0700 From: Miao Li To: dev@dpdk.org Cc: skori@marvell.com, thomas@monjalon.net, david.marchand@redhat.com, ferruh.yigit@amd.com, chenbo.xia@intel.com, yahui.cao@intel.com, Anatoly Burakov Subject: [PATCH v4 2/4] bus/pci: avoid depending on private value in kernel source Date: Wed, 31 May 2023 05:37:40 +0000 Message-Id: <20230531053743.129442-3-miao.li@intel.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20230531053743.129442-1-miao.li@intel.com> References: <20230525163116.682000-1-miao.li@intel.com> <20230531053743.129442-1-miao.li@intel.com> MIME-Version: 1.0 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org From: Chenbo Xia The value 40 used in VFIO_GET_REGION_ADDR() is a private value (VFIO_PCI_OFFSET_SHIFT) defined in Linux kernel source [1]. It is not part of VFIO API, and we should not depend on it. [1] https://github.com/torvalds/linux/blob/v6.2/include/linux/vfio_pci_core.h Signed-off-by: Chenbo Xia Acked-by: Sunil Kumar Kori Acked-by: Yahui Cao --- drivers/bus/pci/linux/pci.c | 4 +- drivers/bus/pci/linux/pci_init.h | 4 +- drivers/bus/pci/linux/pci_vfio.c | 197 +++++++++++++++++++++++-------- drivers/bus/pci/private.h | 9 ++ lib/eal/include/rte_vfio.h | 1 - 5 files changed, 159 insertions(+), 56 deletions(-) diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c index 4c2c5ba382..04e21ae20f 100644 --- a/drivers/bus/pci/linux/pci.c +++ b/drivers/bus/pci/linux/pci.c @@ -645,7 +645,7 @@ int rte_pci_read_config(const struct rte_pci_device *device, return pci_uio_read_config(intr_handle, buf, len, offset); #ifdef VFIO_PRESENT case RTE_PCI_KDRV_VFIO: - return pci_vfio_read_config(intr_handle, buf, len, offset); + return pci_vfio_read_config(device, buf, len, offset); #endif default: rte_pci_device_name(&device->addr, devname, @@ -669,7 +669,7 @@ int rte_pci_write_config(const struct rte_pci_device *device, return pci_uio_write_config(intr_handle, buf, len, offset); #ifdef VFIO_PRESENT case RTE_PCI_KDRV_VFIO: - return pci_vfio_write_config(intr_handle, buf, len, offset); + return pci_vfio_write_config(device, buf, len, offset); #endif default: rte_pci_device_name(&device->addr, devname, diff --git a/drivers/bus/pci/linux/pci_init.h b/drivers/bus/pci/linux/pci_init.h index dcea726186..9f6659ba6e 100644 --- a/drivers/bus/pci/linux/pci_init.h +++ b/drivers/bus/pci/linux/pci_init.h @@ -66,9 +66,9 @@ int pci_uio_ioport_unmap(struct rte_pci_ioport *p); #endif /* access config space */ -int pci_vfio_read_config(const struct rte_intr_handle *intr_handle, +int pci_vfio_read_config(const struct rte_pci_device *dev, void *buf, size_t len, off_t offs); -int pci_vfio_write_config(const struct rte_intr_handle *intr_handle, +int pci_vfio_write_config(const struct rte_pci_device *dev, const void *buf, size_t len, off_t offs); int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c index fab3483d9f..5aef84b7d0 100644 --- a/drivers/bus/pci/linux/pci_vfio.c +++ b/drivers/bus/pci/linux/pci_vfio.c @@ -43,45 +43,82 @@ static struct rte_tailq_elem rte_vfio_tailq = { }; EAL_REGISTER_TAILQ(rte_vfio_tailq) +static int +pci_vfio_get_region(const struct rte_pci_device *dev, int index, + uint64_t *size, uint64_t *offset) +{ + const struct rte_pci_device_internal *pdev = + RTE_PCI_DEVICE_INTERNAL_CONST(dev); + + if (index >= VFIO_PCI_NUM_REGIONS || index >= RTE_MAX_PCI_REGIONS) + return -1; + + if (pdev->region[index].size == 0 && pdev->region[index].offset == 0) + return -1; + + *size = pdev->region[index].size; + *offset = pdev->region[index].offset; + + return 0; +} + int -pci_vfio_read_config(const struct rte_intr_handle *intr_handle, +pci_vfio_read_config(const struct rte_pci_device *dev, void *buf, size_t len, off_t offs) { - int vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); + uint64_t size, offset; + int fd; - if (vfio_dev_fd < 0) + fd = rte_intr_dev_fd_get(dev->intr_handle); + + if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, + &size, &offset) != 0) + return -1; + + if ((uint64_t)len + offs > size) return -1; - return pread64(vfio_dev_fd, buf, len, - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); + return pread64(fd, buf, len, offset + offs); } int -pci_vfio_write_config(const struct rte_intr_handle *intr_handle, +pci_vfio_write_config(const struct rte_pci_device *dev, const void *buf, size_t len, off_t offs) { - int vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); + uint64_t size, offset; + int fd; - if (vfio_dev_fd < 0) + fd = rte_intr_dev_fd_get(dev->intr_handle); + + if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, + &size, &offset) != 0) return -1; - return pwrite64(vfio_dev_fd, buf, len, - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); + if ((uint64_t)len + offs > size) + return -1; + + return pwrite64(fd, buf, len, offset + offs); } /* get PCI BAR number where MSI-X interrupts are */ static int -pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) +pci_vfio_get_msix_bar(const struct rte_pci_device *dev, int fd, + struct pci_msix_table *msix_table) { int ret; uint32_t reg; uint16_t flags; uint8_t cap_id, cap_offset; + uint64_t size, offset; + + if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, + &size, &offset) != 0) { + RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n"); + return -1; + } /* read PCI capability pointer from config space */ - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_CAPABILITY_LIST); + ret = pread64(fd, ®, sizeof(reg), offset + PCI_CAPABILITY_LIST); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI config space!\n"); @@ -94,9 +131,7 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) while (cap_offset) { /* read PCI capability ID */ - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset); + ret = pread64(fd, ®, sizeof(reg), offset + cap_offset); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot read capability ID from PCI config space!\n"); @@ -108,9 +143,7 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) /* if we haven't reached MSI-X, check next capability */ if (cap_id != PCI_CAP_ID_MSIX) { - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset); + ret = pread64(fd, ®, sizeof(reg), offset + cap_offset); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI config space!\n"); @@ -125,18 +158,14 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) /* else, read table offset */ else { /* table offset resides in the next 4 bytes */ - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset + 4); + ret = pread64(fd, ®, sizeof(reg), offset + cap_offset + 4); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot read table offset from PCI config space!\n"); return -1; } - ret = pread64(fd, &flags, sizeof(flags), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset + 2); + ret = pread64(fd, &flags, sizeof(flags), offset + cap_offset + 2); if (ret != sizeof(flags)) { RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config space!\n"); @@ -156,14 +185,19 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) /* enable PCI bus memory space */ static int -pci_vfio_enable_bus_memory(int dev_fd) +pci_vfio_enable_bus_memory(struct rte_pci_device *dev, int dev_fd) { + uint64_t size, offset; uint16_t cmd; int ret; - ret = pread64(dev_fd, &cmd, sizeof(cmd), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_COMMAND); + if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, + &size, &offset) != 0) { + RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n"); + return -1; + } + + ret = pread64(dev_fd, &cmd, sizeof(cmd), offset + PCI_COMMAND); if (ret != sizeof(cmd)) { RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n"); @@ -174,9 +208,7 @@ pci_vfio_enable_bus_memory(int dev_fd) return 0; cmd |= PCI_COMMAND_MEMORY; - ret = pwrite64(dev_fd, &cmd, sizeof(cmd), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_COMMAND); + ret = pwrite64(dev_fd, &cmd, sizeof(cmd), offset + PCI_COMMAND); if (ret != sizeof(cmd)) { RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n"); @@ -188,14 +220,19 @@ pci_vfio_enable_bus_memory(int dev_fd) /* set PCI bus mastering */ static int -pci_vfio_set_bus_master(int dev_fd, bool op) +pci_vfio_set_bus_master(const struct rte_pci_device *dev, int dev_fd, bool op) { + uint64_t size, offset; uint16_t reg; int ret; - ret = pread64(dev_fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_COMMAND); + if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, + &size, &offset) != 0) { + RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n"); + return -1; + } + + ret = pread64(dev_fd, ®, sizeof(reg), offset + PCI_COMMAND); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n"); return -1; @@ -207,9 +244,7 @@ pci_vfio_set_bus_master(int dev_fd, bool op) else reg &= ~(PCI_COMMAND_MASTER); - ret = pwrite64(dev_fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_COMMAND); + ret = pwrite64(dev_fd, ®, sizeof(reg), offset + PCI_COMMAND); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n"); @@ -458,14 +493,21 @@ pci_vfio_disable_notifier(struct rte_pci_device *dev) #endif static int -pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index) +pci_vfio_is_ioport_bar(const struct rte_pci_device *dev, int vfio_dev_fd, + int bar_index) { + uint64_t size, offset; uint32_t ioport_bar; int ret; + if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, + &size, &offset) != 0) { + RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n"); + return -1; + } + ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) - + PCI_BASE_ADDRESS_0 + bar_index*4); + offset + PCI_BASE_ADDRESS_0 + bar_index * 4); if (ret != sizeof(ioport_bar)) { RTE_LOG(ERR, EAL, "Cannot read command (%x) from config space!\n", PCI_BASE_ADDRESS_0 + bar_index*4); @@ -483,13 +525,13 @@ pci_rte_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd) return -1; } - if (pci_vfio_enable_bus_memory(vfio_dev_fd)) { + if (pci_vfio_enable_bus_memory(dev, vfio_dev_fd)) { RTE_LOG(ERR, EAL, "Cannot enable bus memory!\n"); return -1; } /* set bus mastering for the device */ - if (pci_vfio_set_bus_master(vfio_dev_fd, true)) { + if (pci_vfio_set_bus_master(dev, vfio_dev_fd, true)) { RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n"); return -1; } @@ -704,7 +746,7 @@ pci_vfio_info_cap(struct vfio_region_info *info, int cap) static int pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region) { - struct vfio_region_info *info; + struct vfio_region_info *info = NULL; int ret; ret = pci_vfio_get_region_info(vfio_dev_fd, &info, msix_region); @@ -719,11 +761,40 @@ pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region) return ret; } +static int +pci_vfio_fill_regions(struct rte_pci_device *dev, int vfio_dev_fd, + struct vfio_device_info *device_info) +{ + struct rte_pci_device_internal *pdev = RTE_PCI_DEVICE_INTERNAL(dev); + struct vfio_region_info *reg = NULL; + int nb_maps, i, ret; + + nb_maps = RTE_MIN((int)device_info->num_regions, + VFIO_PCI_CONFIG_REGION_INDEX + 1); + + for (i = 0; i < nb_maps; i++) { + ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i); + if (ret < 0) { + RTE_LOG(DEBUG, EAL, "%s cannot get device region info error %i (%s)\n", + dev->name, errno, strerror(errno)); + return -1; + } + + pdev->region[i].size = reg->size; + pdev->region[i].offset = reg->offset; + + free(reg); + } + + return 0; +} static int pci_vfio_map_resource_primary(struct rte_pci_device *dev) { + struct rte_pci_device_internal *pdev = RTE_PCI_DEVICE_INTERNAL(dev); struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; + struct vfio_region_info *reg = NULL; char pci_addr[PATH_MAX] = {0}; int vfio_dev_fd; struct rte_pci_addr *loc = &dev->addr; @@ -767,11 +838,22 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) /* map BARs */ maps = vfio_res->maps; + ret = pci_vfio_get_region_info(vfio_dev_fd, ®, + VFIO_PCI_CONFIG_REGION_INDEX); + if (ret < 0) { + RTE_LOG(ERR, EAL, "%s cannot get device region info error %i (%s)\n", + dev->name, errno, strerror(errno)); + goto err_vfio_res; + } + pdev->region[VFIO_PCI_CONFIG_REGION_INDEX].size = reg->size; + pdev->region[VFIO_PCI_CONFIG_REGION_INDEX].offset = reg->offset; + free(reg); + vfio_res->msix_table.bar_index = -1; /* get MSI-X BAR, if any (we have to know where it is because we can't * easily mmap it when using VFIO) */ - ret = pci_vfio_get_msix_bar(vfio_dev_fd, &vfio_res->msix_table); + ret = pci_vfio_get_msix_bar(dev, vfio_dev_fd, &vfio_res->msix_table); if (ret < 0) { RTE_LOG(ERR, EAL, "%s cannot get MSI-X BAR number!\n", pci_addr); @@ -792,7 +874,6 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) } for (i = 0; i < vfio_res->nb_maps; i++) { - struct vfio_region_info *reg = NULL; void *bar_addr; ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i); @@ -803,8 +884,11 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) goto err_vfio_res; } + pdev->region[i].size = reg->size; + pdev->region[i].offset = reg->offset; + /* chk for io port region */ - ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i); + ret = pci_vfio_is_ioport_bar(dev, vfio_dev_fd, i); if (ret < 0) { free(reg); goto err_vfio_res; @@ -916,6 +1000,10 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev) if (ret) return ret; + ret = pci_vfio_fill_regions(dev, vfio_dev_fd, &device_info); + if (ret) + return ret; + /* map BARs */ maps = vfio_res->maps; @@ -1031,7 +1119,7 @@ pci_vfio_unmap_resource_primary(struct rte_pci_device *dev) if (vfio_dev_fd < 0) return -1; - if (pci_vfio_set_bus_master(vfio_dev_fd, false)) { + if (pci_vfio_set_bus_master(dev, vfio_dev_fd, false)) { RTE_LOG(ERR, EAL, "%s cannot unset bus mastering for PCI device!\n", pci_addr); return -1; @@ -1111,14 +1199,21 @@ int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, struct rte_pci_ioport *p) { + uint64_t size, offset; + if (bar < VFIO_PCI_BAR0_REGION_INDEX || bar > VFIO_PCI_BAR5_REGION_INDEX) { RTE_LOG(ERR, EAL, "invalid bar (%d)!\n", bar); return -1; } + if (pci_vfio_get_region(dev, bar, &size, &offset) != 0) { + RTE_LOG(ERR, EAL, "Cannot get offset of region %d.\n", bar); + return -1; + } + p->dev = dev; - p->base = VFIO_GET_REGION_ADDR(bar); + p->base = offset; return 0; } diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h index b564646e03..2d6991ccb7 100644 --- a/drivers/bus/pci/private.h +++ b/drivers/bus/pci/private.h @@ -13,6 +13,8 @@ #include #include +#define RTE_MAX_PCI_REGIONS 9 + /* * Convert struct rte_pci_device to struct rte_pci_device_internal */ @@ -42,8 +44,15 @@ extern struct rte_pci_bus rte_pci_bus; struct rte_pci_driver; struct rte_pci_device; +struct rte_pci_region { + uint64_t size; + uint64_t offset; +}; + struct rte_pci_device_internal { struct rte_pci_device device; + /* PCI regions provided by e.g. VFIO. */ + struct rte_pci_region region[RTE_MAX_PCI_REGIONS]; }; /** diff --git a/lib/eal/include/rte_vfio.h b/lib/eal/include/rte_vfio.h index 7bdb8932b2..3487c4f2a2 100644 --- a/lib/eal/include/rte_vfio.h +++ b/lib/eal/include/rte_vfio.h @@ -38,7 +38,6 @@ extern "C" { #define VFIO_CONTAINER_PATH "/dev/vfio/vfio" #define VFIO_GROUP_FMT "/dev/vfio/%u" #define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u" -#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL) #define VFIO_GET_REGION_IDX(x) (x >> 40) #define VFIO_NOIOMMU_MODE \ "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode" From patchwork Wed May 31 05:37:41 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Li, Miao" X-Patchwork-Id: 127741 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 3041E42BEB; Wed, 31 May 2023 07:38:18 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id B266442D17; Wed, 31 May 2023 07:38:08 +0200 (CEST) Received: from mga17.intel.com (mga17.intel.com [192.55.52.151]) by mails.dpdk.org (Postfix) with ESMTP id 3092340A87 for ; Wed, 31 May 2023 07:38:04 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1685511484; x=1717047484; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=HPST99BHqbXfp59PqBYl4GiL1jA6VL54BUQL7V0cVjg=; b=PT6rq30yjuhEUSIj/KnxGfkYhqkvdjpw1cuV1X5cTPpcxbOJSLde58iK fTB6yPyalrdukv2JXw/tfXDU9ZwAAS4DDLDiB7jYg5V0lfYAQ0BwoHfa9 +bvNmqdX7A6zQIIXJRPTig52z1Y+Be7CtIoik4N82TgM5cndeM3i+oJ0Q e2/B/MvdrE4Rx4k9QwldKAMcvJVVINZmECk1AFn7fiInwkZcCyzbtzVBJ JK1fmI7zEiiE7x47FCQg8d3mgM3lcaXjuK4Io6iMBSi1Wn4/ZKbOlmp5B FYAf6eh3h0TT+ZnCHty5/2ar6F6LtjvB2FExAlLwLW2t0qqgBWykvIFNt A==; X-IronPort-AV: E=McAfee;i="6600,9927,10726"; a="335489295" X-IronPort-AV: E=Sophos;i="6.00,205,1681196400"; d="scan'208";a="335489295" Received: from orsmga006.jf.intel.com ([10.7.209.51]) by fmsmga107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 30 May 2023 22:38:03 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10726"; a="684273531" X-IronPort-AV: E=Sophos;i="6.00,205,1681196400"; d="scan'208";a="684273531" Received: from dpdk-limiao-icelake.sh.intel.com ([10.67.111.26]) by orsmga006.jf.intel.com with ESMTP; 30 May 2023 22:38:01 -0700 From: Miao Li To: dev@dpdk.org Cc: skori@marvell.com, thomas@monjalon.net, david.marchand@redhat.com, ferruh.yigit@amd.com, chenbo.xia@intel.com, yahui.cao@intel.com, Anatoly Burakov Subject: [PATCH v4 3/4] bus/pci: introduce helper for MMIO read and write Date: Wed, 31 May 2023 05:37:41 +0000 Message-Id: <20230531053743.129442-4-miao.li@intel.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20230531053743.129442-1-miao.li@intel.com> References: <20230525163116.682000-1-miao.li@intel.com> <20230531053743.129442-1-miao.li@intel.com> MIME-Version: 1.0 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org From: Chenbo Xia The MMIO regions may not be mmap-able for VFIO-PCI devices. In this case, the driver should explicitly do read and write to access these regions. Signed-off-by: Chenbo Xia Acked-by: Sunil Kumar Kori Acked-by: Yahui Cao --- doc/guides/rel_notes/release_23_07.rst | 5 +++ drivers/bus/pci/bsd/pci.c | 22 ++++++++++++ drivers/bus/pci/linux/pci.c | 46 ++++++++++++++++++++++++ drivers/bus/pci/linux/pci_init.h | 10 ++++++ drivers/bus/pci/linux/pci_uio.c | 22 ++++++++++++ drivers/bus/pci/linux/pci_vfio.c | 36 +++++++++++++++++++ drivers/bus/pci/rte_bus_pci.h | 48 ++++++++++++++++++++++++++ drivers/bus/pci/version.map | 3 ++ 8 files changed, 192 insertions(+) diff --git a/doc/guides/rel_notes/release_23_07.rst b/doc/guides/rel_notes/release_23_07.rst index a9b1293689..dba39134f1 100644 --- a/doc/guides/rel_notes/release_23_07.rst +++ b/doc/guides/rel_notes/release_23_07.rst @@ -55,6 +55,11 @@ New Features Also, make sure to start the actual text at the margin. ======================================================= +* **Added MMIO read and write APIs to PCI bus.** + + Introduced ``rte_pci_mmio_read()`` and ``rte_pci_mmio_write()`` APIs to PCI + bus so that PCI drivers can access PCI memory resources when they are not + mapped to process address space. Removed Items ------------- diff --git a/drivers/bus/pci/bsd/pci.c b/drivers/bus/pci/bsd/pci.c index a747eca58c..27f12590d4 100644 --- a/drivers/bus/pci/bsd/pci.c +++ b/drivers/bus/pci/bsd/pci.c @@ -489,6 +489,28 @@ int rte_pci_write_config(const struct rte_pci_device *dev, return -1; } +/* Read PCI MMIO space. */ +int rte_pci_mmio_read(const struct rte_pci_device *dev, int bar, + void *buf, size_t len, off_t offset) +{ + if (bar >= PCI_MAX_RESOURCE || dev->mem_resource[bar].addr == NULL || + (uint64_t)offset + len > dev->mem_resource[bar].len) + return -1; + memcpy(buf, (uint8_t *)dev->mem_resource[bar].addr + offset, len); + return len; +} + +/* Write PCI MMIO space. */ +int rte_pci_mmio_write(const struct rte_pci_device *dev, int bar, + const void *buf, size_t len, off_t offset) +{ + if (bar >= PCI_MAX_RESOURCE || dev->mem_resource[bar].addr == NULL || + (uint64_t)offset + len > dev->mem_resource[bar].len) + return -1; + memcpy((uint8_t *)dev->mem_resource[bar].addr + offset, buf, len); + return len; +} + int rte_pci_ioport_map(struct rte_pci_device *dev, int bar, struct rte_pci_ioport *p) diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c index 04e21ae20f..3d237398d9 100644 --- a/drivers/bus/pci/linux/pci.c +++ b/drivers/bus/pci/linux/pci.c @@ -680,6 +680,52 @@ int rte_pci_write_config(const struct rte_pci_device *device, } } +/* Read PCI MMIO space. */ +int rte_pci_mmio_read(const struct rte_pci_device *device, int bar, + void *buf, size_t len, off_t offset) +{ + char devname[RTE_DEV_NAME_MAX_LEN] = ""; + + switch (device->kdrv) { + case RTE_PCI_KDRV_IGB_UIO: + case RTE_PCI_KDRV_UIO_GENERIC: + return pci_uio_mmio_read(device, bar, buf, len, offset); +#ifdef VFIO_PRESENT + case RTE_PCI_KDRV_VFIO: + return pci_vfio_mmio_read(device, bar, buf, len, offset); +#endif + default: + rte_pci_device_name(&device->addr, devname, + RTE_DEV_NAME_MAX_LEN); + RTE_LOG(ERR, EAL, + "Unknown driver type for %s\n", devname); + return -1; + } +} + +/* Write PCI MMIO space. */ +int rte_pci_mmio_write(const struct rte_pci_device *device, int bar, + const void *buf, size_t len, off_t offset) +{ + char devname[RTE_DEV_NAME_MAX_LEN] = ""; + + switch (device->kdrv) { + case RTE_PCI_KDRV_IGB_UIO: + case RTE_PCI_KDRV_UIO_GENERIC: + return pci_uio_mmio_write(device, bar, buf, len, offset); +#ifdef VFIO_PRESENT + case RTE_PCI_KDRV_VFIO: + return pci_vfio_mmio_write(device, bar, buf, len, offset); +#endif + default: + rte_pci_device_name(&device->addr, devname, + RTE_DEV_NAME_MAX_LEN); + RTE_LOG(ERR, EAL, + "Unknown driver type for %s\n", devname); + return -1; + } +} + int rte_pci_ioport_map(struct rte_pci_device *dev, int bar, struct rte_pci_ioport *p) diff --git a/drivers/bus/pci/linux/pci_init.h b/drivers/bus/pci/linux/pci_init.h index 9f6659ba6e..d842809ccd 100644 --- a/drivers/bus/pci/linux/pci_init.h +++ b/drivers/bus/pci/linux/pci_init.h @@ -37,6 +37,11 @@ int pci_uio_read_config(const struct rte_intr_handle *intr_handle, int pci_uio_write_config(const struct rte_intr_handle *intr_handle, const void *buf, size_t len, off_t offs); +int pci_uio_mmio_read(const struct rte_pci_device *dev, int bar, + void *buf, size_t len, off_t offset); +int pci_uio_mmio_write(const struct rte_pci_device *dev, int bar, + const void *buf, size_t len, off_t offset); + int pci_uio_ioport_map(struct rte_pci_device *dev, int bar, struct rte_pci_ioport *p); void pci_uio_ioport_read(struct rte_pci_ioport *p, @@ -71,6 +76,11 @@ int pci_vfio_read_config(const struct rte_pci_device *dev, int pci_vfio_write_config(const struct rte_pci_device *dev, const void *buf, size_t len, off_t offs); +int pci_vfio_mmio_read(const struct rte_pci_device *dev, int bar, + void *buf, size_t len, off_t offset); +int pci_vfio_mmio_write(const struct rte_pci_device *dev, int bar, + const void *buf, size_t len, off_t offset); + int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, struct rte_pci_ioport *p); void pci_vfio_ioport_read(struct rte_pci_ioport *p, diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c index d52125e49b..2bf16e9369 100644 --- a/drivers/bus/pci/linux/pci_uio.c +++ b/drivers/bus/pci/linux/pci_uio.c @@ -55,6 +55,28 @@ pci_uio_write_config(const struct rte_intr_handle *intr_handle, return pwrite(uio_cfg_fd, buf, len, offset); } +int +pci_uio_mmio_read(const struct rte_pci_device *dev, int bar, + void *buf, size_t len, off_t offset) +{ + if (bar >= PCI_MAX_RESOURCE || dev->mem_resource[bar].addr == NULL || + (uint64_t)offset + len > dev->mem_resource[bar].len) + return -1; + memcpy(buf, (uint8_t *)dev->mem_resource[bar].addr + offset, len); + return len; +} + +int +pci_uio_mmio_write(const struct rte_pci_device *dev, int bar, + const void *buf, size_t len, off_t offset) +{ + if (bar >= PCI_MAX_RESOURCE || dev->mem_resource[bar].addr == NULL || + (uint64_t)offset + len > dev->mem_resource[bar].len) + return -1; + memcpy((uint8_t *)dev->mem_resource[bar].addr + offset, buf, len); + return len; +} + static int pci_uio_set_bus_master(int dev_fd) { diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c index 5aef84b7d0..24b0795fbd 100644 --- a/drivers/bus/pci/linux/pci_vfio.c +++ b/drivers/bus/pci/linux/pci_vfio.c @@ -1258,6 +1258,42 @@ pci_vfio_ioport_unmap(struct rte_pci_ioport *p) return -1; } +int +pci_vfio_mmio_read(const struct rte_pci_device *dev, int bar, + void *buf, size_t len, off_t offs) +{ + uint64_t size, offset; + int fd; + + fd = rte_intr_dev_fd_get(dev->intr_handle); + + if (pci_vfio_get_region(dev, bar, &size, &offset) != 0) + return -1; + + if ((uint64_t)len + offs > size) + return -1; + + return pread64(fd, buf, len, offset + offs); +} + +int +pci_vfio_mmio_write(const struct rte_pci_device *dev, int bar, + const void *buf, size_t len, off_t offs) +{ + uint64_t size, offset; + int fd; + + fd = rte_intr_dev_fd_get(dev->intr_handle); + + if (pci_vfio_get_region(dev, bar, &size, &offset) != 0) + return -1; + + if ((uint64_t)len + offs > size) + return -1; + + return pwrite64(fd, buf, len, offset + offs); +} + int pci_vfio_is_enabled(void) { diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h index b193114fe5..82da087f24 100644 --- a/drivers/bus/pci/rte_bus_pci.h +++ b/drivers/bus/pci/rte_bus_pci.h @@ -135,6 +135,54 @@ int rte_pci_read_config(const struct rte_pci_device *device, int rte_pci_write_config(const struct rte_pci_device *device, const void *buf, size_t len, off_t offset); +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Read from a MMIO pci resource. + * + * @param device + * A pointer to a rte_pci_device structure describing the device + * to use + * @param bar + * Index of the io pci resource we want to access. + * @param buf + * A data buffer where the bytes should be read into + * @param len + * The length of the data buffer. + * @param offset + * The offset into MMIO space described by @bar + * @return + * Number of bytes read on success, negative on error. + */ +__rte_experimental +int rte_pci_mmio_read(const struct rte_pci_device *device, int bar, + void *buf, size_t len, off_t offset); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Write to a MMIO pci resource. + * + * @param device + * A pointer to a rte_pci_device structure describing the device + * to use + * @param bar + * Index of the io pci resource we want to access. + * @param buf + * A data buffer containing the bytes should be written + * @param len + * The length of the data buffer. + * @param offset + * The offset into MMIO space described by @bar + * @return + * Number of bytes written on success, negative on error. + */ +__rte_experimental +int rte_pci_mmio_write(const struct rte_pci_device *device, int bar, + const void *buf, size_t len, off_t offset); + /** * Initialize a rte_pci_ioport object for a pci device io resource. * diff --git a/drivers/bus/pci/version.map b/drivers/bus/pci/version.map index 161ab86d3b..00fde139ca 100644 --- a/drivers/bus/pci/version.map +++ b/drivers/bus/pci/version.map @@ -21,6 +21,9 @@ EXPERIMENTAL { # added in 21.08 rte_pci_set_bus_master; + # added in 23.07 + rte_pci_mmio_read; + rte_pci_mmio_write; }; INTERNAL { From patchwork Wed May 31 05:37:42 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Li, Miao" X-Patchwork-Id: 127742 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 0148A42BEB; Wed, 31 May 2023 07:38:24 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id C44FC42D39; Wed, 31 May 2023 07:38:09 +0200 (CEST) Received: from mga17.intel.com (mga17.intel.com [192.55.52.151]) by mails.dpdk.org (Postfix) with ESMTP id 5644142C76 for ; Wed, 31 May 2023 07:38:07 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1685511487; x=1717047487; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=75qQgyEffNjGvP82AdA75FyhNPOdFo6o/PO2aB4YVxk=; b=jFsFS5HXH2cGk4uwSZjheDvzYEWmmHQ45S9no2ysfosIg0YXpF50dAYW nbDAYjiESE+iHCV/tEgsiJnXEDAlEDh35jOaLNdvmElUCkt45Y5wYQC1y ET5jzQKcQN724+n1AlluW9o7O3dVQJCpJ/yCiT6nHNU9eLvALbhglchZP wxRQmE9YMDQ1ZCxXOa6XOB1DtpdVd/l0y/hvfF7KHvF7ULX+P/6yiOS7v vzCAJ9xARhO1zjHL7KjxetjrM3Vt5w25tVyUhhm3VRW9wSpzmtd/3Frjo +5BfkeR+oTG872JKNCBg3pdnVacFhdspvgw77/YoPRgfEyS/79XGCW4ym g==; X-IronPort-AV: E=McAfee;i="6600,9927,10726"; a="335489303" X-IronPort-AV: E=Sophos;i="6.00,205,1681196400"; d="scan'208";a="335489303" Received: from orsmga006.jf.intel.com ([10.7.209.51]) by fmsmga107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 30 May 2023 22:38:06 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10726"; a="684273536" X-IronPort-AV: E=Sophos;i="6.00,205,1681196400"; d="scan'208";a="684273536" Received: from dpdk-limiao-icelake.sh.intel.com ([10.67.111.26]) by orsmga006.jf.intel.com with ESMTP; 30 May 2023 22:38:04 -0700 From: Miao Li To: dev@dpdk.org Cc: skori@marvell.com, thomas@monjalon.net, david.marchand@redhat.com, ferruh.yigit@amd.com, chenbo.xia@intel.com, yahui.cao@intel.com, Anatoly Burakov Subject: [PATCH v4 4/4] bus/pci: add VFIO sparse mmap support Date: Wed, 31 May 2023 05:37:42 +0000 Message-Id: <20230531053743.129442-5-miao.li@intel.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20230531053743.129442-1-miao.li@intel.com> References: <20230525163116.682000-1-miao.li@intel.com> <20230531053743.129442-1-miao.li@intel.com> MIME-Version: 1.0 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org This patch adds sparse mmap support in PCI bus. Sparse mmap is a capability defined in VFIO which allows multiple mmap areas in one VFIO region. In this patch, the sparse mmap regions are mapped to one continuous virtual address region that follows device-specific BAR layout. So, driver can still access all mapped sparse mmap regions by using 'bar_base_address + bar_offset'. Signed-off-by: Miao Li Signed-off-by: Chenbo Xia Acked-by: Sunil Kumar Kori Acked-by: Yahui Cao --- drivers/bus/pci/linux/pci_vfio.c | 138 +++++++++++++++++++++++++++---- drivers/bus/pci/private.h | 2 + 2 files changed, 122 insertions(+), 18 deletions(-) diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c index 24b0795fbd..e6db30d36a 100644 --- a/drivers/bus/pci/linux/pci_vfio.c +++ b/drivers/bus/pci/linux/pci_vfio.c @@ -673,6 +673,54 @@ pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res, return 0; } +static int +pci_vfio_sparse_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res, + int bar_index, int additional_flags) +{ + struct pci_map *bar = &vfio_res->maps[bar_index]; + struct vfio_region_sparse_mmap_area *sparse; + void *bar_addr; + uint32_t i; + + if (bar->size == 0) { + RTE_LOG(DEBUG, EAL, "Bar size is 0, skip BAR%d\n", bar_index); + return 0; + } + + /* reserve the address using an inaccessible mapping */ + bar_addr = mmap(bar->addr, bar->size, 0, MAP_PRIVATE | + MAP_ANONYMOUS | additional_flags, -1, 0); + if (bar_addr != MAP_FAILED) { + void *map_addr = NULL; + for (i = 0; i < bar->nr_areas; i++) { + sparse = &bar->areas[i]; + if (sparse->size) { + void *addr = RTE_PTR_ADD(bar_addr, (uintptr_t)sparse->offset); + map_addr = pci_map_resource(addr, vfio_dev_fd, + bar->offset + sparse->offset, sparse->size, + RTE_MAP_FORCE_ADDRESS); + if (map_addr == NULL) { + munmap(bar_addr, bar->size); + RTE_LOG(ERR, EAL, "Failed to map pci BAR%d\n", + bar_index); + goto err_map; + } + } + } + } else { + RTE_LOG(ERR, EAL, "Failed to create inaccessible mapping for BAR%d\n", + bar_index); + goto err_map; + } + + bar->addr = bar_addr; + return 0; + +err_map: + bar->nr_areas = 0; + return -1; +} + /* * region info may contain capability headers, so we need to keep reallocating * the memory until we match allocated memory size with argsz. @@ -798,7 +846,7 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) char pci_addr[PATH_MAX] = {0}; int vfio_dev_fd; struct rte_pci_addr *loc = &dev->addr; - int i, ret; + int i, j, ret; struct mapped_pci_resource *vfio_res = NULL; struct mapped_pci_res_list *vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); @@ -875,13 +923,15 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) for (i = 0; i < vfio_res->nb_maps; i++) { void *bar_addr; + struct vfio_info_cap_header *hdr; + struct vfio_region_info_cap_sparse_mmap *sparse; ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i); if (ret < 0) { RTE_LOG(ERR, EAL, "%s cannot get device region info error " "%i (%s)\n", pci_addr, errno, strerror(errno)); - goto err_vfio_res; + goto err_map; } pdev->region[i].size = reg->size; @@ -891,7 +941,7 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) ret = pci_vfio_is_ioport_bar(dev, vfio_dev_fd, i); if (ret < 0) { free(reg); - goto err_vfio_res; + goto err_map; } else if (ret) { RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d)\n", i); @@ -920,12 +970,41 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) maps[i].size = reg->size; maps[i].path = NULL; /* vfio doesn't have per-resource paths */ - ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0); - if (ret < 0) { - RTE_LOG(ERR, EAL, "%s mapping BAR%i failed: %s\n", - pci_addr, i, strerror(errno)); - free(reg); - goto err_vfio_res; + hdr = pci_vfio_info_cap(reg, VFIO_REGION_INFO_CAP_SPARSE_MMAP); + + if (hdr != NULL) { + sparse = container_of(hdr, + struct vfio_region_info_cap_sparse_mmap, header); + if (sparse->nr_areas > 0) { + maps[i].nr_areas = sparse->nr_areas; + maps[i].areas = rte_zmalloc(NULL, + sizeof(*maps[i].areas) * maps[i].nr_areas, 0); + if (maps[i].areas == NULL) { + RTE_LOG(ERR, EAL, + "Cannot alloc memory for sparse map areas\n"); + goto err_map; + } + memcpy(maps[i].areas, sparse->areas, + sizeof(*maps[i].areas) * maps[i].nr_areas); + } + } + + if (maps[i].nr_areas > 0) { + ret = pci_vfio_sparse_mmap_bar(vfio_dev_fd, vfio_res, i, 0); + if (ret < 0) { + RTE_LOG(ERR, EAL, "%s sparse mapping BAR%i failed: %s\n", + pci_addr, i, strerror(errno)); + free(reg); + goto err_map; + } + } else { + ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0); + if (ret < 0) { + RTE_LOG(ERR, EAL, "%s mapping BAR%i failed: %s\n", + pci_addr, i, strerror(errno)); + free(reg); + goto err_map; + } } dev->mem_resource[i].addr = maps[i].addr; @@ -935,19 +1014,26 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) if (pci_rte_vfio_setup_device(dev, vfio_dev_fd) < 0) { RTE_LOG(ERR, EAL, "%s setup device failed\n", pci_addr); - goto err_vfio_res; + goto err_map; } #ifdef HAVE_VFIO_DEV_REQ_INTERFACE if (pci_vfio_enable_notifier(dev, vfio_dev_fd) != 0) { RTE_LOG(ERR, EAL, "Error setting up notifier!\n"); - goto err_vfio_res; + goto err_map; } #endif TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next); return 0; +err_map: + for (j = 0; j < i; j++) { + if (maps[j].addr) + pci_unmap_resource(maps[j].addr, maps[j].size); + if (maps[j].nr_areas > 0) + rte_free(maps[j].areas); + } err_vfio_res: rte_free(vfio_res); err_vfio_dev_fd: @@ -963,7 +1049,7 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev) char pci_addr[PATH_MAX] = {0}; int vfio_dev_fd; struct rte_pci_addr *loc = &dev->addr; - int i, ret; + int i, j, ret; struct mapped_pci_resource *vfio_res = NULL; struct mapped_pci_res_list *vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); @@ -1008,11 +1094,20 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev) maps = vfio_res->maps; for (i = 0; i < vfio_res->nb_maps; i++) { - ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, MAP_FIXED); - if (ret < 0) { - RTE_LOG(ERR, EAL, "%s mapping BAR%i failed: %s\n", - pci_addr, i, strerror(errno)); - goto err_vfio_dev_fd; + if (maps[i].nr_areas > 0) { + ret = pci_vfio_sparse_mmap_bar(vfio_dev_fd, vfio_res, i, MAP_FIXED); + if (ret < 0) { + RTE_LOG(ERR, EAL, "%s sparse mapping BAR%i failed: %s\n", + pci_addr, i, strerror(errno)); + goto err_vfio_dev_fd; + } + } else { + ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, MAP_FIXED); + if (ret < 0) { + RTE_LOG(ERR, EAL, "%s mapping BAR%i failed: %s\n", + pci_addr, i, strerror(errno)); + goto err_vfio_dev_fd; + } } dev->mem_resource[i].addr = maps[i].addr; @@ -1028,6 +1123,10 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev) return 0; err_vfio_dev_fd: + for (j = 0; j < i; j++) { + if (maps[j].addr) + pci_unmap_resource(maps[j].addr, maps[j].size); + } rte_vfio_release_device(rte_pci_get_sysfs_path(), pci_addr, vfio_dev_fd); return -1; @@ -1062,7 +1161,7 @@ find_and_unmap_vfio_resource(struct mapped_pci_res_list *vfio_res_list, break; } - if (vfio_res == NULL) + if (vfio_res == NULL) return vfio_res; RTE_LOG(INFO, EAL, "Releasing PCI mapped resource for %s\n", @@ -1080,6 +1179,9 @@ find_and_unmap_vfio_resource(struct mapped_pci_res_list *vfio_res_list, pci_addr, maps[i].addr); pci_unmap_resource(maps[i].addr, maps[i].size); } + + if (maps[i].nr_areas > 0) + rte_free(maps[i].areas); } return vfio_res; diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h index 2d6991ccb7..8b0ce73533 100644 --- a/drivers/bus/pci/private.h +++ b/drivers/bus/pci/private.h @@ -121,6 +121,8 @@ struct pci_map { uint64_t offset; uint64_t size; uint64_t phaddr; + uint32_t nr_areas; + struct vfio_region_sparse_mmap_area *areas; }; struct pci_msix_table {