[v7,1/2] bus/pci: support PASID control

Message ID 20231106170521.3064038-2-abdullah.sevincer@intel.com (mailing list archive)
State Accepted, archived
Delegated to: Jerin Jacob
Headers
Series *** Disable PASID for DLB Device *** |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Abdullah Sevincer Nov. 6, 2023, 5:05 p.m. UTC
Add an internal API to control PASID for a given PCIe device.

For kernels when PASID enabled by default it breaks DLB functionality,
hence disabling PASID is required for DLB to function properly.

PASID capability is not exposed to users hence offset can not be
retrieved by rte_pci_find_ext_capability() api. Therefore, api
implemented in this commit accepts an offset for PASID with an enable
flag which is used to enable/disable PASID.

Signed-off-by: Abdullah Sevincer <abdullah.sevincer@intel.com>
---
 drivers/bus/pci/pci_common.c  |  7 +++++++
 drivers/bus/pci/rte_bus_pci.h | 13 +++++++++++++
 drivers/bus/pci/version.map   |  1 +
 lib/pci/rte_pci.h             |  4 ++++
 4 files changed, 25 insertions(+)
  

Comments

David Marchand Nov. 6, 2023, 6:30 p.m. UTC | #1
On Mon, Nov 6, 2023 at 6:05 PM Abdullah Sevincer
<abdullah.sevincer@intel.com> wrote:
>
> Add an internal API to control PASID for a given PCIe device.
>
> For kernels when PASID enabled by default it breaks DLB functionality,
> hence disabling PASID is required for DLB to function properly.
>
> PASID capability is not exposed to users hence offset can not be
> retrieved by rte_pci_find_ext_capability() api. Therefore, api
> implemented in this commit accepts an offset for PASID with an enable
> flag which is used to enable/disable PASID.
>
> Signed-off-by: Abdullah Sevincer <abdullah.sevincer@intel.com>
> ---
>  drivers/bus/pci/pci_common.c  |  7 +++++++
>  drivers/bus/pci/rte_bus_pci.h | 13 +++++++++++++
>  drivers/bus/pci/version.map   |  1 +
>  lib/pci/rte_pci.h             |  4 ++++
>  4 files changed, 25 insertions(+)
>
> diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
> index 921d957bf6..ecf080c5d7 100644
> --- a/drivers/bus/pci/pci_common.c
> +++ b/drivers/bus/pci/pci_common.c
> @@ -938,6 +938,13 @@ rte_pci_set_bus_master(const struct rte_pci_device *dev, bool enable)
>         return 0;
>  }
>
> +int
> +rte_pci_pasid_set_state(const struct rte_pci_device *dev, off_t offset, bool enable)
> +{
> +       uint16_t pasid = enable;
> +       return rte_pci_write_config(dev, &pasid, sizeof(pasid), offset) < 0 ? -1 : 0;
> +}

I don't see much point in providing a wrapper that does nothing more
than call rte_pci_write_config() and let the driver pass the right
offsets.

If anything, can't this wrapper find out about the pasid offset itself?
There is a extended capability for this, so I would expect it can be used.

Something like (only compile tested):

diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
index ba5e280d33..2ca28bd4d4 100644
--- a/drivers/bus/pci/pci_common.c
+++ b/drivers/bus/pci/pci_common.c
@@ -939,13 +939,18 @@ rte_pci_set_bus_master(const struct
rte_pci_device *dev, bool enable)
 }

 int
-rte_pci_pasid_set_state(const struct rte_pci_device *dev,
-               off_t offset, bool enable)
+rte_pci_pasid_set_state(const struct rte_pci_device *dev, bool enable)
 {
-       uint16_t pasid = enable;
-       return rte_pci_write_config(dev, &pasid, sizeof(pasid), offset) < 0
-               ? -1
-               : 0;
+       uint16_t state = enable;
+       off_t pasid_offset;
+       int ret = -1;
+
+       pasid_offset = rte_pci_find_ext_capability(dev,
RTE_PCI_EXT_CAP_ID_PASID);
+       if (pasid_offset >= 0 && rte_pci_write_config(dev, &state,
sizeof(state),
+                       pasid_offset + RTE_PCI_PASID_CTRL) == sizeof(state))
+               ret = 0;
+
+       return ret;
 }

 struct rte_pci_bus rte_pci_bus = {
diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
index f07bf9b588..6d5dbc1d50 100644
--- a/drivers/bus/pci/rte_bus_pci.h
+++ b/drivers/bus/pci/rte_bus_pci.h
@@ -160,14 +160,14 @@ int rte_pci_set_bus_master(const struct
rte_pci_device *dev, bool enable);
  *
  * @param dev
  *   A pointer to a rte_pci_device structure.
- * @param offset
- *   Offset of the PASID external capability.
  * @param enable
  *   Flag to enable or disable PASID.
+ *
+ *  @return
+ *  0 on success, -1 on error in PCI config space read/write.
  */
 __rte_internal
-int rte_pci_pasid_set_state(const struct rte_pci_device *dev,
-               off_t offset, bool enable);
+int rte_pci_pasid_set_state(const struct rte_pci_device *dev, bool enable);

 /**
  * Read PCI config space.
diff --git a/drivers/event/dlb2/pf/dlb2_main.c
b/drivers/event/dlb2/pf/dlb2_main.c
index 61a7b39eef..bd1ee4af27 100644
--- a/drivers/event/dlb2/pf/dlb2_main.c
+++ b/drivers/event/dlb2/pf/dlb2_main.c
@@ -26,7 +26,6 @@
 #define PF_ID_ZERO 0   /* PF ONLY! */
 #define NO_OWNER_VF 0  /* PF ONLY! */
 #define NOT_VF_REQ false /* PF ONLY! */
-#define DLB2_PCI_PASID_CAP_OFFSET        0x148   /* PASID capability offset */

 static int
 dlb2_pf_init_driver_state(struct dlb2_dev *dlb2_dev)
@@ -518,8 +517,7 @@ dlb2_pf_reset(struct dlb2_dev *dlb2_dev)
        /* Disable PASID if it is enabled by default, which
         * breaks the DLB if enabled.
         */
-       off = DLB2_PCI_PASID_CAP_OFFSET + RTE_PCI_PASID_CTRL;
-       if (rte_pci_pasid_set_state(pdev, off, false)) {
+       if (rte_pci_pasid_set_state(pdev, false)) {
                DLB2_LOG_ERR("[%s()] failed to write the pcie config
space at offset %d\n",
                                __func__, (int)off);
                return -1;
  
Abdullah Sevincer Nov. 6, 2023, 6:50 p.m. UTC | #2
>+I don't see much point in providing a wrapper that does nothing more than call rte_pci_write_config() and let the driver pass the right offsets.

>+If anything, can't this wrapper find out about the pasid offset itself?
>+There is a extended capability for this, so I would expect it can be used.

>+Something like (only compile tested):

>+diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c index ba5e280d33..2ca28bd4d4 100644
>+--- a/drivers/bus/pci/pci_common.c
>++++ b/drivers/bus/pci/pci_common.c
>+@@ -939,13 +939,18 @@ rte_pci_set_bus_master(const struct rte_pci_device *dev, bool enable)  }

>+ int
>+-rte_pci_pasid_set_state(const struct rte_pci_device *dev,
>+-               off_t offset, bool enable)
>++rte_pci_pasid_set_state(const struct rte_pci_device *dev, bool enable)
>+ {
>+-       uint16_t pasid = enable;
>+-       return rte_pci_write_config(dev, &pasid, sizeof(pasid), offset) < 0
>+-               ? -1
>+-               : 0;
>++       uint16_t state = enable;
>++       off_t pasid_offset;
>++       int ret = -1;
>++
>++       pasid_offset = rte_pci_find_ext_capability(dev,
>+RTE_PCI_EXT_CAP_ID_PASID);
>++       if (pasid_offset >= 0 && rte_pci_write_config(dev, &state,
>+sizeof(state),
>++                       pasid_offset + RTE_PCI_PASID_CTRL) == sizeof(state))
>++               ret = 0;
>++
>++       return ret;
 >+}

 >+struct rte_pci_bus rte_pci_bus = {
>+diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h index f07bf9b588..6d5dbc1d50 100644
>+--- a/drivers/bus/pci/rte_bus_pci.h
>++++ b/drivers/bus/pci/rte_bus_pci.h
>+@@ -160,14 +160,14 @@ int rte_pci_set_bus_master(const struct rte_pci_device *dev, bool enable);
 >+ *
  >+* @param dev
  >+*   A pointer to a rte_pci_device structure.
>+- * @param offset
>+- *   Offset of the PASID external capability.
>+  * @param enable
>+  *   Flag to enable or disable PASID.
>++ *
>++ *  @return
>++ *  0 on success, -1 on error in PCI config space read/write.
>+  */
>+ __rte_internal
>+-int rte_pci_pasid_set_state(const struct rte_pci_device *dev,
>+-               off_t offset, bool enable);
>++int rte_pci_pasid_set_state(const struct rte_pci_device *dev, bool 
>++enable);

 >+/**
  >+* Read PCI config space.
>+diff --git a/drivers/event/dlb2/pf/dlb2_main.c
>+b/drivers/event/dlb2/pf/dlb2_main.c
>+index 61a7b39eef..bd1ee4af27 100644
>+--- a/drivers/event/dlb2/pf/dlb2_main.c
>++++ b/drivers/event/dlb2/pf/dlb2_main.c
>+@@ -26,7 +26,6 @@
>+ #define PF_ID_ZERO 0   /* PF ONLY! */
 >+#define NO_OWNER_VF 0  /* PF ONLY! */
>+ #define NOT_VF_REQ false /* PF ONLY! */
>+-#define DLB2_PCI_PASID_CAP_OFFSET        0x148   /* PASID capability offset */

 >+static int
 >+dlb2_pf_init_driver_state(struct dlb2_dev *dlb2_dev) @@ -518,8 +517,7 @@ dlb2_pf_reset(struct dlb2_dev *dlb2_dev)
  >+      /* Disable PASID if it is enabled by default, which
   >+      * breaks the DLB if enabled.
  >+       */
>+-       off = DLB2_PCI_PASID_CAP_OFFSET + RTE_PCI_PASID_CTRL;
>+-       if (rte_pci_pasid_set_state(pdev, off, false)) {
>++       if (rte_pci_pasid_set_state(pdev, false)) {
 >+               DLB2_LOG_ERR("[%s()] failed to write the pcie config space at offset %d\n",
 >+                               __func__, (int)off);
 >+               return -1;

Hi David,
>++       pasid_offset = rte_pci_find_ext_capability(dev,
>+RTE_PCI_EXT_CAP_ID_PASID);

That  rte_pci_find_ext_capability() api does not work for PASID since PASID is not exposed to user from kernel.
So, we can not retrieve offset. Instead we came up with a solution that passes an offset to an internal function to disable PASID and make the function internal so we can change it later.
When the linux limitation is lifted we can re-write the functions and use rte_pci_find_ext_capability api to retrieve offset and your 
solution above can be done.
  
David Marchand Nov. 10, 2023, 8:03 a.m. UTC | #3
Hello,

On Mon, Nov 6, 2023 at 7:50 PM Sevincer, Abdullah
<abdullah.sevincer@intel.com> wrote:
> Hi David,
> >++       pasid_offset = rte_pci_find_ext_capability(dev,
> >+RTE_PCI_EXT_CAP_ID_PASID);
>
> That  rte_pci_find_ext_capability() api does not work for PASID since PASID is not exposed to user from kernel.
> So, we can not retrieve offset. Instead we came up with a solution that passes an offset to an internal function to disable PASID and make the function internal so we can change it later.
> When the linux limitation is lifted we can re-write the functions and use rte_pci_find_ext_capability api to retrieve offset and your
> solution above can be done.

Adding PCI bus maintainers, Chenbo and Nipun.

Ok, that is indeed an issue.
I found some patches exposing this capability with vfio-pci but I am
not sure what is the latest work on the topic.
Do you have pointers to the latest kernel patches?


In any case, even if, in the future, the kernel exposes this
capability, we need to live with the current behavior (and probably
for a long time).
As the discovery of pasid offset is not possible, the common API merit
is low, but at least it shows what is being done by the driver.

Can we make a change so that this new API takes only the offset to the
pasid *structure* and not to the exact register controlling the
feature?
It should be something like:

$ git diff
diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
index ba5e280d33..c66cefcd63 100644
--- a/drivers/bus/pci/pci_common.c
+++ b/drivers/bus/pci/pci_common.c
@@ -943,9 +943,9 @@ rte_pci_pasid_set_state(const struct rte_pci_device *dev,
                off_t offset, bool enable)
 {
        uint16_t pasid = enable;
-       return rte_pci_write_config(dev, &pasid, sizeof(pasid), offset) < 0
-               ? -1
-               : 0;
+
+       return rte_pci_write_config(dev, &pasid, sizeof(pasid),
+               offset + RTE_PCI_PASID_CTRL) < 0 ? -1 : 0;
 }

 struct rte_pci_bus rte_pci_bus = {
diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
index f07bf9b588..b1d17996cb 100644
--- a/drivers/bus/pci/rte_bus_pci.h
+++ b/drivers/bus/pci/rte_bus_pci.h
@@ -161,9 +161,12 @@ int rte_pci_set_bus_master(const struct
rte_pci_device *dev, bool enable);
  * @param dev
  *   A pointer to a rte_pci_device structure.
  * @param offset
- *   Offset of the PASID external capability.
+ *   Offset of the PASID external capability structure.
  * @param enable
  *   Flag to enable or disable PASID.
+ *
+ *  @return
+ *  0 on success, -1 on error in PCI config space read/write.
  */
 __rte_internal
 int rte_pci_pasid_set_state(const struct rte_pci_device *dev,
diff --git a/drivers/event/dlb2/pf/dlb2_main.c
b/drivers/event/dlb2/pf/dlb2_main.c
index 61a7b39eef..a95d3227a4 100644
--- a/drivers/event/dlb2/pf/dlb2_main.c
+++ b/drivers/event/dlb2/pf/dlb2_main.c
@@ -518,8 +518,8 @@ dlb2_pf_reset(struct dlb2_dev *dlb2_dev)
        /* Disable PASID if it is enabled by default, which
         * breaks the DLB if enabled.
         */
-       off = DLB2_PCI_PASID_CAP_OFFSET + RTE_PCI_PASID_CTRL;
-       if (rte_pci_pasid_set_state(pdev, off, false)) {
+       off = DLB2_PCI_PASID_CAP_OFFSET;
+       if (rte_pci_pasid_set_state(pdev, off, false) < 0) {
                DLB2_LOG_ERR("[%s()] failed to write the pcie config
space at offset %d\n",
                                __func__, (int)off);
                return -1;
diff --git a/lib/pci/rte_pci.h b/lib/pci/rte_pci.h
index 0d2d8d8fed..94219792de 100644
--- a/lib/pci/rte_pci.h
+++ b/lib/pci/rte_pci.h
@@ -101,7 +101,7 @@ extern "C" {
 #define RTE_PCI_EXT_CAP_ID_ACS         0x0d    /* Access Control Services */
 #define RTE_PCI_EXT_CAP_ID_SRIOV       0x10    /* SR-IOV */
 #define RTE_PCI_EXT_CAP_ID_PRI         0x13    /* Page Request Interface */
-#define RTE_PCI_EXT_CAP_ID_PASID       0x1B    /* Process Address Space ID */
+#define RTE_PCI_EXT_CAP_ID_PASID       0x1b    /* Process Address Space ID */

 /* Advanced Error Reporting (RTE_PCI_EXT_CAP_ID_ERR) */
 #define RTE_PCI_ERR_UNCOR_STATUS       0x04    /* Uncorrectable Error Status */
  
Abdullah Sevincer Nov. 13, 2023, 3:51 p.m. UTC | #4
>+Ok, that is indeed an issue.
>+I found some patches exposing this capability with vfio-pci but I am not sure what is the latest work on the topic.

That's right, some kernels may expose the capability some not. We realized it when Bruce reported my earlier patch with the api rte_pci_find_ext_capability() did not
work for him but it was working for me. We had different versions/flavors of kernel.

>+Do you have pointers to the latest kernel patches?
I don’t have any pointers to the latest kernel patches, though I have come across some internet search that was submitted to expose the capability but I am not sure what's the latest status on those patches. 
We can ask people involved there and get feedback.
For example below:
https://lists.linuxfoundation.org/pipermail/iommu/2020-June/045531.html

>+In any case, even if, in the future, the kernel exposes this capability, we need to live with the current behavior (and probably for a long time).
>+As the discovery of pasid offset is not possible, the common API merit is low, but at least it shows what is being done by the driver.

>+Can we make a change so that this new API takes only the offset to the pasid *structure* and not to the exact register controlling the feature?
>+It should be something like:

We can change it the way you described. The patch is already merged though 😊.
  
Abdullah Sevincer Nov. 13, 2023, 5:36 p.m. UTC | #5
I have pushed another patch addressing David's comments.

https://patches.dpdk.org/project/dpdk/patch/20231113172759.3529518-1-abdullah.sevincer@intel.com/
  

Patch

diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
index 921d957bf6..ecf080c5d7 100644
--- a/drivers/bus/pci/pci_common.c
+++ b/drivers/bus/pci/pci_common.c
@@ -938,6 +938,13 @@  rte_pci_set_bus_master(const struct rte_pci_device *dev, bool enable)
 	return 0;
 }
 
+int
+rte_pci_pasid_set_state(const struct rte_pci_device *dev, off_t offset, bool enable)
+{
+	uint16_t pasid = enable;
+	return rte_pci_write_config(dev, &pasid, sizeof(pasid), offset) < 0 ? -1 : 0;
+}
+
 struct rte_pci_bus rte_pci_bus = {
 	.bus = {
 		.scan = rte_pci_scan,
diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
index 21e234abf0..6d836e771a 100644
--- a/drivers/bus/pci/rte_bus_pci.h
+++ b/drivers/bus/pci/rte_bus_pci.h
@@ -295,6 +295,19 @@  void rte_pci_ioport_read(struct rte_pci_ioport *p,
 void rte_pci_ioport_write(struct rte_pci_ioport *p,
 		const void *data, size_t len, off_t offset);
 
+/**
+ * Enable/Disable PASID.
+ *
+ * @param dev
+ *   A pointer to a rte_pci_device structure.
+ * @param offset
+ *   Offset of the PASID external capability.
+ * @param enable
+ *   Flag to enable or disable PASID.
+ */
+__rte_internal
+int rte_pci_pasid_set_state(const struct rte_pci_device *dev, off_t offset, bool enable);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/drivers/bus/pci/version.map b/drivers/bus/pci/version.map
index 74c5b075d5..9fad086bdf 100644
--- a/drivers/bus/pci/version.map
+++ b/drivers/bus/pci/version.map
@@ -37,5 +37,6 @@  INTERNAL {
 
 	rte_pci_get_sysfs_path;
 	rte_pci_register;
+	rte_pci_pasid_set_state;
 	rte_pci_unregister;
 };
diff --git a/lib/pci/rte_pci.h b/lib/pci/rte_pci.h
index 69e932d910..d195f01950 100644
--- a/lib/pci/rte_pci.h
+++ b/lib/pci/rte_pci.h
@@ -101,6 +101,10 @@  extern "C" {
 #define RTE_PCI_EXT_CAP_ID_ACS		0x0d	/* Access Control Services */
 #define RTE_PCI_EXT_CAP_ID_SRIOV	0x10	/* SR-IOV */
 #define RTE_PCI_EXT_CAP_ID_PRI		0x13	/* Page Request Interface */
+#define RTE_PCI_EXT_CAP_ID_PASID        0x1B    /* Process Address Space ID */
+
+/* Process Address Space ID */
+#define RTE_PCI_PASID_CTRL		0x06    /* PASID control register */
 
 /* Advanced Error Reporting (RTE_PCI_EXT_CAP_ID_ERR) */
 #define RTE_PCI_ERR_UNCOR_STATUS	0x04	/* Uncorrectable Error Status */