[v7,1/2] bus/pci: support PASID control
Checks
Commit Message
Add an internal API to control PASID for a given PCIe device.
For kernels when PASID enabled by default it breaks DLB functionality,
hence disabling PASID is required for DLB to function properly.
PASID capability is not exposed to users hence offset can not be
retrieved by rte_pci_find_ext_capability() api. Therefore, api
implemented in this commit accepts an offset for PASID with an enable
flag which is used to enable/disable PASID.
Signed-off-by: Abdullah Sevincer <abdullah.sevincer@intel.com>
---
drivers/bus/pci/pci_common.c | 7 +++++++
drivers/bus/pci/rte_bus_pci.h | 13 +++++++++++++
drivers/bus/pci/version.map | 1 +
lib/pci/rte_pci.h | 4 ++++
4 files changed, 25 insertions(+)
Comments
On Mon, Nov 6, 2023 at 6:05 PM Abdullah Sevincer
<abdullah.sevincer@intel.com> wrote:
>
> Add an internal API to control PASID for a given PCIe device.
>
> For kernels when PASID enabled by default it breaks DLB functionality,
> hence disabling PASID is required for DLB to function properly.
>
> PASID capability is not exposed to users hence offset can not be
> retrieved by rte_pci_find_ext_capability() api. Therefore, api
> implemented in this commit accepts an offset for PASID with an enable
> flag which is used to enable/disable PASID.
>
> Signed-off-by: Abdullah Sevincer <abdullah.sevincer@intel.com>
> ---
> drivers/bus/pci/pci_common.c | 7 +++++++
> drivers/bus/pci/rte_bus_pci.h | 13 +++++++++++++
> drivers/bus/pci/version.map | 1 +
> lib/pci/rte_pci.h | 4 ++++
> 4 files changed, 25 insertions(+)
>
> diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
> index 921d957bf6..ecf080c5d7 100644
> --- a/drivers/bus/pci/pci_common.c
> +++ b/drivers/bus/pci/pci_common.c
> @@ -938,6 +938,13 @@ rte_pci_set_bus_master(const struct rte_pci_device *dev, bool enable)
> return 0;
> }
>
> +int
> +rte_pci_pasid_set_state(const struct rte_pci_device *dev, off_t offset, bool enable)
> +{
> + uint16_t pasid = enable;
> + return rte_pci_write_config(dev, &pasid, sizeof(pasid), offset) < 0 ? -1 : 0;
> +}
I don't see much point in providing a wrapper that does nothing more
than call rte_pci_write_config() and let the driver pass the right
offsets.
If anything, can't this wrapper find out about the pasid offset itself?
There is a extended capability for this, so I would expect it can be used.
Something like (only compile tested):
diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
index ba5e280d33..2ca28bd4d4 100644
--- a/drivers/bus/pci/pci_common.c
+++ b/drivers/bus/pci/pci_common.c
@@ -939,13 +939,18 @@ rte_pci_set_bus_master(const struct
rte_pci_device *dev, bool enable)
}
int
-rte_pci_pasid_set_state(const struct rte_pci_device *dev,
- off_t offset, bool enable)
+rte_pci_pasid_set_state(const struct rte_pci_device *dev, bool enable)
{
- uint16_t pasid = enable;
- return rte_pci_write_config(dev, &pasid, sizeof(pasid), offset) < 0
- ? -1
- : 0;
+ uint16_t state = enable;
+ off_t pasid_offset;
+ int ret = -1;
+
+ pasid_offset = rte_pci_find_ext_capability(dev,
RTE_PCI_EXT_CAP_ID_PASID);
+ if (pasid_offset >= 0 && rte_pci_write_config(dev, &state,
sizeof(state),
+ pasid_offset + RTE_PCI_PASID_CTRL) == sizeof(state))
+ ret = 0;
+
+ return ret;
}
struct rte_pci_bus rte_pci_bus = {
diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
index f07bf9b588..6d5dbc1d50 100644
--- a/drivers/bus/pci/rte_bus_pci.h
+++ b/drivers/bus/pci/rte_bus_pci.h
@@ -160,14 +160,14 @@ int rte_pci_set_bus_master(const struct
rte_pci_device *dev, bool enable);
*
* @param dev
* A pointer to a rte_pci_device structure.
- * @param offset
- * Offset of the PASID external capability.
* @param enable
* Flag to enable or disable PASID.
+ *
+ * @return
+ * 0 on success, -1 on error in PCI config space read/write.
*/
__rte_internal
-int rte_pci_pasid_set_state(const struct rte_pci_device *dev,
- off_t offset, bool enable);
+int rte_pci_pasid_set_state(const struct rte_pci_device *dev, bool enable);
/**
* Read PCI config space.
diff --git a/drivers/event/dlb2/pf/dlb2_main.c
b/drivers/event/dlb2/pf/dlb2_main.c
index 61a7b39eef..bd1ee4af27 100644
--- a/drivers/event/dlb2/pf/dlb2_main.c
+++ b/drivers/event/dlb2/pf/dlb2_main.c
@@ -26,7 +26,6 @@
#define PF_ID_ZERO 0 /* PF ONLY! */
#define NO_OWNER_VF 0 /* PF ONLY! */
#define NOT_VF_REQ false /* PF ONLY! */
-#define DLB2_PCI_PASID_CAP_OFFSET 0x148 /* PASID capability offset */
static int
dlb2_pf_init_driver_state(struct dlb2_dev *dlb2_dev)
@@ -518,8 +517,7 @@ dlb2_pf_reset(struct dlb2_dev *dlb2_dev)
/* Disable PASID if it is enabled by default, which
* breaks the DLB if enabled.
*/
- off = DLB2_PCI_PASID_CAP_OFFSET + RTE_PCI_PASID_CTRL;
- if (rte_pci_pasid_set_state(pdev, off, false)) {
+ if (rte_pci_pasid_set_state(pdev, false)) {
DLB2_LOG_ERR("[%s()] failed to write the pcie config
space at offset %d\n",
__func__, (int)off);
return -1;
>+I don't see much point in providing a wrapper that does nothing more than call rte_pci_write_config() and let the driver pass the right offsets.
>+If anything, can't this wrapper find out about the pasid offset itself?
>+There is a extended capability for this, so I would expect it can be used.
>+Something like (only compile tested):
>+diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c index ba5e280d33..2ca28bd4d4 100644
>+--- a/drivers/bus/pci/pci_common.c
>++++ b/drivers/bus/pci/pci_common.c
>+@@ -939,13 +939,18 @@ rte_pci_set_bus_master(const struct rte_pci_device *dev, bool enable) }
>+ int
>+-rte_pci_pasid_set_state(const struct rte_pci_device *dev,
>+- off_t offset, bool enable)
>++rte_pci_pasid_set_state(const struct rte_pci_device *dev, bool enable)
>+ {
>+- uint16_t pasid = enable;
>+- return rte_pci_write_config(dev, &pasid, sizeof(pasid), offset) < 0
>+- ? -1
>+- : 0;
>++ uint16_t state = enable;
>++ off_t pasid_offset;
>++ int ret = -1;
>++
>++ pasid_offset = rte_pci_find_ext_capability(dev,
>+RTE_PCI_EXT_CAP_ID_PASID);
>++ if (pasid_offset >= 0 && rte_pci_write_config(dev, &state,
>+sizeof(state),
>++ pasid_offset + RTE_PCI_PASID_CTRL) == sizeof(state))
>++ ret = 0;
>++
>++ return ret;
>+}
>+struct rte_pci_bus rte_pci_bus = {
>+diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h index f07bf9b588..6d5dbc1d50 100644
>+--- a/drivers/bus/pci/rte_bus_pci.h
>++++ b/drivers/bus/pci/rte_bus_pci.h
>+@@ -160,14 +160,14 @@ int rte_pci_set_bus_master(const struct rte_pci_device *dev, bool enable);
>+ *
>+* @param dev
>+* A pointer to a rte_pci_device structure.
>+- * @param offset
>+- * Offset of the PASID external capability.
>+ * @param enable
>+ * Flag to enable or disable PASID.
>++ *
>++ * @return
>++ * 0 on success, -1 on error in PCI config space read/write.
>+ */
>+ __rte_internal
>+-int rte_pci_pasid_set_state(const struct rte_pci_device *dev,
>+- off_t offset, bool enable);
>++int rte_pci_pasid_set_state(const struct rte_pci_device *dev, bool
>++enable);
>+/**
>+* Read PCI config space.
>+diff --git a/drivers/event/dlb2/pf/dlb2_main.c
>+b/drivers/event/dlb2/pf/dlb2_main.c
>+index 61a7b39eef..bd1ee4af27 100644
>+--- a/drivers/event/dlb2/pf/dlb2_main.c
>++++ b/drivers/event/dlb2/pf/dlb2_main.c
>+@@ -26,7 +26,6 @@
>+ #define PF_ID_ZERO 0 /* PF ONLY! */
>+#define NO_OWNER_VF 0 /* PF ONLY! */
>+ #define NOT_VF_REQ false /* PF ONLY! */
>+-#define DLB2_PCI_PASID_CAP_OFFSET 0x148 /* PASID capability offset */
>+static int
>+dlb2_pf_init_driver_state(struct dlb2_dev *dlb2_dev) @@ -518,8 +517,7 @@ dlb2_pf_reset(struct dlb2_dev *dlb2_dev)
>+ /* Disable PASID if it is enabled by default, which
>+ * breaks the DLB if enabled.
>+ */
>+- off = DLB2_PCI_PASID_CAP_OFFSET + RTE_PCI_PASID_CTRL;
>+- if (rte_pci_pasid_set_state(pdev, off, false)) {
>++ if (rte_pci_pasid_set_state(pdev, false)) {
>+ DLB2_LOG_ERR("[%s()] failed to write the pcie config space at offset %d\n",
>+ __func__, (int)off);
>+ return -1;
Hi David,
>++ pasid_offset = rte_pci_find_ext_capability(dev,
>+RTE_PCI_EXT_CAP_ID_PASID);
That rte_pci_find_ext_capability() api does not work for PASID since PASID is not exposed to user from kernel.
So, we can not retrieve offset. Instead we came up with a solution that passes an offset to an internal function to disable PASID and make the function internal so we can change it later.
When the linux limitation is lifted we can re-write the functions and use rte_pci_find_ext_capability api to retrieve offset and your
solution above can be done.
Hello,
On Mon, Nov 6, 2023 at 7:50 PM Sevincer, Abdullah
<abdullah.sevincer@intel.com> wrote:
> Hi David,
> >++ pasid_offset = rte_pci_find_ext_capability(dev,
> >+RTE_PCI_EXT_CAP_ID_PASID);
>
> That rte_pci_find_ext_capability() api does not work for PASID since PASID is not exposed to user from kernel.
> So, we can not retrieve offset. Instead we came up with a solution that passes an offset to an internal function to disable PASID and make the function internal so we can change it later.
> When the linux limitation is lifted we can re-write the functions and use rte_pci_find_ext_capability api to retrieve offset and your
> solution above can be done.
Adding PCI bus maintainers, Chenbo and Nipun.
Ok, that is indeed an issue.
I found some patches exposing this capability with vfio-pci but I am
not sure what is the latest work on the topic.
Do you have pointers to the latest kernel patches?
In any case, even if, in the future, the kernel exposes this
capability, we need to live with the current behavior (and probably
for a long time).
As the discovery of pasid offset is not possible, the common API merit
is low, but at least it shows what is being done by the driver.
Can we make a change so that this new API takes only the offset to the
pasid *structure* and not to the exact register controlling the
feature?
It should be something like:
$ git diff
diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
index ba5e280d33..c66cefcd63 100644
--- a/drivers/bus/pci/pci_common.c
+++ b/drivers/bus/pci/pci_common.c
@@ -943,9 +943,9 @@ rte_pci_pasid_set_state(const struct rte_pci_device *dev,
off_t offset, bool enable)
{
uint16_t pasid = enable;
- return rte_pci_write_config(dev, &pasid, sizeof(pasid), offset) < 0
- ? -1
- : 0;
+
+ return rte_pci_write_config(dev, &pasid, sizeof(pasid),
+ offset + RTE_PCI_PASID_CTRL) < 0 ? -1 : 0;
}
struct rte_pci_bus rte_pci_bus = {
diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
index f07bf9b588..b1d17996cb 100644
--- a/drivers/bus/pci/rte_bus_pci.h
+++ b/drivers/bus/pci/rte_bus_pci.h
@@ -161,9 +161,12 @@ int rte_pci_set_bus_master(const struct
rte_pci_device *dev, bool enable);
* @param dev
* A pointer to a rte_pci_device structure.
* @param offset
- * Offset of the PASID external capability.
+ * Offset of the PASID external capability structure.
* @param enable
* Flag to enable or disable PASID.
+ *
+ * @return
+ * 0 on success, -1 on error in PCI config space read/write.
*/
__rte_internal
int rte_pci_pasid_set_state(const struct rte_pci_device *dev,
diff --git a/drivers/event/dlb2/pf/dlb2_main.c
b/drivers/event/dlb2/pf/dlb2_main.c
index 61a7b39eef..a95d3227a4 100644
--- a/drivers/event/dlb2/pf/dlb2_main.c
+++ b/drivers/event/dlb2/pf/dlb2_main.c
@@ -518,8 +518,8 @@ dlb2_pf_reset(struct dlb2_dev *dlb2_dev)
/* Disable PASID if it is enabled by default, which
* breaks the DLB if enabled.
*/
- off = DLB2_PCI_PASID_CAP_OFFSET + RTE_PCI_PASID_CTRL;
- if (rte_pci_pasid_set_state(pdev, off, false)) {
+ off = DLB2_PCI_PASID_CAP_OFFSET;
+ if (rte_pci_pasid_set_state(pdev, off, false) < 0) {
DLB2_LOG_ERR("[%s()] failed to write the pcie config
space at offset %d\n",
__func__, (int)off);
return -1;
diff --git a/lib/pci/rte_pci.h b/lib/pci/rte_pci.h
index 0d2d8d8fed..94219792de 100644
--- a/lib/pci/rte_pci.h
+++ b/lib/pci/rte_pci.h
@@ -101,7 +101,7 @@ extern "C" {
#define RTE_PCI_EXT_CAP_ID_ACS 0x0d /* Access Control Services */
#define RTE_PCI_EXT_CAP_ID_SRIOV 0x10 /* SR-IOV */
#define RTE_PCI_EXT_CAP_ID_PRI 0x13 /* Page Request Interface */
-#define RTE_PCI_EXT_CAP_ID_PASID 0x1B /* Process Address Space ID */
+#define RTE_PCI_EXT_CAP_ID_PASID 0x1b /* Process Address Space ID */
/* Advanced Error Reporting (RTE_PCI_EXT_CAP_ID_ERR) */
#define RTE_PCI_ERR_UNCOR_STATUS 0x04 /* Uncorrectable Error Status */
>+Ok, that is indeed an issue.
>+I found some patches exposing this capability with vfio-pci but I am not sure what is the latest work on the topic.
That's right, some kernels may expose the capability some not. We realized it when Bruce reported my earlier patch with the api rte_pci_find_ext_capability() did not
work for him but it was working for me. We had different versions/flavors of kernel.
>+Do you have pointers to the latest kernel patches?
I don’t have any pointers to the latest kernel patches, though I have come across some internet search that was submitted to expose the capability but I am not sure what's the latest status on those patches.
We can ask people involved there and get feedback.
For example below:
https://lists.linuxfoundation.org/pipermail/iommu/2020-June/045531.html
>+In any case, even if, in the future, the kernel exposes this capability, we need to live with the current behavior (and probably for a long time).
>+As the discovery of pasid offset is not possible, the common API merit is low, but at least it shows what is being done by the driver.
>+Can we make a change so that this new API takes only the offset to the pasid *structure* and not to the exact register controlling the feature?
>+It should be something like:
We can change it the way you described. The patch is already merged though 😊.
I have pushed another patch addressing David's comments.
https://patches.dpdk.org/project/dpdk/patch/20231113172759.3529518-1-abdullah.sevincer@intel.com/
@@ -938,6 +938,13 @@ rte_pci_set_bus_master(const struct rte_pci_device *dev, bool enable)
return 0;
}
+int
+rte_pci_pasid_set_state(const struct rte_pci_device *dev, off_t offset, bool enable)
+{
+ uint16_t pasid = enable;
+ return rte_pci_write_config(dev, &pasid, sizeof(pasid), offset) < 0 ? -1 : 0;
+}
+
struct rte_pci_bus rte_pci_bus = {
.bus = {
.scan = rte_pci_scan,
@@ -295,6 +295,19 @@ void rte_pci_ioport_read(struct rte_pci_ioport *p,
void rte_pci_ioport_write(struct rte_pci_ioport *p,
const void *data, size_t len, off_t offset);
+/**
+ * Enable/Disable PASID.
+ *
+ * @param dev
+ * A pointer to a rte_pci_device structure.
+ * @param offset
+ * Offset of the PASID external capability.
+ * @param enable
+ * Flag to enable or disable PASID.
+ */
+__rte_internal
+int rte_pci_pasid_set_state(const struct rte_pci_device *dev, off_t offset, bool enable);
+
#ifdef __cplusplus
}
#endif
@@ -37,5 +37,6 @@ INTERNAL {
rte_pci_get_sysfs_path;
rte_pci_register;
+ rte_pci_pasid_set_state;
rte_pci_unregister;
};
@@ -101,6 +101,10 @@ extern "C" {
#define RTE_PCI_EXT_CAP_ID_ACS 0x0d /* Access Control Services */
#define RTE_PCI_EXT_CAP_ID_SRIOV 0x10 /* SR-IOV */
#define RTE_PCI_EXT_CAP_ID_PRI 0x13 /* Page Request Interface */
+#define RTE_PCI_EXT_CAP_ID_PASID 0x1B /* Process Address Space ID */
+
+/* Process Address Space ID */
+#define RTE_PCI_PASID_CTRL 0x06 /* PASID control register */
/* Advanced Error Reporting (RTE_PCI_EXT_CAP_ID_ERR) */
#define RTE_PCI_ERR_UNCOR_STATUS 0x04 /* Uncorrectable Error Status */