[v8,08/10] common/mlx5: introduce layer to support multiple class drivers

Message ID 20200723200910.376581-9-parav@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Raslan Darawsheh
Headers
Series Improve mlx5 PMD driver framework for multiple classes |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation fail apply issues

Commit Message

Parav Pandit July 23, 2020, 8:09 p.m. UTC
  From: Thomas Monjalon <thomas@monjalon.net>

Add generic mlx5 PCI PMD layer as part of existing common_mlx5
module. This enables multiple classes (net, regex, vdpa) PMDs
to be supported at same time.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/Makefile                              |  10 +-
 drivers/common/Makefile                       |   4 -
 drivers/common/meson.build                    |   2 +-
 drivers/common/mlx5/Makefile                  |   1 +
 drivers/common/mlx5/meson.build               |   9 +-
 drivers/common/mlx5/mlx5_common.c             |   2 +
 drivers/common/mlx5/mlx5_common_pci.c         | 541 ++++++++++++++++++
 drivers/common/mlx5/mlx5_common_pci.h         |  77 +++
 .../common/mlx5/rte_common_mlx5_version.map   |   2 +
 drivers/meson.build                           |   1 +
 10 files changed, 638 insertions(+), 11 deletions(-)
 create mode 100644 drivers/common/mlx5/mlx5_common_pci.c
 create mode 100644 drivers/common/mlx5/mlx5_common_pci.h
  

Comments

David Marchand July 24, 2020, 2:40 p.m. UTC | #1
This is not a thorough review, just caught one issue for makefile and some nits.


On Thu, Jul 23, 2020 at 10:11 PM Parav Pandit <parav@mellanox.com> wrote:
>
> From: Thomas Monjalon <thomas@monjalon.net>
>
> Add generic mlx5 PCI PMD layer as part of existing common_mlx5
> module. This enables multiple classes (net, regex, vdpa) PMDs
> to be supported at same time.
>
> Signed-off-by: Parav Pandit <parav@mellanox.com>
> Acked-by: Matan Azrad <matan@mellanox.com>
> ---
>  drivers/Makefile                              |  10 +-
>  drivers/common/Makefile                       |   4 -
>  drivers/common/meson.build                    |   2 +-
>  drivers/common/mlx5/Makefile                  |   1 +
>  drivers/common/mlx5/meson.build               |   9 +-
>  drivers/common/mlx5/mlx5_common.c             |   2 +
>  drivers/common/mlx5/mlx5_common_pci.c         | 541 ++++++++++++++++++
>  drivers/common/mlx5/mlx5_common_pci.h         |  77 +++
>  .../common/mlx5/rte_common_mlx5_version.map   |   2 +
>  drivers/meson.build                           |   1 +
>  10 files changed, 638 insertions(+), 11 deletions(-)
>  create mode 100644 drivers/common/mlx5/mlx5_common_pci.c
>  create mode 100644 drivers/common/mlx5/mlx5_common_pci.h
>
> diff --git a/drivers/Makefile b/drivers/Makefile
> index 1551272ef..7f06162dc 100644
> --- a/drivers/Makefile
> +++ b/drivers/Makefile
> @@ -8,8 +8,12 @@ DIRS-y += bus
>  DEPDIRS-bus := common
>  DIRS-y += mempool
>  DEPDIRS-mempool := common bus
> +ifeq ($(findstring y,$(CONFIG_RTE_LIBRTE_MLX5_PMD)$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD)$(CONFIG_RTE_LIBRTE_MLX5_REGEX_PMD)),y)
> +DIRS-y += common/mlx5
> +DEPDIRS-common/mlx5 := bus
> +endif
>  DIRS-y += net
> -DEPDIRS-net := common bus mempool
> +DEPDIRS-net := common bus mempool common/mlx5

Will it still work for people not building with mlx5?

I would have seen something like:
DEPDIRS-net/mlx5 := xx xx xx common/mlx5
Or maybe
DEPDIRS-net/mlx5 += common/mlx5


>  DIRS-$(CONFIG_RTE_LIBRTE_BBDEV) += baseband
>  DEPDIRS-baseband := common bus mempool
>  DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += crypto
> @@ -19,9 +23,9 @@ DEPDIRS-common/qat := bus mempool
>  DIRS-$(CONFIG_RTE_LIBRTE_COMPRESSDEV) += compress
>  DEPDIRS-compress := bus mempool
>  DIRS-$(CONFIG_RTE_LIBRTE_REGEXDEV) += regex
> -DEPDIRS-regex := common bus
> +DEPDIRS-regex := common bus common/mlx5

DEPDIRS-regex/mlx5 := xx xx xx common/mlx5

>  DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += vdpa
> -DEPDIRS-vdpa := common bus mempool
> +DEPDIRS-vdpa := common bus mempool common/mlx5

Idem.


>  DIRS-$(CONFIG_RTE_LIBRTE_EVENTDEV) += event
>  DEPDIRS-event := common bus mempool net crypto
>  DIRS-$(CONFIG_RTE_LIBRTE_RAWDEV) += raw
> diff --git a/drivers/common/Makefile b/drivers/common/Makefile
> index cbc71077c..cfb6b4dc8 100644
> --- a/drivers/common/Makefile
> +++ b/drivers/common/Makefile
> @@ -36,8 +36,4 @@ ifneq (,$(findstring y,$(IAVF-y)))
>  DIRS-y += iavf
>  endif
>
> -ifeq ($(findstring y,$(CONFIG_RTE_LIBRTE_MLX5_PMD)$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD)$(CONFIG_RTE_LIBRTE_MLX5_REGEX_PMD)),y)
> -DIRS-y += mlx5
> -endif
> -
>  include $(RTE_SDK)/mk/rte.subdir.mk
> diff --git a/drivers/common/meson.build b/drivers/common/meson.build
> index 5db7e29b1..9ed4c04ba 100644
> --- a/drivers/common/meson.build
> +++ b/drivers/common/meson.build
> @@ -6,6 +6,6 @@ if is_windows
>  endif
>
>  std_deps = ['eal']
> -drivers = ['cpt', 'dpaax', 'iavf', 'mlx5', 'mvep', 'octeontx', 'octeontx2', 'qat']
> +drivers = ['cpt', 'dpaax', 'iavf', 'mvep', 'octeontx', 'octeontx2', 'qat']
>  config_flag_fmt = 'RTE_LIBRTE_@0@_COMMON'
>  driver_name_fmt = 'rte_common_@0@'
> diff --git a/drivers/common/mlx5/Makefile b/drivers/common/mlx5/Makefile
> index 6b89a6c85..eaabb8c10 100644
> --- a/drivers/common/mlx5/Makefile
> +++ b/drivers/common/mlx5/Makefile
> @@ -22,6 +22,7 @@ SRCS-y += linux/mlx5_common_verbs.c
>  SRCS-y += mlx5_common_mp.c
>  SRCS-y += mlx5_common_mr.c
>  SRCS-y += mlx5_malloc.c
> +SRCS-y += mlx5_common_pci.c

Alphabetical order?


>  ifeq ($(CONFIG_RTE_IBVERBS_LINK_DLOPEN),y)
>  INSTALL-y-lib += $(LIB_GLUE)
>  endif
> diff --git a/drivers/common/mlx5/meson.build b/drivers/common/mlx5/meson.build
> index 70e2c1c32..8e5608703 100644
> --- a/drivers/common/mlx5/meson.build
> +++ b/drivers/common/mlx5/meson.build
> @@ -1,19 +1,22 @@
>  # SPDX-License-Identifier: BSD-3-Clause
>  # Copyright 2019 Mellanox Technologies, Ltd
>
> -if not (is_linux or is_windows)
> +if not is_linux
>         build = false
> -       reason = 'only supported on Linux and Windows'
> +       reason = 'only supported on Linux'
>         subdir_done()
>  endif
>
> -deps += ['hash', 'pci', 'net', 'eal', 'kvargs']
> +config_flag_fmt = 'RTE_LIBRTE_@0@_COMMON'
> +driver_name_fmt = 'rte_common_@0@'
> +deps += ['hash', 'pci', 'bus_pci', 'net', 'eal', 'kvargs']
>  sources += files(
>         'mlx5_devx_cmds.c',
>         'mlx5_common.c',
>         'mlx5_common_mp.c',
>         'mlx5_common_mr.c',
>         'mlx5_malloc.c',
> +       'mlx5_common_pci.c',

Idem order.


>  )
>
>  cflags_options = [
> diff --git a/drivers/common/mlx5/mlx5_common.c b/drivers/common/mlx5/mlx5_common.c
> index 2b336bb2d..fd818ef24 100644
> --- a/drivers/common/mlx5/mlx5_common.c
> +++ b/drivers/common/mlx5/mlx5_common.c
> @@ -14,6 +14,7 @@
>  #include "mlx5_common_os.h"
>  #include "mlx5_common_utils.h"
>  #include "mlx5_malloc.h"
> +#include "mlx5_common_pci.h"

Idem order.


>
>  int mlx5_common_logtype;
>
> @@ -100,6 +101,7 @@ mlx5_common_init(void)
>                 return;
>
>         mlx5_glue_constructor();
> +       mlx5_common_pci_init();
>         mlx5_common_initialized = true;
>  }
>
> diff --git a/drivers/common/mlx5/mlx5_common_pci.c b/drivers/common/mlx5/mlx5_common_pci.c
> new file mode 100644
> index 000000000..b42f7469f
> --- /dev/null
> +++ b/drivers/common/mlx5/mlx5_common_pci.c
> @@ -0,0 +1,541 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2020 Mellanox Technologies Ltd
> + */
> +
> +#include <stdlib.h>

Nit: headers usually come in separate blocks, "system headers", "dpdk
headers", "drivers headers".
So missing an empty line here.

> +#include <rte_malloc.h>


> +#include "mlx5_common_utils.h"
> +#include "mlx5_common_pci.h"
> +
> +struct mlx5_pci_device {
> +      struct rte_pci_device *pci_dev;
> +      TAILQ_ENTRY(mlx5_pci_device) next;
> +      uint32_t classes_loaded;
> +};
> +
> +/* Head of list of drivers. */
> +static TAILQ_HEAD(mlx5_pci_bus_drv_head, mlx5_pci_driver) drv_list =
> +                               TAILQ_HEAD_INITIALIZER(drv_list);
> +
> +/* Head of mlx5 pci devices. */
> +static TAILQ_HEAD(mlx5_pci_devices_head, mlx5_pci_device) devices_list =
> +                               TAILQ_HEAD_INITIALIZER(devices_list);
> +
> +static const struct {
> +       const char *name;
> +       unsigned int driver_class;
> +} mlx5_classes[] = {
> +       { .name = "vdpa", .driver_class = MLX5_CLASS_VDPA },
> +       { .name = "net", .driver_class = MLX5_CLASS_NET },
> +       { .name = "regex", .driver_class = MLX5_CLASS_REGEX },
> +};
> +
> +static const unsigned int mlx5_class_combinations[] = {
> +       MLX5_CLASS_NET,
> +       MLX5_CLASS_VDPA,
> +       MLX5_CLASS_REGEX,
> +       /* New class combination should be added here.
> +        * For example a new multi class device combination
> +        * can be MLX5_CLASS_FOO | MLX5_CLASS_BAR.
> +        */
> +};
> +
> +static int
> +class_name_to_value(const char *class_name)
> +{
> +       unsigned int i;
> +
> +       for (i = 0; i < RTE_DIM(mlx5_classes); i++) {
> +               if (strcmp(class_name, mlx5_classes[i].name) == 0)
> +                       return mlx5_classes[i].driver_class;
> +       }
> +       return -EINVAL;
> +}
> +
> +static struct mlx5_pci_driver *
> +driver_get(uint32_t class)
> +{
> +       struct mlx5_pci_driver *driver;
> +
> +       TAILQ_FOREACH(driver, &drv_list, next) {
> +               if (driver->driver_class == class)
> +                       return driver;
> +       }
> +       return NULL;
> +}
> +
> +static int
> +bus_cmdline_options_handler(__rte_unused const char *key,
> +                           const char *class_names, void *opaque)
> +{
> +       int *ret = opaque;
> +       char *nstr_org;
> +       int class_val;
> +       char *found;
> +       char *nstr;
> +
> +       *ret = 0;
> +       nstr = strdup(class_names);
> +       if (!nstr) {
> +               *ret = -ENOMEM;
> +               return *ret;
> +       }
> +       nstr_org = nstr;
> +       while (nstr) {
> +               /* Extract each individual class name. Multiple
> +                * class key,value is supplied as class=net:vdpa:foo:bar.
> +                */
> +               found = strsep(&nstr, ":");
> +               if (!found)
> +                       continue;
> +               /* Check if its a valid class. */
> +               class_val = class_name_to_value(found);
> +               if (class_val < 0) {
> +                       *ret = -EINVAL;
> +                       goto err;
> +               }
> +               *ret |= class_val;
> +       }
> +err:
> +       free(nstr_org);
> +       if (*ret < 0)
> +               DRV_LOG(ERR, "Invalid mlx5 class options %s."
> +                       " Maybe typo in device class argument setting?",
> +                       class_names);
> +       return *ret;
> +}
> +
> +static int
> +parse_class_options(const struct rte_devargs *devargs)
> +{
> +       const char *key = MLX5_CLASS_ARG_NAME;
> +       struct rte_kvargs *kvlist;
> +       int ret = 0;
> +
> +       if (devargs == NULL)
> +               return 0;
> +       kvlist = rte_kvargs_parse(devargs->args, NULL);
> +       if (kvlist == NULL)
> +               return 0;
> +       if (rte_kvargs_count(kvlist, key))
> +               rte_kvargs_process(kvlist, key, bus_cmdline_options_handler,
> +                                  &ret);
> +       rte_kvargs_free(kvlist);
> +       return ret;
> +}
> +
> +static bool
> +mlx5_bus_match(const struct mlx5_pci_driver *drv,
> +              const struct rte_pci_device *pci_dev)
> +{
> +       const struct rte_pci_id *id_table;
> +
> +       for (id_table = drv->pci_driver.id_table; id_table->vendor_id != 0;
> +            id_table++) {
> +               /* Check if device's ids match the class driver's ids. */
> +               if (id_table->vendor_id != pci_dev->id.vendor_id &&
> +                   id_table->vendor_id != PCI_ANY_ID)
> +                       continue;
> +               if (id_table->device_id != pci_dev->id.device_id &&
> +                   id_table->device_id != PCI_ANY_ID)
> +                       continue;
> +               if (id_table->subsystem_vendor_id !=
> +                   pci_dev->id.subsystem_vendor_id &&
> +                   id_table->subsystem_vendor_id != PCI_ANY_ID)
> +                       continue;
> +               if (id_table->subsystem_device_id !=
> +                   pci_dev->id.subsystem_device_id &&
> +                   id_table->subsystem_device_id != PCI_ANY_ID)
> +                       continue;
> +               if (id_table->class_id != pci_dev->id.class_id &&
> +                   id_table->class_id != RTE_CLASS_ANY_ID)
> +                       continue;
> +               return true;
> +       }
> +       return false;
> +}
> +
> +static int
> +is_valid_class_combination(uint32_t user_classes)
> +{
> +       unsigned int i;
> +
> +       /* Verify if user specified valid supported combination. */

Are there invalid supported combinations? :-)

/* Verify if the user specified a supported combination. */


> +       for (i = 0; i < RTE_DIM(mlx5_class_combinations); i++) {
> +               if (mlx5_class_combinations[i] == user_classes)
> +                       return 0;
> +       }
> +       /* Not found any valid class combination. */
> +       return -EINVAL;
> +}
> +
> +static struct mlx5_pci_device *
> +pci_to_mlx5_device(const struct rte_pci_device *pci_dev)
> +{
> +       struct mlx5_pci_device *dev;
> +
> +       TAILQ_FOREACH(dev, &devices_list, next) {
> +               if (dev->pci_dev == pci_dev)
> +                       return dev;
> +       }
> +       return NULL;
> +}
> +
> +static bool
> +device_class_enabled(const struct mlx5_pci_device *device, uint32_t class)
> +{
> +       return (device->classes_loaded & class) ? true : false;
> +}
> +
> +static void
> +dev_release(struct mlx5_pci_device *dev)
> +{
> +       TAILQ_REMOVE(&devices_list, dev, next);
> +       rte_free(dev);
> +}
> +
> +static int
> +drivers_remove(struct mlx5_pci_device *dev, uint32_t enabled_classes)
> +{
> +       struct mlx5_pci_driver *driver;
> +       int local_ret = -ENODEV;
> +       unsigned int i = 0;
> +       int ret = 0;
> +
> +       enabled_classes &= dev->classes_loaded;
> +       while (enabled_classes) {
> +               driver = driver_get(RTE_BIT64(i));
> +               if (driver) {
> +                       local_ret = driver->pci_driver.remove(dev->pci_dev);
> +                       if (!local_ret)
> +                               dev->classes_loaded &= ~RTE_BIT64(i);
> +                       else if (ret == 0)
> +                               ret = local_ret;
> +               }
> +               enabled_classes &= ~RTE_BIT64(i);
> +               i++;
> +       }
> +       if (local_ret)
> +               ret = local_ret;
> +       return ret;
> +}
> +
> +static int
> +drivers_probe(struct mlx5_pci_device *dev, struct rte_pci_driver *pci_drv,
> +             struct rte_pci_device *pci_dev, uint32_t user_classes)
> +{
> +       struct mlx5_pci_driver *driver;
> +       uint32_t enabled_classes = 0;
> +       bool already_loaded;
> +       int ret;
> +
> +       TAILQ_FOREACH(driver, &drv_list, next) {
> +               if ((driver->driver_class & user_classes) == 0)
> +                       continue;
> +               if (!mlx5_bus_match(driver, pci_dev))
> +                       continue;
> +               already_loaded = dev->classes_loaded & driver->driver_class;
> +               if (already_loaded &&
> +                   !(driver->pci_driver.drv_flags & RTE_PCI_DRV_PROBE_AGAIN)) {
> +                       DRV_LOG(ERR, "Device %s is already probed\n",
> +                               pci_dev->device.name);
> +                       ret = -EEXIST;
> +                       goto probe_err;
> +               }
> +               ret = driver->pci_driver.probe(pci_drv, pci_dev);
> +               if (ret < 0) {
> +                       DRV_LOG(ERR, "Failed to load driver = %s.\n",
> +                               driver->pci_driver.driver.name);
> +                       goto probe_err;
> +               }
> +               enabled_classes |= driver->driver_class;
> +       }
> +       dev->classes_loaded |= enabled_classes;
> +       return 0;
> +probe_err:
> +       /* Only unload drivers which are enabled which were enabled
> +        * in this probe instance.
> +        */
> +       drivers_remove(dev, enabled_classes);
> +       return ret;
> +}
> +
> +/**
> + * DPDK callback to register to probe multiple drivers for a PCI device.
> + *
> + * @param[in] pci_drv
> + *   PCI driver structure.
> + * @param[in] dev
> + *   PCI device information.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +mlx5_common_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
> +                     struct rte_pci_device *pci_dev)
> +{
> +       struct mlx5_pci_device *dev;
> +       uint32_t user_classes = 0;
> +       bool new_device = false;
> +       int ret;
> +
> +       ret = parse_class_options(pci_dev->device.devargs);
> +       if (ret < 0)
> +               return ret;
> +       user_classes = ret;
> +       if (user_classes) {
> +               /* Validate combination here. */
> +               ret = is_valid_class_combination(user_classes);
> +               if (ret) {
> +                       DRV_LOG(ERR, "Unsupported mlx5 classes supplied.");
> +                       return ret;
> +               }
> +       } else {
> +               /* Default to net class. */
> +               user_classes = MLX5_CLASS_NET;
> +       }
> +       dev = pci_to_mlx5_device(pci_dev);
> +       if (!dev) {
> +               dev = rte_zmalloc("mlx5_pci_device", sizeof(*dev), 0);
> +               if (!dev)
> +                       return -ENOMEM;
> +               dev->pci_dev = pci_dev;
> +               TAILQ_INSERT_HEAD(&devices_list, dev, next);
> +               new_device = true;
> +       }
> +       ret = drivers_probe(dev, pci_drv, pci_dev, user_classes);
> +       if (ret)
> +               goto class_err;
> +       return 0;
> +class_err:
> +       if (new_device)
> +               dev_release(dev);
> +       return ret;
> +}
> +
> +/**
> + * DPDK callback to remove one or more drivers for a PCI device.
> + *
> + * This function removes all drivers probed for a given PCI device.
> + *
> + * @param[in] pci_dev
> + *   Pointer to the PCI device.
> + *
> + * @return
> + *   0 on success, the function cannot fail.
> + */
> +static int
> +mlx5_common_pci_remove(struct rte_pci_device *pci_dev)
> +{
> +       struct mlx5_pci_device *dev;
> +       int ret;
> +
> +       dev = pci_to_mlx5_device(pci_dev);
> +       if (!dev)
> +               return -ENODEV;
> +       /* Matching device found, cleanup and unload drivers. */
> +       ret = drivers_remove(dev, dev->classes_loaded);
> +       if (!ret)
> +               dev_release(dev);
> +       return ret;
> +}
> +
> +static int
> +mlx5_common_pci_dma_map(struct rte_pci_device *pci_dev, void *addr,
> +                       uint64_t iova, size_t len)
> +{
> +       struct mlx5_pci_driver *driver = NULL;
> +       struct mlx5_pci_driver *temp;
> +       struct mlx5_pci_device *dev;
> +       int ret = -EINVAL;
> +
> +       dev = pci_to_mlx5_device(pci_dev);
> +       if (!dev)
> +               return -ENODEV;
> +       TAILQ_FOREACH(driver, &drv_list, next) {
> +               if (device_class_enabled(dev, driver->driver_class) &&
> +                   driver->pci_driver.dma_map) {
> +                       ret = driver->pci_driver.dma_map(pci_dev, addr,
> +                                                        iova, len);
> +                       if (ret)
> +                               goto map_err;
> +               }
> +       }
> +       return ret;
> +map_err:
> +       TAILQ_FOREACH(temp, &drv_list, next) {
> +               if (temp == driver)
> +                       break;
> +               if (device_class_enabled(dev, temp->driver_class) &&
> +                   temp->pci_driver.dma_map && temp->pci_driver.dma_unmap)
> +                       temp->pci_driver.dma_unmap(pci_dev, addr, iova, len);
> +       }
> +       return ret;
> +}
> +
> +static int
> +mlx5_common_pci_dma_unmap(struct rte_pci_device *pci_dev, void *addr,
> +                         uint64_t iova, size_t len)
> +{
> +       struct mlx5_pci_driver *driver;
> +       struct mlx5_pci_device *dev;
> +       int local_ret = -EINVAL;
> +       int ret;
> +
> +       dev = pci_to_mlx5_device(pci_dev);
> +       if (!dev)
> +               return -ENODEV;
> +       ret = 0;
> +       /* There is no unmap error recovery in current implementation. */
> +       TAILQ_FOREACH_REVERSE(driver, &drv_list, mlx5_pci_bus_drv_head, next) {
> +               if (device_class_enabled(dev, driver->driver_class) &&
> +                   driver->pci_driver.dma_unmap) {
> +                       local_ret = driver->pci_driver.dma_unmap(pci_dev, addr,
> +                                                                iova, len);
> +                       if (local_ret && (ret == 0))
> +                               ret = local_ret;
> +               }
> +       }
> +       if (local_ret)
> +               ret = local_ret;
> +       return ret;
> +}
> +
> +/* PCI ID table is build dynamically based on registered mlx5 drivers. */

built

> +static struct rte_pci_id *mlx5_pci_id_table;
> +
> +static struct rte_pci_driver mlx5_pci_driver = {
> +       .driver = {
> +               .name = "mlx5_pci",
> +       },
> +       .probe = mlx5_common_pci_probe,
> +       .remove = mlx5_common_pci_remove,
> +       .dma_map = mlx5_common_pci_dma_map,
> +       .dma_unmap = mlx5_common_pci_dma_unmap,
> +};
> +
> +static int
> +pci_id_table_size_get(const struct rte_pci_id *id_table)
> +{
> +       int table_size = 0;
> +
> +       for (; id_table->vendor_id != 0; id_table++)
> +               table_size++;
> +       return table_size;
> +}
> +
> +static bool
> +pci_id_exists(const struct rte_pci_id *id, const struct rte_pci_id *table,
> +             int next_idx)
> +{
> +       int current_size = next_idx - 1;
> +       int i;
> +
> +       for (i = 0; i < current_size; i++) {
> +               if (id->device_id == table[i].device_id &&
> +                   id->vendor_id == table[i].vendor_id &&
> +                   id->subsystem_vendor_id == table[i].subsystem_vendor_id &&
> +                   id->subsystem_device_id == table[i].subsystem_device_id)
> +                       return true;
> +       }
> +       return false;
> +}
> +
> +static void
> +pci_id_insert(struct rte_pci_id *new_table, int *next_idx,
> +             const struct rte_pci_id *id_table)
> +{
> +       /* Traverse the id_table, check if entry exists in new_table;
> +        * Add non duplicate entries to new table.
> +        */
> +       for (; id_table->vendor_id != 0; id_table++) {
> +               if (!pci_id_exists(id_table, new_table, *next_idx)) {
> +                       /* New entry; add to the table. */
> +                       new_table[*next_idx] = *id_table;
> +                       (*next_idx)++;
> +               }
> +       }
> +}
> +
> +static int
> +pci_ids_table_update(const struct rte_pci_id *driver_id_table)
> +{
> +       const struct rte_pci_id *id_iter;
> +       struct rte_pci_id *updated_table;
> +       struct rte_pci_id *old_table;
> +       int num_ids = 0;
> +       int i = 0;
> +
> +       old_table = mlx5_pci_id_table;
> +       if (old_table)
> +               num_ids = pci_id_table_size_get(old_table);
> +       num_ids += pci_id_table_size_get(driver_id_table);
> +       /* Increase size by one for the termination entry of vendor_id = 0. */
> +       num_ids += 1;
> +       updated_table = calloc(num_ids, sizeof(*updated_table));
> +       if (!updated_table)
> +               return -ENOMEM;
> +       if (TAILQ_EMPTY(&drv_list)) {
> +               /* Copy the first driver's ID table. */
> +               for (id_iter = driver_id_table; id_iter->vendor_id != 0;
> +                    id_iter++, i++)
> +                       updated_table[i] = *id_iter;
> +       } else {
> +               /* First copy existing table entries. */
> +               for (id_iter = old_table; id_iter->vendor_id != 0;
> +                    id_iter++, i++)
> +                       updated_table[i] = *id_iter;
> +               /* New id to be added at the end of current ID table. */
> +               pci_id_insert(updated_table, &i, driver_id_table);
> +       }
> +       /* Terminate table with empty entry. */
> +       updated_table[i].vendor_id = 0;
> +       mlx5_pci_driver.id_table = updated_table;
> +       mlx5_pci_id_table = updated_table;
> +       if (old_table)
> +               free(old_table);
> +       return 0;
> +}
> +
> +void
> +mlx5_pci_driver_register(struct mlx5_pci_driver *driver)
> +{
> +       int ret;
> +
> +       ret = pci_ids_table_update(driver->pci_driver.id_table);
> +       if (ret)
> +               return;
> +       mlx5_pci_driver.drv_flags |= driver->pci_driver.drv_flags;
> +       TAILQ_INSERT_TAIL(&drv_list, driver, next);
> +}
> +
> +void mlx5_common_pci_init(void)
> +{
> +       const struct rte_pci_id empty_table[] = {
> +               {
> +                       .vendor_id = 0
> +               },
> +       };
> +
> +       /* All mlx5 PMDs constructor runs at same priority. So any of the PMD

/* All mlx5 PMDs constructors run at the same priority. So any of the PMD


> +        * including this one can register the PCI table first. If any other
> +        * PMD(s) have registered the PCI ID table, No need to register an empty
> +        * default one.
> +        */
> +       if (mlx5_pci_id_table == NULL && pci_ids_table_update(empty_table))
> +                       return;
> +       rte_pci_register(&mlx5_pci_driver);
> +}
> +
> +RTE_FINI(mlx5_common_pci_finish)
> +{
> +       if (mlx5_pci_id_table != NULL) {
> +               /* Constructor doesn't register with PCI bus if it failed
> +                * to build the table.
> +                */
> +               rte_pci_unregister(&mlx5_pci_driver);
> +               free(mlx5_pci_id_table);
> +       }
> +}

I don't like the asymmetry between multiple constructors calling a
common init / and a single destructor in the common code.
But this is not really an issue.


> +RTE_PMD_EXPORT_NAME(mlx5_common_pci, __COUNTER__);

The pci driver name is mlx5_pci iirc.

But if there is no info about this driver to export, you can remove
RTE_PMD_EXPORT_NAME.


> diff --git a/drivers/common/mlx5/mlx5_common_pci.h b/drivers/common/mlx5/mlx5_common_pci.h
> new file mode 100644
> index 000000000..41b73e17a
> --- /dev/null
> +++ b/drivers/common/mlx5/mlx5_common_pci.h
> @@ -0,0 +1,77 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2020 Mellanox Technologies, Ltd
> + */
> +
> +#ifndef _MLX5_COMMON_PCI_H_
> +#define _MLX5_COMMON_PCI_H_
> +
> +/**
> + * @file
> + *
> + * RTE Mellanox PCI Driver Interface
> + * Mellanox ConnectX PCI device supports multiple class (net/vdpa/regex)
> + * devices. This layer enables creating such multiple class of devices on a
> + * single PCI device by allowing to bind multiple class specific device
> + * driver to attach to mlx5_pci driver.
> + *
> + * -----------    ------------    -------------
> + * |   mlx5  |    |   mlx5   |    |   mlx5    |
> + * | net pmd |    | vdpa pmd |    | regex pmd |
> + * -----------    ------------    -------------
> + *      \              |                 /
> + *       \             |                /
> + *        \       --------------       /
> + *         \______|   mlx5     |_____ /
> + *                | pci common |
> + *                --------------
> + *                     |
> + *                 -----------
> + *                 |   mlx5  |
> + *                 | pci dev |
> + *                 -----------
> + *
> + * - mlx5 pci driver binds to mlx5 PCI devices defined by PCI
> + *   ID table of all related mlx5 PCI devices.
> + * - mlx5 class driver such as net, vdpa, regex PMD defines its
> + *   specific PCI ID table and mlx5 bus driver probes matching
> + *   class drivers.
> + * - mlx5 pci bus driver is cental place that validates supported

central


> + *   class combinations.
> + */
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif /* __cplusplus */
> +
> +#include <rte_pci.h>
> +#include <rte_bus_pci.h>
> +
> +#include <mlx5_common.h>
> +
> +void mlx5_common_pci_init(void);
> +
> +/**
> + * A structure describing a mlx5 pci driver.
> + */
> +struct mlx5_pci_driver {
> +       struct rte_pci_driver pci_driver;       /**< Inherit core pci driver. */
> +       uint32_t driver_class;  /**< Class of this driver, enum mlx5_class */
> +       TAILQ_ENTRY(mlx5_pci_driver) next;
> +};
> +
> +/**
> + * Register a mlx5_pci device driver.
> + *
> + * @param driver
> + *   A pointer to a mlx5_pci_driver structure describing the driver
> + *   to be registered.
> + */
> +__rte_internal
> +void
> +mlx5_pci_driver_register(struct mlx5_pci_driver *driver);
> +
> +#ifdef __cplusplus
> +}
> +#endif /* __cplusplus */
> +
> +#endif /* _MLX5_COMMON_PCI_H_ */
> diff --git a/drivers/common/mlx5/rte_common_mlx5_version.map b/drivers/common/mlx5/rte_common_mlx5_version.map
> index 65f25252a..73cf72548 100644
> --- a/drivers/common/mlx5/rte_common_mlx5_version.map
> +++ b/drivers/common/mlx5/rte_common_mlx5_version.map
> @@ -91,5 +91,7 @@ INTERNAL {
>         mlx5_malloc;
>         mlx5_realloc;
>         mlx5_free;
> +
> +       mlx5_pci_driver_register;
>  };
>
> diff --git a/drivers/meson.build b/drivers/meson.build
> index e6d0409aa..95df7ef1d 100644
> --- a/drivers/meson.build
> +++ b/drivers/meson.build
> @@ -5,6 +5,7 @@
>  subdirs = [
>         'common',
>         'bus',
> +       'common/mlx5', # depends on bus.
>         'mempool', # depends on common and bus.
>         'net',     # depends on common, bus, mempool
>         'raw',     # depends on common, bus and net.
> --
> 2.25.4
>

Reviewed-by: David Marchand <david.marchand@redhat.com>
  

Patch

diff --git a/drivers/Makefile b/drivers/Makefile
index 1551272ef..7f06162dc 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -8,8 +8,12 @@  DIRS-y += bus
 DEPDIRS-bus := common
 DIRS-y += mempool
 DEPDIRS-mempool := common bus
+ifeq ($(findstring y,$(CONFIG_RTE_LIBRTE_MLX5_PMD)$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD)$(CONFIG_RTE_LIBRTE_MLX5_REGEX_PMD)),y)
+DIRS-y += common/mlx5
+DEPDIRS-common/mlx5 := bus
+endif
 DIRS-y += net
-DEPDIRS-net := common bus mempool
+DEPDIRS-net := common bus mempool common/mlx5
 DIRS-$(CONFIG_RTE_LIBRTE_BBDEV) += baseband
 DEPDIRS-baseband := common bus mempool
 DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += crypto
@@ -19,9 +23,9 @@  DEPDIRS-common/qat := bus mempool
 DIRS-$(CONFIG_RTE_LIBRTE_COMPRESSDEV) += compress
 DEPDIRS-compress := bus mempool
 DIRS-$(CONFIG_RTE_LIBRTE_REGEXDEV) += regex
-DEPDIRS-regex := common bus
+DEPDIRS-regex := common bus common/mlx5
 DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += vdpa
-DEPDIRS-vdpa := common bus mempool
+DEPDIRS-vdpa := common bus mempool common/mlx5
 DIRS-$(CONFIG_RTE_LIBRTE_EVENTDEV) += event
 DEPDIRS-event := common bus mempool net crypto
 DIRS-$(CONFIG_RTE_LIBRTE_RAWDEV) += raw
diff --git a/drivers/common/Makefile b/drivers/common/Makefile
index cbc71077c..cfb6b4dc8 100644
--- a/drivers/common/Makefile
+++ b/drivers/common/Makefile
@@ -36,8 +36,4 @@  ifneq (,$(findstring y,$(IAVF-y)))
 DIRS-y += iavf
 endif
 
-ifeq ($(findstring y,$(CONFIG_RTE_LIBRTE_MLX5_PMD)$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD)$(CONFIG_RTE_LIBRTE_MLX5_REGEX_PMD)),y)
-DIRS-y += mlx5
-endif
-
 include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/drivers/common/meson.build b/drivers/common/meson.build
index 5db7e29b1..9ed4c04ba 100644
--- a/drivers/common/meson.build
+++ b/drivers/common/meson.build
@@ -6,6 +6,6 @@  if is_windows
 endif
 
 std_deps = ['eal']
-drivers = ['cpt', 'dpaax', 'iavf', 'mlx5', 'mvep', 'octeontx', 'octeontx2', 'qat']
+drivers = ['cpt', 'dpaax', 'iavf', 'mvep', 'octeontx', 'octeontx2', 'qat']
 config_flag_fmt = 'RTE_LIBRTE_@0@_COMMON'
 driver_name_fmt = 'rte_common_@0@'
diff --git a/drivers/common/mlx5/Makefile b/drivers/common/mlx5/Makefile
index 6b89a6c85..eaabb8c10 100644
--- a/drivers/common/mlx5/Makefile
+++ b/drivers/common/mlx5/Makefile
@@ -22,6 +22,7 @@  SRCS-y += linux/mlx5_common_verbs.c
 SRCS-y += mlx5_common_mp.c
 SRCS-y += mlx5_common_mr.c
 SRCS-y += mlx5_malloc.c
+SRCS-y += mlx5_common_pci.c
 ifeq ($(CONFIG_RTE_IBVERBS_LINK_DLOPEN),y)
 INSTALL-y-lib += $(LIB_GLUE)
 endif
diff --git a/drivers/common/mlx5/meson.build b/drivers/common/mlx5/meson.build
index 70e2c1c32..8e5608703 100644
--- a/drivers/common/mlx5/meson.build
+++ b/drivers/common/mlx5/meson.build
@@ -1,19 +1,22 @@ 
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2019 Mellanox Technologies, Ltd
 
-if not (is_linux or is_windows)
+if not is_linux
 	build = false
-	reason = 'only supported on Linux and Windows'
+	reason = 'only supported on Linux'
 	subdir_done()
 endif
 
-deps += ['hash', 'pci', 'net', 'eal', 'kvargs']
+config_flag_fmt = 'RTE_LIBRTE_@0@_COMMON'
+driver_name_fmt = 'rte_common_@0@'
+deps += ['hash', 'pci', 'bus_pci', 'net', 'eal', 'kvargs']
 sources += files(
 	'mlx5_devx_cmds.c',
 	'mlx5_common.c',
 	'mlx5_common_mp.c',
 	'mlx5_common_mr.c',
 	'mlx5_malloc.c',
+	'mlx5_common_pci.c',
 )
 
 cflags_options = [
diff --git a/drivers/common/mlx5/mlx5_common.c b/drivers/common/mlx5/mlx5_common.c
index 2b336bb2d..fd818ef24 100644
--- a/drivers/common/mlx5/mlx5_common.c
+++ b/drivers/common/mlx5/mlx5_common.c
@@ -14,6 +14,7 @@ 
 #include "mlx5_common_os.h"
 #include "mlx5_common_utils.h"
 #include "mlx5_malloc.h"
+#include "mlx5_common_pci.h"
 
 int mlx5_common_logtype;
 
@@ -100,6 +101,7 @@  mlx5_common_init(void)
 		return;
 
 	mlx5_glue_constructor();
+	mlx5_common_pci_init();
 	mlx5_common_initialized = true;
 }
 
diff --git a/drivers/common/mlx5/mlx5_common_pci.c b/drivers/common/mlx5/mlx5_common_pci.c
new file mode 100644
index 000000000..b42f7469f
--- /dev/null
+++ b/drivers/common/mlx5/mlx5_common_pci.c
@@ -0,0 +1,541 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2020 Mellanox Technologies Ltd
+ */
+
+#include <stdlib.h>
+#include <rte_malloc.h>
+#include "mlx5_common_utils.h"
+#include "mlx5_common_pci.h"
+
+struct mlx5_pci_device {
+      struct rte_pci_device *pci_dev;
+      TAILQ_ENTRY(mlx5_pci_device) next;
+      uint32_t classes_loaded;
+};
+
+/* Head of list of drivers. */
+static TAILQ_HEAD(mlx5_pci_bus_drv_head, mlx5_pci_driver) drv_list =
+				TAILQ_HEAD_INITIALIZER(drv_list);
+
+/* Head of mlx5 pci devices. */
+static TAILQ_HEAD(mlx5_pci_devices_head, mlx5_pci_device) devices_list =
+				TAILQ_HEAD_INITIALIZER(devices_list);
+
+static const struct {
+	const char *name;
+	unsigned int driver_class;
+} mlx5_classes[] = {
+	{ .name = "vdpa", .driver_class = MLX5_CLASS_VDPA },
+	{ .name = "net", .driver_class = MLX5_CLASS_NET },
+	{ .name = "regex", .driver_class = MLX5_CLASS_REGEX },
+};
+
+static const unsigned int mlx5_class_combinations[] = {
+	MLX5_CLASS_NET,
+	MLX5_CLASS_VDPA,
+	MLX5_CLASS_REGEX,
+	/* New class combination should be added here.
+	 * For example a new multi class device combination
+	 * can be MLX5_CLASS_FOO | MLX5_CLASS_BAR.
+	 */
+};
+
+static int
+class_name_to_value(const char *class_name)
+{
+	unsigned int i;
+
+	for (i = 0; i < RTE_DIM(mlx5_classes); i++) {
+		if (strcmp(class_name, mlx5_classes[i].name) == 0)
+			return mlx5_classes[i].driver_class;
+	}
+	return -EINVAL;
+}
+
+static struct mlx5_pci_driver *
+driver_get(uint32_t class)
+{
+	struct mlx5_pci_driver *driver;
+
+	TAILQ_FOREACH(driver, &drv_list, next) {
+		if (driver->driver_class == class)
+			return driver;
+	}
+	return NULL;
+}
+
+static int
+bus_cmdline_options_handler(__rte_unused const char *key,
+			    const char *class_names, void *opaque)
+{
+	int *ret = opaque;
+	char *nstr_org;
+	int class_val;
+	char *found;
+	char *nstr;
+
+	*ret = 0;
+	nstr = strdup(class_names);
+	if (!nstr) {
+		*ret = -ENOMEM;
+		return *ret;
+	}
+	nstr_org = nstr;
+	while (nstr) {
+		/* Extract each individual class name. Multiple
+		 * class key,value is supplied as class=net:vdpa:foo:bar.
+		 */
+		found = strsep(&nstr, ":");
+		if (!found)
+			continue;
+		/* Check if its a valid class. */
+		class_val = class_name_to_value(found);
+		if (class_val < 0) {
+			*ret = -EINVAL;
+			goto err;
+		}
+		*ret |= class_val;
+	}
+err:
+	free(nstr_org);
+	if (*ret < 0)
+		DRV_LOG(ERR, "Invalid mlx5 class options %s."
+			" Maybe typo in device class argument setting?",
+			class_names);
+	return *ret;
+}
+
+static int
+parse_class_options(const struct rte_devargs *devargs)
+{
+	const char *key = MLX5_CLASS_ARG_NAME;
+	struct rte_kvargs *kvlist;
+	int ret = 0;
+
+	if (devargs == NULL)
+		return 0;
+	kvlist = rte_kvargs_parse(devargs->args, NULL);
+	if (kvlist == NULL)
+		return 0;
+	if (rte_kvargs_count(kvlist, key))
+		rte_kvargs_process(kvlist, key, bus_cmdline_options_handler,
+				   &ret);
+	rte_kvargs_free(kvlist);
+	return ret;
+}
+
+static bool
+mlx5_bus_match(const struct mlx5_pci_driver *drv,
+	       const struct rte_pci_device *pci_dev)
+{
+	const struct rte_pci_id *id_table;
+
+	for (id_table = drv->pci_driver.id_table; id_table->vendor_id != 0;
+	     id_table++) {
+		/* Check if device's ids match the class driver's ids. */
+		if (id_table->vendor_id != pci_dev->id.vendor_id &&
+		    id_table->vendor_id != PCI_ANY_ID)
+			continue;
+		if (id_table->device_id != pci_dev->id.device_id &&
+		    id_table->device_id != PCI_ANY_ID)
+			continue;
+		if (id_table->subsystem_vendor_id !=
+		    pci_dev->id.subsystem_vendor_id &&
+		    id_table->subsystem_vendor_id != PCI_ANY_ID)
+			continue;
+		if (id_table->subsystem_device_id !=
+		    pci_dev->id.subsystem_device_id &&
+		    id_table->subsystem_device_id != PCI_ANY_ID)
+			continue;
+		if (id_table->class_id != pci_dev->id.class_id &&
+		    id_table->class_id != RTE_CLASS_ANY_ID)
+			continue;
+		return true;
+	}
+	return false;
+}
+
+static int
+is_valid_class_combination(uint32_t user_classes)
+{
+	unsigned int i;
+
+	/* Verify if user specified valid supported combination. */
+	for (i = 0; i < RTE_DIM(mlx5_class_combinations); i++) {
+		if (mlx5_class_combinations[i] == user_classes)
+			return 0;
+	}
+	/* Not found any valid class combination. */
+	return -EINVAL;
+}
+
+static struct mlx5_pci_device *
+pci_to_mlx5_device(const struct rte_pci_device *pci_dev)
+{
+	struct mlx5_pci_device *dev;
+
+	TAILQ_FOREACH(dev, &devices_list, next) {
+		if (dev->pci_dev == pci_dev)
+			return dev;
+	}
+	return NULL;
+}
+
+static bool
+device_class_enabled(const struct mlx5_pci_device *device, uint32_t class)
+{
+	return (device->classes_loaded & class) ? true : false;
+}
+
+static void
+dev_release(struct mlx5_pci_device *dev)
+{
+	TAILQ_REMOVE(&devices_list, dev, next);
+	rte_free(dev);
+}
+
+static int
+drivers_remove(struct mlx5_pci_device *dev, uint32_t enabled_classes)
+{
+	struct mlx5_pci_driver *driver;
+	int local_ret = -ENODEV;
+	unsigned int i = 0;
+	int ret = 0;
+
+	enabled_classes &= dev->classes_loaded;
+	while (enabled_classes) {
+		driver = driver_get(RTE_BIT64(i));
+		if (driver) {
+			local_ret = driver->pci_driver.remove(dev->pci_dev);
+			if (!local_ret)
+				dev->classes_loaded &= ~RTE_BIT64(i);
+			else if (ret == 0)
+				ret = local_ret;
+		}
+		enabled_classes &= ~RTE_BIT64(i);
+		i++;
+	}
+	if (local_ret)
+		ret = local_ret;
+	return ret;
+}
+
+static int
+drivers_probe(struct mlx5_pci_device *dev, struct rte_pci_driver *pci_drv,
+	      struct rte_pci_device *pci_dev, uint32_t user_classes)
+{
+	struct mlx5_pci_driver *driver;
+	uint32_t enabled_classes = 0;
+	bool already_loaded;
+	int ret;
+
+	TAILQ_FOREACH(driver, &drv_list, next) {
+		if ((driver->driver_class & user_classes) == 0)
+			continue;
+		if (!mlx5_bus_match(driver, pci_dev))
+			continue;
+		already_loaded = dev->classes_loaded & driver->driver_class;
+		if (already_loaded &&
+		    !(driver->pci_driver.drv_flags & RTE_PCI_DRV_PROBE_AGAIN)) {
+			DRV_LOG(ERR, "Device %s is already probed\n",
+				pci_dev->device.name);
+			ret = -EEXIST;
+			goto probe_err;
+		}
+		ret = driver->pci_driver.probe(pci_drv, pci_dev);
+		if (ret < 0) {
+			DRV_LOG(ERR, "Failed to load driver = %s.\n",
+				driver->pci_driver.driver.name);
+			goto probe_err;
+		}
+		enabled_classes |= driver->driver_class;
+	}
+	dev->classes_loaded |= enabled_classes;
+	return 0;
+probe_err:
+	/* Only unload drivers which are enabled which were enabled
+	 * in this probe instance.
+	 */
+	drivers_remove(dev, enabled_classes);
+	return ret;
+}
+
+/**
+ * DPDK callback to register to probe multiple drivers for a PCI device.
+ *
+ * @param[in] pci_drv
+ *   PCI driver structure.
+ * @param[in] dev
+ *   PCI device information.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_common_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
+		      struct rte_pci_device *pci_dev)
+{
+	struct mlx5_pci_device *dev;
+	uint32_t user_classes = 0;
+	bool new_device = false;
+	int ret;
+
+	ret = parse_class_options(pci_dev->device.devargs);
+	if (ret < 0)
+		return ret;
+	user_classes = ret;
+	if (user_classes) {
+		/* Validate combination here. */
+		ret = is_valid_class_combination(user_classes);
+		if (ret) {
+			DRV_LOG(ERR, "Unsupported mlx5 classes supplied.");
+			return ret;
+		}
+	} else {
+		/* Default to net class. */
+		user_classes = MLX5_CLASS_NET;
+	}
+	dev = pci_to_mlx5_device(pci_dev);
+	if (!dev) {
+		dev = rte_zmalloc("mlx5_pci_device", sizeof(*dev), 0);
+		if (!dev)
+			return -ENOMEM;
+		dev->pci_dev = pci_dev;
+		TAILQ_INSERT_HEAD(&devices_list, dev, next);
+		new_device = true;
+	}
+	ret = drivers_probe(dev, pci_drv, pci_dev, user_classes);
+	if (ret)
+		goto class_err;
+	return 0;
+class_err:
+	if (new_device)
+		dev_release(dev);
+	return ret;
+}
+
+/**
+ * DPDK callback to remove one or more drivers for a PCI device.
+ *
+ * This function removes all drivers probed for a given PCI device.
+ *
+ * @param[in] pci_dev
+ *   Pointer to the PCI device.
+ *
+ * @return
+ *   0 on success, the function cannot fail.
+ */
+static int
+mlx5_common_pci_remove(struct rte_pci_device *pci_dev)
+{
+	struct mlx5_pci_device *dev;
+	int ret;
+
+	dev = pci_to_mlx5_device(pci_dev);
+	if (!dev)
+		return -ENODEV;
+	/* Matching device found, cleanup and unload drivers. */
+	ret = drivers_remove(dev, dev->classes_loaded);
+	if (!ret)
+		dev_release(dev);
+	return ret;
+}
+
+static int
+mlx5_common_pci_dma_map(struct rte_pci_device *pci_dev, void *addr,
+			uint64_t iova, size_t len)
+{
+	struct mlx5_pci_driver *driver = NULL;
+	struct mlx5_pci_driver *temp;
+	struct mlx5_pci_device *dev;
+	int ret = -EINVAL;
+
+	dev = pci_to_mlx5_device(pci_dev);
+	if (!dev)
+		return -ENODEV;
+	TAILQ_FOREACH(driver, &drv_list, next) {
+		if (device_class_enabled(dev, driver->driver_class) &&
+		    driver->pci_driver.dma_map) {
+			ret = driver->pci_driver.dma_map(pci_dev, addr,
+							 iova, len);
+			if (ret)
+				goto map_err;
+		}
+	}
+	return ret;
+map_err:
+	TAILQ_FOREACH(temp, &drv_list, next) {
+		if (temp == driver)
+			break;
+		if (device_class_enabled(dev, temp->driver_class) &&
+		    temp->pci_driver.dma_map && temp->pci_driver.dma_unmap)
+			temp->pci_driver.dma_unmap(pci_dev, addr, iova, len);
+	}
+	return ret;
+}
+
+static int
+mlx5_common_pci_dma_unmap(struct rte_pci_device *pci_dev, void *addr,
+			  uint64_t iova, size_t len)
+{
+	struct mlx5_pci_driver *driver;
+	struct mlx5_pci_device *dev;
+	int local_ret = -EINVAL;
+	int ret;
+
+	dev = pci_to_mlx5_device(pci_dev);
+	if (!dev)
+		return -ENODEV;
+	ret = 0;
+	/* There is no unmap error recovery in current implementation. */
+	TAILQ_FOREACH_REVERSE(driver, &drv_list, mlx5_pci_bus_drv_head, next) {
+		if (device_class_enabled(dev, driver->driver_class) &&
+		    driver->pci_driver.dma_unmap) {
+			local_ret = driver->pci_driver.dma_unmap(pci_dev, addr,
+								 iova, len);
+			if (local_ret && (ret == 0))
+				ret = local_ret;
+		}
+	}
+	if (local_ret)
+		ret = local_ret;
+	return ret;
+}
+
+/* PCI ID table is build dynamically based on registered mlx5 drivers. */
+static struct rte_pci_id *mlx5_pci_id_table;
+
+static struct rte_pci_driver mlx5_pci_driver = {
+	.driver = {
+		.name = "mlx5_pci",
+	},
+	.probe = mlx5_common_pci_probe,
+	.remove = mlx5_common_pci_remove,
+	.dma_map = mlx5_common_pci_dma_map,
+	.dma_unmap = mlx5_common_pci_dma_unmap,
+};
+
+static int
+pci_id_table_size_get(const struct rte_pci_id *id_table)
+{
+	int table_size = 0;
+
+	for (; id_table->vendor_id != 0; id_table++)
+		table_size++;
+	return table_size;
+}
+
+static bool
+pci_id_exists(const struct rte_pci_id *id, const struct rte_pci_id *table,
+	      int next_idx)
+{
+	int current_size = next_idx - 1;
+	int i;
+
+	for (i = 0; i < current_size; i++) {
+		if (id->device_id == table[i].device_id &&
+		    id->vendor_id == table[i].vendor_id &&
+		    id->subsystem_vendor_id == table[i].subsystem_vendor_id &&
+		    id->subsystem_device_id == table[i].subsystem_device_id)
+			return true;
+	}
+	return false;
+}
+
+static void
+pci_id_insert(struct rte_pci_id *new_table, int *next_idx,
+	      const struct rte_pci_id *id_table)
+{
+	/* Traverse the id_table, check if entry exists in new_table;
+	 * Add non duplicate entries to new table.
+	 */
+	for (; id_table->vendor_id != 0; id_table++) {
+		if (!pci_id_exists(id_table, new_table, *next_idx)) {
+			/* New entry; add to the table. */
+			new_table[*next_idx] = *id_table;
+			(*next_idx)++;
+		}
+	}
+}
+
+static int
+pci_ids_table_update(const struct rte_pci_id *driver_id_table)
+{
+	const struct rte_pci_id *id_iter;
+	struct rte_pci_id *updated_table;
+	struct rte_pci_id *old_table;
+	int num_ids = 0;
+	int i = 0;
+
+	old_table = mlx5_pci_id_table;
+	if (old_table)
+		num_ids = pci_id_table_size_get(old_table);
+	num_ids += pci_id_table_size_get(driver_id_table);
+	/* Increase size by one for the termination entry of vendor_id = 0. */
+	num_ids += 1;
+	updated_table = calloc(num_ids, sizeof(*updated_table));
+	if (!updated_table)
+		return -ENOMEM;
+	if (TAILQ_EMPTY(&drv_list)) {
+		/* Copy the first driver's ID table. */
+		for (id_iter = driver_id_table; id_iter->vendor_id != 0;
+		     id_iter++, i++)
+			updated_table[i] = *id_iter;
+	} else {
+		/* First copy existing table entries. */
+		for (id_iter = old_table; id_iter->vendor_id != 0;
+		     id_iter++, i++)
+			updated_table[i] = *id_iter;
+		/* New id to be added at the end of current ID table. */
+		pci_id_insert(updated_table, &i, driver_id_table);
+	}
+	/* Terminate table with empty entry. */
+	updated_table[i].vendor_id = 0;
+	mlx5_pci_driver.id_table = updated_table;
+	mlx5_pci_id_table = updated_table;
+	if (old_table)
+		free(old_table);
+	return 0;
+}
+
+void
+mlx5_pci_driver_register(struct mlx5_pci_driver *driver)
+{
+	int ret;
+
+	ret = pci_ids_table_update(driver->pci_driver.id_table);
+	if (ret)
+		return;
+	mlx5_pci_driver.drv_flags |= driver->pci_driver.drv_flags;
+	TAILQ_INSERT_TAIL(&drv_list, driver, next);
+}
+
+void mlx5_common_pci_init(void)
+{
+	const struct rte_pci_id empty_table[] = {
+		{
+			.vendor_id = 0
+		},
+	};
+
+	/* All mlx5 PMDs constructor runs at same priority. So any of the PMD
+	 * including this one can register the PCI table first. If any other
+	 * PMD(s) have registered the PCI ID table, No need to register an empty
+	 * default one.
+	 */
+	if (mlx5_pci_id_table == NULL && pci_ids_table_update(empty_table))
+			return;
+	rte_pci_register(&mlx5_pci_driver);
+}
+
+RTE_FINI(mlx5_common_pci_finish)
+{
+	if (mlx5_pci_id_table != NULL) {
+		/* Constructor doesn't register with PCI bus if it failed
+		 * to build the table.
+		 */
+		rte_pci_unregister(&mlx5_pci_driver);
+		free(mlx5_pci_id_table);
+	}
+}
+RTE_PMD_EXPORT_NAME(mlx5_common_pci, __COUNTER__);
diff --git a/drivers/common/mlx5/mlx5_common_pci.h b/drivers/common/mlx5/mlx5_common_pci.h
new file mode 100644
index 000000000..41b73e17a
--- /dev/null
+++ b/drivers/common/mlx5/mlx5_common_pci.h
@@ -0,0 +1,77 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#ifndef _MLX5_COMMON_PCI_H_
+#define _MLX5_COMMON_PCI_H_
+
+/**
+ * @file
+ *
+ * RTE Mellanox PCI Driver Interface
+ * Mellanox ConnectX PCI device supports multiple class (net/vdpa/regex)
+ * devices. This layer enables creating such multiple class of devices on a
+ * single PCI device by allowing to bind multiple class specific device
+ * driver to attach to mlx5_pci driver.
+ *
+ * -----------    ------------    -------------
+ * |   mlx5  |    |   mlx5   |    |   mlx5    |
+ * | net pmd |    | vdpa pmd |    | regex pmd |
+ * -----------    ------------    -------------
+ *      \              |                 /
+ *       \             |                /
+ *        \       --------------       /
+ *         \______|   mlx5     |_____ /
+ *                | pci common |
+ *                --------------
+ *                     |
+ *                 -----------
+ *                 |   mlx5  |
+ *                 | pci dev |
+ *                 -----------
+ *
+ * - mlx5 pci driver binds to mlx5 PCI devices defined by PCI
+ *   ID table of all related mlx5 PCI devices.
+ * - mlx5 class driver such as net, vdpa, regex PMD defines its
+ *   specific PCI ID table and mlx5 bus driver probes matching
+ *   class drivers.
+ * - mlx5 pci bus driver is cental place that validates supported
+ *   class combinations.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#include <rte_pci.h>
+#include <rte_bus_pci.h>
+
+#include <mlx5_common.h>
+
+void mlx5_common_pci_init(void);
+
+/**
+ * A structure describing a mlx5 pci driver.
+ */
+struct mlx5_pci_driver {
+	struct rte_pci_driver pci_driver;	/**< Inherit core pci driver. */
+	uint32_t driver_class;	/**< Class of this driver, enum mlx5_class */
+	TAILQ_ENTRY(mlx5_pci_driver) next;
+};
+
+/**
+ * Register a mlx5_pci device driver.
+ *
+ * @param driver
+ *   A pointer to a mlx5_pci_driver structure describing the driver
+ *   to be registered.
+ */
+__rte_internal
+void
+mlx5_pci_driver_register(struct mlx5_pci_driver *driver);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _MLX5_COMMON_PCI_H_ */
diff --git a/drivers/common/mlx5/rte_common_mlx5_version.map b/drivers/common/mlx5/rte_common_mlx5_version.map
index 65f25252a..73cf72548 100644
--- a/drivers/common/mlx5/rte_common_mlx5_version.map
+++ b/drivers/common/mlx5/rte_common_mlx5_version.map
@@ -91,5 +91,7 @@  INTERNAL {
 	mlx5_malloc;
 	mlx5_realloc;
 	mlx5_free;
+
+	mlx5_pci_driver_register;
 };
 
diff --git a/drivers/meson.build b/drivers/meson.build
index e6d0409aa..95df7ef1d 100644
--- a/drivers/meson.build
+++ b/drivers/meson.build
@@ -5,6 +5,7 @@ 
 subdirs = [
 	'common',
 	'bus',
+	'common/mlx5', # depends on bus.
 	'mempool', # depends on common and bus.
 	'net',     # depends on common, bus, mempool
 	'raw',     # depends on common, bus and net.