On 9/24/2022 3:45 AM, longli@linuxonhyperv.com wrote:
>
> From: Long Li <longli@microsoft.com>
>
> MANA is a PCI device. It uses IB verbs to access hardware through the
> kernel RDMA layer. This patch introduces build environment and basic
> device probe functions.
>
> Signed-off-by: Long Li <longli@microsoft.com>
<...>
> +++ b/doc/guides/nics/mana.rst
> @@ -0,0 +1,69 @@
> +.. SPDX-License-Identifier: BSD-3-Clause
> + Copyright 2022 Microsoft Corporation
> +
> +MANA poll mode driver library
> +=============================
> +
> +The MANA poll mode driver library (**librte_net_mana**) implements support
> +for Microsoft Azure Network Adapter VF in SR-IOV context.
> +
> +Features
> +--------
> +
> +Features of the MANA Ethdev PMD are:
> +
> +Prerequisites
> +-------------
> +
> +This driver relies on external libraries and kernel drivers for resources
> +allocations and initialization. The following dependencies are not part of
> +DPDK and must be installed separately:
> +
> +- **libibverbs** (provided by rdma-core package)
> +
Does it make sense to provide rdma-core git repo link?
<...>
> +
> +static const char * const mana_init_args[] = {
> + "mac",
It is better to define a macro for the devarg string to be able to reuse
it in 'RTE_PMD_REGISTER_PARAM_STRING' (please see below).
#define ETH_MANA_MAC_ARG "mac"
static const char * const mana_init_args[] = {
ETH_MANA_MAC_ARG,
NULL,
};
<...>
> +
> +/*
> + * Goes through the IB device list to look for the IB port matching the
> + * mac_addr. If found, create a rte_eth_dev for it.
> + */
> +static int
> +mana_pci_probe_mac(struct rte_pci_device *pci_dev,
> + struct rte_ether_addr *mac_addr)
> +{
> + struct ibv_device **ibv_list;
> + int ibv_idx;
> + struct ibv_context *ctx;
> + struct ibv_device_attr_ex dev_attr;
> + int num_devices;
> + int ret = 0;
> + uint8_t port;
> + struct mana_priv *priv = NULL;
> + struct rte_eth_dev *eth_dev = NULL;
> + bool found_port;
> +
> + ibv_list = ibv_get_device_list(&num_devices);
> + for (ibv_idx = 0; ibv_idx < num_devices; ibv_idx++) {
> + struct ibv_device *ibdev = ibv_list[ibv_idx];
> + struct rte_pci_addr pci_addr;
> +
> + DRV_LOG(INFO, "Probe device name %s dev_name %s ibdev_path %s",
> + ibdev->name, ibdev->dev_name, ibdev->ibdev_path);
> +
> + if (mana_ibv_device_to_pci_addr(ibdev, &pci_addr))
> + continue;
> +
> + /* Ignore if this IB device is not this PCI device */
> + if (pci_dev->addr.domain != pci_addr.domain ||
> + pci_dev->addr.bus != pci_addr.bus ||
> + pci_dev->addr.devid != pci_addr.devid ||
> + pci_dev->addr.function != pci_addr.function)
> + continue;
> +
> + ctx = ibv_open_device(ibdev);
> + if (!ctx) {
> + DRV_LOG(ERR, "Failed to open IB device %s",
> + ibdev->name);
> + continue;
> + }
> +
> + ret = ibv_query_device_ex(ctx, NULL, &dev_attr);
> + DRV_LOG(INFO, "dev_attr.orig_attr.phys_port_cnt %u",
> + dev_attr.orig_attr.phys_port_cnt);
> + found_port = false;
> +
> + for (port = 1; port <= dev_attr.orig_attr.phys_port_cnt;
> + port++) {
> + struct ibv_parent_domain_init_attr attr = {0};
> + struct rte_ether_addr addr;
> + char address[64];
> + char name[RTE_ETH_NAME_MAX_LEN];
> +
> + ret = get_port_mac(ibdev, port, &addr);
> + if (ret)
> + continue;
> +
> + if (mac_addr && !rte_is_same_ether_addr(&addr, mac_addr))
> + continue;
> +
> + rte_ether_format_addr(address, sizeof(address), &addr);
> + DRV_LOG(INFO, "device located port %u address %s",
> + port, address);
> + found_port = true;
> +
> + priv = rte_zmalloc_socket(NULL, sizeof(*priv),
> + RTE_CACHE_LINE_SIZE,
> + SOCKET_ID_ANY);
> + if (!priv) {
> + ret = -ENOMEM;
> + goto failed;
> + }
> +
> + snprintf(name, sizeof(name), "%s_port%d",
> + pci_dev->device.name, port);
> +
> + if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
> + int fd;
> +
> + eth_dev = rte_eth_dev_attach_secondary(name);
> + if (!eth_dev) {
> + DRV_LOG(ERR, "Can't attach to dev %s",
> + name);
> + ret = -ENOMEM;
> + goto failed;
> + }
> +
> + eth_dev->device = &pci_dev->device;
> + eth_dev->dev_ops = &mana_dev_secondary_ops;
> + ret = mana_proc_priv_init(eth_dev);
> + if (ret)
> + goto failed;
> + priv->process_priv = eth_dev->process_private;
> +
> + /* Get the IB FD from the primary process */
> + fd = mana_mp_req_verbs_cmd_fd(eth_dev);
> + if (fd < 0) {
> + DRV_LOG(ERR, "Failed to get FD %d", fd);
> + ret = -ENODEV;
> + goto failed;
> + }
> +
> + ret = mana_map_doorbell_secondary(eth_dev, fd);
> + if (ret) {
> + DRV_LOG(ERR, "Failed secondary map %d",
> + fd);
The indentation level (and lenght) of this functions hints that some
part of it can be seprated as function, like probe one 'ibv_device' can
be on its own function.
Can you refactor the function, to increase readability? It is control
path, so there is no restriction to have function calls.
> + goto failed;
> + }
> +
> + /* fd is no not used after mapping doorbell */
> + close(fd);
> +
> + rte_spinlock_lock(&mana_shared_data->lock);
> + mana_shared_data->secondary_cnt++;
> + mana_local_data.secondary_cnt++;
> + rte_spinlock_unlock(&mana_shared_data->lock);
> +
> + rte_eth_copy_pci_info(eth_dev, pci_dev);
> + rte_eth_dev_probing_finish(eth_dev);
> +
> + /* Impossible to have more than one port
> + * matching a MAC address
> + */
> + continue;
> + }
> +
> + eth_dev = rte_eth_dev_allocate(name);
> + if (!eth_dev) {
> + ret = -ENOMEM;
> + goto failed;
> + }
> +
> + eth_dev->data->mac_addrs =
> + rte_calloc("mana_mac", 1,
> + sizeof(struct rte_ether_addr), 0);
> + if (!eth_dev->data->mac_addrs) {
> + ret = -ENOMEM;
> + goto failed;
> + }
> +
> + rte_ether_addr_copy(&addr, eth_dev->data->mac_addrs);
> +
> + priv->ib_pd = ibv_alloc_pd(ctx);
> + if (!priv->ib_pd) {
> + DRV_LOG(ERR, "ibv_alloc_pd failed port %d", port);
> + ret = -ENOMEM;
> + goto failed;
> + }
> +
> + /* Create a parent domain with the port number */
> + attr.pd = priv->ib_pd;
> + attr.comp_mask = IBV_PARENT_DOMAIN_INIT_ATTR_PD_CONTEXT;
> + attr.pd_context = (void *)(uint64_t)port;
> + priv->ib_parent_pd = ibv_alloc_parent_domain(ctx, &attr);
> + if (!priv->ib_parent_pd) {
> + DRV_LOG(ERR,
> + "ibv_alloc_parent_domain failed port %d",
> + port);
> + ret = -ENOMEM;
> + goto failed;
> + }
> +
> + priv->ib_ctx = ctx;
> + priv->port_id = eth_dev->data->port_id;
> + priv->dev_port = port;
> + eth_dev->data->dev_private = priv;
> + priv->dev_data = eth_dev->data;
> +
> + priv->max_rx_queues = dev_attr.orig_attr.max_qp;
> + priv->max_tx_queues = dev_attr.orig_attr.max_qp;
> +
> + priv->max_rx_desc =
> + RTE_MIN(dev_attr.orig_attr.max_qp_wr,
> + dev_attr.orig_attr.max_cqe);
> + priv->max_tx_desc =
> + RTE_MIN(dev_attr.orig_attr.max_qp_wr,
> + dev_attr.orig_attr.max_cqe);
> +
> + priv->max_send_sge = dev_attr.orig_attr.max_sge;
> + priv->max_recv_sge = dev_attr.orig_attr.max_sge;
> +
> + priv->max_mr = dev_attr.orig_attr.max_mr;
> + priv->max_mr_size = dev_attr.orig_attr.max_mr_size;
> +
> + DRV_LOG(INFO, "dev %s max queues %d desc %d sge %d",
> + name, priv->max_rx_queues, priv->max_rx_desc,
> + priv->max_send_sge);
> +
> + rte_spinlock_lock(&mana_shared_data->lock);
> + mana_shared_data->primary_cnt++;
> + rte_spinlock_unlock(&mana_shared_data->lock);
> +
> + eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_RMV;
> +
This assignment already done by 'rte_eth_copy_pci_info()' when
'RTE_PCI_DRV_INTR_RMV' driver flag set which this PMD sets, so
assignment is redundant.
<...>
> +
> +RTE_PMD_REGISTER_PCI(net_mana, mana_pci_driver);
> +RTE_PMD_REGISTER_PCI_TABLE(net_mana, mana_pci_id_map);
> +RTE_PMD_REGISTER_KMOD_DEP(net_mana, "* ib_uverbs & mana_ib");
> +RTE_LOG_REGISTER_SUFFIX(mana_logtype_init, init, NOTICE);
> +RTE_LOG_REGISTER_SUFFIX(mana_logtype_driver, driver, NOTICE);
Can you please add 'RTE_PMD_REGISTER_PARAM_STRING' macro for 'mac' devarg?
> diff --git a/drivers/net/mana/mana.h b/drivers/net/mana/mana.h
> new file mode 100644
> index 0000000000..a2021ceb4a
> --- /dev/null
> +++ b/drivers/net/mana/mana.h
> @@ -0,0 +1,102 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2022 Microsoft Corporation
> + */
> +
> +#ifndef __MANA_H__
> +#define __MANA_H__
> +
> +enum {
> + PCI_VENDOR_ID_MICROSOFT = 0x1414,
> +};
> +
> +enum {
> + PCI_DEVICE_ID_MICROSOFT_MANA = 0x00ba,
> +};
There is a common guidance to prefer enums against define BUT,
I tend to use enums for related cases, or when underneath numerical
value doesn't matter.
For PCI IDs I would use #define, although both works same, what do you
think to update them to define?
@@ -837,6 +837,12 @@ F: buildtools/options-ibverbs-static.sh
F: doc/guides/nics/mlx5.rst
F: doc/guides/nics/features/mlx5.ini
+Microsoft mana
+M: Long Li <longli@microsoft.com>
+F: drivers/net/mana
+F: doc/guides/nics/mana.rst
+F: doc/guides/nics/features/mana.ini
+
Microsoft vdev_netvsc - EXPERIMENTAL
M: Matan Azrad <matan@nvidia.com>
F: drivers/net/vdev_netvsc/
new file mode 100644
@@ -0,0 +1,10 @@
+;
+; Supported features of the 'mana' network poll mode driver.
+;
+; Refer to default.ini for the full list of available PMD features.
+;
+[Features]
+Linux = Y
+Multiprocess aware = Y
+Usage doc = Y
+x86-64 = Y
@@ -41,6 +41,7 @@ Network Interface Controller Drivers
intel_vf
kni
liquidio
+ mana
memif
mlx4
mlx5
new file mode 100644
@@ -0,0 +1,69 @@
+.. SPDX-License-Identifier: BSD-3-Clause
+ Copyright 2022 Microsoft Corporation
+
+MANA poll mode driver library
+=============================
+
+The MANA poll mode driver library (**librte_net_mana**) implements support
+for Microsoft Azure Network Adapter VF in SR-IOV context.
+
+Features
+--------
+
+Features of the MANA Ethdev PMD are:
+
+Prerequisites
+-------------
+
+This driver relies on external libraries and kernel drivers for resources
+allocations and initialization. The following dependencies are not part of
+DPDK and must be installed separately:
+
+- **libibverbs** (provided by rdma-core package)
+
+ User space verbs framework used by librte_net_mana. This library provides
+ a generic interface between the kernel and low-level user space drivers
+ such as libmana.
+
+ It allows slow and privileged operations (context initialization, hardware
+ resources allocations) to be managed by the kernel and fast operations to
+ never leave user space. The minimum required rdma-core version is v43.
+
+- **libmana** (provided by rdma-core package)
+
+ Low-level user space driver library for Microsoft Azure Network Adapter
+ devices, it is automatically loaded by libibverbs. The minimum required
+ version of rdma-core with libmana is v43.
+
+- **Kernel modules**
+
+ They provide the kernel-side verbs API and low level device drivers that
+ manage actual hardware initialization and resources sharing with user
+ space processes. The minimum required Linux kernel version is 6.1.
+
+ Unlike most other PMDs, these modules must remain loaded and bound to
+ their devices:
+
+ - mana: Ethernet device driver that provides kernel network interfaces.
+ - mana_ib: InifiniBand device driver.
+ - ib_uverbs: user space driver for verbs (entry point for libibverbs).
+
+Driver compilation and testing
+------------------------------
+
+Refer to the document :ref:`compiling and testing a PMD for a NIC <pmd_build_and_test>`
+for details.
+
+MANA PMD arguments
+--------------------
+
+The user can specify below argument in devargs.
+
+#. ``mac``:
+
+ Specify the MAC address for this device. If it is set, the driver
+ probes and loads the NIC with a matching mac address. If it is not
+ set, the driver probes on all the NICs on the PCI device. The default
+ value is not set, meaning all the NICs will be probed and loaded.
+ User can specify multiple mac=xx:xx:xx:xx:xx:xx arguments for up to
+ 8 NICs.
new file mode 100644
@@ -0,0 +1,725 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Microsoft Corporation
+ */
+
+#include <unistd.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include <ethdev_driver.h>
+#include <ethdev_pci.h>
+#include <rte_kvargs.h>
+#include <rte_eal_paging.h>
+
+#include <infiniband/verbs.h>
+#include <infiniband/manadv.h>
+
+#include <assert.h>
+
+#include "mana.h"
+
+/* Shared memory between primary/secondary processes, per driver */
+/* Data to track primary/secondary usage */
+struct mana_shared_data *mana_shared_data;
+static struct mana_shared_data mana_local_data;
+
+/* The memory region for the above data */
+static const struct rte_memzone *mana_shared_mz;
+static const char *MZ_MANA_SHARED_DATA = "mana_shared_data";
+
+/* Spinlock for mana_shared_data */
+static rte_spinlock_t mana_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
+
+/* Allocate a buffer on the stack and fill it with a printf format string. */
+#define MANA_MKSTR(name, ...) \
+ int mkstr_size_##name = snprintf(NULL, 0, "" __VA_ARGS__); \
+ char name[mkstr_size_##name + 1]; \
+ \
+ memset(name, 0, mkstr_size_##name + 1); \
+ snprintf(name, sizeof(name), "" __VA_ARGS__)
+
+int mana_logtype_driver;
+int mana_logtype_init;
+
+static const struct eth_dev_ops mana_dev_ops = {
+};
+
+static const struct eth_dev_ops mana_dev_secondary_ops = {
+};
+
+uint16_t
+mana_rx_burst_removed(void *dpdk_rxq __rte_unused,
+ struct rte_mbuf **pkts __rte_unused,
+ uint16_t pkts_n __rte_unused)
+{
+ rte_mb();
+ return 0;
+}
+
+uint16_t
+mana_tx_burst_removed(void *dpdk_rxq __rte_unused,
+ struct rte_mbuf **pkts __rte_unused,
+ uint16_t pkts_n __rte_unused)
+{
+ rte_mb();
+ return 0;
+}
+
+static const char * const mana_init_args[] = {
+ "mac",
+ NULL,
+};
+
+/* Support of parsing up to 8 mac address from EAL command line */
+#define MAX_NUM_ADDRESS 8
+struct mana_conf {
+ struct rte_ether_addr mac_array[MAX_NUM_ADDRESS];
+ unsigned int index;
+};
+
+static int
+mana_arg_parse_callback(const char *key, const char *val, void *private)
+{
+ struct mana_conf *conf = (struct mana_conf *)private;
+ int ret;
+
+ DRV_LOG(INFO, "key=%s value=%s index=%d", key, val, conf->index);
+
+ if (conf->index >= MAX_NUM_ADDRESS) {
+ DRV_LOG(ERR, "Exceeding max MAC address");
+ return 1;
+ }
+
+ ret = rte_ether_unformat_addr(val, &conf->mac_array[conf->index]);
+ if (ret) {
+ DRV_LOG(ERR, "Invalid MAC address %s", val);
+ return ret;
+ }
+
+ conf->index++;
+
+ return 0;
+}
+
+static int
+mana_parse_args(struct rte_devargs *devargs, struct mana_conf *conf)
+{
+ struct rte_kvargs *kvlist;
+ unsigned int arg_count;
+ int ret = 0;
+
+ kvlist = rte_kvargs_parse(devargs->drv_str, mana_init_args);
+ if (!kvlist) {
+ DRV_LOG(ERR, "failed to parse kvargs args=%s", devargs->drv_str);
+ return -EINVAL;
+ }
+
+ arg_count = rte_kvargs_count(kvlist, mana_init_args[0]);
+ if (arg_count > MAX_NUM_ADDRESS) {
+ ret = -EINVAL;
+ goto free_kvlist;
+ }
+ ret = rte_kvargs_process(kvlist, mana_init_args[0],
+ mana_arg_parse_callback, conf);
+ if (ret) {
+ DRV_LOG(ERR, "error parsing args");
+ goto free_kvlist;
+ }
+
+free_kvlist:
+ rte_kvargs_free(kvlist);
+ return ret;
+}
+
+static int
+get_port_mac(struct ibv_device *device, unsigned int port,
+ struct rte_ether_addr *addr)
+{
+ FILE *file;
+ int ret = 0;
+ DIR *dir;
+ struct dirent *dent;
+ unsigned int dev_port;
+ char mac[20];
+
+ MANA_MKSTR(path, "%s/device/net", device->ibdev_path);
+
+ dir = opendir(path);
+ if (!dir)
+ return -ENOENT;
+
+ while ((dent = readdir(dir))) {
+ char *name = dent->d_name;
+
+ MANA_MKSTR(port_path, "%s/%s/dev_port", path, name);
+
+ /* Ignore . and .. */
+ if ((name[0] == '.') &&
+ ((name[1] == '\0') ||
+ ((name[1] == '.') && (name[2] == '\0'))))
+ continue;
+
+ file = fopen(port_path, "r");
+ if (!file)
+ continue;
+
+ ret = fscanf(file, "%u", &dev_port);
+ fclose(file);
+
+ if (ret != 1)
+ continue;
+
+ /* Ethernet ports start at 0, IB port start at 1 */
+ if (dev_port == port - 1) {
+ MANA_MKSTR(address_path, "%s/%s/address", path, name);
+
+ file = fopen(address_path, "r");
+ if (!file)
+ continue;
+
+ ret = fscanf(file, "%s", mac);
+ fclose(file);
+
+ if (ret < 0)
+ break;
+
+ ret = rte_ether_unformat_addr(mac, addr);
+ if (ret)
+ DRV_LOG(ERR, "unrecognized mac addr %s", mac);
+ break;
+ }
+ }
+
+ closedir(dir);
+ return ret;
+}
+
+static int
+mana_ibv_device_to_pci_addr(const struct ibv_device *device,
+ struct rte_pci_addr *pci_addr)
+{
+ FILE *file;
+ char *line = NULL;
+ size_t len = 0;
+
+ MANA_MKSTR(path, "%s/device/uevent", device->ibdev_path);
+
+ file = fopen(path, "r");
+ if (!file)
+ return -errno;
+
+ while (getline(&line, &len, file) != -1) {
+ /* Extract information. */
+ if (sscanf(line,
+ "PCI_SLOT_NAME="
+ "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n",
+ &pci_addr->domain,
+ &pci_addr->bus,
+ &pci_addr->devid,
+ &pci_addr->function) == 4) {
+ break;
+ }
+ }
+
+ free(line);
+ fclose(file);
+ return 0;
+}
+
+static int
+mana_proc_priv_init(struct rte_eth_dev *dev)
+{
+ struct mana_process_priv *priv;
+
+ priv = rte_zmalloc_socket("mana_proc_priv",
+ sizeof(struct mana_process_priv),
+ RTE_CACHE_LINE_SIZE,
+ dev->device->numa_node);
+ if (!priv)
+ return -ENOMEM;
+
+ dev->process_private = priv;
+ return 0;
+}
+
+/*
+ * Map the doorbell page for the secondary process through IB device handle.
+ */
+static int
+mana_map_doorbell_secondary(struct rte_eth_dev *eth_dev, int fd)
+{
+ struct mana_process_priv *priv = eth_dev->process_private;
+
+ void *addr;
+
+ addr = mmap(NULL, rte_mem_page_size(), PROT_WRITE, MAP_SHARED, fd, 0);
+ if (addr == MAP_FAILED) {
+ DRV_LOG(ERR, "Failed to map secondary doorbell port %u",
+ eth_dev->data->port_id);
+ return -ENOMEM;
+ }
+
+ DRV_LOG(INFO, "Secondary doorbell mapped to %p", addr);
+
+ priv->db_page = addr;
+
+ return 0;
+}
+
+/* Initialize shared data for the driver (all devices) */
+static int
+mana_init_shared_data(void)
+{
+ int ret = 0;
+ const struct rte_memzone *secondary_mz;
+
+ rte_spinlock_lock(&mana_shared_data_lock);
+
+ /* Skip if shared data is already initialized */
+ if (mana_shared_data)
+ goto exit;
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ mana_shared_mz = rte_memzone_reserve(MZ_MANA_SHARED_DATA,
+ sizeof(*mana_shared_data),
+ SOCKET_ID_ANY, 0);
+ if (!mana_shared_mz) {
+ DRV_LOG(ERR, "Cannot allocate mana shared data");
+ ret = -rte_errno;
+ goto exit;
+ }
+
+ mana_shared_data = mana_shared_mz->addr;
+ memset(mana_shared_data, 0, sizeof(*mana_shared_data));
+ rte_spinlock_init(&mana_shared_data->lock);
+ } else {
+ secondary_mz = rte_memzone_lookup(MZ_MANA_SHARED_DATA);
+ if (!secondary_mz) {
+ DRV_LOG(ERR, "Cannot attach mana shared data");
+ ret = -rte_errno;
+ goto exit;
+ }
+
+ mana_shared_data = secondary_mz->addr;
+ memset(&mana_local_data, 0, sizeof(mana_local_data));
+ }
+
+exit:
+ rte_spinlock_unlock(&mana_shared_data_lock);
+
+ return ret;
+}
+
+/*
+ * Init the data structures for use in primary and secondary processes.
+ */
+static int
+mana_init_once(void)
+{
+ int ret;
+
+ ret = mana_init_shared_data();
+ if (ret)
+ return ret;
+
+ rte_spinlock_lock(&mana_shared_data->lock);
+
+ switch (rte_eal_process_type()) {
+ case RTE_PROC_PRIMARY:
+ if (mana_shared_data->init_done)
+ break;
+
+ ret = mana_mp_init_primary();
+ if (ret)
+ break;
+ DRV_LOG(ERR, "MP INIT PRIMARY");
+
+ mana_shared_data->init_done = 1;
+ break;
+
+ case RTE_PROC_SECONDARY:
+
+ if (mana_local_data.init_done)
+ break;
+
+ ret = mana_mp_init_secondary();
+ if (ret)
+ break;
+
+ DRV_LOG(ERR, "MP INIT SECONDARY");
+
+ mana_local_data.init_done = 1;
+ break;
+
+ default:
+ /* Impossible, internal error */
+ ret = -EPROTO;
+ break;
+ }
+
+ rte_spinlock_unlock(&mana_shared_data->lock);
+
+ return ret;
+}
+
+/*
+ * Goes through the IB device list to look for the IB port matching the
+ * mac_addr. If found, create a rte_eth_dev for it.
+ */
+static int
+mana_pci_probe_mac(struct rte_pci_device *pci_dev,
+ struct rte_ether_addr *mac_addr)
+{
+ struct ibv_device **ibv_list;
+ int ibv_idx;
+ struct ibv_context *ctx;
+ struct ibv_device_attr_ex dev_attr;
+ int num_devices;
+ int ret = 0;
+ uint8_t port;
+ struct mana_priv *priv = NULL;
+ struct rte_eth_dev *eth_dev = NULL;
+ bool found_port;
+
+ ibv_list = ibv_get_device_list(&num_devices);
+ for (ibv_idx = 0; ibv_idx < num_devices; ibv_idx++) {
+ struct ibv_device *ibdev = ibv_list[ibv_idx];
+ struct rte_pci_addr pci_addr;
+
+ DRV_LOG(INFO, "Probe device name %s dev_name %s ibdev_path %s",
+ ibdev->name, ibdev->dev_name, ibdev->ibdev_path);
+
+ if (mana_ibv_device_to_pci_addr(ibdev, &pci_addr))
+ continue;
+
+ /* Ignore if this IB device is not this PCI device */
+ if (pci_dev->addr.domain != pci_addr.domain ||
+ pci_dev->addr.bus != pci_addr.bus ||
+ pci_dev->addr.devid != pci_addr.devid ||
+ pci_dev->addr.function != pci_addr.function)
+ continue;
+
+ ctx = ibv_open_device(ibdev);
+ if (!ctx) {
+ DRV_LOG(ERR, "Failed to open IB device %s",
+ ibdev->name);
+ continue;
+ }
+
+ ret = ibv_query_device_ex(ctx, NULL, &dev_attr);
+ DRV_LOG(INFO, "dev_attr.orig_attr.phys_port_cnt %u",
+ dev_attr.orig_attr.phys_port_cnt);
+ found_port = false;
+
+ for (port = 1; port <= dev_attr.orig_attr.phys_port_cnt;
+ port++) {
+ struct ibv_parent_domain_init_attr attr = {0};
+ struct rte_ether_addr addr;
+ char address[64];
+ char name[RTE_ETH_NAME_MAX_LEN];
+
+ ret = get_port_mac(ibdev, port, &addr);
+ if (ret)
+ continue;
+
+ if (mac_addr && !rte_is_same_ether_addr(&addr, mac_addr))
+ continue;
+
+ rte_ether_format_addr(address, sizeof(address), &addr);
+ DRV_LOG(INFO, "device located port %u address %s",
+ port, address);
+ found_port = true;
+
+ priv = rte_zmalloc_socket(NULL, sizeof(*priv),
+ RTE_CACHE_LINE_SIZE,
+ SOCKET_ID_ANY);
+ if (!priv) {
+ ret = -ENOMEM;
+ goto failed;
+ }
+
+ snprintf(name, sizeof(name), "%s_port%d",
+ pci_dev->device.name, port);
+
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+ int fd;
+
+ eth_dev = rte_eth_dev_attach_secondary(name);
+ if (!eth_dev) {
+ DRV_LOG(ERR, "Can't attach to dev %s",
+ name);
+ ret = -ENOMEM;
+ goto failed;
+ }
+
+ eth_dev->device = &pci_dev->device;
+ eth_dev->dev_ops = &mana_dev_secondary_ops;
+ ret = mana_proc_priv_init(eth_dev);
+ if (ret)
+ goto failed;
+ priv->process_priv = eth_dev->process_private;
+
+ /* Get the IB FD from the primary process */
+ fd = mana_mp_req_verbs_cmd_fd(eth_dev);
+ if (fd < 0) {
+ DRV_LOG(ERR, "Failed to get FD %d", fd);
+ ret = -ENODEV;
+ goto failed;
+ }
+
+ ret = mana_map_doorbell_secondary(eth_dev, fd);
+ if (ret) {
+ DRV_LOG(ERR, "Failed secondary map %d",
+ fd);
+ goto failed;
+ }
+
+ /* fd is no not used after mapping doorbell */
+ close(fd);
+
+ rte_spinlock_lock(&mana_shared_data->lock);
+ mana_shared_data->secondary_cnt++;
+ mana_local_data.secondary_cnt++;
+ rte_spinlock_unlock(&mana_shared_data->lock);
+
+ rte_eth_copy_pci_info(eth_dev, pci_dev);
+ rte_eth_dev_probing_finish(eth_dev);
+
+ /* Impossible to have more than one port
+ * matching a MAC address
+ */
+ continue;
+ }
+
+ eth_dev = rte_eth_dev_allocate(name);
+ if (!eth_dev) {
+ ret = -ENOMEM;
+ goto failed;
+ }
+
+ eth_dev->data->mac_addrs =
+ rte_calloc("mana_mac", 1,
+ sizeof(struct rte_ether_addr), 0);
+ if (!eth_dev->data->mac_addrs) {
+ ret = -ENOMEM;
+ goto failed;
+ }
+
+ rte_ether_addr_copy(&addr, eth_dev->data->mac_addrs);
+
+ priv->ib_pd = ibv_alloc_pd(ctx);
+ if (!priv->ib_pd) {
+ DRV_LOG(ERR, "ibv_alloc_pd failed port %d", port);
+ ret = -ENOMEM;
+ goto failed;
+ }
+
+ /* Create a parent domain with the port number */
+ attr.pd = priv->ib_pd;
+ attr.comp_mask = IBV_PARENT_DOMAIN_INIT_ATTR_PD_CONTEXT;
+ attr.pd_context = (void *)(uint64_t)port;
+ priv->ib_parent_pd = ibv_alloc_parent_domain(ctx, &attr);
+ if (!priv->ib_parent_pd) {
+ DRV_LOG(ERR,
+ "ibv_alloc_parent_domain failed port %d",
+ port);
+ ret = -ENOMEM;
+ goto failed;
+ }
+
+ priv->ib_ctx = ctx;
+ priv->port_id = eth_dev->data->port_id;
+ priv->dev_port = port;
+ eth_dev->data->dev_private = priv;
+ priv->dev_data = eth_dev->data;
+
+ priv->max_rx_queues = dev_attr.orig_attr.max_qp;
+ priv->max_tx_queues = dev_attr.orig_attr.max_qp;
+
+ priv->max_rx_desc =
+ RTE_MIN(dev_attr.orig_attr.max_qp_wr,
+ dev_attr.orig_attr.max_cqe);
+ priv->max_tx_desc =
+ RTE_MIN(dev_attr.orig_attr.max_qp_wr,
+ dev_attr.orig_attr.max_cqe);
+
+ priv->max_send_sge = dev_attr.orig_attr.max_sge;
+ priv->max_recv_sge = dev_attr.orig_attr.max_sge;
+
+ priv->max_mr = dev_attr.orig_attr.max_mr;
+ priv->max_mr_size = dev_attr.orig_attr.max_mr_size;
+
+ DRV_LOG(INFO, "dev %s max queues %d desc %d sge %d",
+ name, priv->max_rx_queues, priv->max_rx_desc,
+ priv->max_send_sge);
+
+ rte_spinlock_lock(&mana_shared_data->lock);
+ mana_shared_data->primary_cnt++;
+ rte_spinlock_unlock(&mana_shared_data->lock);
+
+ eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_RMV;
+
+ eth_dev->device = &pci_dev->device;
+
+ DRV_LOG(INFO, "device %s at port %u",
+ name, eth_dev->data->port_id);
+
+ eth_dev->rx_pkt_burst = mana_rx_burst_removed;
+ eth_dev->tx_pkt_burst = mana_tx_burst_removed;
+ eth_dev->dev_ops = &mana_dev_ops;
+
+ rte_eth_copy_pci_info(eth_dev, pci_dev);
+ rte_eth_dev_probing_finish(eth_dev);
+ }
+
+ /* Secondary process doesn't need an ibv_ctx. It maps the
+ * doorbell pages using the IB cmd_fd passed from the primary
+ * process and send messages to primary process for memory
+ * registartions.
+ */
+ if (!found_port || rte_eal_process_type() == RTE_PROC_SECONDARY)
+ ibv_close_device(ctx);
+ }
+
+ ibv_free_device_list(ibv_list);
+ return 0;
+
+failed:
+ /* Free the resource for the port failed */
+ if (priv) {
+ if (priv->ib_parent_pd)
+ ibv_dealloc_pd(priv->ib_parent_pd);
+
+ if (priv->ib_pd)
+ ibv_dealloc_pd(priv->ib_pd);
+ }
+
+ if (eth_dev)
+ rte_eth_dev_release_port(eth_dev);
+
+ rte_free(priv);
+
+ ibv_close_device(ctx);
+ ibv_free_device_list(ibv_list);
+
+ return ret;
+}
+
+/*
+ * Main callback function from PCI bus to probe a device.
+ */
+static int
+mana_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
+ struct rte_pci_device *pci_dev)
+{
+ struct rte_devargs *args = pci_dev->device.devargs;
+ struct mana_conf conf = {0};
+ unsigned int i;
+ int ret;
+
+ if (args && args->drv_str) {
+ ret = mana_parse_args(args, &conf);
+ if (ret) {
+ DRV_LOG(ERR, "failed to parse parameters args = %s",
+ args->drv_str);
+ return ret;
+ }
+ }
+
+ ret = mana_init_once();
+ if (ret) {
+ DRV_LOG(ERR, "Failed to init PMD global data %d", ret);
+ return ret;
+ }
+
+ /* If there are no driver parameters, probe on all ports */
+ if (!conf.index)
+ return mana_pci_probe_mac(pci_dev, NULL);
+
+ for (i = 0; i < conf.index; i++) {
+ ret = mana_pci_probe_mac(pci_dev, &conf.mac_array[i]);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+mana_dev_uninit(struct rte_eth_dev *dev)
+{
+ RTE_SET_USED(dev);
+ return 0;
+}
+
+/*
+ * Callback from PCI to remove this device.
+ */
+static int
+mana_pci_remove(struct rte_pci_device *pci_dev)
+{
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ rte_spinlock_lock(&mana_shared_data_lock);
+
+ rte_spinlock_lock(&mana_shared_data->lock);
+
+ RTE_VERIFY(mana_shared_data->primary_cnt > 0);
+ mana_shared_data->primary_cnt--;
+ if (!mana_shared_data->primary_cnt) {
+ DRV_LOG(DEBUG, "mp uninit primary");
+ mana_mp_uninit_primary();
+ }
+
+ rte_spinlock_unlock(&mana_shared_data->lock);
+
+ /* Also free the shared memory if this is the last */
+ if (!mana_shared_data->primary_cnt) {
+ DRV_LOG(DEBUG, "free shared memezone data");
+ rte_memzone_free(mana_shared_mz);
+ }
+
+ rte_spinlock_unlock(&mana_shared_data_lock);
+ } else {
+ rte_spinlock_lock(&mana_shared_data_lock);
+
+ rte_spinlock_lock(&mana_shared_data->lock);
+ RTE_VERIFY(mana_shared_data->secondary_cnt > 0);
+ mana_shared_data->secondary_cnt--;
+ rte_spinlock_unlock(&mana_shared_data->lock);
+
+ RTE_VERIFY(mana_local_data.secondary_cnt > 0);
+ mana_local_data.secondary_cnt--;
+ if (!mana_local_data.secondary_cnt) {
+ DRV_LOG(DEBUG, "mp uninit secondary");
+ mana_mp_uninit_secondary();
+ }
+
+ rte_spinlock_unlock(&mana_shared_data_lock);
+ }
+
+ return rte_eth_dev_pci_generic_remove(pci_dev, mana_dev_uninit);
+}
+
+static const struct rte_pci_id mana_pci_id_map[] = {
+ {
+ RTE_PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT,
+ PCI_DEVICE_ID_MICROSOFT_MANA)
+ },
+ {
+ .vendor_id = 0
+ },
+};
+
+static struct rte_pci_driver mana_pci_driver = {
+ .id_table = mana_pci_id_map,
+ .probe = mana_pci_probe,
+ .remove = mana_pci_remove,
+ .drv_flags = RTE_PCI_DRV_INTR_RMV,
+};
+
+RTE_PMD_REGISTER_PCI(net_mana, mana_pci_driver);
+RTE_PMD_REGISTER_PCI_TABLE(net_mana, mana_pci_id_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_mana, "* ib_uverbs & mana_ib");
+RTE_LOG_REGISTER_SUFFIX(mana_logtype_init, init, NOTICE);
+RTE_LOG_REGISTER_SUFFIX(mana_logtype_driver, driver, NOTICE);
new file mode 100644
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Microsoft Corporation
+ */
+
+#ifndef __MANA_H__
+#define __MANA_H__
+
+enum {
+ PCI_VENDOR_ID_MICROSOFT = 0x1414,
+};
+
+enum {
+ PCI_DEVICE_ID_MICROSOFT_MANA = 0x00ba,
+};
+
+/* Shared data between primary/secondary processes */
+struct mana_shared_data {
+ rte_spinlock_t lock;
+ int init_done;
+ unsigned int primary_cnt;
+ unsigned int secondary_cnt;
+};
+
+struct mana_process_priv {
+ void *db_page;
+};
+
+struct mana_priv {
+ struct rte_eth_dev_data *dev_data;
+ struct mana_process_priv *process_priv;
+
+ /* DPDK port */
+ uint16_t port_id;
+
+ /* IB device port */
+ uint8_t dev_port;
+
+ struct ibv_context *ib_ctx;
+ struct ibv_pd *ib_pd;
+ struct ibv_pd *ib_parent_pd;
+ void *db_page;
+ int max_rx_queues;
+ int max_tx_queues;
+ int max_rx_desc;
+ int max_tx_desc;
+ int max_send_sge;
+ int max_recv_sge;
+ int max_mr;
+ uint64_t max_mr_size;
+};
+
+extern int mana_logtype_driver;
+extern int mana_logtype_init;
+
+#define DRV_LOG(level, fmt, args...) \
+ rte_log(RTE_LOG_ ## level, mana_logtype_driver, "%s(): " fmt "\n", \
+ __func__, ## args)
+
+#define PMD_INIT_LOG(level, fmt, args...) \
+ rte_log(RTE_LOG_ ## level, mana_logtype_init, "%s(): " fmt "\n",\
+ __func__, ## args)
+
+#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>")
+
+uint16_t mana_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
+ uint16_t pkts_n);
+
+uint16_t mana_tx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
+ uint16_t pkts_n);
+
+/** Request timeout for IPC. */
+#define MANA_MP_REQ_TIMEOUT_SEC 5
+
+/* Request types for IPC. */
+enum mana_mp_req_type {
+ MANA_MP_REQ_VERBS_CMD_FD = 1,
+ MANA_MP_REQ_CREATE_MR,
+ MANA_MP_REQ_START_RXTX,
+ MANA_MP_REQ_STOP_RXTX,
+};
+
+/* Pameters for IPC. */
+struct mana_mp_param {
+ enum mana_mp_req_type type;
+ int port_id;
+ int result;
+
+ /* MANA_MP_REQ_CREATE_MR */
+ uintptr_t addr;
+ uint32_t len;
+};
+
+#define MANA_MP_NAME "net_mana_mp"
+int mana_mp_init_primary(void);
+int mana_mp_init_secondary(void);
+void mana_mp_uninit_primary(void);
+void mana_mp_uninit_secondary(void);
+int mana_mp_req_verbs_cmd_fd(struct rte_eth_dev *dev);
+
+void mana_mp_req_on_rxtx(struct rte_eth_dev *dev, enum mana_mp_req_type type);
+
+#endif
new file mode 100644
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2022 Microsoft Corporation
+
+if not is_linux or not dpdk_conf.has('RTE_ARCH_X86_64')
+ build = false
+ reason = 'mana is supported on Linux X86_64'
+ subdir_done()
+endif
+
+deps += ['pci', 'bus_pci', 'net', 'eal', 'kvargs']
+
+sources += files(
+ 'mana.c',
+ 'mp.c',
+)
+
+libnames = ['ibverbs', 'mana' ]
+foreach libname:libnames
+ lib = cc.find_library(libname, required:false)
+ if lib.found()
+ ext_deps += lib
+ else
+ build = false
+ reason = 'missing dependency, "' + libname + '"'
+ subdir_done()
+ endif
+endforeach
+
+required_symbols = [
+ ['infiniband/manadv.h', 'manadv_set_context_attr'],
+ ['infiniband/manadv.h', 'manadv_init_obj'],
+ ['infiniband/manadv.h', 'MANADV_CTX_ATTR_BUF_ALLOCATORS'],
+ ['infiniband/manadv.h', 'MANADV_OBJ_QP'],
+ ['infiniband/manadv.h', 'MANADV_OBJ_CQ'],
+ ['infiniband/manadv.h', 'MANADV_OBJ_RWQ'],
+]
+
+foreach arg:required_symbols
+ if not cc.has_header_symbol(arg[0], arg[1])
+ build = false
+ reason = 'missing symbol "' + arg[1] + '" in "' + arg[0] + '"'
+ subdir_done()
+ endif
+endforeach
new file mode 100644
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Microsoft Corporation
+ */
+
+#include <rte_malloc.h>
+#include <ethdev_driver.h>
+#include <rte_log.h>
+
+#include <infiniband/verbs.h>
+
+#include "mana.h"
+
+extern struct mana_shared_data *mana_shared_data;
+
+static void
+mp_init_msg(struct rte_mp_msg *msg, enum mana_mp_req_type type, int port_id)
+{
+ struct mana_mp_param *param;
+
+ strlcpy(msg->name, MANA_MP_NAME, sizeof(msg->name));
+ msg->len_param = sizeof(*param);
+
+ param = (struct mana_mp_param *)msg->param;
+ param->type = type;
+ param->port_id = port_id;
+}
+
+static int
+mana_mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
+{
+ struct rte_eth_dev *dev;
+ const struct mana_mp_param *param =
+ (const struct mana_mp_param *)mp_msg->param;
+ struct rte_mp_msg mp_res = { 0 };
+ struct mana_mp_param *res = (struct mana_mp_param *)mp_res.param;
+ int ret;
+ struct mana_priv *priv;
+
+ if (!rte_eth_dev_is_valid_port(param->port_id)) {
+ DRV_LOG(ERR, "MP handle port ID %u invalid", param->port_id);
+ return -ENODEV;
+ }
+
+ dev = &rte_eth_devices[param->port_id];
+ priv = dev->data->dev_private;
+
+ mp_init_msg(&mp_res, param->type, param->port_id);
+
+ switch (param->type) {
+ case MANA_MP_REQ_VERBS_CMD_FD:
+ mp_res.num_fds = 1;
+ mp_res.fds[0] = priv->ib_ctx->cmd_fd;
+ res->result = 0;
+ ret = rte_mp_reply(&mp_res, peer);
+ break;
+
+ default:
+ DRV_LOG(ERR, "Port %u unknown primary MP type %u",
+ param->port_id, param->type);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int
+mana_mp_secondary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
+{
+ struct rte_mp_msg mp_res = { 0 };
+ struct mana_mp_param *res = (struct mana_mp_param *)mp_res.param;
+ const struct mana_mp_param *param =
+ (const struct mana_mp_param *)mp_msg->param;
+ struct rte_eth_dev *dev;
+ int ret;
+
+ if (!rte_eth_dev_is_valid_port(param->port_id)) {
+ DRV_LOG(ERR, "MP handle port ID %u invalid", param->port_id);
+ return -ENODEV;
+ }
+
+ dev = &rte_eth_devices[param->port_id];
+
+ mp_init_msg(&mp_res, param->type, param->port_id);
+
+ switch (param->type) {
+ case MANA_MP_REQ_START_RXTX:
+ DRV_LOG(INFO, "Port %u starting datapath", dev->data->port_id);
+
+ rte_mb();
+
+ res->result = 0;
+ ret = rte_mp_reply(&mp_res, peer);
+ break;
+
+ case MANA_MP_REQ_STOP_RXTX:
+ DRV_LOG(INFO, "Port %u stopping datapath", dev->data->port_id);
+
+ dev->tx_pkt_burst = mana_tx_burst_removed;
+ dev->rx_pkt_burst = mana_rx_burst_removed;
+
+ rte_mb();
+
+ res->result = 0;
+ ret = rte_mp_reply(&mp_res, peer);
+ break;
+
+ default:
+ DRV_LOG(ERR, "Port %u unknown secondary MP type %u",
+ param->port_id, param->type);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+int
+mana_mp_init_primary(void)
+{
+ int ret;
+
+ ret = rte_mp_action_register(MANA_MP_NAME, mana_mp_primary_handle);
+ if (ret && rte_errno != ENOTSUP) {
+ DRV_LOG(ERR, "Failed to register primary handler %d %d",
+ ret, rte_errno);
+ return -1;
+ }
+
+ return 0;
+}
+
+void
+mana_mp_uninit_primary(void)
+{
+ rte_mp_action_unregister(MANA_MP_NAME);
+}
+
+int
+mana_mp_init_secondary(void)
+{
+ return rte_mp_action_register(MANA_MP_NAME, mana_mp_secondary_handle);
+}
+
+void
+mana_mp_uninit_secondary(void)
+{
+ rte_mp_action_unregister(MANA_MP_NAME);
+}
+
+int
+mana_mp_req_verbs_cmd_fd(struct rte_eth_dev *dev)
+{
+ struct rte_mp_msg mp_req = { 0 };
+ struct rte_mp_msg *mp_res;
+ struct rte_mp_reply mp_rep;
+ struct mana_mp_param *res;
+ struct timespec ts = {.tv_sec = MANA_MP_REQ_TIMEOUT_SEC, .tv_nsec = 0};
+ int ret;
+
+ mp_init_msg(&mp_req, MANA_MP_REQ_VERBS_CMD_FD, dev->data->port_id);
+
+ ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);
+ if (ret) {
+ DRV_LOG(ERR, "port %u request to primary process failed",
+ dev->data->port_id);
+ return ret;
+ }
+
+ if (mp_rep.nb_received != 1) {
+ DRV_LOG(ERR, "primary replied %u messages", mp_rep.nb_received);
+ ret = -EPROTO;
+ goto exit;
+ }
+
+ mp_res = &mp_rep.msgs[0];
+ res = (struct mana_mp_param *)mp_res->param;
+ if (res->result) {
+ DRV_LOG(ERR, "failed to get CMD FD, port %u",
+ dev->data->port_id);
+ ret = res->result;
+ goto exit;
+ }
+
+ if (mp_res->num_fds != 1) {
+ DRV_LOG(ERR, "got FDs %d unexpected", mp_res->num_fds);
+ ret = -EPROTO;
+ goto exit;
+ }
+
+ ret = mp_res->fds[0];
+ DRV_LOG(ERR, "port %u command FD from primary is %d",
+ dev->data->port_id, ret);
+exit:
+ free(mp_rep.msgs);
+ return ret;
+}
+
+void
+mana_mp_req_on_rxtx(struct rte_eth_dev *dev, enum mana_mp_req_type type)
+{
+ struct rte_mp_msg mp_req = { 0 };
+ struct rte_mp_msg *mp_res;
+ struct rte_mp_reply mp_rep;
+ struct mana_mp_param *res;
+ struct timespec ts = {.tv_sec = MANA_MP_REQ_TIMEOUT_SEC, .tv_nsec = 0};
+ int i, ret;
+
+ if (type != MANA_MP_REQ_START_RXTX && type != MANA_MP_REQ_STOP_RXTX) {
+ DRV_LOG(ERR, "port %u unknown request (req_type %d)",
+ dev->data->port_id, type);
+ return;
+ }
+
+ if (!mana_shared_data->secondary_cnt)
+ return;
+
+ mp_init_msg(&mp_req, type, dev->data->port_id);
+
+ ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);
+ if (ret) {
+ if (rte_errno != ENOTSUP)
+ DRV_LOG(ERR, "port %u failed to request Rx/Tx (%d)",
+ dev->data->port_id, type);
+ goto exit;
+ }
+ if (mp_rep.nb_sent != mp_rep.nb_received) {
+ DRV_LOG(ERR, "port %u not all secondaries responded (%d)",
+ dev->data->port_id, type);
+ goto exit;
+ }
+ for (i = 0; i < mp_rep.nb_received; i++) {
+ mp_res = &mp_rep.msgs[i];
+ res = (struct mana_mp_param *)mp_res->param;
+ if (res->result) {
+ DRV_LOG(ERR, "port %u request failed on secondary %d",
+ dev->data->port_id, i);
+ goto exit;
+ }
+ }
+exit:
+ free(mp_rep.msgs);
+}
new file mode 100644
@@ -0,0 +1,3 @@
+DPDK_23 {
+ local: *;
+};
@@ -34,6 +34,7 @@ drivers = [
'ixgbe',
'kni',
'liquidio',
+ 'mana',
'memif',
'mlx4',
'mlx5',