[dpdk-dev,8/8] eal: VMBUS infrastructure
Checks
Commit Message
Add support for VMBUS on Hyper-V/Azure. VMBUS is similar to PCI
but has different addressing and internal API's.
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
lib/librte_eal/common/Makefile | 2 +-
lib/librte_eal/common/eal_common_devargs.c | 7 +
lib/librte_eal/common/eal_common_options.c | 38 ++
lib/librte_eal/common/eal_internal_cfg.h | 1 +
lib/librte_eal/common/eal_options.h | 6 +
lib/librte_eal/common/eal_private.h | 5 +
lib/librte_eal/common/include/rte_devargs.h | 8 +
lib/librte_eal/common/include/rte_vmbus.h | 249 ++++++++
lib/librte_eal/linuxapp/eal/Makefile | 6 +
lib/librte_eal/linuxapp/eal/eal.c | 13 +
lib/librte_eal/linuxapp/eal/eal_vmbus.c | 911 ++++++++++++++++++++++++++++
lib/librte_ether/rte_ethdev.c | 90 +++
lib/librte_ether/rte_ethdev.h | 31 +
mk/rte.app.mk | 1 +
14 files changed, 1367 insertions(+), 1 deletion(-)
create mode 100644 lib/librte_eal/common/include/rte_vmbus.h
create mode 100644 lib/librte_eal/linuxapp/eal/eal_vmbus.c
Comments
On Sat, Jan 7, 2017 at 7:17 PM, Stephen Hemminger
<stephen@networkplumber.org> wrote:
> Add support for VMBUS on Hyper-V/Azure. VMBUS is similar to PCI
> but has different addressing and internal API's.
>
> Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
> ---
> lib/librte_eal/common/Makefile | 2 +-
> lib/librte_eal/common/eal_common_devargs.c | 7 +
> lib/librte_eal/common/eal_common_options.c | 38 ++
> lib/librte_eal/common/eal_internal_cfg.h | 1 +
> lib/librte_eal/common/eal_options.h | 6 +
> lib/librte_eal/common/eal_private.h | 5 +
> lib/librte_eal/common/include/rte_devargs.h | 8 +
> lib/librte_eal/common/include/rte_vmbus.h | 249 ++++++++
> lib/librte_eal/linuxapp/eal/Makefile | 6 +
> lib/librte_eal/linuxapp/eal/eal.c | 13 +
> lib/librte_eal/linuxapp/eal/eal_vmbus.c | 911 ++++++++++++++++++++++++++++
> lib/librte_ether/rte_ethdev.c | 90 +++
> lib/librte_ether/rte_ethdev.h | 31 +
> mk/rte.app.mk | 1 +
> 14 files changed, 1367 insertions(+), 1 deletion(-)
> create mode 100644 lib/librte_eal/common/include/rte_vmbus.h
> create mode 100644 lib/librte_eal/linuxapp/eal/eal_vmbus.c
>
> diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
> index 09a3d3af..ceb77bed 100644
> --- a/lib/librte_eal/common/Makefile
> +++ b/lib/librte_eal/common/Makefile
> @@ -33,7 +33,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
>
> INC := rte_branch_prediction.h rte_common.h
> INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
> -INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
> +INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h rte_vmbus.h
> INC += rte_per_lcore.h rte_random.h
> INC += rte_tailq.h rte_interrupts.h rte_alarm.h
> INC += rte_string_fns.h rte_version.h
> diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c
> index e403717b..934ca840 100644
> --- a/lib/librte_eal/common/eal_common_devargs.c
> +++ b/lib/librte_eal/common/eal_common_devargs.c
> @@ -113,6 +113,13 @@ rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str)
> goto fail;
>
> break;
> + case RTE_DEVTYPE_WHITELISTED_VMBUS:
> + case RTE_DEVTYPE_BLACKLISTED_VMBUS:
> +#ifdef RTE_LIBRTE_HV_PMD
> + if (uuid_parse(buf, devargs->uuid) == 0)
> + break;
> +#endif
> + goto fail;
> }
>
> free(buf);
> diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
> index f36bc556..1a2b418c 100644
> --- a/lib/librte_eal/common/eal_common_options.c
> +++ b/lib/librte_eal/common/eal_common_options.c
> @@ -95,6 +95,11 @@ eal_long_options[] = {
> {OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM },
> {OPT_VMWARE_TSC_MAP, 0, NULL, OPT_VMWARE_TSC_MAP_NUM },
> {OPT_XEN_DOM0, 0, NULL, OPT_XEN_DOM0_NUM },
> +#ifdef RTE_LIBRTE_HV_PMD
> + {OPT_NO_VMBUS, 0, NULL, OPT_NO_VMBUS_NUM },
> + {OPT_VMBUS_BLACKLIST, 1, NULL, OPT_VMBUS_BLACKLIST_NUM },
> + {OPT_VMBUS_WHITELIST, 1, NULL, OPT_VMBUS_WHITELIST_NUM },
> +#endif
> {0, 0, NULL, 0 }
> };
>
> @@ -858,6 +863,21 @@ eal_parse_common_option(int opt, const char *optarg,
> conf->no_pci = 1;
> break;
>
> +#ifdef RTE_LIBRTE_HV_PMD
> + case OPT_NO_VMBUS_NUM:
> + conf->no_vmbus = 1;
> + break;
> + case OPT_VMBUS_BLACKLIST_NUM:
> + if (rte_eal_devargs_add(RTE_DEVTYPE_BLACKLISTED_VMBUS,
> + optarg) < 0)
> + return -1;
> + break;
> + case OPT_VMBUS_WHITELIST_NUM:
> + if (rte_eal_devargs_add(RTE_DEVTYPE_WHITELISTED_VMBUS,
> + optarg) < 0)
> + return -1;
> + break;
> +#endif
> case OPT_NO_HPET_NUM:
> conf->no_hpet = 1;
> break;
> @@ -1017,6 +1037,14 @@ eal_check_common_options(struct internal_config *internal_cfg)
> return -1;
> }
>
> +#ifdef RTE_LIBRTE_HV_PMD
> + if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_VMBUS) != 0 &&
> + rte_eal_devargs_type_count(RTE_DEVTYPE_BLACKLISTED_VMBUS) != 0) {
> + RTE_LOG(ERR, EAL, "Options vmbus blacklist and whitelist "
> + "cannot be used at the same time\n");
> + return -1;
> + }
> +#endif
> return 0;
> }
>
> @@ -1066,5 +1094,15 @@ eal_common_usage(void)
> " --"OPT_NO_PCI" Disable PCI\n"
> " --"OPT_NO_HPET" Disable HPET\n"
> " --"OPT_NO_SHCONF" No shared config (mmap'd files)\n"
> +#ifdef RTE_LIBRTE_HV_PMD
> + " --"OPT_NO_VMBUS" Disable VMBUS\n"
> + " --"OPT_VMBUS_BLACKLIST" Add a VMBUS device to black list.\n"
> + " Prevent EAL from using this PCI device. The argument\n"
> + " format is device UUID.\n"
> + " --"OPT_VMBUS_WHITELIST" Add a VMBUS device to white list.\n"
> + " Only use the specified VMBUS devices. The argument format\n"
> + " is device UUID This option can be present\n"
> + " several times (once per device).\n"
> +#endif
> "\n", RTE_MAX_LCORE);
> }
> diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
> index 5f1367eb..4b6af937 100644
> --- a/lib/librte_eal/common/eal_internal_cfg.h
> +++ b/lib/librte_eal/common/eal_internal_cfg.h
> @@ -67,6 +67,7 @@ struct internal_config {
> unsigned hugepage_unlink; /**< true to unlink backing files */
> volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
> volatile unsigned no_pci; /**< true to disable PCI */
> + volatile unsigned no_vmbus; /**< true to disable VMBUS */
> volatile unsigned no_hpet; /**< true to disable HPET */
> volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
> * instead of native TSC */
> diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
> index a881c62e..156727e7 100644
> --- a/lib/librte_eal/common/eal_options.h
> +++ b/lib/librte_eal/common/eal_options.h
> @@ -83,6 +83,12 @@ enum {
> OPT_VMWARE_TSC_MAP_NUM,
> #define OPT_XEN_DOM0 "xen-dom0"
> OPT_XEN_DOM0_NUM,
> +#define OPT_NO_VMBUS "no-vmbus"
> + OPT_NO_VMBUS_NUM,
> +#define OPT_VMBUS_BLACKLIST "vmbus-blacklist"
> + OPT_VMBUS_BLACKLIST_NUM,
> +#define OPT_VMBUS_WHITELIST "vmbus-whitelist"
> + OPT_VMBUS_WHITELIST_NUM,
> OPT_LONG_MAX_NUM
> };
>
> diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
> index 9e7d8f6b..c856c63e 100644
> --- a/lib/librte_eal/common/eal_private.h
> +++ b/lib/librte_eal/common/eal_private.h
> @@ -210,6 +210,11 @@ int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
> struct mapped_pci_resource *uio_res, int map_idx);
>
> /**
> + * VMBUS related functions and structures
> + */
> +int rte_eal_vmbus_init(void);
> +
> +/**
> * Init tail queues for non-EAL library structures. This is to allow
> * the rings, mempools, etc. lists to be shared among multiple processes
> *
> diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h
> index 88120a1c..c079d289 100644
> --- a/lib/librte_eal/common/include/rte_devargs.h
> +++ b/lib/librte_eal/common/include/rte_devargs.h
> @@ -51,6 +51,9 @@ extern "C" {
> #include <stdio.h>
> #include <sys/queue.h>
> #include <rte_pci.h>
> +#ifdef RTE_LIBRTE_HV_PMD
> +#include <uuid/uuid.h>
> +#endif
>
> /**
> * Type of generic device
> @@ -59,6 +62,8 @@ enum rte_devtype {
> RTE_DEVTYPE_WHITELISTED_PCI,
> RTE_DEVTYPE_BLACKLISTED_PCI,
> RTE_DEVTYPE_VIRTUAL,
> + RTE_DEVTYPE_WHITELISTED_VMBUS,
> + RTE_DEVTYPE_BLACKLISTED_VMBUS,
> };
>
> /**
> @@ -88,6 +93,9 @@ struct rte_devargs {
> /** Driver name. */
> char drv_name[32];
> } virt;
> +#ifdef RTE_LIBRTE_HV_PMD
> + uuid_t uuid;
> +#endif
> };
> /** Arguments string as given by user or "" for no argument. */
> char *args;
> diff --git a/lib/librte_eal/common/include/rte_vmbus.h b/lib/librte_eal/common/include/rte_vmbus.h
> new file mode 100644
> index 00000000..f96d753e
> --- /dev/null
> +++ b/lib/librte_eal/common/include/rte_vmbus.h
> @@ -0,0 +1,249 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2013-2016 Brocade Communications Systems, Inc.
> + * Copyright(c) 2016 Microsoft Corporation
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + *
> + */
> +
> +#ifndef _RTE_VMBUS_H_
> +#define _RTE_VMBUS_H_
> +
> +/**
> + * @file
> + *
> + * RTE VMBUS Interface
> + */
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <limits.h>
> +#include <errno.h>
> +#include <uuid/uuid.h>
> +#include <sys/queue.h>
> +#include <stdint.h>
> +#include <inttypes.h>
> +
> +#include <rte_debug.h>
> +#include <rte_interrupts.h>
> +#include <rte_dev.h>
> +
> +TAILQ_HEAD(vmbus_device_list, rte_vmbus_device);
> +TAILQ_HEAD(vmbus_driver_list, rte_vmbus_driver);
> +
> +extern struct vmbus_driver_list vmbus_driver_list;
> +extern struct vmbus_device_list vmbus_device_list;
> +
> +/** Pathname of VMBUS devices directory. */
> +#define SYSFS_VMBUS_DEVICES "/sys/bus/vmbus/devices"
> +
> +#define UUID_BUF_SZ (36 + 1)
> +
> +
> +/** Maximum number of VMBUS resources. */
> +#define VMBUS_MAX_RESOURCE 7
> +
> +/**
> + * A structure describing a VMBUS device.
> + */
> +struct rte_vmbus_device {
> + TAILQ_ENTRY(rte_vmbus_device) next; /**< Next probed VMBUS device. */
> + struct rte_device device; /**< Inherit core device */
> + uuid_t device_id; /**< VMBUS device id */
> + uuid_t class_id; /**< VMBUS device type */
> + uint32_t relid; /**< VMBUS id for notification */
> + uint8_t monitor_id;
> + struct rte_intr_handle intr_handle; /**< Interrupt handle */
> + const struct rte_vmbus_driver *driver; /**< Associated driver */
> +
> + struct rte_mem_resource mem_resource[VMBUS_MAX_RESOURCE];
> + /**< VMBUS Memory Resource */
> + char sysfs_name[]; /**< Name in sysfs bus directory */
> +};
> +
> +struct rte_vmbus_driver;
> +
> +/**
> + * Initialisation function for the driver called during VMBUS probing.
> + */
> +typedef int (vmbus_probe_t)(struct rte_vmbus_driver *,
> + struct rte_vmbus_device *);
> +
> +/**
> + * Uninitialisation function for the driver called during hotplugging.
> + */
> +typedef int (vmbus_remove_t)(struct rte_vmbus_device *);
> +
> +/**
> + * A structure describing a VMBUS driver.
> + */
> +struct rte_vmbus_driver {
> + TAILQ_ENTRY(rte_vmbus_driver) next; /**< Next in list. */
> + struct rte_driver driver;
> + vmbus_probe_t *probe; /**< Device Probe function. */
> + vmbus_remove_t *remove; /**< Device Remove function. */
> +
> + const uuid_t *id_table; /**< ID table. */
> +};
> +
> +struct vmbus_map {
> + void *addr;
> + char *path;
> + uint64_t offset;
> + uint64_t size;
> + uint64_t phaddr;
> +};
> +
> +/*
> + * For multi-process we need to reproduce all vmbus mappings in secondary
> + * processes, so save them in a tailq.
> + */
> +struct mapped_vmbus_resource {
> + TAILQ_ENTRY(mapped_vmbus_resource) next;
> +
> + uuid_t uuid;
> + char path[PATH_MAX];
> + int nb_maps;
> + struct vmbus_map maps[VMBUS_MAX_RESOURCE];
> +};
> +
> +TAILQ_HEAD(mapped_vmbus_res_list, mapped_vmbus_resource);
> +
> +/**
> + * Scan the content of the VMBUS bus, and the devices in the devices list
> + *
> + * @return
> + * 0 on success, negative on error
> + */
> +int rte_eal_vmbus_scan(void);
> +
> +/**
> + * Probe the VMBUS bus for registered drivers.
> + *
> + * Scan the content of the VMBUS bus, and call the probe() function for
> + * all registered drivers that have a matching entry in its id_table
> + * for discovered devices.
> + *
> + * @return
> + * - 0 on success.
> + * - Negative on error.
> + */
> +int rte_eal_vmbus_probe(void);
> +
> +/**
> + * Map the VMBUS device resources in user space virtual memory address
> + *
> + * @param dev
> + * A pointer to a rte_vmbus_device structure describing the device
> + * to use
> + *
> + * @return
> + * 0 on success, negative on error and positive if no driver
> + * is found for the device.
> + */
> +int rte_eal_vmbus_map_device(struct rte_vmbus_device *dev);
> +
> +/**
> + * Unmap this device
> + *
> + * @param dev
> + * A pointer to a rte_vmbus_device structure describing the device
> + * to use
> + */
> +void rte_eal_vmbus_unmap_device(struct rte_vmbus_device *dev);
> +
> +/**
> + * Probe the single VMBUS device.
> + *
> + * Scan the content of the VMBUS bus, and find the vmbus device
> + * specified by device uuid, then call the probe() function for
> + * registered driver that has a matching entry in its id_table for
> + * discovered device.
> + *
> + * @param id
> + * The VMBUS device uuid.
> + * @return
> + * - 0 on success.
> + * - Negative on error.
> + */
> +int rte_eal_vmbus_probe_one(uuid_t id);
> +
> +/**
> + * Close the single VMBUS device.
> + *
> + * Scan the content of the VMBUS bus, and find the vmbus device id,
> + * then call the remove() function for registered driver that has a
> + * matching entry in its id_table for discovered device.
> + *
> + * @param id
> + * The VMBUS device uuid.
> + * @return
> + * - 0 on success.
> + * - Negative on error.
> + */
> +int rte_eal_vmbus_detach(uuid_t id);
> +
> +/**
> + * Register a VMBUS driver.
> + *
> + * @param driver
> + * A pointer to a rte_vmbus_driver structure describing the driver
> + * to be registered.
> + */
> +void rte_eal_vmbus_register(struct rte_vmbus_driver *driver);
> +
> +/** Helper for VMBUS device registration from driver nstance */
> +#define RTE_PMD_REGISTER_VMBUS(nm, vmbus_drv) \
> +RTE_INIT(vmbusinitfn_ ##nm); \
> +static void vmbusinitfn_ ##nm(void) \
> +{\
> + (vmbus_drv).driver.name = RTE_STR(nm);\
> + (vmbus_drv).driver.type = PMD_VMBUS; \
> + rte_eal_vmbus_register(&vmbus_drv); \
> +} \
> +RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
> +
> +/**
> + * Unregister a VMBUS driver.
> + *
> + * @param driver
> + * A pointer to a rte_vmbus_driver structure describing the driver
> + * to be unregistered.
> + */
> +void rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_VMBUS_H_ */
> diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
> index 4e206f09..f6ca3848 100644
> --- a/lib/librte_eal/linuxapp/eal/Makefile
> +++ b/lib/librte_eal/linuxapp/eal/Makefile
> @@ -71,6 +71,11 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c
> SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_interrupts.c
> SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_alarm.c
>
> +ifeq ($(CONFIG_RTE_LIBRTE_HV_PMD),y)
> +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vmbus.c
> +LDLIBS += -luuid
> +endif
> +
> # from common dir
> SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_lcore.c
> SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c
> @@ -114,6 +119,7 @@ CFLAGS_eal_hugepage_info.o := -D_GNU_SOURCE
> CFLAGS_eal_pci.o := -D_GNU_SOURCE
> CFLAGS_eal_pci_uio.o := -D_GNU_SOURCE
> CFLAGS_eal_pci_vfio.o := -D_GNU_SOURCE
> +CFLAGS_eal_vmbux.o := -D_GNU_SOURCE
> CFLAGS_eal_common_whitelist.o := -D_GNU_SOURCE
> CFLAGS_eal_common_options.o := -D_GNU_SOURCE
> CFLAGS_eal_common_thread.o := -D_GNU_SOURCE
> diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
> index 16dd5b9c..1bc0814a 100644
> --- a/lib/librte_eal/linuxapp/eal/eal.c
> +++ b/lib/librte_eal/linuxapp/eal/eal.c
> @@ -70,6 +70,9 @@
> #include <rte_cpuflags.h>
> #include <rte_interrupts.h>
> #include <rte_pci.h>
> +#ifdef RTE_LIBRTE_HV_PMD
> +#include <rte_vmbus.h>
> +#endif
> #include <rte_dev.h>
> #include <rte_devargs.h>
> #include <rte_common.h>
> @@ -830,6 +833,11 @@ rte_eal_init(int argc, char **argv)
>
> eal_check_mem_on_local_socket();
>
> +#ifdef RTE_LIBRTE_HV_PMD
> + if (rte_eal_vmbus_init() < 0)
> + RTE_LOG(ERR, EAL, "Cannot init VMBUS\n");
> +#endif
> +
> if (eal_plugins_init() < 0)
> rte_panic("Cannot init plugins\n");
>
> @@ -884,6 +892,11 @@ rte_eal_init(int argc, char **argv)
> if (rte_eal_pci_probe())
> rte_panic("Cannot probe PCI\n");
>
> +#ifdef RTE_LIBRTE_HV_PMD
> + if (rte_eal_vmbus_probe() < 0)
> + rte_panic("Cannot probe VMBUS\n");
> +#endif
> +
> if (rte_eal_dev_init() < 0)
> rte_panic("Cannot init pmd devices\n");
>
> diff --git a/lib/librte_eal/linuxapp/eal/eal_vmbus.c b/lib/librte_eal/linuxapp/eal/eal_vmbus.c
> new file mode 100644
> index 00000000..729f93a9
> --- /dev/null
> +++ b/lib/librte_eal/linuxapp/eal/eal_vmbus.c
> @@ -0,0 +1,911 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2013-2016 Brocade Communications Systems, Inc.
> + * Copyright(c) 2016 Microsoft Corporation
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + *
> + */
> +
> +#include <string.h>
> +#include <unistd.h>
> +#include <dirent.h>
> +#include <fcntl.h>
> +#include <sys/mman.h>
> +
> +#include <rte_eal.h>
> +#include <rte_tailq.h>
> +#include <rte_log.h>
> +#include <rte_devargs.h>
> +#include <rte_vmbus.h>
> +#include <rte_malloc.h>
> +
> +#include "eal_private.h"
> +#include "eal_pci_init.h"
> +#include "eal_filesystem.h"
> +
> +struct vmbus_driver_list vmbus_driver_list =
> + TAILQ_HEAD_INITIALIZER(vmbus_driver_list);
> +struct vmbus_device_list vmbus_device_list =
> + TAILQ_HEAD_INITIALIZER(vmbus_device_list);
> +
> +static void *vmbus_map_addr;
> +
> +static struct rte_tailq_elem rte_vmbus_uio_tailq = {
> + .name = "UIO_RESOURCE_LIST",
> +};
> +EAL_REGISTER_TAILQ(rte_vmbus_uio_tailq);
> +
> +/*
> + * parse a sysfs file containing one integer value
> + * different to the eal version, as it needs to work with 64-bit values
> + */
> +static int
> +vmbus_get_sysfs_uuid(const char *filename, uuid_t uu)
> +{
> + char buf[BUFSIZ];
> + char *cp, *in = buf;
> + FILE *f;
> +
> + f = fopen(filename, "r");
> + if (f == NULL) {
> + RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
> + __func__, filename);
> + return -1;
> + }
> +
> + if (fgets(buf, sizeof(buf), f) == NULL) {
> + RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
> + __func__, filename);
> + fclose(f);
> + return -1;
> + }
> + fclose(f);
> +
> + cp = strchr(buf, '\n');
> + if (cp)
> + *cp = '\0';
> +
> + /* strip { } notation */
> + if (buf[0] == '{') {
> + in = buf + 1;
> + cp = strchr(in, '}');
> + if (cp)
> + *cp = '\0';
> + }
> +
> + if (uuid_parse(in, uu) < 0) {
> + RTE_LOG(ERR, EAL, "%s %s not a valid UUID\n",
> + filename, buf);
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +/* map a particular resource from a file */
> +static void *
> +vmbus_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
> + int flags)
> +{
> + void *mapaddr;
> +
> + /* Map the memory resource of device */
> + mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
> + MAP_SHARED | flags, fd, offset);
> + if (mapaddr == MAP_FAILED ||
> + (requested_addr != NULL && mapaddr != requested_addr)) {
> + RTE_LOG(ERR, EAL,
> + "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s)\n",
> + __func__, fd, requested_addr,
> + (unsigned long)size, (unsigned long)offset,
> + strerror(errno));
> + } else
> + RTE_LOG(DEBUG, EAL, " VMBUS memory mapped at %p\n", mapaddr);
> +
> + return mapaddr;
> +}
> +
> +/* unmap a particular resource */
> +static void
> +vmbus_unmap_resource(void *requested_addr, size_t size)
> +{
> + if (requested_addr == NULL)
> + return;
> +
> + /* Unmap the VMBUS memory resource of device */
> + if (munmap(requested_addr, size)) {
> + RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n",
> + __func__, requested_addr, (unsigned long)size,
> + strerror(errno));
> + } else
> + RTE_LOG(DEBUG, EAL, " VMBUS memory unmapped at %p\n",
> + requested_addr);
> +}
> +
> +/* Only supports current kernel version
> + * Unlike PCI there is no option (or need) to create UIO device.
> + */
> +static int vmbus_get_uio_dev(const char *name,
> + char *dstbuf, size_t buflen)
> +{
> + char dirname[PATH_MAX];
> + unsigned int uio_num;
> + struct dirent *e;
> + DIR *dir;
> +
> + snprintf(dirname, sizeof(dirname),
> + "/sys/bus/vmbus/devices/%s/uio", name);
> +
> + dir = opendir(dirname);
> + if (dir == NULL) {
> + RTE_LOG(ERR, EAL, "Cannot map uio resources for %s: %s\n",
> + name, strerror(errno));
> + return -1;
> + }
> +
> + /* take the first file starting with "uio" */
> + while ((e = readdir(dir)) != NULL) {
> + if (sscanf(e->d_name, "uio%u", &uio_num) != 1)
> + continue;
> +
> + snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num);
> + break;
> + }
> + closedir(dir);
> +
> + return e ? (int) uio_num : -1;
> +}
> +
> +/*
> + * parse a sysfs file containing one integer value
> + * different to the eal version, as it needs to work with 64-bit values
> + */
> +static int
> +vmbus_parse_sysfs_value(const char *dir, const char *name,
> + uint64_t *val)
> +{
> + char filename[PATH_MAX];
> + FILE *f;
> + char buf[BUFSIZ];
> + char *end = NULL;
> +
> + snprintf(filename, sizeof(filename), "%s/%s", dir, name);
> + f = fopen(filename, "r");
> + if (f == NULL) {
> + RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
> + __func__, filename);
> + return -1;
> + }
> +
> + if (fgets(buf, sizeof(buf), f) == NULL) {
> + RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
> + __func__, filename);
> + fclose(f);
> + return -1;
> + }
> + fclose(f);
> +
> + *val = strtoull(buf, &end, 0);
> + if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
> + RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
> + __func__, filename);
> + return -1;
> + }
> + return 0;
> +}
> +
> +/* Get mappings out of values provided by uio */
> +static int
> +vmbus_uio_get_mappings(const char *uioname,
> + struct vmbus_map maps[])
> +{
> + int i;
> +
> + for (i = 0; i != VMBUS_MAX_RESOURCE; i++) {
> + struct vmbus_map *map = &maps[i];
> + char dirname[PATH_MAX];
> +
> + /* check if map directory exists */
> + snprintf(dirname, sizeof(dirname),
> + "%s/maps/map%d", uioname, i);
> +
> + if (access(dirname, F_OK) != 0)
> + break;
> +
> + /* get mapping offset */
> + if (vmbus_parse_sysfs_value(dirname, "offset",
> + &map->offset) < 0)
> + return -1;
> +
> + /* get mapping size */
> + if (vmbus_parse_sysfs_value(dirname, "size",
> + &map->size) < 0)
> + return -1;
> +
> + /* get mapping physical address */
> + if (vmbus_parse_sysfs_value(dirname, "addr",
> + &maps->phaddr) < 0)
> + return -1;
> + }
> +
> + return i;
> +}
> +
> +static void
> +vmbus_uio_free_resource(struct rte_vmbus_device *dev,
> + struct mapped_vmbus_resource *uio_res)
> +{
> + rte_free(uio_res);
> +
> + if (dev->intr_handle.fd) {
> + close(dev->intr_handle.fd);
> + dev->intr_handle.fd = -1;
> + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
> + }
> +}
> +
> +static struct mapped_vmbus_resource *
> +vmbus_uio_alloc_resource(struct rte_vmbus_device *dev)
> +{
> + struct mapped_vmbus_resource *uio_res;
> + char dirname[PATH_MAX], devname[PATH_MAX];
> + int uio_num, nb_maps;
> +
> + uio_num = vmbus_get_uio_dev(dev->sysfs_name, dirname, sizeof(dirname));
> + if (uio_num < 0) {
> + RTE_LOG(WARNING, EAL,
> + " %s not managed by UIO driver, skipping\n",
> + dev->sysfs_name);
> + return NULL;
> + }
> +
> + /* allocate the mapping details for secondary processes*/
> + uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0);
> + if (uio_res == NULL) {
> + RTE_LOG(ERR, EAL,
> + "%s(): cannot store uio mmap details\n", __func__);
> + goto error;
> + }
> +
> + snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
> + dev->intr_handle.fd = open(devname, O_RDWR);
> + if (dev->intr_handle.fd < 0) {
> + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
> + devname, strerror(errno));
> + goto error;
> + }
> +
> + dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX;
> +
> + snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname);
> + uuid_copy(uio_res->uuid, dev->device_id);
> +
> + nb_maps = vmbus_uio_get_mappings(dirname, uio_res->maps);
> + if (nb_maps < 0)
> + goto error;
> +
> + RTE_LOG(DEBUG, EAL, "Found %d memory maps for device %s\n",
> + nb_maps, dev->sysfs_name);
> +
> + return uio_res;
> +
> + error:
> + vmbus_uio_free_resource(dev, uio_res);
> + return NULL;
> +}
> +
> +static int
> +vmbus_uio_map_resource_by_index(struct rte_vmbus_device *dev,
> + unsigned int res_idx,
> + struct mapped_vmbus_resource *uio_res,
> + unsigned int map_idx)
> +{
> + struct vmbus_map *maps = uio_res->maps;
> + char devname[PATH_MAX];
> + void *mapaddr;
> + int fd;
> +
> + snprintf(devname, sizeof(devname),
> + "/sys/bus/vmbus/%s/resource%u", dev->sysfs_name, res_idx);
> +
> + fd = open(devname, O_RDWR);
> + if (fd < 0) {
> + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
> + devname, strerror(errno));
> + return -1;
> + }
> +
> + /* allocate memory to keep path */
> + maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
> + if (maps[map_idx].path == NULL) {
> + RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n",
> + strerror(errno));
> + return -1;
> + }
> +
> + /* try mapping somewhere close to the end of hugepages */
> + if (vmbus_map_addr == NULL)
> + vmbus_map_addr = pci_find_max_end_va();
> +
> + mapaddr = vmbus_map_resource(vmbus_map_addr, fd, 0,
> + dev->mem_resource[res_idx].len, 0);
> + close(fd);
> + if (mapaddr == MAP_FAILED) {
> + rte_free(maps[map_idx].path);
> + return -1;
> + }
> +
> + vmbus_map_addr = RTE_PTR_ADD(mapaddr,
> + dev->mem_resource[res_idx].len);
> +
> + maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
> + maps[map_idx].size = dev->mem_resource[res_idx].len;
> + maps[map_idx].addr = mapaddr;
> + maps[map_idx].offset = 0;
> + strcpy(maps[map_idx].path, devname);
> + dev->mem_resource[res_idx].addr = mapaddr;
> +
> + return 0;
> +}
> +
> +static void
> +vmbus_uio_unmap(struct mapped_vmbus_resource *uio_res)
> +{
> + int i;
> +
> + if (uio_res == NULL)
> + return;
> +
> + for (i = 0; i != uio_res->nb_maps; i++) {
> + vmbus_unmap_resource(uio_res->maps[i].addr,
> + uio_res->maps[i].size);
> +
> + if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> + rte_free(uio_res->maps[i].path);
> + }
> +}
> +
> +static struct mapped_vmbus_resource *
> +vmbus_uio_find_resource(struct rte_vmbus_device *dev)
> +{
> + struct mapped_vmbus_resource *uio_res;
> + struct mapped_vmbus_res_list *uio_res_list =
> + RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head,
> + mapped_vmbus_res_list);
> +
> + if (dev == NULL)
> + return NULL;
> +
> + TAILQ_FOREACH(uio_res, uio_res_list, next) {
> + if (uuid_compare(uio_res->uuid, dev->device_id) == 0)
> + return uio_res;
> + }
> + return NULL;
> +}
> +
> +/* unmap the VMBUS resource of a VMBUS device in virtual memory */
> +static void
> +vmbus_uio_unmap_resource(struct rte_vmbus_device *dev)
> +{
> + struct mapped_vmbus_resource *uio_res;
> + struct mapped_vmbus_res_list *uio_res_list =
> + RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head,
> + mapped_vmbus_res_list);
> +
> + if (dev == NULL)
> + return;
> +
> + /* find an entry for the device */
> + uio_res = vmbus_uio_find_resource(dev);
> + if (uio_res == NULL)
> + return;
> +
> + /* secondary processes - just free maps */
> + if (rte_eal_process_type() != RTE_PROC_PRIMARY)
> + return vmbus_uio_unmap(uio_res);
> +
> + TAILQ_REMOVE(uio_res_list, uio_res, next);
> +
> + /* unmap all resources */
> + vmbus_uio_unmap(uio_res);
> +
> + /* free uio resource */
> + rte_free(uio_res);
> +
> + /* close fd if in primary process */
> + close(dev->intr_handle.fd);
> + if (dev->intr_handle.uio_cfg_fd >= 0) {
> + close(dev->intr_handle.uio_cfg_fd);
> + dev->intr_handle.uio_cfg_fd = -1;
> + }
> +
> + dev->intr_handle.fd = -1;
> + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
> +}
> +
> +static int
> +vmbus_uio_map_secondary(struct rte_vmbus_device *dev)
> +{
> + struct mapped_vmbus_resource *uio_res;
> + struct mapped_vmbus_res_list *uio_res_list =
> + RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head,
> + mapped_vmbus_res_list);
> +
> + TAILQ_FOREACH(uio_res, uio_res_list, next) {
> + int i;
> +
> + /* skip this element if it doesn't match our id */
> + if (uuid_compare(uio_res->uuid, dev->device_id))
> + continue;
> +
> + for (i = 0; i != uio_res->nb_maps; i++) {
> + void *mapaddr;
> + int fd;
> +
> + fd = open(uio_res->maps[i].path, O_RDWR);
> + if (fd < 0) {
> + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
> + uio_res->maps[i].path, strerror(errno));
> + return -1;
> + }
> +
> + mapaddr = vmbus_map_resource(uio_res->maps[i].addr, fd,
> + uio_res->maps[i].offset,
> + uio_res->maps[i].size, 0);
> + /* fd is not needed in slave process, close it */
> + close(fd);
> +
> + if (mapaddr == uio_res->maps[i].addr)
> + continue;
> +
> + RTE_LOG(ERR, EAL,
> + "Cannot mmap device resource file %s to address: %p\n",
> + uio_res->maps[i].path,
> + uio_res->maps[i].addr);
> +
> + /* unmap addrs correctly mapped */
> + while (i != 0) {
> + --i;
> + vmbus_unmap_resource(uio_res->maps[i].addr,
> + uio_res->maps[i].size);
> + }
> + return -1;
> +
> + }
> + return 0;
> + }
> +
> + RTE_LOG(ERR, EAL, "Cannot find resource for device\n");
> + return 1;
> +}
> +
> +/* map the resources of a vmbus device in virtual memory */
> +int
> +rte_eal_vmbus_map_device(struct rte_vmbus_device *dev)
> +{
> + struct mapped_vmbus_resource *uio_res;
> + struct mapped_vmbus_res_list *uio_res_list =
> + RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head, mapped_vmbus_res_list);
> + int i, ret, map_idx = 0;
> +
> + dev->intr_handle.fd = -1;
> + dev->intr_handle.uio_cfg_fd = -1;
> + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
> +
> + /* secondary processes - use already recorded details */
> + if (rte_eal_process_type() != RTE_PROC_PRIMARY)
> + return vmbus_uio_map_secondary(dev);
> +
> + /* allocate uio resource */
> + uio_res = vmbus_uio_alloc_resource(dev);
> + if (uio_res == NULL)
> + return -1;
> +
> + /* Map all BARs */
> + for (i = 0; i != VMBUS_MAX_RESOURCE; i++) {
> + uint64_t phaddr;
> +
> + /* skip empty BAR */
> + phaddr = dev->mem_resource[i].phys_addr;
> + if (phaddr == 0)
> + continue;
> +
> + ret = vmbus_uio_map_resource_by_index(dev, i,
> + uio_res, map_idx);
> + if (ret)
> + goto error;
> +
> + map_idx++;
> + }
> +
> + uio_res->nb_maps = map_idx;
> +
> + TAILQ_INSERT_TAIL(uio_res_list, uio_res, next);
> +
> + return 0;
> +error:
> + for (i = 0; i < map_idx; i++) {
> + vmbus_unmap_resource(uio_res->maps[i].addr,
> + uio_res->maps[i].size);
> + rte_free(uio_res->maps[i].path);
> + }
> + vmbus_uio_free_resource(dev, uio_res);
> + return -1;
> +}
> +
> +/* Scan one vmbus sysfs entry, and fill the devices list from it. */
> +static int
> +vmbus_scan_one(const char *name)
> +{
> + struct rte_vmbus_device *dev, *dev2;
> + char filename[PATH_MAX];
> + char dirname[PATH_MAX];
> + unsigned long tmp;
> +
> + dev = malloc(sizeof(*dev) + strlen(name) + 1);
> + if (dev == NULL)
> + return -1;
> +
> + memset(dev, 0, sizeof(*dev));
> + strcpy(dev->sysfs_name, name);
> + if (dev->sysfs_name == NULL)
> + goto error;
> +
> + /* sysfs base directory
> + * /sys/bus/vmbus/devices/7a08391f-f5a0-4ac0-9802-d13fd964f8df
> + * or on older kernel
> + * /sys/bus/vmbus/devices/vmbus_1
> + */
> + snprintf(dirname, sizeof(dirname), "%s/%s",
> + SYSFS_VMBUS_DEVICES, name);
> +
> + /* get device id */
> + snprintf(filename, sizeof(filename), "%s/device_id", dirname);
> + if (vmbus_get_sysfs_uuid(filename, dev->device_id) < 0)
> + goto error;
> +
> + /* get device class */
> + snprintf(filename, sizeof(filename), "%s/class_id", dirname);
> + if (vmbus_get_sysfs_uuid(filename, dev->class_id) < 0)
> + goto error;
> +
> + /* get relid */
> + snprintf(filename, sizeof(filename), "%s/id", dirname);
> + if (eal_parse_sysfs_value(filename, &tmp) < 0)
> + goto error;
> + dev->relid = tmp;
> +
> + /* get monitor id */
> + snprintf(filename, sizeof(filename), "%s/monitor_id", dirname);
> + if (eal_parse_sysfs_value(filename, &tmp) < 0)
> + goto error;
> + dev->monitor_id = tmp;
> +
> + /* get numa node */
> + snprintf(filename, sizeof(filename), "%s/numa_node",
> + dirname);
> + if (eal_parse_sysfs_value(filename, &tmp) < 0)
> + /* if no NUMA support, set default to 0 */
> + dev->device.numa_node = 0;
> + else
> + dev->device.numa_node = tmp;
> +
> + /* device is valid, add in list (sorted) */
> + RTE_LOG(DEBUG, EAL, "Adding vmbus device %s\n", name);
> +
> + TAILQ_FOREACH(dev2, &vmbus_device_list, next) {
> + int ret;
> +
> + ret = uuid_compare(dev->device_id, dev->device_id);
> + if (ret > 0)
> + continue;
> +
> + if (ret < 0) {
> + TAILQ_INSERT_BEFORE(dev2, dev, next);
> + rte_eal_device_insert(&dev->device);
> + } else { /* already registered */
> + memmove(dev2->mem_resource, dev->mem_resource,
> + sizeof(dev->mem_resource));
> + free(dev);
> + }
> + return 0;
> + }
> +
> + rte_eal_device_insert(&dev->device);
> + TAILQ_INSERT_TAIL(&vmbus_device_list, dev, next);
> +
> + return 0;
> +error:
> + free(dev);
> + return -1;
> +}
> +
> +/*
> + * Scan the content of the vmbus, and the devices in the devices list
> + */
> +static int
> +vmbus_scan(void)
> +{
> + struct dirent *e;
> + DIR *dir;
> +
> + dir = opendir(SYSFS_VMBUS_DEVICES);
> + if (dir == NULL) {
> + if (errno == ENOENT)
> + return 0;
> +
> + RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n",
> + __func__, strerror(errno));
> + return -1;
> + }
> +
> + while ((e = readdir(dir)) != NULL) {
> + if (e->d_name[0] == '.')
> + continue;
> +
> + if (vmbus_scan_one(e->d_name) < 0)
> + goto error;
> + }
> + closedir(dir);
> + return 0;
> +
> +error:
> + closedir(dir);
> + return -1;
> +}
> +
> +/* Init the VMBUS EAL subsystem */
> +int rte_eal_vmbus_init(void)
> +{
> + /* VMBUS can be disabled */
> + if (internal_config.no_vmbus)
> + return 0;
> +
> + if (vmbus_scan() < 0) {
> + RTE_LOG(ERR, EAL, "%s(): Cannot scan vmbus\n", __func__);
> + return -1;
> + }
> + return 0;
> +}
> +
> +/* Below is PROBE part of eal_vmbus library */
> +
> +/*
> + * If device ID match, call the devinit() function of the driver.
> + */
> +static int
> +rte_eal_vmbus_probe_one_driver(struct rte_vmbus_driver *dr,
> + struct rte_vmbus_device *dev)
> +{
> + const uuid_t *id_table;
> +
> + RTE_LOG(DEBUG, EAL, " probe driver: %s\n", dr->driver.name);
> +
> + for (id_table = dr->id_table; !uuid_is_null(*id_table); ++id_table) {
> + struct rte_devargs *args;
> + char guid[UUID_BUF_SZ];
> + int ret;
> +
> + /* skip devices not assocaited with this device class */
> + if (uuid_compare(*id_table, dev->class_id) != 0)
> + continue;
> +
> + uuid_unparse(dev->device_id, guid);
> + RTE_LOG(INFO, EAL, "VMBUS device %s on NUMA socket %i\n",
> + guid, dev->device.numa_node);
> +
> + /* no initialization when blacklisted, return without error */
> + args = dev->device.devargs;
> + if (args && args->type == RTE_DEVTYPE_BLACKLISTED_VMBUS) {
> + RTE_LOG(INFO, EAL, " Device is blacklisted, not initializing\n");
> + return 1;
> + }
> +
> + RTE_LOG(INFO, EAL, " probe driver: %s\n", dr->driver.name);
> +
> + /* map resources for device */
> + ret = rte_eal_vmbus_map_device(dev);
> + if (ret != 0)
> + return ret;
> +
> + /* reference driver structure */
> + dev->driver = dr;
> +
> + /* call the driver probe() function */
> + ret = dr->probe(dr, dev);
> + if (ret)
> + dev->driver = NULL;
> +
> + return ret;
> + }
> +
> + /* return positive value if driver doesn't support this device */
> + return 1;
> +}
> +
> +
> +/*
> + * If vendor/device ID match, call the remove() function of the
> + * driver.
> + */
> +static int
> +vmbus_detach_dev(struct rte_vmbus_driver *dr,
> + struct rte_vmbus_device *dev)
> +{
> + const uuid_t *id_table;
> +
> + for (id_table = dr->id_table; !uuid_is_null(*id_table); ++id_table) {
> + char guid[UUID_BUF_SZ];
> +
> + /* skip devices not assocaited with this device class */
> + if (uuid_compare(*id_table, dev->class_id) != 0)
> + continue;
> +
> + uuid_unparse(dev->device_id, guid);
> + RTE_LOG(INFO, EAL, "VMBUS device %s on NUMA socket %i\n",
> + guid, dev->device.numa_node);
> +
> + RTE_LOG(DEBUG, EAL, " remove driver: %s\n", dr->driver.name);
> +
> + if (dr->remove && (dr->remove(dev) < 0))
> + return -1; /* negative value is an error */
> +
> + /* clear driver structure */
> + dev->driver = NULL;
> +
> + vmbus_uio_unmap_resource(dev);
> + return 0;
> + }
> +
> + /* return positive value if driver doesn't support this device */
> + return 1;
> +}
> +
> +/*
> + * call the devinit() function of all
> + * registered drivers for the vmbus device. Return -1 if no driver is
> + * found for this class of vmbus device.
> + * The present assumption is that we have drivers only for vmbus network
> + * devices. That's why we don't check driver's id_table now.
> + */
> +static int
> +vmbus_probe_all_drivers(struct rte_vmbus_device *dev)
> +{
> + struct rte_vmbus_driver *dr = NULL;
> + int ret;
> +
> + TAILQ_FOREACH(dr, &vmbus_driver_list, next) {
> + ret = rte_eal_vmbus_probe_one_driver(dr, dev);
> + if (ret < 0) {
> + /* negative value is an error */
> + RTE_LOG(ERR, EAL, "Failed to probe driver %s\n",
> + dr->driver.name);
> + return -1;
> + }
> + /* positive value means driver doesn't support it */
> + if (ret > 0)
> + continue;
> +
> + return 0;
> + }
> +
> + return 1;
> +}
> +
> +
> +/*
> + * If device ID matches, call the remove() function of all
> + * registered driver for the given device. Return -1 if initialization
> + * failed, return 1 if no driver is found for this device.
> + */
> +static int
> +vmbus_detach_all_drivers(struct rte_vmbus_device *dev)
> +{
> + struct rte_vmbus_driver *dr;
> + int rc = 0;
> +
> + if (dev == NULL)
> + return -1;
> +
> + TAILQ_FOREACH(dr, &vmbus_driver_list, next) {
> + rc = vmbus_detach_dev(dr, dev);
> + if (rc < 0)
> + /* negative value is an error */
> + return -1;
> + if (rc > 0)
> + /* positive value means driver doesn't support it */
> + continue;
> + return 0;
> + }
> + return 1;
> +}
> +
> +/* Detach device specified by its VMBUS id */
> +int
> +rte_eal_vmbus_detach(uuid_t device_id)
> +{
> + struct rte_vmbus_device *dev;
> + char ubuf[UUID_BUF_SZ];
> +
> + TAILQ_FOREACH(dev, &vmbus_device_list, next) {
> + if (uuid_compare(dev->device_id, device_id) != 0)
> + continue;
> +
> + if (vmbus_detach_all_drivers(dev) < 0)
> + goto err_return;
> +
> + TAILQ_REMOVE(&vmbus_device_list, dev, next);
> + free(dev);
> + return 0;
> + }
> + return -1;
> +
> +err_return:
> + uuid_unparse(device_id, ubuf);
> + RTE_LOG(WARNING, EAL, "Requested device %s cannot be used\n",
> + ubuf);
> + return -1;
> +}
> +
> +/*
> + * Scan the vmbus, and call the devinit() function for
> + * all registered drivers that have a matching entry in its id_table
> + * for discovered devices.
> + */
> +int
> +rte_eal_vmbus_probe(void)
> +{
> + struct rte_vmbus_device *dev = NULL;
> +
> + TAILQ_FOREACH(dev, &vmbus_device_list, next) {
> + char ubuf[UUID_BUF_SZ];
> +
> + uuid_unparse(dev->device_id, ubuf);
> +
> + RTE_LOG(DEBUG, EAL, "Probing driver for device %s ...\n",
> + ubuf);
> + vmbus_probe_all_drivers(dev);
> + }
> + return 0;
> +}
> +
> +/* register vmbus driver */
> +void
> +rte_eal_vmbus_register(struct rte_vmbus_driver *driver)
> +{
> + TAILQ_INSERT_TAIL(&vmbus_driver_list, driver, next);
> +}
> +
> +/* unregister vmbus driver */
> +void
> +rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver)
> +{
> + TAILQ_REMOVE(&vmbus_driver_list, driver, next);
> +}
> diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
> index 7c212096..b69af0f0 100644
> --- a/lib/librte_ether/rte_ethdev.c
> +++ b/lib/librte_ether/rte_ethdev.c
> @@ -3334,3 +3334,93 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id,
> -ENOTSUP);
> return (*dev->dev_ops->l2_tunnel_offload_set)(dev, l2_tunnel, mask, en);
> }
> +
> +
> +#ifdef RTE_LIBRTE_HV_PMD
> +int
> +rte_eth_dev_vmbus_probe(struct rte_vmbus_driver *vmbus_drv,
> + struct rte_vmbus_device *vmbus_dev)
> +{
> + struct eth_driver *eth_drv = (struct eth_driver *)vmbus_drv;
> + struct rte_eth_dev *eth_dev;
> + char ustr[UUID_BUF_SZ];
> + int diag;
> +
> + uuid_unparse(vmbus_dev->device_id, ustr);
> +
> + eth_dev = rte_eth_dev_allocate(ustr);
> + if (eth_dev == NULL)
> + return -ENOMEM;
> +
> + if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> + eth_dev->data->dev_private = rte_zmalloc("ethdev private structure",
> + eth_drv->dev_private_size,
> + RTE_CACHE_LINE_SIZE);
> + if (eth_dev->data->dev_private == NULL)
> + rte_panic("Cannot allocate memzone for private port data\n");
> + }
> +
> + eth_dev->device = &vmbus_dev->device;
> + eth_dev->driver = eth_drv;
> + eth_dev->data->rx_mbuf_alloc_failed = 0;
> +
> + /* init user callbacks */
> + TAILQ_INIT(&(eth_dev->link_intr_cbs));
> +
> + /*
> + * Set the default maximum frame size.
> + */
> + eth_dev->data->mtu = ETHER_MTU;
Initialization of default values has moved into rte_eth_dev_allocate().
> +
> + /* Invoke PMD device initialization function */
> + diag = (*eth_drv->eth_dev_init)(eth_dev);
> + if (diag == 0)
> + return 0;
> +
> + RTE_PMD_DEBUG_TRACE("driver %s: eth_dev_init(%s) failed\n",
> + vmbus_drv->driver.name, ustr);
> +
> + if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> + rte_free(eth_dev->data->dev_private);
> +
> + return diag;
> +}
> +
> +int
> +rte_eth_dev_vmbus_remove(struct rte_vmbus_device *vmbus_dev)
> +{
> + const struct eth_driver *eth_drv;
> + struct rte_eth_dev *eth_dev;
> + char ustr[UUID_BUF_SZ];
> + int ret;
> +
> + if (vmbus_dev == NULL)
> + return -EINVAL;
> +
> + uuid_unparse(vmbus_dev->device_id, ustr);
> + eth_dev = rte_eth_dev_allocated(ustr);
> + if (eth_dev == NULL)
> + return -ENODEV;
> +
> + eth_drv = (const struct eth_driver *)vmbus_dev->driver;
> +
> + /* Invoke PMD device uninit function */
> + if (*eth_drv->eth_dev_uninit) {
> + ret = (*eth_drv->eth_dev_uninit)(eth_dev);
> + if (ret)
> + return ret;
> + }
> +
> + /* free ether device */
> + rte_eth_dev_release_port(eth_dev);
> +
> + if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> + rte_free(eth_dev->data->dev_private);
> +
> + eth_dev->device = NULL;
> + eth_dev->driver = NULL;
> + eth_dev->data = NULL;
> +
> + return 0;
> +}
> +#endif
> diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
> index 1a62a322..2a8c1eed 100644
> --- a/lib/librte_ether/rte_ethdev.h
> +++ b/lib/librte_ether/rte_ethdev.h
> @@ -180,6 +180,9 @@ extern "C" {
> #include <rte_log.h>
> #include <rte_interrupts.h>
> #include <rte_pci.h>
> +#ifdef RTE_LIBRTE_HV_PMD
> +#include <rte_vmbus.h>
> +#endif
> #include <rte_dev.h>
> #include <rte_devargs.h>
> #include <rte_errno.h>
> @@ -1908,6 +1911,17 @@ struct rte_pci_eth_driver {
> struct eth_driver eth_drv; /**< Ethernet driver. */
> };
>
> +#ifdef RTE_LIBRTE_HV_PMD
> +/**
> + * @internal
> + * The structure associated with a PMD VMBUS Ethernet driver.
> + */
> +struct rte_vmbus_eth_driver {
> + struct rte_vmbus_driver vmbus_drv; /**< Underlying VMBUS driver. */
> + struct eth_driver eth_drv; /**< Ethernet driver. */
> +};
> +#endif
> +
> /**
> * Convert a numerical speed in Mbps to a bitmap flag that can be used in
> * the bitmap link_speeds of the struct rte_eth_conf
> @@ -4543,6 +4557,23 @@ int rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv,
> */
> int rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev);
>
> +#ifdef RTE_LIBRTE_HV_PMD
> +/**
> + * @internal
> + * Wrapper for use by vmbus drivers as a .probe function to attach to a ethdev
> + * interface.
> + */
> +int rte_eth_dev_vmbus_probe(struct rte_vmbus_driver *vmbus_drv,
> + struct rte_vmbus_device *vmbus_dev);
> +
> +/**
> + * @internal
> + * Wrapper for use by vmbus drivers as a .remove function to detach a ethdev
> + * interface.
> + */
> +int rte_eth_dev_vmbus_remove(struct rte_vmbus_device *vmbus_dev);
> +#endif
I don't think that replicating the PCI probe/remove wrappers is the
right thing to do. To me it looks like this should move into the
rte_vmbus_driver's probe function instead. That way the ethdev header
can decoupled from the low-level device implementations.
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
> index f75f0e24..6b304084 100644
> --- a/mk/rte.app.mk
> +++ b/mk/rte.app.mk
> @@ -130,6 +130,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
> _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_VHOST) += -lrte_pmd_vhost
> endif # $(CONFIG_RTE_LIBRTE_VHOST)
> _LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += -lrte_pmd_vmxnet3_uio
> +_LDLIBS-$(CONFIG_RTE_LIBRTE_HV_PMD) += -luuid
>
> ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
> _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB) += -lrte_pmd_aesni_mb
> --
> 2.11.0
>
On Tue, 10 Jan 2017 18:27:31 +0100
Jan Blunck <jblunck@infradead.org> wrote:
> > +#ifdef RTE_LIBRTE_HV_PMD
> > +/**
> > + * @internal
> > + * Wrapper for use by vmbus drivers as a .probe function to attach to a ethdev
> > + * interface.
> > + */
> > +int rte_eth_dev_vmbus_probe(struct rte_vmbus_driver *vmbus_drv,
> > + struct rte_vmbus_device *vmbus_dev);
> > +
> > +/**
> > + * @internal
> > + * Wrapper for use by vmbus drivers as a .remove function to detach a ethdev
> > + * interface.
> > + */
> > +int rte_eth_dev_vmbus_remove(struct rte_vmbus_device *vmbus_dev);
> > +#endif
>
> I don't think that replicating the PCI probe/remove wrappers is the
> right thing to do. To me it looks like this should move into the
> rte_vmbus_driver's probe function instead. That way the ethdev header
> can decoupled from the low-level device implementations.
With a real bus model. There would be registration of busses. And the probe would
be:
foreach bus
foreach device on bus
...
On Sat, Jan 7, 2017 at 7:17 PM, Stephen Hemminger
<stephen@networkplumber.org> wrote:
> Add support for VMBUS on Hyper-V/Azure. VMBUS is similar to PCI
> but has different addressing and internal API's.
>
> Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
> ---
> lib/librte_eal/common/Makefile | 2 +-
> lib/librte_eal/common/eal_common_devargs.c | 7 +
> lib/librte_eal/common/eal_common_options.c | 38 ++
> lib/librte_eal/common/eal_internal_cfg.h | 1 +
> lib/librte_eal/common/eal_options.h | 6 +
> lib/librte_eal/common/eal_private.h | 5 +
> lib/librte_eal/common/include/rte_devargs.h | 8 +
> lib/librte_eal/common/include/rte_vmbus.h | 249 ++++++++
> lib/librte_eal/linuxapp/eal/Makefile | 6 +
> lib/librte_eal/linuxapp/eal/eal.c | 13 +
> lib/librte_eal/linuxapp/eal/eal_vmbus.c | 911 ++++++++++++++++++++++++++++
> lib/librte_ether/rte_ethdev.c | 90 +++
> lib/librte_ether/rte_ethdev.h | 31 +
> mk/rte.app.mk | 1 +
> 14 files changed, 1367 insertions(+), 1 deletion(-)
> create mode 100644 lib/librte_eal/common/include/rte_vmbus.h
> create mode 100644 lib/librte_eal/linuxapp/eal/eal_vmbus.c
>
> diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
> index 09a3d3af..ceb77bed 100644
> --- a/lib/librte_eal/common/Makefile
> +++ b/lib/librte_eal/common/Makefile
> @@ -33,7 +33,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
>
> INC := rte_branch_prediction.h rte_common.h
> INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
> -INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
> +INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h rte_vmbus.h
> INC += rte_per_lcore.h rte_random.h
> INC += rte_tailq.h rte_interrupts.h rte_alarm.h
> INC += rte_string_fns.h rte_version.h
> diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c
> index e403717b..934ca840 100644
> --- a/lib/librte_eal/common/eal_common_devargs.c
> +++ b/lib/librte_eal/common/eal_common_devargs.c
> @@ -113,6 +113,13 @@ rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str)
> goto fail;
>
> break;
> + case RTE_DEVTYPE_WHITELISTED_VMBUS:
> + case RTE_DEVTYPE_BLACKLISTED_VMBUS:
> +#ifdef RTE_LIBRTE_HV_PMD
> + if (uuid_parse(buf, devargs->uuid) == 0)
> + break;
> +#endif
> + goto fail;
> }
>
> free(buf);
> diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
> index f36bc556..1a2b418c 100644
> --- a/lib/librte_eal/common/eal_common_options.c
> +++ b/lib/librte_eal/common/eal_common_options.c
> @@ -95,6 +95,11 @@ eal_long_options[] = {
> {OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM },
> {OPT_VMWARE_TSC_MAP, 0, NULL, OPT_VMWARE_TSC_MAP_NUM },
> {OPT_XEN_DOM0, 0, NULL, OPT_XEN_DOM0_NUM },
> +#ifdef RTE_LIBRTE_HV_PMD
> + {OPT_NO_VMBUS, 0, NULL, OPT_NO_VMBUS_NUM },
> + {OPT_VMBUS_BLACKLIST, 1, NULL, OPT_VMBUS_BLACKLIST_NUM },
> + {OPT_VMBUS_WHITELIST, 1, NULL, OPT_VMBUS_WHITELIST_NUM },
> +#endif
> {0, 0, NULL, 0 }
> };
>
> @@ -858,6 +863,21 @@ eal_parse_common_option(int opt, const char *optarg,
> conf->no_pci = 1;
> break;
>
> +#ifdef RTE_LIBRTE_HV_PMD
> + case OPT_NO_VMBUS_NUM:
> + conf->no_vmbus = 1;
> + break;
> + case OPT_VMBUS_BLACKLIST_NUM:
> + if (rte_eal_devargs_add(RTE_DEVTYPE_BLACKLISTED_VMBUS,
> + optarg) < 0)
> + return -1;
> + break;
> + case OPT_VMBUS_WHITELIST_NUM:
> + if (rte_eal_devargs_add(RTE_DEVTYPE_WHITELISTED_VMBUS,
> + optarg) < 0)
> + return -1;
> + break;
> +#endif
> case OPT_NO_HPET_NUM:
> conf->no_hpet = 1;
> break;
> @@ -1017,6 +1037,14 @@ eal_check_common_options(struct internal_config *internal_cfg)
> return -1;
> }
>
> +#ifdef RTE_LIBRTE_HV_PMD
> + if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_VMBUS) != 0 &&
> + rte_eal_devargs_type_count(RTE_DEVTYPE_BLACKLISTED_VMBUS) != 0) {
> + RTE_LOG(ERR, EAL, "Options vmbus blacklist and whitelist "
> + "cannot be used at the same time\n");
> + return -1;
> + }
> +#endif
> return 0;
> }
>
> @@ -1066,5 +1094,15 @@ eal_common_usage(void)
> " --"OPT_NO_PCI" Disable PCI\n"
> " --"OPT_NO_HPET" Disable HPET\n"
> " --"OPT_NO_SHCONF" No shared config (mmap'd files)\n"
> +#ifdef RTE_LIBRTE_HV_PMD
> + " --"OPT_NO_VMBUS" Disable VMBUS\n"
> + " --"OPT_VMBUS_BLACKLIST" Add a VMBUS device to black list.\n"
> + " Prevent EAL from using this PCI device. The argument\n"
> + " format is device UUID.\n"
> + " --"OPT_VMBUS_WHITELIST" Add a VMBUS device to white list.\n"
> + " Only use the specified VMBUS devices. The argument format\n"
> + " is device UUID This option can be present\n"
> + " several times (once per device).\n"
> +#endif
> "\n", RTE_MAX_LCORE);
> }
> diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
> index 5f1367eb..4b6af937 100644
> --- a/lib/librte_eal/common/eal_internal_cfg.h
> +++ b/lib/librte_eal/common/eal_internal_cfg.h
> @@ -67,6 +67,7 @@ struct internal_config {
> unsigned hugepage_unlink; /**< true to unlink backing files */
> volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
> volatile unsigned no_pci; /**< true to disable PCI */
> + volatile unsigned no_vmbus; /**< true to disable VMBUS */
> volatile unsigned no_hpet; /**< true to disable HPET */
> volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
> * instead of native TSC */
> diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
> index a881c62e..156727e7 100644
> --- a/lib/librte_eal/common/eal_options.h
> +++ b/lib/librte_eal/common/eal_options.h
> @@ -83,6 +83,12 @@ enum {
> OPT_VMWARE_TSC_MAP_NUM,
> #define OPT_XEN_DOM0 "xen-dom0"
> OPT_XEN_DOM0_NUM,
> +#define OPT_NO_VMBUS "no-vmbus"
> + OPT_NO_VMBUS_NUM,
> +#define OPT_VMBUS_BLACKLIST "vmbus-blacklist"
> + OPT_VMBUS_BLACKLIST_NUM,
> +#define OPT_VMBUS_WHITELIST "vmbus-whitelist"
> + OPT_VMBUS_WHITELIST_NUM,
> OPT_LONG_MAX_NUM
> };
>
> diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
> index 9e7d8f6b..c856c63e 100644
> --- a/lib/librte_eal/common/eal_private.h
> +++ b/lib/librte_eal/common/eal_private.h
> @@ -210,6 +210,11 @@ int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
> struct mapped_pci_resource *uio_res, int map_idx);
>
> /**
> + * VMBUS related functions and structures
> + */
> +int rte_eal_vmbus_init(void);
> +
> +/**
> * Init tail queues for non-EAL library structures. This is to allow
> * the rings, mempools, etc. lists to be shared among multiple processes
> *
> diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h
> index 88120a1c..c079d289 100644
> --- a/lib/librte_eal/common/include/rte_devargs.h
> +++ b/lib/librte_eal/common/include/rte_devargs.h
> @@ -51,6 +51,9 @@ extern "C" {
> #include <stdio.h>
> #include <sys/queue.h>
> #include <rte_pci.h>
> +#ifdef RTE_LIBRTE_HV_PMD
> +#include <uuid/uuid.h>
> +#endif
>
> /**
> * Type of generic device
> @@ -59,6 +62,8 @@ enum rte_devtype {
> RTE_DEVTYPE_WHITELISTED_PCI,
> RTE_DEVTYPE_BLACKLISTED_PCI,
> RTE_DEVTYPE_VIRTUAL,
> + RTE_DEVTYPE_WHITELISTED_VMBUS,
> + RTE_DEVTYPE_BLACKLISTED_VMBUS,
> };
>
> /**
> @@ -88,6 +93,9 @@ struct rte_devargs {
> /** Driver name. */
> char drv_name[32];
> } virt;
> +#ifdef RTE_LIBRTE_HV_PMD
> + uuid_t uuid;
> +#endif
> };
> /** Arguments string as given by user or "" for no argument. */
> char *args;
> diff --git a/lib/librte_eal/common/include/rte_vmbus.h b/lib/librte_eal/common/include/rte_vmbus.h
> new file mode 100644
> index 00000000..f96d753e
> --- /dev/null
> +++ b/lib/librte_eal/common/include/rte_vmbus.h
> @@ -0,0 +1,249 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2013-2016 Brocade Communications Systems, Inc.
> + * Copyright(c) 2016 Microsoft Corporation
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + *
> + */
> +
> +#ifndef _RTE_VMBUS_H_
> +#define _RTE_VMBUS_H_
> +
> +/**
> + * @file
> + *
> + * RTE VMBUS Interface
> + */
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <limits.h>
> +#include <errno.h>
> +#include <uuid/uuid.h>
> +#include <sys/queue.h>
> +#include <stdint.h>
> +#include <inttypes.h>
> +
> +#include <rte_debug.h>
> +#include <rte_interrupts.h>
> +#include <rte_dev.h>
> +
> +TAILQ_HEAD(vmbus_device_list, rte_vmbus_device);
> +TAILQ_HEAD(vmbus_driver_list, rte_vmbus_driver);
> +
> +extern struct vmbus_driver_list vmbus_driver_list;
> +extern struct vmbus_device_list vmbus_device_list;
> +
> +/** Pathname of VMBUS devices directory. */
> +#define SYSFS_VMBUS_DEVICES "/sys/bus/vmbus/devices"
> +
> +#define UUID_BUF_SZ (36 + 1)
> +
> +
> +/** Maximum number of VMBUS resources. */
> +#define VMBUS_MAX_RESOURCE 7
> +
> +/**
> + * A structure describing a VMBUS device.
> + */
> +struct rte_vmbus_device {
> + TAILQ_ENTRY(rte_vmbus_device) next; /**< Next probed VMBUS device. */
> + struct rte_device device; /**< Inherit core device */
> + uuid_t device_id; /**< VMBUS device id */
> + uuid_t class_id; /**< VMBUS device type */
> + uint32_t relid; /**< VMBUS id for notification */
> + uint8_t monitor_id;
> + struct rte_intr_handle intr_handle; /**< Interrupt handle */
> + const struct rte_vmbus_driver *driver; /**< Associated driver */
> +
> + struct rte_mem_resource mem_resource[VMBUS_MAX_RESOURCE];
> + /**< VMBUS Memory Resource */
> + char sysfs_name[]; /**< Name in sysfs bus directory */
> +};
> +
> +struct rte_vmbus_driver;
> +
> +/**
> + * Initialisation function for the driver called during VMBUS probing.
> + */
> +typedef int (vmbus_probe_t)(struct rte_vmbus_driver *,
> + struct rte_vmbus_device *);
> +
> +/**
> + * Uninitialisation function for the driver called during hotplugging.
> + */
> +typedef int (vmbus_remove_t)(struct rte_vmbus_device *);
> +
> +/**
> + * A structure describing a VMBUS driver.
> + */
> +struct rte_vmbus_driver {
> + TAILQ_ENTRY(rte_vmbus_driver) next; /**< Next in list. */
> + struct rte_driver driver;
> + vmbus_probe_t *probe; /**< Device Probe function. */
> + vmbus_remove_t *remove; /**< Device Remove function. */
> +
> + const uuid_t *id_table; /**< ID table. */
> +};
> +
> +struct vmbus_map {
> + void *addr;
> + char *path;
> + uint64_t offset;
> + uint64_t size;
> + uint64_t phaddr;
> +};
> +
> +/*
> + * For multi-process we need to reproduce all vmbus mappings in secondary
> + * processes, so save them in a tailq.
> + */
> +struct mapped_vmbus_resource {
> + TAILQ_ENTRY(mapped_vmbus_resource) next;
> +
> + uuid_t uuid;
> + char path[PATH_MAX];
> + int nb_maps;
> + struct vmbus_map maps[VMBUS_MAX_RESOURCE];
> +};
> +
> +TAILQ_HEAD(mapped_vmbus_res_list, mapped_vmbus_resource);
> +
> +/**
> + * Scan the content of the VMBUS bus, and the devices in the devices list
> + *
> + * @return
> + * 0 on success, negative on error
> + */
> +int rte_eal_vmbus_scan(void);
> +
> +/**
> + * Probe the VMBUS bus for registered drivers.
> + *
> + * Scan the content of the VMBUS bus, and call the probe() function for
> + * all registered drivers that have a matching entry in its id_table
> + * for discovered devices.
> + *
> + * @return
> + * - 0 on success.
> + * - Negative on error.
> + */
> +int rte_eal_vmbus_probe(void);
> +
> +/**
> + * Map the VMBUS device resources in user space virtual memory address
> + *
> + * @param dev
> + * A pointer to a rte_vmbus_device structure describing the device
> + * to use
> + *
> + * @return
> + * 0 on success, negative on error and positive if no driver
> + * is found for the device.
> + */
> +int rte_eal_vmbus_map_device(struct rte_vmbus_device *dev);
> +
> +/**
> + * Unmap this device
> + *
> + * @param dev
> + * A pointer to a rte_vmbus_device structure describing the device
> + * to use
> + */
> +void rte_eal_vmbus_unmap_device(struct rte_vmbus_device *dev);
> +
> +/**
> + * Probe the single VMBUS device.
> + *
> + * Scan the content of the VMBUS bus, and find the vmbus device
> + * specified by device uuid, then call the probe() function for
> + * registered driver that has a matching entry in its id_table for
> + * discovered device.
> + *
> + * @param id
> + * The VMBUS device uuid.
> + * @return
> + * - 0 on success.
> + * - Negative on error.
> + */
> +int rte_eal_vmbus_probe_one(uuid_t id);
> +
> +/**
> + * Close the single VMBUS device.
> + *
> + * Scan the content of the VMBUS bus, and find the vmbus device id,
> + * then call the remove() function for registered driver that has a
> + * matching entry in its id_table for discovered device.
> + *
> + * @param id
> + * The VMBUS device uuid.
> + * @return
> + * - 0 on success.
> + * - Negative on error.
> + */
> +int rte_eal_vmbus_detach(uuid_t id);
> +
> +/**
> + * Register a VMBUS driver.
> + *
> + * @param driver
> + * A pointer to a rte_vmbus_driver structure describing the driver
> + * to be registered.
> + */
> +void rte_eal_vmbus_register(struct rte_vmbus_driver *driver);
> +
> +/** Helper for VMBUS device registration from driver nstance */
> +#define RTE_PMD_REGISTER_VMBUS(nm, vmbus_drv) \
> +RTE_INIT(vmbusinitfn_ ##nm); \
> +static void vmbusinitfn_ ##nm(void) \
> +{\
> + (vmbus_drv).driver.name = RTE_STR(nm);\
> + (vmbus_drv).driver.type = PMD_VMBUS; \
> + rte_eal_vmbus_register(&vmbus_drv); \
> +} \
> +RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
> +
> +/**
> + * Unregister a VMBUS driver.
> + *
> + * @param driver
> + * A pointer to a rte_vmbus_driver structure describing the driver
> + * to be unregistered.
> + */
> +void rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver);
The register/unregister need to get exported via the map file too.
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_VMBUS_H_ */
> diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
> index 4e206f09..f6ca3848 100644
> --- a/lib/librte_eal/linuxapp/eal/Makefile
> +++ b/lib/librte_eal/linuxapp/eal/Makefile
> @@ -71,6 +71,11 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c
> SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_interrupts.c
> SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_alarm.c
>
> +ifeq ($(CONFIG_RTE_LIBRTE_HV_PMD),y)
> +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vmbus.c
> +LDLIBS += -luuid
> +endif
> +
> # from common dir
> SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_lcore.c
> SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c
> @@ -114,6 +119,7 @@ CFLAGS_eal_hugepage_info.o := -D_GNU_SOURCE
> CFLAGS_eal_pci.o := -D_GNU_SOURCE
> CFLAGS_eal_pci_uio.o := -D_GNU_SOURCE
> CFLAGS_eal_pci_vfio.o := -D_GNU_SOURCE
> +CFLAGS_eal_vmbux.o := -D_GNU_SOURCE
> CFLAGS_eal_common_whitelist.o := -D_GNU_SOURCE
> CFLAGS_eal_common_options.o := -D_GNU_SOURCE
> CFLAGS_eal_common_thread.o := -D_GNU_SOURCE
> diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
> index 16dd5b9c..1bc0814a 100644
> --- a/lib/librte_eal/linuxapp/eal/eal.c
> +++ b/lib/librte_eal/linuxapp/eal/eal.c
> @@ -70,6 +70,9 @@
> #include <rte_cpuflags.h>
> #include <rte_interrupts.h>
> #include <rte_pci.h>
> +#ifdef RTE_LIBRTE_HV_PMD
> +#include <rte_vmbus.h>
> +#endif
> #include <rte_dev.h>
> #include <rte_devargs.h>
> #include <rte_common.h>
> @@ -830,6 +833,11 @@ rte_eal_init(int argc, char **argv)
>
> eal_check_mem_on_local_socket();
>
> +#ifdef RTE_LIBRTE_HV_PMD
> + if (rte_eal_vmbus_init() < 0)
> + RTE_LOG(ERR, EAL, "Cannot init VMBUS\n");
> +#endif
> +
> if (eal_plugins_init() < 0)
> rte_panic("Cannot init plugins\n");
>
> @@ -884,6 +892,11 @@ rte_eal_init(int argc, char **argv)
> if (rte_eal_pci_probe())
> rte_panic("Cannot probe PCI\n");
>
> +#ifdef RTE_LIBRTE_HV_PMD
> + if (rte_eal_vmbus_probe() < 0)
> + rte_panic("Cannot probe VMBUS\n");
> +#endif
> +
> if (rte_eal_dev_init() < 0)
> rte_panic("Cannot init pmd devices\n");
>
> diff --git a/lib/librte_eal/linuxapp/eal/eal_vmbus.c b/lib/librte_eal/linuxapp/eal/eal_vmbus.c
> new file mode 100644
> index 00000000..729f93a9
> --- /dev/null
> +++ b/lib/librte_eal/linuxapp/eal/eal_vmbus.c
> @@ -0,0 +1,911 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2013-2016 Brocade Communications Systems, Inc.
> + * Copyright(c) 2016 Microsoft Corporation
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + *
> + */
> +
> +#include <string.h>
> +#include <unistd.h>
> +#include <dirent.h>
> +#include <fcntl.h>
> +#include <sys/mman.h>
> +
> +#include <rte_eal.h>
> +#include <rte_tailq.h>
> +#include <rte_log.h>
> +#include <rte_devargs.h>
> +#include <rte_vmbus.h>
> +#include <rte_malloc.h>
> +
> +#include "eal_private.h"
> +#include "eal_pci_init.h"
> +#include "eal_filesystem.h"
> +
> +struct vmbus_driver_list vmbus_driver_list =
> + TAILQ_HEAD_INITIALIZER(vmbus_driver_list);
> +struct vmbus_device_list vmbus_device_list =
> + TAILQ_HEAD_INITIALIZER(vmbus_device_list);
> +
> +static void *vmbus_map_addr;
> +
> +static struct rte_tailq_elem rte_vmbus_uio_tailq = {
> + .name = "UIO_RESOURCE_LIST",
> +};
> +EAL_REGISTER_TAILQ(rte_vmbus_uio_tailq);
> +
> +/*
> + * parse a sysfs file containing one integer value
> + * different to the eal version, as it needs to work with 64-bit values
> + */
> +static int
> +vmbus_get_sysfs_uuid(const char *filename, uuid_t uu)
> +{
> + char buf[BUFSIZ];
> + char *cp, *in = buf;
> + FILE *f;
> +
> + f = fopen(filename, "r");
> + if (f == NULL) {
> + RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
> + __func__, filename);
> + return -1;
> + }
> +
> + if (fgets(buf, sizeof(buf), f) == NULL) {
> + RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
> + __func__, filename);
> + fclose(f);
> + return -1;
> + }
> + fclose(f);
> +
> + cp = strchr(buf, '\n');
> + if (cp)
> + *cp = '\0';
> +
> + /* strip { } notation */
> + if (buf[0] == '{') {
> + in = buf + 1;
> + cp = strchr(in, '}');
> + if (cp)
> + *cp = '\0';
> + }
> +
> + if (uuid_parse(in, uu) < 0) {
> + RTE_LOG(ERR, EAL, "%s %s not a valid UUID\n",
> + filename, buf);
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +/* map a particular resource from a file */
> +static void *
> +vmbus_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
> + int flags)
> +{
> + void *mapaddr;
> +
> + /* Map the memory resource of device */
> + mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
> + MAP_SHARED | flags, fd, offset);
> + if (mapaddr == MAP_FAILED ||
> + (requested_addr != NULL && mapaddr != requested_addr)) {
> + RTE_LOG(ERR, EAL,
> + "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s)\n",
> + __func__, fd, requested_addr,
> + (unsigned long)size, (unsigned long)offset,
> + strerror(errno));
> + } else
> + RTE_LOG(DEBUG, EAL, " VMBUS memory mapped at %p\n", mapaddr);
> +
> + return mapaddr;
> +}
> +
> +/* unmap a particular resource */
> +static void
> +vmbus_unmap_resource(void *requested_addr, size_t size)
> +{
> + if (requested_addr == NULL)
> + return;
> +
> + /* Unmap the VMBUS memory resource of device */
> + if (munmap(requested_addr, size)) {
> + RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n",
> + __func__, requested_addr, (unsigned long)size,
> + strerror(errno));
> + } else
> + RTE_LOG(DEBUG, EAL, " VMBUS memory unmapped at %p\n",
> + requested_addr);
> +}
> +
> +/* Only supports current kernel version
> + * Unlike PCI there is no option (or need) to create UIO device.
> + */
> +static int vmbus_get_uio_dev(const char *name,
> + char *dstbuf, size_t buflen)
> +{
> + char dirname[PATH_MAX];
> + unsigned int uio_num;
> + struct dirent *e;
> + DIR *dir;
> +
> + snprintf(dirname, sizeof(dirname),
> + "/sys/bus/vmbus/devices/%s/uio", name);
> +
> + dir = opendir(dirname);
> + if (dir == NULL) {
> + RTE_LOG(ERR, EAL, "Cannot map uio resources for %s: %s\n",
> + name, strerror(errno));
> + return -1;
> + }
> +
> + /* take the first file starting with "uio" */
> + while ((e = readdir(dir)) != NULL) {
> + if (sscanf(e->d_name, "uio%u", &uio_num) != 1)
> + continue;
> +
> + snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num);
> + break;
> + }
> + closedir(dir);
> +
> + return e ? (int) uio_num : -1;
> +}
> +
> +/*
> + * parse a sysfs file containing one integer value
> + * different to the eal version, as it needs to work with 64-bit values
> + */
> +static int
> +vmbus_parse_sysfs_value(const char *dir, const char *name,
> + uint64_t *val)
> +{
> + char filename[PATH_MAX];
> + FILE *f;
> + char buf[BUFSIZ];
> + char *end = NULL;
> +
> + snprintf(filename, sizeof(filename), "%s/%s", dir, name);
> + f = fopen(filename, "r");
> + if (f == NULL) {
> + RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
> + __func__, filename);
> + return -1;
> + }
> +
> + if (fgets(buf, sizeof(buf), f) == NULL) {
> + RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
> + __func__, filename);
> + fclose(f);
> + return -1;
> + }
> + fclose(f);
> +
> + *val = strtoull(buf, &end, 0);
> + if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
> + RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
> + __func__, filename);
> + return -1;
> + }
> + return 0;
> +}
> +
> +/* Get mappings out of values provided by uio */
> +static int
> +vmbus_uio_get_mappings(const char *uioname,
> + struct vmbus_map maps[])
> +{
> + int i;
> +
> + for (i = 0; i != VMBUS_MAX_RESOURCE; i++) {
> + struct vmbus_map *map = &maps[i];
> + char dirname[PATH_MAX];
> +
> + /* check if map directory exists */
> + snprintf(dirname, sizeof(dirname),
> + "%s/maps/map%d", uioname, i);
> +
> + if (access(dirname, F_OK) != 0)
> + break;
> +
> + /* get mapping offset */
> + if (vmbus_parse_sysfs_value(dirname, "offset",
> + &map->offset) < 0)
> + return -1;
> +
> + /* get mapping size */
> + if (vmbus_parse_sysfs_value(dirname, "size",
> + &map->size) < 0)
> + return -1;
> +
> + /* get mapping physical address */
> + if (vmbus_parse_sysfs_value(dirname, "addr",
> + &maps->phaddr) < 0)
> + return -1;
> + }
> +
> + return i;
> +}
> +
> +static void
> +vmbus_uio_free_resource(struct rte_vmbus_device *dev,
> + struct mapped_vmbus_resource *uio_res)
> +{
> + rte_free(uio_res);
> +
> + if (dev->intr_handle.fd) {
> + close(dev->intr_handle.fd);
> + dev->intr_handle.fd = -1;
> + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
> + }
> +}
> +
> +static struct mapped_vmbus_resource *
> +vmbus_uio_alloc_resource(struct rte_vmbus_device *dev)
> +{
> + struct mapped_vmbus_resource *uio_res;
> + char dirname[PATH_MAX], devname[PATH_MAX];
> + int uio_num, nb_maps;
> +
> + uio_num = vmbus_get_uio_dev(dev->sysfs_name, dirname, sizeof(dirname));
> + if (uio_num < 0) {
> + RTE_LOG(WARNING, EAL,
> + " %s not managed by UIO driver, skipping\n",
> + dev->sysfs_name);
> + return NULL;
> + }
> +
> + /* allocate the mapping details for secondary processes*/
> + uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0);
> + if (uio_res == NULL) {
> + RTE_LOG(ERR, EAL,
> + "%s(): cannot store uio mmap details\n", __func__);
> + goto error;
> + }
> +
> + snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
> + dev->intr_handle.fd = open(devname, O_RDWR);
> + if (dev->intr_handle.fd < 0) {
> + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
> + devname, strerror(errno));
> + goto error;
> + }
> +
> + dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX;
> +
> + snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname);
> + uuid_copy(uio_res->uuid, dev->device_id);
> +
> + nb_maps = vmbus_uio_get_mappings(dirname, uio_res->maps);
> + if (nb_maps < 0)
> + goto error;
> +
> + RTE_LOG(DEBUG, EAL, "Found %d memory maps for device %s\n",
> + nb_maps, dev->sysfs_name);
> +
> + return uio_res;
> +
> + error:
> + vmbus_uio_free_resource(dev, uio_res);
> + return NULL;
> +}
> +
> +static int
> +vmbus_uio_map_resource_by_index(struct rte_vmbus_device *dev,
> + unsigned int res_idx,
> + struct mapped_vmbus_resource *uio_res,
> + unsigned int map_idx)
> +{
> + struct vmbus_map *maps = uio_res->maps;
> + char devname[PATH_MAX];
> + void *mapaddr;
> + int fd;
> +
> + snprintf(devname, sizeof(devname),
> + "/sys/bus/vmbus/%s/resource%u", dev->sysfs_name, res_idx);
> +
> + fd = open(devname, O_RDWR);
> + if (fd < 0) {
> + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
> + devname, strerror(errno));
> + return -1;
> + }
> +
> + /* allocate memory to keep path */
> + maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
> + if (maps[map_idx].path == NULL) {
> + RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n",
> + strerror(errno));
> + return -1;
> + }
> +
> + /* try mapping somewhere close to the end of hugepages */
> + if (vmbus_map_addr == NULL)
> + vmbus_map_addr = pci_find_max_end_va();
> +
> + mapaddr = vmbus_map_resource(vmbus_map_addr, fd, 0,
> + dev->mem_resource[res_idx].len, 0);
> + close(fd);
> + if (mapaddr == MAP_FAILED) {
> + rte_free(maps[map_idx].path);
> + return -1;
> + }
> +
> + vmbus_map_addr = RTE_PTR_ADD(mapaddr,
> + dev->mem_resource[res_idx].len);
> +
> + maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
> + maps[map_idx].size = dev->mem_resource[res_idx].len;
> + maps[map_idx].addr = mapaddr;
> + maps[map_idx].offset = 0;
> + strcpy(maps[map_idx].path, devname);
> + dev->mem_resource[res_idx].addr = mapaddr;
> +
> + return 0;
> +}
> +
> +static void
> +vmbus_uio_unmap(struct mapped_vmbus_resource *uio_res)
> +{
> + int i;
> +
> + if (uio_res == NULL)
> + return;
> +
> + for (i = 0; i != uio_res->nb_maps; i++) {
> + vmbus_unmap_resource(uio_res->maps[i].addr,
> + uio_res->maps[i].size);
> +
> + if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> + rte_free(uio_res->maps[i].path);
> + }
> +}
> +
> +static struct mapped_vmbus_resource *
> +vmbus_uio_find_resource(struct rte_vmbus_device *dev)
> +{
> + struct mapped_vmbus_resource *uio_res;
> + struct mapped_vmbus_res_list *uio_res_list =
> + RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head,
> + mapped_vmbus_res_list);
> +
> + if (dev == NULL)
> + return NULL;
> +
> + TAILQ_FOREACH(uio_res, uio_res_list, next) {
> + if (uuid_compare(uio_res->uuid, dev->device_id) == 0)
> + return uio_res;
> + }
> + return NULL;
> +}
> +
> +/* unmap the VMBUS resource of a VMBUS device in virtual memory */
> +static void
> +vmbus_uio_unmap_resource(struct rte_vmbus_device *dev)
> +{
> + struct mapped_vmbus_resource *uio_res;
> + struct mapped_vmbus_res_list *uio_res_list =
> + RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head,
> + mapped_vmbus_res_list);
> +
> + if (dev == NULL)
> + return;
> +
> + /* find an entry for the device */
> + uio_res = vmbus_uio_find_resource(dev);
> + if (uio_res == NULL)
> + return;
> +
> + /* secondary processes - just free maps */
> + if (rte_eal_process_type() != RTE_PROC_PRIMARY)
> + return vmbus_uio_unmap(uio_res);
> +
> + TAILQ_REMOVE(uio_res_list, uio_res, next);
> +
> + /* unmap all resources */
> + vmbus_uio_unmap(uio_res);
> +
> + /* free uio resource */
> + rte_free(uio_res);
> +
> + /* close fd if in primary process */
> + close(dev->intr_handle.fd);
> + if (dev->intr_handle.uio_cfg_fd >= 0) {
> + close(dev->intr_handle.uio_cfg_fd);
> + dev->intr_handle.uio_cfg_fd = -1;
> + }
> +
> + dev->intr_handle.fd = -1;
> + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
> +}
> +
> +static int
> +vmbus_uio_map_secondary(struct rte_vmbus_device *dev)
> +{
> + struct mapped_vmbus_resource *uio_res;
> + struct mapped_vmbus_res_list *uio_res_list =
> + RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head,
> + mapped_vmbus_res_list);
> +
> + TAILQ_FOREACH(uio_res, uio_res_list, next) {
> + int i;
> +
> + /* skip this element if it doesn't match our id */
> + if (uuid_compare(uio_res->uuid, dev->device_id))
> + continue;
> +
> + for (i = 0; i != uio_res->nb_maps; i++) {
> + void *mapaddr;
> + int fd;
> +
> + fd = open(uio_res->maps[i].path, O_RDWR);
> + if (fd < 0) {
> + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
> + uio_res->maps[i].path, strerror(errno));
> + return -1;
> + }
> +
> + mapaddr = vmbus_map_resource(uio_res->maps[i].addr, fd,
> + uio_res->maps[i].offset,
> + uio_res->maps[i].size, 0);
> + /* fd is not needed in slave process, close it */
> + close(fd);
> +
> + if (mapaddr == uio_res->maps[i].addr)
> + continue;
> +
> + RTE_LOG(ERR, EAL,
> + "Cannot mmap device resource file %s to address: %p\n",
> + uio_res->maps[i].path,
> + uio_res->maps[i].addr);
> +
> + /* unmap addrs correctly mapped */
> + while (i != 0) {
> + --i;
> + vmbus_unmap_resource(uio_res->maps[i].addr,
> + uio_res->maps[i].size);
> + }
> + return -1;
> +
> + }
> + return 0;
> + }
> +
> + RTE_LOG(ERR, EAL, "Cannot find resource for device\n");
> + return 1;
> +}
> +
> +/* map the resources of a vmbus device in virtual memory */
> +int
> +rte_eal_vmbus_map_device(struct rte_vmbus_device *dev)
> +{
> + struct mapped_vmbus_resource *uio_res;
> + struct mapped_vmbus_res_list *uio_res_list =
> + RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head, mapped_vmbus_res_list);
> + int i, ret, map_idx = 0;
> +
> + dev->intr_handle.fd = -1;
> + dev->intr_handle.uio_cfg_fd = -1;
> + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
> +
> + /* secondary processes - use already recorded details */
> + if (rte_eal_process_type() != RTE_PROC_PRIMARY)
> + return vmbus_uio_map_secondary(dev);
> +
> + /* allocate uio resource */
> + uio_res = vmbus_uio_alloc_resource(dev);
> + if (uio_res == NULL)
> + return -1;
> +
> + /* Map all BARs */
> + for (i = 0; i != VMBUS_MAX_RESOURCE; i++) {
> + uint64_t phaddr;
> +
> + /* skip empty BAR */
> + phaddr = dev->mem_resource[i].phys_addr;
> + if (phaddr == 0)
> + continue;
> +
> + ret = vmbus_uio_map_resource_by_index(dev, i,
> + uio_res, map_idx);
> + if (ret)
> + goto error;
> +
> + map_idx++;
> + }
> +
> + uio_res->nb_maps = map_idx;
> +
> + TAILQ_INSERT_TAIL(uio_res_list, uio_res, next);
> +
> + return 0;
> +error:
> + for (i = 0; i < map_idx; i++) {
> + vmbus_unmap_resource(uio_res->maps[i].addr,
> + uio_res->maps[i].size);
> + rte_free(uio_res->maps[i].path);
> + }
> + vmbus_uio_free_resource(dev, uio_res);
> + return -1;
> +}
> +
> +/* Scan one vmbus sysfs entry, and fill the devices list from it. */
> +static int
> +vmbus_scan_one(const char *name)
> +{
> + struct rte_vmbus_device *dev, *dev2;
> + char filename[PATH_MAX];
> + char dirname[PATH_MAX];
> + unsigned long tmp;
> +
> + dev = malloc(sizeof(*dev) + strlen(name) + 1);
> + if (dev == NULL)
> + return -1;
> +
> + memset(dev, 0, sizeof(*dev));
> + strcpy(dev->sysfs_name, name);
> + if (dev->sysfs_name == NULL)
> + goto error;
> +
> + /* sysfs base directory
> + * /sys/bus/vmbus/devices/7a08391f-f5a0-4ac0-9802-d13fd964f8df
> + * or on older kernel
> + * /sys/bus/vmbus/devices/vmbus_1
> + */
> + snprintf(dirname, sizeof(dirname), "%s/%s",
> + SYSFS_VMBUS_DEVICES, name);
> +
> + /* get device id */
> + snprintf(filename, sizeof(filename), "%s/device_id", dirname);
> + if (vmbus_get_sysfs_uuid(filename, dev->device_id) < 0)
> + goto error;
> +
> + /* get device class */
> + snprintf(filename, sizeof(filename), "%s/class_id", dirname);
> + if (vmbus_get_sysfs_uuid(filename, dev->class_id) < 0)
> + goto error;
> +
> + /* get relid */
> + snprintf(filename, sizeof(filename), "%s/id", dirname);
> + if (eal_parse_sysfs_value(filename, &tmp) < 0)
> + goto error;
> + dev->relid = tmp;
> +
> + /* get monitor id */
> + snprintf(filename, sizeof(filename), "%s/monitor_id", dirname);
> + if (eal_parse_sysfs_value(filename, &tmp) < 0)
> + goto error;
> + dev->monitor_id = tmp;
> +
> + /* get numa node */
> + snprintf(filename, sizeof(filename), "%s/numa_node",
> + dirname);
> + if (eal_parse_sysfs_value(filename, &tmp) < 0)
> + /* if no NUMA support, set default to 0 */
> + dev->device.numa_node = 0;
> + else
> + dev->device.numa_node = tmp;
> +
> + /* device is valid, add in list (sorted) */
> + RTE_LOG(DEBUG, EAL, "Adding vmbus device %s\n", name);
> +
> + TAILQ_FOREACH(dev2, &vmbus_device_list, next) {
> + int ret;
> +
> + ret = uuid_compare(dev->device_id, dev->device_id);
> + if (ret > 0)
> + continue;
> +
> + if (ret < 0) {
> + TAILQ_INSERT_BEFORE(dev2, dev, next);
> + rte_eal_device_insert(&dev->device);
> + } else { /* already registered */
> + memmove(dev2->mem_resource, dev->mem_resource,
> + sizeof(dev->mem_resource));
> + free(dev);
> + }
> + return 0;
> + }
> +
> + rte_eal_device_insert(&dev->device);
> + TAILQ_INSERT_TAIL(&vmbus_device_list, dev, next);
> +
> + return 0;
> +error:
> + free(dev);
> + return -1;
> +}
> +
> +/*
> + * Scan the content of the vmbus, and the devices in the devices list
> + */
> +static int
> +vmbus_scan(void)
> +{
> + struct dirent *e;
> + DIR *dir;
> +
> + dir = opendir(SYSFS_VMBUS_DEVICES);
> + if (dir == NULL) {
> + if (errno == ENOENT)
> + return 0;
> +
> + RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n",
> + __func__, strerror(errno));
> + return -1;
> + }
> +
> + while ((e = readdir(dir)) != NULL) {
> + if (e->d_name[0] == '.')
> + continue;
> +
> + if (vmbus_scan_one(e->d_name) < 0)
> + goto error;
> + }
> + closedir(dir);
> + return 0;
> +
> +error:
> + closedir(dir);
> + return -1;
> +}
> +
> +/* Init the VMBUS EAL subsystem */
> +int rte_eal_vmbus_init(void)
> +{
> + /* VMBUS can be disabled */
> + if (internal_config.no_vmbus)
> + return 0;
> +
> + if (vmbus_scan() < 0) {
> + RTE_LOG(ERR, EAL, "%s(): Cannot scan vmbus\n", __func__);
> + return -1;
> + }
> + return 0;
> +}
> +
> +/* Below is PROBE part of eal_vmbus library */
> +
> +/*
> + * If device ID match, call the devinit() function of the driver.
> + */
> +static int
> +rte_eal_vmbus_probe_one_driver(struct rte_vmbus_driver *dr,
> + struct rte_vmbus_device *dev)
> +{
> + const uuid_t *id_table;
> +
> + RTE_LOG(DEBUG, EAL, " probe driver: %s\n", dr->driver.name);
> +
> + for (id_table = dr->id_table; !uuid_is_null(*id_table); ++id_table) {
> + struct rte_devargs *args;
> + char guid[UUID_BUF_SZ];
> + int ret;
> +
> + /* skip devices not assocaited with this device class */
> + if (uuid_compare(*id_table, dev->class_id) != 0)
> + continue;
> +
> + uuid_unparse(dev->device_id, guid);
> + RTE_LOG(INFO, EAL, "VMBUS device %s on NUMA socket %i\n",
> + guid, dev->device.numa_node);
> +
> + /* no initialization when blacklisted, return without error */
> + args = dev->device.devargs;
> + if (args && args->type == RTE_DEVTYPE_BLACKLISTED_VMBUS) {
> + RTE_LOG(INFO, EAL, " Device is blacklisted, not initializing\n");
> + return 1;
> + }
> +
> + RTE_LOG(INFO, EAL, " probe driver: %s\n", dr->driver.name);
> +
> + /* map resources for device */
> + ret = rte_eal_vmbus_map_device(dev);
> + if (ret != 0)
> + return ret;
> +
> + /* reference driver structure */
> + dev->driver = dr;
> +
> + /* call the driver probe() function */
> + ret = dr->probe(dr, dev);
> + if (ret)
> + dev->driver = NULL;
> +
> + return ret;
> + }
> +
> + /* return positive value if driver doesn't support this device */
> + return 1;
> +}
> +
> +
> +/*
> + * If vendor/device ID match, call the remove() function of the
> + * driver.
> + */
> +static int
> +vmbus_detach_dev(struct rte_vmbus_driver *dr,
> + struct rte_vmbus_device *dev)
> +{
> + const uuid_t *id_table;
> +
> + for (id_table = dr->id_table; !uuid_is_null(*id_table); ++id_table) {
> + char guid[UUID_BUF_SZ];
> +
> + /* skip devices not assocaited with this device class */
> + if (uuid_compare(*id_table, dev->class_id) != 0)
> + continue;
> +
> + uuid_unparse(dev->device_id, guid);
> + RTE_LOG(INFO, EAL, "VMBUS device %s on NUMA socket %i\n",
> + guid, dev->device.numa_node);
> +
> + RTE_LOG(DEBUG, EAL, " remove driver: %s\n", dr->driver.name);
> +
> + if (dr->remove && (dr->remove(dev) < 0))
> + return -1; /* negative value is an error */
> +
> + /* clear driver structure */
> + dev->driver = NULL;
> +
> + vmbus_uio_unmap_resource(dev);
> + return 0;
> + }
> +
> + /* return positive value if driver doesn't support this device */
> + return 1;
> +}
> +
> +/*
> + * call the devinit() function of all
> + * registered drivers for the vmbus device. Return -1 if no driver is
> + * found for this class of vmbus device.
> + * The present assumption is that we have drivers only for vmbus network
> + * devices. That's why we don't check driver's id_table now.
> + */
> +static int
> +vmbus_probe_all_drivers(struct rte_vmbus_device *dev)
> +{
> + struct rte_vmbus_driver *dr = NULL;
> + int ret;
> +
> + TAILQ_FOREACH(dr, &vmbus_driver_list, next) {
> + ret = rte_eal_vmbus_probe_one_driver(dr, dev);
> + if (ret < 0) {
> + /* negative value is an error */
> + RTE_LOG(ERR, EAL, "Failed to probe driver %s\n",
> + dr->driver.name);
> + return -1;
> + }
> + /* positive value means driver doesn't support it */
> + if (ret > 0)
> + continue;
> +
> + return 0;
> + }
> +
> + return 1;
> +}
> +
> +
> +/*
> + * If device ID matches, call the remove() function of all
> + * registered driver for the given device. Return -1 if initialization
> + * failed, return 1 if no driver is found for this device.
> + */
> +static int
> +vmbus_detach_all_drivers(struct rte_vmbus_device *dev)
> +{
> + struct rte_vmbus_driver *dr;
> + int rc = 0;
> +
> + if (dev == NULL)
> + return -1;
> +
> + TAILQ_FOREACH(dr, &vmbus_driver_list, next) {
> + rc = vmbus_detach_dev(dr, dev);
> + if (rc < 0)
> + /* negative value is an error */
> + return -1;
> + if (rc > 0)
> + /* positive value means driver doesn't support it */
> + continue;
> + return 0;
> + }
> + return 1;
> +}
> +
> +/* Detach device specified by its VMBUS id */
> +int
> +rte_eal_vmbus_detach(uuid_t device_id)
> +{
> + struct rte_vmbus_device *dev;
> + char ubuf[UUID_BUF_SZ];
> +
> + TAILQ_FOREACH(dev, &vmbus_device_list, next) {
> + if (uuid_compare(dev->device_id, device_id) != 0)
> + continue;
> +
> + if (vmbus_detach_all_drivers(dev) < 0)
> + goto err_return;
> +
> + TAILQ_REMOVE(&vmbus_device_list, dev, next);
> + free(dev);
> + return 0;
> + }
> + return -1;
> +
> +err_return:
> + uuid_unparse(device_id, ubuf);
> + RTE_LOG(WARNING, EAL, "Requested device %s cannot be used\n",
> + ubuf);
> + return -1;
> +}
> +
> +/*
> + * Scan the vmbus, and call the devinit() function for
> + * all registered drivers that have a matching entry in its id_table
> + * for discovered devices.
> + */
> +int
> +rte_eal_vmbus_probe(void)
> +{
> + struct rte_vmbus_device *dev = NULL;
> +
> + TAILQ_FOREACH(dev, &vmbus_device_list, next) {
> + char ubuf[UUID_BUF_SZ];
> +
> + uuid_unparse(dev->device_id, ubuf);
> +
> + RTE_LOG(DEBUG, EAL, "Probing driver for device %s ...\n",
> + ubuf);
> + vmbus_probe_all_drivers(dev);
> + }
> + return 0;
> +}
> +
> +/* register vmbus driver */
> +void
> +rte_eal_vmbus_register(struct rte_vmbus_driver *driver)
> +{
> + TAILQ_INSERT_TAIL(&vmbus_driver_list, driver, next);
> +}
> +
> +/* unregister vmbus driver */
> +void
> +rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver)
> +{
> + TAILQ_REMOVE(&vmbus_driver_list, driver, next);
> +}
> diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
> index 7c212096..b69af0f0 100644
> --- a/lib/librte_ether/rte_ethdev.c
> +++ b/lib/librte_ether/rte_ethdev.c
> @@ -3334,3 +3334,93 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id,
> -ENOTSUP);
> return (*dev->dev_ops->l2_tunnel_offload_set)(dev, l2_tunnel, mask, en);
> }
> +
> +
> +#ifdef RTE_LIBRTE_HV_PMD
> +int
> +rte_eth_dev_vmbus_probe(struct rte_vmbus_driver *vmbus_drv,
> + struct rte_vmbus_device *vmbus_dev)
> +{
> + struct eth_driver *eth_drv = (struct eth_driver *)vmbus_drv;
> + struct rte_eth_dev *eth_dev;
> + char ustr[UUID_BUF_SZ];
> + int diag;
> +
> + uuid_unparse(vmbus_dev->device_id, ustr);
> +
> + eth_dev = rte_eth_dev_allocate(ustr);
> + if (eth_dev == NULL)
> + return -ENOMEM;
> +
> + if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> + eth_dev->data->dev_private = rte_zmalloc("ethdev private structure",
> + eth_drv->dev_private_size,
> + RTE_CACHE_LINE_SIZE);
> + if (eth_dev->data->dev_private == NULL)
> + rte_panic("Cannot allocate memzone for private port data\n");
> + }
> +
> + eth_dev->device = &vmbus_dev->device;
> + eth_dev->driver = eth_drv;
> + eth_dev->data->rx_mbuf_alloc_failed = 0;
> +
> + /* init user callbacks */
> + TAILQ_INIT(&(eth_dev->link_intr_cbs));
> +
> + /*
> + * Set the default maximum frame size.
> + */
> + eth_dev->data->mtu = ETHER_MTU;
> +
> + /* Invoke PMD device initialization function */
> + diag = (*eth_drv->eth_dev_init)(eth_dev);
> + if (diag == 0)
> + return 0;
> +
> + RTE_PMD_DEBUG_TRACE("driver %s: eth_dev_init(%s) failed\n",
> + vmbus_drv->driver.name, ustr);
> +
> + if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> + rte_free(eth_dev->data->dev_private);
> +
> + return diag;
> +}
> +
> +int
> +rte_eth_dev_vmbus_remove(struct rte_vmbus_device *vmbus_dev)
> +{
> + const struct eth_driver *eth_drv;
> + struct rte_eth_dev *eth_dev;
> + char ustr[UUID_BUF_SZ];
> + int ret;
> +
> + if (vmbus_dev == NULL)
> + return -EINVAL;
> +
> + uuid_unparse(vmbus_dev->device_id, ustr);
> + eth_dev = rte_eth_dev_allocated(ustr);
> + if (eth_dev == NULL)
> + return -ENODEV;
> +
> + eth_drv = (const struct eth_driver *)vmbus_dev->driver;
> +
> + /* Invoke PMD device uninit function */
> + if (*eth_drv->eth_dev_uninit) {
> + ret = (*eth_drv->eth_dev_uninit)(eth_dev);
> + if (ret)
> + return ret;
> + }
> +
> + /* free ether device */
> + rte_eth_dev_release_port(eth_dev);
> +
> + if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> + rte_free(eth_dev->data->dev_private);
> +
> + eth_dev->device = NULL;
> + eth_dev->driver = NULL;
> + eth_dev->data = NULL;
> +
> + return 0;
> +}
> +#endif
> diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
> index 1a62a322..2a8c1eed 100644
> --- a/lib/librte_ether/rte_ethdev.h
> +++ b/lib/librte_ether/rte_ethdev.h
> @@ -180,6 +180,9 @@ extern "C" {
> #include <rte_log.h>
> #include <rte_interrupts.h>
> #include <rte_pci.h>
> +#ifdef RTE_LIBRTE_HV_PMD
> +#include <rte_vmbus.h>
> +#endif
> #include <rte_dev.h>
> #include <rte_devargs.h>
> #include <rte_errno.h>
> @@ -1908,6 +1911,17 @@ struct rte_pci_eth_driver {
> struct eth_driver eth_drv; /**< Ethernet driver. */
> };
>
> +#ifdef RTE_LIBRTE_HV_PMD
> +/**
> + * @internal
> + * The structure associated with a PMD VMBUS Ethernet driver.
> + */
> +struct rte_vmbus_eth_driver {
> + struct rte_vmbus_driver vmbus_drv; /**< Underlying VMBUS driver. */
> + struct eth_driver eth_drv; /**< Ethernet driver. */
> +};
> +#endif
> +
> /**
> * Convert a numerical speed in Mbps to a bitmap flag that can be used in
> * the bitmap link_speeds of the struct rte_eth_conf
> @@ -4543,6 +4557,23 @@ int rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv,
> */
> int rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev);
>
> +#ifdef RTE_LIBRTE_HV_PMD
> +/**
> + * @internal
> + * Wrapper for use by vmbus drivers as a .probe function to attach to a ethdev
> + * interface.
> + */
> +int rte_eth_dev_vmbus_probe(struct rte_vmbus_driver *vmbus_drv,
> + struct rte_vmbus_device *vmbus_dev);
> +
> +/**
> + * @internal
> + * Wrapper for use by vmbus drivers as a .remove function to detach a ethdev
> + * interface.
> + */
> +int rte_eth_dev_vmbus_remove(struct rte_vmbus_device *vmbus_dev);
> +#endif
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
> index f75f0e24..6b304084 100644
> --- a/mk/rte.app.mk
> +++ b/mk/rte.app.mk
> @@ -130,6 +130,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
> _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_VHOST) += -lrte_pmd_vhost
> endif # $(CONFIG_RTE_LIBRTE_VHOST)
> _LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += -lrte_pmd_vmxnet3_uio
> +_LDLIBS-$(CONFIG_RTE_LIBRTE_HV_PMD) += -luuid
>
> ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
> _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB) += -lrte_pmd_aesni_mb
> --
> 2.11.0
>
On Wed, Jan 11, 2017 at 3:49 PM, Jan Blunck <jblunck@infradead.org> wrote:
> On Sat, Jan 7, 2017 at 7:17 PM, Stephen Hemminger
> <stephen@networkplumber.org> wrote:
>> Add support for VMBUS on Hyper-V/Azure. VMBUS is similar to PCI
>> but has different addressing and internal API's.
>>
>> Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
>> ---
>> lib/librte_eal/common/Makefile | 2 +-
>> lib/librte_eal/common/eal_common_devargs.c | 7 +
>> lib/librte_eal/common/eal_common_options.c | 38 ++
>> lib/librte_eal/common/eal_internal_cfg.h | 1 +
>> lib/librte_eal/common/eal_options.h | 6 +
>> lib/librte_eal/common/eal_private.h | 5 +
>> lib/librte_eal/common/include/rte_devargs.h | 8 +
>> lib/librte_eal/common/include/rte_vmbus.h | 249 ++++++++
>> lib/librte_eal/linuxapp/eal/Makefile | 6 +
>> lib/librte_eal/linuxapp/eal/eal.c | 13 +
>> lib/librte_eal/linuxapp/eal/eal_vmbus.c | 911 ++++++++++++++++++++++++++++
>> lib/librte_ether/rte_ethdev.c | 90 +++
>> lib/librte_ether/rte_ethdev.h | 31 +
>> mk/rte.app.mk | 1 +
>> 14 files changed, 1367 insertions(+), 1 deletion(-)
>> create mode 100644 lib/librte_eal/common/include/rte_vmbus.h
>> create mode 100644 lib/librte_eal/linuxapp/eal/eal_vmbus.c
>>
>> diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
>> index 09a3d3af..ceb77bed 100644
>> --- a/lib/librte_eal/common/Makefile
>> +++ b/lib/librte_eal/common/Makefile
>> @@ -33,7 +33,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
>>
>> INC := rte_branch_prediction.h rte_common.h
>> INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
>> -INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
>> +INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h rte_vmbus.h
>> INC += rte_per_lcore.h rte_random.h
>> INC += rte_tailq.h rte_interrupts.h rte_alarm.h
>> INC += rte_string_fns.h rte_version.h
>> diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c
>> index e403717b..934ca840 100644
>> --- a/lib/librte_eal/common/eal_common_devargs.c
>> +++ b/lib/librte_eal/common/eal_common_devargs.c
>> @@ -113,6 +113,13 @@ rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str)
>> goto fail;
>>
>> break;
>> + case RTE_DEVTYPE_WHITELISTED_VMBUS:
>> + case RTE_DEVTYPE_BLACKLISTED_VMBUS:
>> +#ifdef RTE_LIBRTE_HV_PMD
>> + if (uuid_parse(buf, devargs->uuid) == 0)
>> + break;
>> +#endif
>> + goto fail;
>> }
>>
>> free(buf);
>> diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
>> index f36bc556..1a2b418c 100644
>> --- a/lib/librte_eal/common/eal_common_options.c
>> +++ b/lib/librte_eal/common/eal_common_options.c
>> @@ -95,6 +95,11 @@ eal_long_options[] = {
>> {OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM },
>> {OPT_VMWARE_TSC_MAP, 0, NULL, OPT_VMWARE_TSC_MAP_NUM },
>> {OPT_XEN_DOM0, 0, NULL, OPT_XEN_DOM0_NUM },
>> +#ifdef RTE_LIBRTE_HV_PMD
>> + {OPT_NO_VMBUS, 0, NULL, OPT_NO_VMBUS_NUM },
>> + {OPT_VMBUS_BLACKLIST, 1, NULL, OPT_VMBUS_BLACKLIST_NUM },
>> + {OPT_VMBUS_WHITELIST, 1, NULL, OPT_VMBUS_WHITELIST_NUM },
>> +#endif
>> {0, 0, NULL, 0 }
>> };
>>
>> @@ -858,6 +863,21 @@ eal_parse_common_option(int opt, const char *optarg,
>> conf->no_pci = 1;
>> break;
>>
>> +#ifdef RTE_LIBRTE_HV_PMD
>> + case OPT_NO_VMBUS_NUM:
>> + conf->no_vmbus = 1;
>> + break;
>> + case OPT_VMBUS_BLACKLIST_NUM:
>> + if (rte_eal_devargs_add(RTE_DEVTYPE_BLACKLISTED_VMBUS,
>> + optarg) < 0)
>> + return -1;
>> + break;
>> + case OPT_VMBUS_WHITELIST_NUM:
>> + if (rte_eal_devargs_add(RTE_DEVTYPE_WHITELISTED_VMBUS,
>> + optarg) < 0)
>> + return -1;
>> + break;
>> +#endif
>> case OPT_NO_HPET_NUM:
>> conf->no_hpet = 1;
>> break;
>> @@ -1017,6 +1037,14 @@ eal_check_common_options(struct internal_config *internal_cfg)
>> return -1;
>> }
>>
>> +#ifdef RTE_LIBRTE_HV_PMD
>> + if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_VMBUS) != 0 &&
>> + rte_eal_devargs_type_count(RTE_DEVTYPE_BLACKLISTED_VMBUS) != 0) {
>> + RTE_LOG(ERR, EAL, "Options vmbus blacklist and whitelist "
>> + "cannot be used at the same time\n");
>> + return -1;
>> + }
>> +#endif
>> return 0;
>> }
>>
>> @@ -1066,5 +1094,15 @@ eal_common_usage(void)
>> " --"OPT_NO_PCI" Disable PCI\n"
>> " --"OPT_NO_HPET" Disable HPET\n"
>> " --"OPT_NO_SHCONF" No shared config (mmap'd files)\n"
>> +#ifdef RTE_LIBRTE_HV_PMD
>> + " --"OPT_NO_VMBUS" Disable VMBUS\n"
>> + " --"OPT_VMBUS_BLACKLIST" Add a VMBUS device to black list.\n"
>> + " Prevent EAL from using this PCI device. The argument\n"
>> + " format is device UUID.\n"
>> + " --"OPT_VMBUS_WHITELIST" Add a VMBUS device to white list.\n"
>> + " Only use the specified VMBUS devices. The argument format\n"
>> + " is device UUID This option can be present\n"
>> + " several times (once per device).\n"
>> +#endif
>> "\n", RTE_MAX_LCORE);
>> }
>> diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
>> index 5f1367eb..4b6af937 100644
>> --- a/lib/librte_eal/common/eal_internal_cfg.h
>> +++ b/lib/librte_eal/common/eal_internal_cfg.h
>> @@ -67,6 +67,7 @@ struct internal_config {
>> unsigned hugepage_unlink; /**< true to unlink backing files */
>> volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
>> volatile unsigned no_pci; /**< true to disable PCI */
>> + volatile unsigned no_vmbus; /**< true to disable VMBUS */
>> volatile unsigned no_hpet; /**< true to disable HPET */
>> volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
>> * instead of native TSC */
>> diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
>> index a881c62e..156727e7 100644
>> --- a/lib/librte_eal/common/eal_options.h
>> +++ b/lib/librte_eal/common/eal_options.h
>> @@ -83,6 +83,12 @@ enum {
>> OPT_VMWARE_TSC_MAP_NUM,
>> #define OPT_XEN_DOM0 "xen-dom0"
>> OPT_XEN_DOM0_NUM,
>> +#define OPT_NO_VMBUS "no-vmbus"
>> + OPT_NO_VMBUS_NUM,
>> +#define OPT_VMBUS_BLACKLIST "vmbus-blacklist"
>> + OPT_VMBUS_BLACKLIST_NUM,
>> +#define OPT_VMBUS_WHITELIST "vmbus-whitelist"
>> + OPT_VMBUS_WHITELIST_NUM,
>> OPT_LONG_MAX_NUM
>> };
>>
>> diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
>> index 9e7d8f6b..c856c63e 100644
>> --- a/lib/librte_eal/common/eal_private.h
>> +++ b/lib/librte_eal/common/eal_private.h
>> @@ -210,6 +210,11 @@ int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
>> struct mapped_pci_resource *uio_res, int map_idx);
>>
>> /**
>> + * VMBUS related functions and structures
>> + */
>> +int rte_eal_vmbus_init(void);
>> +
>> +/**
>> * Init tail queues for non-EAL library structures. This is to allow
>> * the rings, mempools, etc. lists to be shared among multiple processes
>> *
>> diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h
>> index 88120a1c..c079d289 100644
>> --- a/lib/librte_eal/common/include/rte_devargs.h
>> +++ b/lib/librte_eal/common/include/rte_devargs.h
>> @@ -51,6 +51,9 @@ extern "C" {
>> #include <stdio.h>
>> #include <sys/queue.h>
>> #include <rte_pci.h>
>> +#ifdef RTE_LIBRTE_HV_PMD
>> +#include <uuid/uuid.h>
>> +#endif
>>
>> /**
>> * Type of generic device
>> @@ -59,6 +62,8 @@ enum rte_devtype {
>> RTE_DEVTYPE_WHITELISTED_PCI,
>> RTE_DEVTYPE_BLACKLISTED_PCI,
>> RTE_DEVTYPE_VIRTUAL,
>> + RTE_DEVTYPE_WHITELISTED_VMBUS,
>> + RTE_DEVTYPE_BLACKLISTED_VMBUS,
>> };
>>
>> /**
>> @@ -88,6 +93,9 @@ struct rte_devargs {
>> /** Driver name. */
>> char drv_name[32];
>> } virt;
>> +#ifdef RTE_LIBRTE_HV_PMD
>> + uuid_t uuid;
>> +#endif
>> };
>> /** Arguments string as given by user or "" for no argument. */
>> char *args;
>> diff --git a/lib/librte_eal/common/include/rte_vmbus.h b/lib/librte_eal/common/include/rte_vmbus.h
>> new file mode 100644
>> index 00000000..f96d753e
>> --- /dev/null
>> +++ b/lib/librte_eal/common/include/rte_vmbus.h
>> @@ -0,0 +1,249 @@
>> +/*-
>> + * BSD LICENSE
>> + *
>> + * Copyright(c) 2013-2016 Brocade Communications Systems, Inc.
>> + * Copyright(c) 2016 Microsoft Corporation
>> + * All rights reserved.
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + *
>> + * * Redistributions of source code must retain the above copyright
>> + * notice, this list of conditions and the following disclaimer.
>> + * * Redistributions in binary form must reproduce the above copyright
>> + * notice, this list of conditions and the following disclaimer in
>> + * the documentation and/or other materials provided with the
>> + * distribution.
>> + * * Neither the name of Intel Corporation nor the names of its
>> + * contributors may be used to endorse or promote products derived
>> + * from this software without specific prior written permission.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + *
>> + */
>> +
>> +#ifndef _RTE_VMBUS_H_
>> +#define _RTE_VMBUS_H_
>> +
>> +/**
>> + * @file
>> + *
>> + * RTE VMBUS Interface
>> + */
>> +#ifdef __cplusplus
>> +extern "C" {
>> +#endif
>> +
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <limits.h>
>> +#include <errno.h>
>> +#include <uuid/uuid.h>
>> +#include <sys/queue.h>
>> +#include <stdint.h>
>> +#include <inttypes.h>
>> +
>> +#include <rte_debug.h>
>> +#include <rte_interrupts.h>
>> +#include <rte_dev.h>
>> +
>> +TAILQ_HEAD(vmbus_device_list, rte_vmbus_device);
>> +TAILQ_HEAD(vmbus_driver_list, rte_vmbus_driver);
>> +
>> +extern struct vmbus_driver_list vmbus_driver_list;
>> +extern struct vmbus_device_list vmbus_device_list;
>> +
>> +/** Pathname of VMBUS devices directory. */
>> +#define SYSFS_VMBUS_DEVICES "/sys/bus/vmbus/devices"
>> +
>> +#define UUID_BUF_SZ (36 + 1)
>> +
>> +
>> +/** Maximum number of VMBUS resources. */
>> +#define VMBUS_MAX_RESOURCE 7
>> +
>> +/**
>> + * A structure describing a VMBUS device.
>> + */
>> +struct rte_vmbus_device {
>> + TAILQ_ENTRY(rte_vmbus_device) next; /**< Next probed VMBUS device. */
>> + struct rte_device device; /**< Inherit core device */
>> + uuid_t device_id; /**< VMBUS device id */
>> + uuid_t class_id; /**< VMBUS device type */
>> + uint32_t relid; /**< VMBUS id for notification */
>> + uint8_t monitor_id;
>> + struct rte_intr_handle intr_handle; /**< Interrupt handle */
>> + const struct rte_vmbus_driver *driver; /**< Associated driver */
>> +
>> + struct rte_mem_resource mem_resource[VMBUS_MAX_RESOURCE];
>> + /**< VMBUS Memory Resource */
>> + char sysfs_name[]; /**< Name in sysfs bus directory */
>> +};
>> +
>> +struct rte_vmbus_driver;
>> +
>> +/**
>> + * Initialisation function for the driver called during VMBUS probing.
>> + */
>> +typedef int (vmbus_probe_t)(struct rte_vmbus_driver *,
>> + struct rte_vmbus_device *);
>> +
>> +/**
>> + * Uninitialisation function for the driver called during hotplugging.
>> + */
>> +typedef int (vmbus_remove_t)(struct rte_vmbus_device *);
>> +
>> +/**
>> + * A structure describing a VMBUS driver.
>> + */
>> +struct rte_vmbus_driver {
>> + TAILQ_ENTRY(rte_vmbus_driver) next; /**< Next in list. */
>> + struct rte_driver driver;
>> + vmbus_probe_t *probe; /**< Device Probe function. */
>> + vmbus_remove_t *remove; /**< Device Remove function. */
>> +
>> + const uuid_t *id_table; /**< ID table. */
>> +};
>> +
>> +struct vmbus_map {
>> + void *addr;
>> + char *path;
>> + uint64_t offset;
>> + uint64_t size;
>> + uint64_t phaddr;
>> +};
>> +
>> +/*
>> + * For multi-process we need to reproduce all vmbus mappings in secondary
>> + * processes, so save them in a tailq.
>> + */
>> +struct mapped_vmbus_resource {
>> + TAILQ_ENTRY(mapped_vmbus_resource) next;
>> +
>> + uuid_t uuid;
>> + char path[PATH_MAX];
>> + int nb_maps;
>> + struct vmbus_map maps[VMBUS_MAX_RESOURCE];
>> +};
>> +
>> +TAILQ_HEAD(mapped_vmbus_res_list, mapped_vmbus_resource);
>> +
>> +/**
>> + * Scan the content of the VMBUS bus, and the devices in the devices list
>> + *
>> + * @return
>> + * 0 on success, negative on error
>> + */
>> +int rte_eal_vmbus_scan(void);
>> +
>> +/**
>> + * Probe the VMBUS bus for registered drivers.
>> + *
>> + * Scan the content of the VMBUS bus, and call the probe() function for
>> + * all registered drivers that have a matching entry in its id_table
>> + * for discovered devices.
>> + *
>> + * @return
>> + * - 0 on success.
>> + * - Negative on error.
>> + */
>> +int rte_eal_vmbus_probe(void);
>> +
>> +/**
>> + * Map the VMBUS device resources in user space virtual memory address
>> + *
>> + * @param dev
>> + * A pointer to a rte_vmbus_device structure describing the device
>> + * to use
>> + *
>> + * @return
>> + * 0 on success, negative on error and positive if no driver
>> + * is found for the device.
>> + */
>> +int rte_eal_vmbus_map_device(struct rte_vmbus_device *dev);
>> +
>> +/**
>> + * Unmap this device
>> + *
>> + * @param dev
>> + * A pointer to a rte_vmbus_device structure describing the device
>> + * to use
>> + */
>> +void rte_eal_vmbus_unmap_device(struct rte_vmbus_device *dev);
>> +
>> +/**
>> + * Probe the single VMBUS device.
>> + *
>> + * Scan the content of the VMBUS bus, and find the vmbus device
>> + * specified by device uuid, then call the probe() function for
>> + * registered driver that has a matching entry in its id_table for
>> + * discovered device.
>> + *
>> + * @param id
>> + * The VMBUS device uuid.
>> + * @return
>> + * - 0 on success.
>> + * - Negative on error.
>> + */
>> +int rte_eal_vmbus_probe_one(uuid_t id);
>> +
>> +/**
>> + * Close the single VMBUS device.
>> + *
>> + * Scan the content of the VMBUS bus, and find the vmbus device id,
>> + * then call the remove() function for registered driver that has a
>> + * matching entry in its id_table for discovered device.
>> + *
>> + * @param id
>> + * The VMBUS device uuid.
>> + * @return
>> + * - 0 on success.
>> + * - Negative on error.
>> + */
>> +int rte_eal_vmbus_detach(uuid_t id);
>> +
>> +/**
>> + * Register a VMBUS driver.
>> + *
>> + * @param driver
>> + * A pointer to a rte_vmbus_driver structure describing the driver
>> + * to be registered.
>> + */
>> +void rte_eal_vmbus_register(struct rte_vmbus_driver *driver);
>> +
>> +/** Helper for VMBUS device registration from driver nstance */
>> +#define RTE_PMD_REGISTER_VMBUS(nm, vmbus_drv) \
>> +RTE_INIT(vmbusinitfn_ ##nm); \
>> +static void vmbusinitfn_ ##nm(void) \
>> +{\
>> + (vmbus_drv).driver.name = RTE_STR(nm);\
>> + (vmbus_drv).driver.type = PMD_VMBUS; \
>> + rte_eal_vmbus_register(&vmbus_drv); \
>> +} \
>> +RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
>> +
>> +/**
>> + * Unregister a VMBUS driver.
>> + *
>> + * @param driver
>> + * A pointer to a rte_vmbus_driver structure describing the driver
>> + * to be unregistered.
>> + */
>> +void rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver);
>
> The register/unregister need to get exported via the map file too.
>
>> +
>> +#ifdef __cplusplus
>> +}
>> +#endif
>> +
>> +#endif /* _RTE_VMBUS_H_ */
>> diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
>> index 4e206f09..f6ca3848 100644
>> --- a/lib/librte_eal/linuxapp/eal/Makefile
>> +++ b/lib/librte_eal/linuxapp/eal/Makefile
>> @@ -71,6 +71,11 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c
>> SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_interrupts.c
>> SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_alarm.c
>>
>> +ifeq ($(CONFIG_RTE_LIBRTE_HV_PMD),y)
>> +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vmbus.c
>> +LDLIBS += -luuid
>> +endif
>> +
>> # from common dir
>> SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_lcore.c
>> SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c
>> @@ -114,6 +119,7 @@ CFLAGS_eal_hugepage_info.o := -D_GNU_SOURCE
>> CFLAGS_eal_pci.o := -D_GNU_SOURCE
>> CFLAGS_eal_pci_uio.o := -D_GNU_SOURCE
>> CFLAGS_eal_pci_vfio.o := -D_GNU_SOURCE
>> +CFLAGS_eal_vmbux.o := -D_GNU_SOURCE
>> CFLAGS_eal_common_whitelist.o := -D_GNU_SOURCE
>> CFLAGS_eal_common_options.o := -D_GNU_SOURCE
>> CFLAGS_eal_common_thread.o := -D_GNU_SOURCE
>> diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
>> index 16dd5b9c..1bc0814a 100644
>> --- a/lib/librte_eal/linuxapp/eal/eal.c
>> +++ b/lib/librte_eal/linuxapp/eal/eal.c
>> @@ -70,6 +70,9 @@
>> #include <rte_cpuflags.h>
>> #include <rte_interrupts.h>
>> #include <rte_pci.h>
>> +#ifdef RTE_LIBRTE_HV_PMD
>> +#include <rte_vmbus.h>
>> +#endif
>> #include <rte_dev.h>
>> #include <rte_devargs.h>
>> #include <rte_common.h>
>> @@ -830,6 +833,11 @@ rte_eal_init(int argc, char **argv)
>>
>> eal_check_mem_on_local_socket();
>>
>> +#ifdef RTE_LIBRTE_HV_PMD
>> + if (rte_eal_vmbus_init() < 0)
>> + RTE_LOG(ERR, EAL, "Cannot init VMBUS\n");
>> +#endif
>> +
>> if (eal_plugins_init() < 0)
>> rte_panic("Cannot init plugins\n");
>>
>> @@ -884,6 +892,11 @@ rte_eal_init(int argc, char **argv)
>> if (rte_eal_pci_probe())
>> rte_panic("Cannot probe PCI\n");
>>
>> +#ifdef RTE_LIBRTE_HV_PMD
>> + if (rte_eal_vmbus_probe() < 0)
>> + rte_panic("Cannot probe VMBUS\n");
>> +#endif
>> +
>> if (rte_eal_dev_init() < 0)
>> rte_panic("Cannot init pmd devices\n");
>>
>> diff --git a/lib/librte_eal/linuxapp/eal/eal_vmbus.c b/lib/librte_eal/linuxapp/eal/eal_vmbus.c
>> new file mode 100644
>> index 00000000..729f93a9
>> --- /dev/null
>> +++ b/lib/librte_eal/linuxapp/eal/eal_vmbus.c
>> @@ -0,0 +1,911 @@
>> +/*-
>> + * BSD LICENSE
>> + *
>> + * Copyright(c) 2013-2016 Brocade Communications Systems, Inc.
>> + * Copyright(c) 2016 Microsoft Corporation
>> + * All rights reserved.
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + *
>> + * * Redistributions of source code must retain the above copyright
>> + * notice, this list of conditions and the following disclaimer.
>> + * * Redistributions in binary form must reproduce the above copyright
>> + * notice, this list of conditions and the following disclaimer in
>> + * the documentation and/or other materials provided with the
>> + * distribution.
>> + * * Neither the name of Intel Corporation nor the names of its
>> + * contributors may be used to endorse or promote products derived
>> + * from this software without specific prior written permission.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + *
>> + */
>> +
>> +#include <string.h>
>> +#include <unistd.h>
>> +#include <dirent.h>
>> +#include <fcntl.h>
>> +#include <sys/mman.h>
>> +
>> +#include <rte_eal.h>
>> +#include <rte_tailq.h>
>> +#include <rte_log.h>
>> +#include <rte_devargs.h>
>> +#include <rte_vmbus.h>
>> +#include <rte_malloc.h>
>> +
>> +#include "eal_private.h"
>> +#include "eal_pci_init.h"
>> +#include "eal_filesystem.h"
>> +
>> +struct vmbus_driver_list vmbus_driver_list =
>> + TAILQ_HEAD_INITIALIZER(vmbus_driver_list);
>> +struct vmbus_device_list vmbus_device_list =
>> + TAILQ_HEAD_INITIALIZER(vmbus_device_list);
>> +
>> +static void *vmbus_map_addr;
>> +
>> +static struct rte_tailq_elem rte_vmbus_uio_tailq = {
>> + .name = "UIO_RESOURCE_LIST",
This should be VMBUS_UIO_RESOURCE_LIST to not collide with rte_uio_tailq.
>> +};
>> +EAL_REGISTER_TAILQ(rte_vmbus_uio_tailq);
>> +
>> +/*
>> + * parse a sysfs file containing one integer value
>> + * different to the eal version, as it needs to work with 64-bit values
>> + */
>> +static int
>> +vmbus_get_sysfs_uuid(const char *filename, uuid_t uu)
>> +{
>> + char buf[BUFSIZ];
>> + char *cp, *in = buf;
>> + FILE *f;
>> +
>> + f = fopen(filename, "r");
>> + if (f == NULL) {
>> + RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
>> + __func__, filename);
>> + return -1;
>> + }
>> +
>> + if (fgets(buf, sizeof(buf), f) == NULL) {
>> + RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
>> + __func__, filename);
>> + fclose(f);
>> + return -1;
>> + }
>> + fclose(f);
>> +
>> + cp = strchr(buf, '\n');
>> + if (cp)
>> + *cp = '\0';
>> +
>> + /* strip { } notation */
>> + if (buf[0] == '{') {
>> + in = buf + 1;
>> + cp = strchr(in, '}');
>> + if (cp)
>> + *cp = '\0';
>> + }
>> +
>> + if (uuid_parse(in, uu) < 0) {
>> + RTE_LOG(ERR, EAL, "%s %s not a valid UUID\n",
>> + filename, buf);
>> + return -1;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +/* map a particular resource from a file */
>> +static void *
>> +vmbus_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
>> + int flags)
>> +{
>> + void *mapaddr;
>> +
>> + /* Map the memory resource of device */
>> + mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
>> + MAP_SHARED | flags, fd, offset);
>> + if (mapaddr == MAP_FAILED ||
>> + (requested_addr != NULL && mapaddr != requested_addr)) {
>> + RTE_LOG(ERR, EAL,
>> + "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s)\n",
>> + __func__, fd, requested_addr,
>> + (unsigned long)size, (unsigned long)offset,
>> + strerror(errno));
>> + } else
>> + RTE_LOG(DEBUG, EAL, " VMBUS memory mapped at %p\n", mapaddr);
>> +
>> + return mapaddr;
>> +}
>> +
>> +/* unmap a particular resource */
>> +static void
>> +vmbus_unmap_resource(void *requested_addr, size_t size)
>> +{
>> + if (requested_addr == NULL)
>> + return;
>> +
>> + /* Unmap the VMBUS memory resource of device */
>> + if (munmap(requested_addr, size)) {
>> + RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n",
>> + __func__, requested_addr, (unsigned long)size,
>> + strerror(errno));
>> + } else
>> + RTE_LOG(DEBUG, EAL, " VMBUS memory unmapped at %p\n",
>> + requested_addr);
>> +}
>> +
>> +/* Only supports current kernel version
>> + * Unlike PCI there is no option (or need) to create UIO device.
>> + */
>> +static int vmbus_get_uio_dev(const char *name,
>> + char *dstbuf, size_t buflen)
>> +{
>> + char dirname[PATH_MAX];
>> + unsigned int uio_num;
>> + struct dirent *e;
>> + DIR *dir;
>> +
>> + snprintf(dirname, sizeof(dirname),
>> + "/sys/bus/vmbus/devices/%s/uio", name);
>> +
>> + dir = opendir(dirname);
>> + if (dir == NULL) {
>> + RTE_LOG(ERR, EAL, "Cannot map uio resources for %s: %s\n",
>> + name, strerror(errno));
>> + return -1;
>> + }
>> +
>> + /* take the first file starting with "uio" */
>> + while ((e = readdir(dir)) != NULL) {
>> + if (sscanf(e->d_name, "uio%u", &uio_num) != 1)
>> + continue;
>> +
>> + snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num);
>> + break;
>> + }
>> + closedir(dir);
>> +
>> + return e ? (int) uio_num : -1;
>> +}
>> +
>> +/*
>> + * parse a sysfs file containing one integer value
>> + * different to the eal version, as it needs to work with 64-bit values
>> + */
>> +static int
>> +vmbus_parse_sysfs_value(const char *dir, const char *name,
>> + uint64_t *val)
>> +{
>> + char filename[PATH_MAX];
>> + FILE *f;
>> + char buf[BUFSIZ];
>> + char *end = NULL;
>> +
>> + snprintf(filename, sizeof(filename), "%s/%s", dir, name);
>> + f = fopen(filename, "r");
>> + if (f == NULL) {
>> + RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
>> + __func__, filename);
>> + return -1;
>> + }
>> +
>> + if (fgets(buf, sizeof(buf), f) == NULL) {
>> + RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
>> + __func__, filename);
>> + fclose(f);
>> + return -1;
>> + }
>> + fclose(f);
>> +
>> + *val = strtoull(buf, &end, 0);
>> + if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
>> + RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
>> + __func__, filename);
>> + return -1;
>> + }
>> + return 0;
>> +}
>> +
>> +/* Get mappings out of values provided by uio */
>> +static int
>> +vmbus_uio_get_mappings(const char *uioname,
>> + struct vmbus_map maps[])
>> +{
>> + int i;
>> +
>> + for (i = 0; i != VMBUS_MAX_RESOURCE; i++) {
>> + struct vmbus_map *map = &maps[i];
>> + char dirname[PATH_MAX];
>> +
>> + /* check if map directory exists */
>> + snprintf(dirname, sizeof(dirname),
>> + "%s/maps/map%d", uioname, i);
>> +
>> + if (access(dirname, F_OK) != 0)
>> + break;
>> +
>> + /* get mapping offset */
>> + if (vmbus_parse_sysfs_value(dirname, "offset",
>> + &map->offset) < 0)
>> + return -1;
>> +
>> + /* get mapping size */
>> + if (vmbus_parse_sysfs_value(dirname, "size",
>> + &map->size) < 0)
>> + return -1;
>> +
>> + /* get mapping physical address */
>> + if (vmbus_parse_sysfs_value(dirname, "addr",
>> + &maps->phaddr) < 0)
>> + return -1;
>> + }
>> +
>> + return i;
>> +}
>> +
>> +static void
>> +vmbus_uio_free_resource(struct rte_vmbus_device *dev,
>> + struct mapped_vmbus_resource *uio_res)
>> +{
>> + rte_free(uio_res);
>> +
>> + if (dev->intr_handle.fd) {
>> + close(dev->intr_handle.fd);
>> + dev->intr_handle.fd = -1;
>> + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
>> + }
>> +}
>> +
>> +static struct mapped_vmbus_resource *
>> +vmbus_uio_alloc_resource(struct rte_vmbus_device *dev)
>> +{
>> + struct mapped_vmbus_resource *uio_res;
>> + char dirname[PATH_MAX], devname[PATH_MAX];
>> + int uio_num, nb_maps;
>> +
>> + uio_num = vmbus_get_uio_dev(dev->sysfs_name, dirname, sizeof(dirname));
>> + if (uio_num < 0) {
>> + RTE_LOG(WARNING, EAL,
>> + " %s not managed by UIO driver, skipping\n",
>> + dev->sysfs_name);
>> + return NULL;
>> + }
>> +
>> + /* allocate the mapping details for secondary processes*/
>> + uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0);
>> + if (uio_res == NULL) {
>> + RTE_LOG(ERR, EAL,
>> + "%s(): cannot store uio mmap details\n", __func__);
>> + goto error;
>> + }
>> +
>> + snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
>> + dev->intr_handle.fd = open(devname, O_RDWR);
>> + if (dev->intr_handle.fd < 0) {
>> + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
>> + devname, strerror(errno));
>> + goto error;
>> + }
>> +
>> + dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX;
>> +
>> + snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname);
>> + uuid_copy(uio_res->uuid, dev->device_id);
>> +
>> + nb_maps = vmbus_uio_get_mappings(dirname, uio_res->maps);
>> + if (nb_maps < 0)
>> + goto error;
>> +
>> + RTE_LOG(DEBUG, EAL, "Found %d memory maps for device %s\n",
>> + nb_maps, dev->sysfs_name);
>> +
>> + return uio_res;
>> +
>> + error:
>> + vmbus_uio_free_resource(dev, uio_res);
>> + return NULL;
>> +}
>> +
>> +static int
>> +vmbus_uio_map_resource_by_index(struct rte_vmbus_device *dev,
>> + unsigned int res_idx,
>> + struct mapped_vmbus_resource *uio_res,
>> + unsigned int map_idx)
>> +{
>> + struct vmbus_map *maps = uio_res->maps;
>> + char devname[PATH_MAX];
>> + void *mapaddr;
>> + int fd;
>> +
>> + snprintf(devname, sizeof(devname),
>> + "/sys/bus/vmbus/%s/resource%u", dev->sysfs_name, res_idx);
>> +
>> + fd = open(devname, O_RDWR);
>> + if (fd < 0) {
>> + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
>> + devname, strerror(errno));
>> + return -1;
>> + }
>> +
>> + /* allocate memory to keep path */
>> + maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
>> + if (maps[map_idx].path == NULL) {
>> + RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n",
>> + strerror(errno));
>> + return -1;
>> + }
>> +
>> + /* try mapping somewhere close to the end of hugepages */
>> + if (vmbus_map_addr == NULL)
>> + vmbus_map_addr = pci_find_max_end_va();
>> +
>> + mapaddr = vmbus_map_resource(vmbus_map_addr, fd, 0,
>> + dev->mem_resource[res_idx].len, 0);
>> + close(fd);
>> + if (mapaddr == MAP_FAILED) {
>> + rte_free(maps[map_idx].path);
>> + return -1;
>> + }
>> +
>> + vmbus_map_addr = RTE_PTR_ADD(mapaddr,
>> + dev->mem_resource[res_idx].len);
>> +
>> + maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
>> + maps[map_idx].size = dev->mem_resource[res_idx].len;
>> + maps[map_idx].addr = mapaddr;
>> + maps[map_idx].offset = 0;
>> + strcpy(maps[map_idx].path, devname);
>> + dev->mem_resource[res_idx].addr = mapaddr;
>> +
>> + return 0;
>> +}
>> +
>> +static void
>> +vmbus_uio_unmap(struct mapped_vmbus_resource *uio_res)
>> +{
>> + int i;
>> +
>> + if (uio_res == NULL)
>> + return;
>> +
>> + for (i = 0; i != uio_res->nb_maps; i++) {
>> + vmbus_unmap_resource(uio_res->maps[i].addr,
>> + uio_res->maps[i].size);
>> +
>> + if (rte_eal_process_type() == RTE_PROC_PRIMARY)
>> + rte_free(uio_res->maps[i].path);
>> + }
>> +}
>> +
>> +static struct mapped_vmbus_resource *
>> +vmbus_uio_find_resource(struct rte_vmbus_device *dev)
>> +{
>> + struct mapped_vmbus_resource *uio_res;
>> + struct mapped_vmbus_res_list *uio_res_list =
>> + RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head,
>> + mapped_vmbus_res_list);
>> +
>> + if (dev == NULL)
>> + return NULL;
>> +
>> + TAILQ_FOREACH(uio_res, uio_res_list, next) {
>> + if (uuid_compare(uio_res->uuid, dev->device_id) == 0)
>> + return uio_res;
>> + }
>> + return NULL;
>> +}
>> +
>> +/* unmap the VMBUS resource of a VMBUS device in virtual memory */
>> +static void
>> +vmbus_uio_unmap_resource(struct rte_vmbus_device *dev)
>> +{
>> + struct mapped_vmbus_resource *uio_res;
>> + struct mapped_vmbus_res_list *uio_res_list =
>> + RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head,
>> + mapped_vmbus_res_list);
>> +
>> + if (dev == NULL)
>> + return;
>> +
>> + /* find an entry for the device */
>> + uio_res = vmbus_uio_find_resource(dev);
>> + if (uio_res == NULL)
>> + return;
>> +
>> + /* secondary processes - just free maps */
>> + if (rte_eal_process_type() != RTE_PROC_PRIMARY)
>> + return vmbus_uio_unmap(uio_res);
>> +
>> + TAILQ_REMOVE(uio_res_list, uio_res, next);
>> +
>> + /* unmap all resources */
>> + vmbus_uio_unmap(uio_res);
>> +
>> + /* free uio resource */
>> + rte_free(uio_res);
>> +
>> + /* close fd if in primary process */
>> + close(dev->intr_handle.fd);
>> + if (dev->intr_handle.uio_cfg_fd >= 0) {
>> + close(dev->intr_handle.uio_cfg_fd);
>> + dev->intr_handle.uio_cfg_fd = -1;
>> + }
>> +
>> + dev->intr_handle.fd = -1;
>> + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
>> +}
>> +
>> +static int
>> +vmbus_uio_map_secondary(struct rte_vmbus_device *dev)
>> +{
>> + struct mapped_vmbus_resource *uio_res;
>> + struct mapped_vmbus_res_list *uio_res_list =
>> + RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head,
>> + mapped_vmbus_res_list);
>> +
>> + TAILQ_FOREACH(uio_res, uio_res_list, next) {
>> + int i;
>> +
>> + /* skip this element if it doesn't match our id */
>> + if (uuid_compare(uio_res->uuid, dev->device_id))
>> + continue;
>> +
>> + for (i = 0; i != uio_res->nb_maps; i++) {
>> + void *mapaddr;
>> + int fd;
>> +
>> + fd = open(uio_res->maps[i].path, O_RDWR);
>> + if (fd < 0) {
>> + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
>> + uio_res->maps[i].path, strerror(errno));
>> + return -1;
>> + }
>> +
>> + mapaddr = vmbus_map_resource(uio_res->maps[i].addr, fd,
>> + uio_res->maps[i].offset,
>> + uio_res->maps[i].size, 0);
>> + /* fd is not needed in slave process, close it */
>> + close(fd);
>> +
>> + if (mapaddr == uio_res->maps[i].addr)
>> + continue;
>> +
>> + RTE_LOG(ERR, EAL,
>> + "Cannot mmap device resource file %s to address: %p\n",
>> + uio_res->maps[i].path,
>> + uio_res->maps[i].addr);
>> +
>> + /* unmap addrs correctly mapped */
>> + while (i != 0) {
>> + --i;
>> + vmbus_unmap_resource(uio_res->maps[i].addr,
>> + uio_res->maps[i].size);
>> + }
>> + return -1;
>> +
>> + }
>> + return 0;
>> + }
>> +
>> + RTE_LOG(ERR, EAL, "Cannot find resource for device\n");
>> + return 1;
>> +}
>> +
>> +/* map the resources of a vmbus device in virtual memory */
>> +int
>> +rte_eal_vmbus_map_device(struct rte_vmbus_device *dev)
>> +{
>> + struct mapped_vmbus_resource *uio_res;
>> + struct mapped_vmbus_res_list *uio_res_list =
>> + RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head, mapped_vmbus_res_list);
>> + int i, ret, map_idx = 0;
>> +
>> + dev->intr_handle.fd = -1;
>> + dev->intr_handle.uio_cfg_fd = -1;
>> + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
>> +
>> + /* secondary processes - use already recorded details */
>> + if (rte_eal_process_type() != RTE_PROC_PRIMARY)
>> + return vmbus_uio_map_secondary(dev);
>> +
>> + /* allocate uio resource */
>> + uio_res = vmbus_uio_alloc_resource(dev);
>> + if (uio_res == NULL)
>> + return -1;
>> +
>> + /* Map all BARs */
>> + for (i = 0; i != VMBUS_MAX_RESOURCE; i++) {
>> + uint64_t phaddr;
>> +
>> + /* skip empty BAR */
>> + phaddr = dev->mem_resource[i].phys_addr;
>> + if (phaddr == 0)
>> + continue;
>> +
>> + ret = vmbus_uio_map_resource_by_index(dev, i,
>> + uio_res, map_idx);
>> + if (ret)
>> + goto error;
>> +
>> + map_idx++;
>> + }
>> +
>> + uio_res->nb_maps = map_idx;
>> +
>> + TAILQ_INSERT_TAIL(uio_res_list, uio_res, next);
>> +
>> + return 0;
>> +error:
>> + for (i = 0; i < map_idx; i++) {
>> + vmbus_unmap_resource(uio_res->maps[i].addr,
>> + uio_res->maps[i].size);
>> + rte_free(uio_res->maps[i].path);
>> + }
>> + vmbus_uio_free_resource(dev, uio_res);
>> + return -1;
>> +}
>> +
>> +/* Scan one vmbus sysfs entry, and fill the devices list from it. */
>> +static int
>> +vmbus_scan_one(const char *name)
>> +{
>> + struct rte_vmbus_device *dev, *dev2;
>> + char filename[PATH_MAX];
>> + char dirname[PATH_MAX];
>> + unsigned long tmp;
>> +
>> + dev = malloc(sizeof(*dev) + strlen(name) + 1);
>> + if (dev == NULL)
>> + return -1;
>> +
>> + memset(dev, 0, sizeof(*dev));
>> + strcpy(dev->sysfs_name, name);
>> + if (dev->sysfs_name == NULL)
>> + goto error;
>> +
>> + /* sysfs base directory
>> + * /sys/bus/vmbus/devices/7a08391f-f5a0-4ac0-9802-d13fd964f8df
>> + * or on older kernel
>> + * /sys/bus/vmbus/devices/vmbus_1
>> + */
>> + snprintf(dirname, sizeof(dirname), "%s/%s",
>> + SYSFS_VMBUS_DEVICES, name);
>> +
>> + /* get device id */
>> + snprintf(filename, sizeof(filename), "%s/device_id", dirname);
>> + if (vmbus_get_sysfs_uuid(filename, dev->device_id) < 0)
>> + goto error;
>> +
>> + /* get device class */
>> + snprintf(filename, sizeof(filename), "%s/class_id", dirname);
>> + if (vmbus_get_sysfs_uuid(filename, dev->class_id) < 0)
>> + goto error;
>> +
>> + /* get relid */
>> + snprintf(filename, sizeof(filename), "%s/id", dirname);
>> + if (eal_parse_sysfs_value(filename, &tmp) < 0)
>> + goto error;
>> + dev->relid = tmp;
>> +
>> + /* get monitor id */
>> + snprintf(filename, sizeof(filename), "%s/monitor_id", dirname);
>> + if (eal_parse_sysfs_value(filename, &tmp) < 0)
>> + goto error;
>> + dev->monitor_id = tmp;
>> +
>> + /* get numa node */
>> + snprintf(filename, sizeof(filename), "%s/numa_node",
>> + dirname);
>> + if (eal_parse_sysfs_value(filename, &tmp) < 0)
>> + /* if no NUMA support, set default to 0 */
>> + dev->device.numa_node = 0;
>> + else
>> + dev->device.numa_node = tmp;
>> +
>> + /* device is valid, add in list (sorted) */
>> + RTE_LOG(DEBUG, EAL, "Adding vmbus device %s\n", name);
>> +
>> + TAILQ_FOREACH(dev2, &vmbus_device_list, next) {
>> + int ret;
>> +
>> + ret = uuid_compare(dev->device_id, dev->device_id);
>> + if (ret > 0)
>> + continue;
>> +
>> + if (ret < 0) {
>> + TAILQ_INSERT_BEFORE(dev2, dev, next);
>> + rte_eal_device_insert(&dev->device);
>> + } else { /* already registered */
>> + memmove(dev2->mem_resource, dev->mem_resource,
>> + sizeof(dev->mem_resource));
>> + free(dev);
>> + }
>> + return 0;
>> + }
>> +
>> + rte_eal_device_insert(&dev->device);
>> + TAILQ_INSERT_TAIL(&vmbus_device_list, dev, next);
>> +
>> + return 0;
>> +error:
>> + free(dev);
>> + return -1;
>> +}
>> +
>> +/*
>> + * Scan the content of the vmbus, and the devices in the devices list
>> + */
>> +static int
>> +vmbus_scan(void)
>> +{
>> + struct dirent *e;
>> + DIR *dir;
>> +
>> + dir = opendir(SYSFS_VMBUS_DEVICES);
>> + if (dir == NULL) {
>> + if (errno == ENOENT)
>> + return 0;
>> +
>> + RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n",
>> + __func__, strerror(errno));
>> + return -1;
>> + }
>> +
>> + while ((e = readdir(dir)) != NULL) {
>> + if (e->d_name[0] == '.')
>> + continue;
>> +
>> + if (vmbus_scan_one(e->d_name) < 0)
>> + goto error;
>> + }
>> + closedir(dir);
>> + return 0;
>> +
>> +error:
>> + closedir(dir);
>> + return -1;
>> +}
>> +
>> +/* Init the VMBUS EAL subsystem */
>> +int rte_eal_vmbus_init(void)
>> +{
>> + /* VMBUS can be disabled */
>> + if (internal_config.no_vmbus)
>> + return 0;
>> +
>> + if (vmbus_scan() < 0) {
>> + RTE_LOG(ERR, EAL, "%s(): Cannot scan vmbus\n", __func__);
>> + return -1;
>> + }
>> + return 0;
>> +}
>> +
>> +/* Below is PROBE part of eal_vmbus library */
>> +
>> +/*
>> + * If device ID match, call the devinit() function of the driver.
>> + */
>> +static int
>> +rte_eal_vmbus_probe_one_driver(struct rte_vmbus_driver *dr,
>> + struct rte_vmbus_device *dev)
>> +{
>> + const uuid_t *id_table;
>> +
>> + RTE_LOG(DEBUG, EAL, " probe driver: %s\n", dr->driver.name);
>> +
>> + for (id_table = dr->id_table; !uuid_is_null(*id_table); ++id_table) {
>> + struct rte_devargs *args;
>> + char guid[UUID_BUF_SZ];
>> + int ret;
>> +
>> + /* skip devices not assocaited with this device class */
>> + if (uuid_compare(*id_table, dev->class_id) != 0)
>> + continue;
>> +
>> + uuid_unparse(dev->device_id, guid);
>> + RTE_LOG(INFO, EAL, "VMBUS device %s on NUMA socket %i\n",
>> + guid, dev->device.numa_node);
>> +
>> + /* no initialization when blacklisted, return without error */
>> + args = dev->device.devargs;
>> + if (args && args->type == RTE_DEVTYPE_BLACKLISTED_VMBUS) {
>> + RTE_LOG(INFO, EAL, " Device is blacklisted, not initializing\n");
>> + return 1;
>> + }
>> +
>> + RTE_LOG(INFO, EAL, " probe driver: %s\n", dr->driver.name);
>> +
>> + /* map resources for device */
>> + ret = rte_eal_vmbus_map_device(dev);
>> + if (ret != 0)
>> + return ret;
>> +
>> + /* reference driver structure */
>> + dev->driver = dr;
>> +
>> + /* call the driver probe() function */
>> + ret = dr->probe(dr, dev);
>> + if (ret)
>> + dev->driver = NULL;
>> +
>> + return ret;
>> + }
>> +
>> + /* return positive value if driver doesn't support this device */
>> + return 1;
>> +}
>> +
>> +
>> +/*
>> + * If vendor/device ID match, call the remove() function of the
>> + * driver.
>> + */
>> +static int
>> +vmbus_detach_dev(struct rte_vmbus_driver *dr,
>> + struct rte_vmbus_device *dev)
>> +{
>> + const uuid_t *id_table;
>> +
>> + for (id_table = dr->id_table; !uuid_is_null(*id_table); ++id_table) {
>> + char guid[UUID_BUF_SZ];
>> +
>> + /* skip devices not assocaited with this device class */
>> + if (uuid_compare(*id_table, dev->class_id) != 0)
>> + continue;
>> +
>> + uuid_unparse(dev->device_id, guid);
>> + RTE_LOG(INFO, EAL, "VMBUS device %s on NUMA socket %i\n",
>> + guid, dev->device.numa_node);
>> +
>> + RTE_LOG(DEBUG, EAL, " remove driver: %s\n", dr->driver.name);
>> +
>> + if (dr->remove && (dr->remove(dev) < 0))
>> + return -1; /* negative value is an error */
>> +
>> + /* clear driver structure */
>> + dev->driver = NULL;
>> +
>> + vmbus_uio_unmap_resource(dev);
>> + return 0;
>> + }
>> +
>> + /* return positive value if driver doesn't support this device */
>> + return 1;
>> +}
>> +
>> +/*
>> + * call the devinit() function of all
>> + * registered drivers for the vmbus device. Return -1 if no driver is
>> + * found for this class of vmbus device.
>> + * The present assumption is that we have drivers only for vmbus network
>> + * devices. That's why we don't check driver's id_table now.
>> + */
>> +static int
>> +vmbus_probe_all_drivers(struct rte_vmbus_device *dev)
>> +{
>> + struct rte_vmbus_driver *dr = NULL;
>> + int ret;
>> +
>> + TAILQ_FOREACH(dr, &vmbus_driver_list, next) {
>> + ret = rte_eal_vmbus_probe_one_driver(dr, dev);
>> + if (ret < 0) {
>> + /* negative value is an error */
>> + RTE_LOG(ERR, EAL, "Failed to probe driver %s\n",
>> + dr->driver.name);
>> + return -1;
>> + }
>> + /* positive value means driver doesn't support it */
>> + if (ret > 0)
>> + continue;
>> +
>> + return 0;
>> + }
>> +
>> + return 1;
>> +}
>> +
>> +
>> +/*
>> + * If device ID matches, call the remove() function of all
>> + * registered driver for the given device. Return -1 if initialization
>> + * failed, return 1 if no driver is found for this device.
>> + */
>> +static int
>> +vmbus_detach_all_drivers(struct rte_vmbus_device *dev)
>> +{
>> + struct rte_vmbus_driver *dr;
>> + int rc = 0;
>> +
>> + if (dev == NULL)
>> + return -1;
>> +
>> + TAILQ_FOREACH(dr, &vmbus_driver_list, next) {
>> + rc = vmbus_detach_dev(dr, dev);
>> + if (rc < 0)
>> + /* negative value is an error */
>> + return -1;
>> + if (rc > 0)
>> + /* positive value means driver doesn't support it */
>> + continue;
>> + return 0;
>> + }
>> + return 1;
>> +}
>> +
>> +/* Detach device specified by its VMBUS id */
>> +int
>> +rte_eal_vmbus_detach(uuid_t device_id)
>> +{
>> + struct rte_vmbus_device *dev;
>> + char ubuf[UUID_BUF_SZ];
>> +
>> + TAILQ_FOREACH(dev, &vmbus_device_list, next) {
>> + if (uuid_compare(dev->device_id, device_id) != 0)
>> + continue;
>> +
>> + if (vmbus_detach_all_drivers(dev) < 0)
>> + goto err_return;
>> +
>> + TAILQ_REMOVE(&vmbus_device_list, dev, next);
>> + free(dev);
>> + return 0;
>> + }
>> + return -1;
>> +
>> +err_return:
>> + uuid_unparse(device_id, ubuf);
>> + RTE_LOG(WARNING, EAL, "Requested device %s cannot be used\n",
>> + ubuf);
>> + return -1;
>> +}
>> +
>> +/*
>> + * Scan the vmbus, and call the devinit() function for
>> + * all registered drivers that have a matching entry in its id_table
>> + * for discovered devices.
>> + */
>> +int
>> +rte_eal_vmbus_probe(void)
>> +{
>> + struct rte_vmbus_device *dev = NULL;
>> +
>> + TAILQ_FOREACH(dev, &vmbus_device_list, next) {
>> + char ubuf[UUID_BUF_SZ];
>> +
>> + uuid_unparse(dev->device_id, ubuf);
>> +
>> + RTE_LOG(DEBUG, EAL, "Probing driver for device %s ...\n",
>> + ubuf);
>> + vmbus_probe_all_drivers(dev);
>> + }
>> + return 0;
>> +}
>> +
>> +/* register vmbus driver */
>> +void
>> +rte_eal_vmbus_register(struct rte_vmbus_driver *driver)
>> +{
>> + TAILQ_INSERT_TAIL(&vmbus_driver_list, driver, next);
>> +}
>> +
>> +/* unregister vmbus driver */
>> +void
>> +rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver)
>> +{
>> + TAILQ_REMOVE(&vmbus_driver_list, driver, next);
>> +}
>> diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
>> index 7c212096..b69af0f0 100644
>> --- a/lib/librte_ether/rte_ethdev.c
>> +++ b/lib/librte_ether/rte_ethdev.c
>> @@ -3334,3 +3334,93 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id,
>> -ENOTSUP);
>> return (*dev->dev_ops->l2_tunnel_offload_set)(dev, l2_tunnel, mask, en);
>> }
>> +
>> +
>> +#ifdef RTE_LIBRTE_HV_PMD
>> +int
>> +rte_eth_dev_vmbus_probe(struct rte_vmbus_driver *vmbus_drv,
>> + struct rte_vmbus_device *vmbus_dev)
>> +{
>> + struct eth_driver *eth_drv = (struct eth_driver *)vmbus_drv;
>> + struct rte_eth_dev *eth_dev;
>> + char ustr[UUID_BUF_SZ];
>> + int diag;
>> +
>> + uuid_unparse(vmbus_dev->device_id, ustr);
>> +
>> + eth_dev = rte_eth_dev_allocate(ustr);
>> + if (eth_dev == NULL)
>> + return -ENOMEM;
>> +
>> + if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
>> + eth_dev->data->dev_private = rte_zmalloc("ethdev private structure",
>> + eth_drv->dev_private_size,
>> + RTE_CACHE_LINE_SIZE);
>> + if (eth_dev->data->dev_private == NULL)
>> + rte_panic("Cannot allocate memzone for private port data\n");
>> + }
>> +
>> + eth_dev->device = &vmbus_dev->device;
>> + eth_dev->driver = eth_drv;
>> + eth_dev->data->rx_mbuf_alloc_failed = 0;
>> +
>> + /* init user callbacks */
>> + TAILQ_INIT(&(eth_dev->link_intr_cbs));
>> +
>> + /*
>> + * Set the default maximum frame size.
>> + */
>> + eth_dev->data->mtu = ETHER_MTU;
>> +
>> + /* Invoke PMD device initialization function */
>> + diag = (*eth_drv->eth_dev_init)(eth_dev);
>> + if (diag == 0)
>> + return 0;
>> +
>> + RTE_PMD_DEBUG_TRACE("driver %s: eth_dev_init(%s) failed\n",
>> + vmbus_drv->driver.name, ustr);
>> +
>> + if (rte_eal_process_type() == RTE_PROC_PRIMARY)
>> + rte_free(eth_dev->data->dev_private);
>> +
>> + return diag;
>> +}
>> +
>> +int
>> +rte_eth_dev_vmbus_remove(struct rte_vmbus_device *vmbus_dev)
>> +{
>> + const struct eth_driver *eth_drv;
>> + struct rte_eth_dev *eth_dev;
>> + char ustr[UUID_BUF_SZ];
>> + int ret;
>> +
>> + if (vmbus_dev == NULL)
>> + return -EINVAL;
>> +
>> + uuid_unparse(vmbus_dev->device_id, ustr);
>> + eth_dev = rte_eth_dev_allocated(ustr);
>> + if (eth_dev == NULL)
>> + return -ENODEV;
>> +
>> + eth_drv = (const struct eth_driver *)vmbus_dev->driver;
>> +
>> + /* Invoke PMD device uninit function */
>> + if (*eth_drv->eth_dev_uninit) {
>> + ret = (*eth_drv->eth_dev_uninit)(eth_dev);
>> + if (ret)
>> + return ret;
>> + }
>> +
>> + /* free ether device */
>> + rte_eth_dev_release_port(eth_dev);
>> +
>> + if (rte_eal_process_type() == RTE_PROC_PRIMARY)
>> + rte_free(eth_dev->data->dev_private);
>> +
>> + eth_dev->device = NULL;
>> + eth_dev->driver = NULL;
>> + eth_dev->data = NULL;
>> +
>> + return 0;
>> +}
>> +#endif
>> diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
>> index 1a62a322..2a8c1eed 100644
>> --- a/lib/librte_ether/rte_ethdev.h
>> +++ b/lib/librte_ether/rte_ethdev.h
>> @@ -180,6 +180,9 @@ extern "C" {
>> #include <rte_log.h>
>> #include <rte_interrupts.h>
>> #include <rte_pci.h>
>> +#ifdef RTE_LIBRTE_HV_PMD
>> +#include <rte_vmbus.h>
>> +#endif
>> #include <rte_dev.h>
>> #include <rte_devargs.h>
>> #include <rte_errno.h>
>> @@ -1908,6 +1911,17 @@ struct rte_pci_eth_driver {
>> struct eth_driver eth_drv; /**< Ethernet driver. */
>> };
>>
>> +#ifdef RTE_LIBRTE_HV_PMD
>> +/**
>> + * @internal
>> + * The structure associated with a PMD VMBUS Ethernet driver.
>> + */
>> +struct rte_vmbus_eth_driver {
>> + struct rte_vmbus_driver vmbus_drv; /**< Underlying VMBUS driver. */
>> + struct eth_driver eth_drv; /**< Ethernet driver. */
>> +};
>> +#endif
>> +
>> /**
>> * Convert a numerical speed in Mbps to a bitmap flag that can be used in
>> * the bitmap link_speeds of the struct rte_eth_conf
>> @@ -4543,6 +4557,23 @@ int rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv,
>> */
>> int rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev);
>>
>> +#ifdef RTE_LIBRTE_HV_PMD
>> +/**
>> + * @internal
>> + * Wrapper for use by vmbus drivers as a .probe function to attach to a ethdev
>> + * interface.
>> + */
>> +int rte_eth_dev_vmbus_probe(struct rte_vmbus_driver *vmbus_drv,
>> + struct rte_vmbus_device *vmbus_dev);
>> +
>> +/**
>> + * @internal
>> + * Wrapper for use by vmbus drivers as a .remove function to detach a ethdev
>> + * interface.
>> + */
>> +int rte_eth_dev_vmbus_remove(struct rte_vmbus_device *vmbus_dev);
>> +#endif
>> +
>> #ifdef __cplusplus
>> }
>> #endif
>> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
>> index f75f0e24..6b304084 100644
>> --- a/mk/rte.app.mk
>> +++ b/mk/rte.app.mk
>> @@ -130,6 +130,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
>> _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_VHOST) += -lrte_pmd_vhost
>> endif # $(CONFIG_RTE_LIBRTE_VHOST)
>> _LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += -lrte_pmd_vmxnet3_uio
>> +_LDLIBS-$(CONFIG_RTE_LIBRTE_HV_PMD) += -luuid
>>
>> ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
>> _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB) += -lrte_pmd_aesni_mb
>> --
>> 2.11.0
>>
On Wed, 11 Jan 2017 22:13:32 +0100
Jan Blunck <jblunck@infradead.org> wrote:
> >> +static void *vmbus_map_addr;
> >> +
> >> +static struct rte_tailq_elem rte_vmbus_uio_tailq = {
> >> + .name = "UIO_RESOURCE_LIST",
>
> This should be VMBUS_UIO_RESOURCE_LIST to not collide with rte_uio_tailq.
Ok, please trim review comments. Trying to find comment in middle of
patch is a nuisance.
@@ -33,7 +33,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
INC := rte_branch_prediction.h rte_common.h
INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
-INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
+INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h rte_vmbus.h
INC += rte_per_lcore.h rte_random.h
INC += rte_tailq.h rte_interrupts.h rte_alarm.h
INC += rte_string_fns.h rte_version.h
@@ -113,6 +113,13 @@ rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str)
goto fail;
break;
+ case RTE_DEVTYPE_WHITELISTED_VMBUS:
+ case RTE_DEVTYPE_BLACKLISTED_VMBUS:
+#ifdef RTE_LIBRTE_HV_PMD
+ if (uuid_parse(buf, devargs->uuid) == 0)
+ break;
+#endif
+ goto fail;
}
free(buf);
@@ -95,6 +95,11 @@ eal_long_options[] = {
{OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM },
{OPT_VMWARE_TSC_MAP, 0, NULL, OPT_VMWARE_TSC_MAP_NUM },
{OPT_XEN_DOM0, 0, NULL, OPT_XEN_DOM0_NUM },
+#ifdef RTE_LIBRTE_HV_PMD
+ {OPT_NO_VMBUS, 0, NULL, OPT_NO_VMBUS_NUM },
+ {OPT_VMBUS_BLACKLIST, 1, NULL, OPT_VMBUS_BLACKLIST_NUM },
+ {OPT_VMBUS_WHITELIST, 1, NULL, OPT_VMBUS_WHITELIST_NUM },
+#endif
{0, 0, NULL, 0 }
};
@@ -858,6 +863,21 @@ eal_parse_common_option(int opt, const char *optarg,
conf->no_pci = 1;
break;
+#ifdef RTE_LIBRTE_HV_PMD
+ case OPT_NO_VMBUS_NUM:
+ conf->no_vmbus = 1;
+ break;
+ case OPT_VMBUS_BLACKLIST_NUM:
+ if (rte_eal_devargs_add(RTE_DEVTYPE_BLACKLISTED_VMBUS,
+ optarg) < 0)
+ return -1;
+ break;
+ case OPT_VMBUS_WHITELIST_NUM:
+ if (rte_eal_devargs_add(RTE_DEVTYPE_WHITELISTED_VMBUS,
+ optarg) < 0)
+ return -1;
+ break;
+#endif
case OPT_NO_HPET_NUM:
conf->no_hpet = 1;
break;
@@ -1017,6 +1037,14 @@ eal_check_common_options(struct internal_config *internal_cfg)
return -1;
}
+#ifdef RTE_LIBRTE_HV_PMD
+ if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_VMBUS) != 0 &&
+ rte_eal_devargs_type_count(RTE_DEVTYPE_BLACKLISTED_VMBUS) != 0) {
+ RTE_LOG(ERR, EAL, "Options vmbus blacklist and whitelist "
+ "cannot be used at the same time\n");
+ return -1;
+ }
+#endif
return 0;
}
@@ -1066,5 +1094,15 @@ eal_common_usage(void)
" --"OPT_NO_PCI" Disable PCI\n"
" --"OPT_NO_HPET" Disable HPET\n"
" --"OPT_NO_SHCONF" No shared config (mmap'd files)\n"
+#ifdef RTE_LIBRTE_HV_PMD
+ " --"OPT_NO_VMBUS" Disable VMBUS\n"
+ " --"OPT_VMBUS_BLACKLIST" Add a VMBUS device to black list.\n"
+ " Prevent EAL from using this PCI device. The argument\n"
+ " format is device UUID.\n"
+ " --"OPT_VMBUS_WHITELIST" Add a VMBUS device to white list.\n"
+ " Only use the specified VMBUS devices. The argument format\n"
+ " is device UUID This option can be present\n"
+ " several times (once per device).\n"
+#endif
"\n", RTE_MAX_LCORE);
}
@@ -67,6 +67,7 @@ struct internal_config {
unsigned hugepage_unlink; /**< true to unlink backing files */
volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
volatile unsigned no_pci; /**< true to disable PCI */
+ volatile unsigned no_vmbus; /**< true to disable VMBUS */
volatile unsigned no_hpet; /**< true to disable HPET */
volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
* instead of native TSC */
@@ -83,6 +83,12 @@ enum {
OPT_VMWARE_TSC_MAP_NUM,
#define OPT_XEN_DOM0 "xen-dom0"
OPT_XEN_DOM0_NUM,
+#define OPT_NO_VMBUS "no-vmbus"
+ OPT_NO_VMBUS_NUM,
+#define OPT_VMBUS_BLACKLIST "vmbus-blacklist"
+ OPT_VMBUS_BLACKLIST_NUM,
+#define OPT_VMBUS_WHITELIST "vmbus-whitelist"
+ OPT_VMBUS_WHITELIST_NUM,
OPT_LONG_MAX_NUM
};
@@ -210,6 +210,11 @@ int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
struct mapped_pci_resource *uio_res, int map_idx);
/**
+ * VMBUS related functions and structures
+ */
+int rte_eal_vmbus_init(void);
+
+/**
* Init tail queues for non-EAL library structures. This is to allow
* the rings, mempools, etc. lists to be shared among multiple processes
*
@@ -51,6 +51,9 @@ extern "C" {
#include <stdio.h>
#include <sys/queue.h>
#include <rte_pci.h>
+#ifdef RTE_LIBRTE_HV_PMD
+#include <uuid/uuid.h>
+#endif
/**
* Type of generic device
@@ -59,6 +62,8 @@ enum rte_devtype {
RTE_DEVTYPE_WHITELISTED_PCI,
RTE_DEVTYPE_BLACKLISTED_PCI,
RTE_DEVTYPE_VIRTUAL,
+ RTE_DEVTYPE_WHITELISTED_VMBUS,
+ RTE_DEVTYPE_BLACKLISTED_VMBUS,
};
/**
@@ -88,6 +93,9 @@ struct rte_devargs {
/** Driver name. */
char drv_name[32];
} virt;
+#ifdef RTE_LIBRTE_HV_PMD
+ uuid_t uuid;
+#endif
};
/** Arguments string as given by user or "" for no argument. */
char *args;
new file mode 100644
@@ -0,0 +1,249 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2013-2016 Brocade Communications Systems, Inc.
+ * Copyright(c) 2016 Microsoft Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _RTE_VMBUS_H_
+#define _RTE_VMBUS_H_
+
+/**
+ * @file
+ *
+ * RTE VMBUS Interface
+ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <errno.h>
+#include <uuid/uuid.h>
+#include <sys/queue.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_debug.h>
+#include <rte_interrupts.h>
+#include <rte_dev.h>
+
+TAILQ_HEAD(vmbus_device_list, rte_vmbus_device);
+TAILQ_HEAD(vmbus_driver_list, rte_vmbus_driver);
+
+extern struct vmbus_driver_list vmbus_driver_list;
+extern struct vmbus_device_list vmbus_device_list;
+
+/** Pathname of VMBUS devices directory. */
+#define SYSFS_VMBUS_DEVICES "/sys/bus/vmbus/devices"
+
+#define UUID_BUF_SZ (36 + 1)
+
+
+/** Maximum number of VMBUS resources. */
+#define VMBUS_MAX_RESOURCE 7
+
+/**
+ * A structure describing a VMBUS device.
+ */
+struct rte_vmbus_device {
+ TAILQ_ENTRY(rte_vmbus_device) next; /**< Next probed VMBUS device. */
+ struct rte_device device; /**< Inherit core device */
+ uuid_t device_id; /**< VMBUS device id */
+ uuid_t class_id; /**< VMBUS device type */
+ uint32_t relid; /**< VMBUS id for notification */
+ uint8_t monitor_id;
+ struct rte_intr_handle intr_handle; /**< Interrupt handle */
+ const struct rte_vmbus_driver *driver; /**< Associated driver */
+
+ struct rte_mem_resource mem_resource[VMBUS_MAX_RESOURCE];
+ /**< VMBUS Memory Resource */
+ char sysfs_name[]; /**< Name in sysfs bus directory */
+};
+
+struct rte_vmbus_driver;
+
+/**
+ * Initialisation function for the driver called during VMBUS probing.
+ */
+typedef int (vmbus_probe_t)(struct rte_vmbus_driver *,
+ struct rte_vmbus_device *);
+
+/**
+ * Uninitialisation function for the driver called during hotplugging.
+ */
+typedef int (vmbus_remove_t)(struct rte_vmbus_device *);
+
+/**
+ * A structure describing a VMBUS driver.
+ */
+struct rte_vmbus_driver {
+ TAILQ_ENTRY(rte_vmbus_driver) next; /**< Next in list. */
+ struct rte_driver driver;
+ vmbus_probe_t *probe; /**< Device Probe function. */
+ vmbus_remove_t *remove; /**< Device Remove function. */
+
+ const uuid_t *id_table; /**< ID table. */
+};
+
+struct vmbus_map {
+ void *addr;
+ char *path;
+ uint64_t offset;
+ uint64_t size;
+ uint64_t phaddr;
+};
+
+/*
+ * For multi-process we need to reproduce all vmbus mappings in secondary
+ * processes, so save them in a tailq.
+ */
+struct mapped_vmbus_resource {
+ TAILQ_ENTRY(mapped_vmbus_resource) next;
+
+ uuid_t uuid;
+ char path[PATH_MAX];
+ int nb_maps;
+ struct vmbus_map maps[VMBUS_MAX_RESOURCE];
+};
+
+TAILQ_HEAD(mapped_vmbus_res_list, mapped_vmbus_resource);
+
+/**
+ * Scan the content of the VMBUS bus, and the devices in the devices list
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_vmbus_scan(void);
+
+/**
+ * Probe the VMBUS bus for registered drivers.
+ *
+ * Scan the content of the VMBUS bus, and call the probe() function for
+ * all registered drivers that have a matching entry in its id_table
+ * for discovered devices.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int rte_eal_vmbus_probe(void);
+
+/**
+ * Map the VMBUS device resources in user space virtual memory address
+ *
+ * @param dev
+ * A pointer to a rte_vmbus_device structure describing the device
+ * to use
+ *
+ * @return
+ * 0 on success, negative on error and positive if no driver
+ * is found for the device.
+ */
+int rte_eal_vmbus_map_device(struct rte_vmbus_device *dev);
+
+/**
+ * Unmap this device
+ *
+ * @param dev
+ * A pointer to a rte_vmbus_device structure describing the device
+ * to use
+ */
+void rte_eal_vmbus_unmap_device(struct rte_vmbus_device *dev);
+
+/**
+ * Probe the single VMBUS device.
+ *
+ * Scan the content of the VMBUS bus, and find the vmbus device
+ * specified by device uuid, then call the probe() function for
+ * registered driver that has a matching entry in its id_table for
+ * discovered device.
+ *
+ * @param id
+ * The VMBUS device uuid.
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int rte_eal_vmbus_probe_one(uuid_t id);
+
+/**
+ * Close the single VMBUS device.
+ *
+ * Scan the content of the VMBUS bus, and find the vmbus device id,
+ * then call the remove() function for registered driver that has a
+ * matching entry in its id_table for discovered device.
+ *
+ * @param id
+ * The VMBUS device uuid.
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int rte_eal_vmbus_detach(uuid_t id);
+
+/**
+ * Register a VMBUS driver.
+ *
+ * @param driver
+ * A pointer to a rte_vmbus_driver structure describing the driver
+ * to be registered.
+ */
+void rte_eal_vmbus_register(struct rte_vmbus_driver *driver);
+
+/** Helper for VMBUS device registration from driver nstance */
+#define RTE_PMD_REGISTER_VMBUS(nm, vmbus_drv) \
+RTE_INIT(vmbusinitfn_ ##nm); \
+static void vmbusinitfn_ ##nm(void) \
+{\
+ (vmbus_drv).driver.name = RTE_STR(nm);\
+ (vmbus_drv).driver.type = PMD_VMBUS; \
+ rte_eal_vmbus_register(&vmbus_drv); \
+} \
+RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
+
+/**
+ * Unregister a VMBUS driver.
+ *
+ * @param driver
+ * A pointer to a rte_vmbus_driver structure describing the driver
+ * to be unregistered.
+ */
+void rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_VMBUS_H_ */
@@ -71,6 +71,11 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_interrupts.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_alarm.c
+ifeq ($(CONFIG_RTE_LIBRTE_HV_PMD),y)
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vmbus.c
+LDLIBS += -luuid
+endif
+
# from common dir
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_lcore.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c
@@ -114,6 +119,7 @@ CFLAGS_eal_hugepage_info.o := -D_GNU_SOURCE
CFLAGS_eal_pci.o := -D_GNU_SOURCE
CFLAGS_eal_pci_uio.o := -D_GNU_SOURCE
CFLAGS_eal_pci_vfio.o := -D_GNU_SOURCE
+CFLAGS_eal_vmbux.o := -D_GNU_SOURCE
CFLAGS_eal_common_whitelist.o := -D_GNU_SOURCE
CFLAGS_eal_common_options.o := -D_GNU_SOURCE
CFLAGS_eal_common_thread.o := -D_GNU_SOURCE
@@ -70,6 +70,9 @@
#include <rte_cpuflags.h>
#include <rte_interrupts.h>
#include <rte_pci.h>
+#ifdef RTE_LIBRTE_HV_PMD
+#include <rte_vmbus.h>
+#endif
#include <rte_dev.h>
#include <rte_devargs.h>
#include <rte_common.h>
@@ -830,6 +833,11 @@ rte_eal_init(int argc, char **argv)
eal_check_mem_on_local_socket();
+#ifdef RTE_LIBRTE_HV_PMD
+ if (rte_eal_vmbus_init() < 0)
+ RTE_LOG(ERR, EAL, "Cannot init VMBUS\n");
+#endif
+
if (eal_plugins_init() < 0)
rte_panic("Cannot init plugins\n");
@@ -884,6 +892,11 @@ rte_eal_init(int argc, char **argv)
if (rte_eal_pci_probe())
rte_panic("Cannot probe PCI\n");
+#ifdef RTE_LIBRTE_HV_PMD
+ if (rte_eal_vmbus_probe() < 0)
+ rte_panic("Cannot probe VMBUS\n");
+#endif
+
if (rte_eal_dev_init() < 0)
rte_panic("Cannot init pmd devices\n");
new file mode 100644
@@ -0,0 +1,911 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2013-2016 Brocade Communications Systems, Inc.
+ * Copyright(c) 2016 Microsoft Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <string.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include <rte_eal.h>
+#include <rte_tailq.h>
+#include <rte_log.h>
+#include <rte_devargs.h>
+#include <rte_vmbus.h>
+#include <rte_malloc.h>
+
+#include "eal_private.h"
+#include "eal_pci_init.h"
+#include "eal_filesystem.h"
+
+struct vmbus_driver_list vmbus_driver_list =
+ TAILQ_HEAD_INITIALIZER(vmbus_driver_list);
+struct vmbus_device_list vmbus_device_list =
+ TAILQ_HEAD_INITIALIZER(vmbus_device_list);
+
+static void *vmbus_map_addr;
+
+static struct rte_tailq_elem rte_vmbus_uio_tailq = {
+ .name = "UIO_RESOURCE_LIST",
+};
+EAL_REGISTER_TAILQ(rte_vmbus_uio_tailq);
+
+/*
+ * parse a sysfs file containing one integer value
+ * different to the eal version, as it needs to work with 64-bit values
+ */
+static int
+vmbus_get_sysfs_uuid(const char *filename, uuid_t uu)
+{
+ char buf[BUFSIZ];
+ char *cp, *in = buf;
+ FILE *f;
+
+ f = fopen(filename, "r");
+ if (f == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
+ __func__, filename);
+ return -1;
+ }
+
+ if (fgets(buf, sizeof(buf), f) == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
+ __func__, filename);
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+
+ cp = strchr(buf, '\n');
+ if (cp)
+ *cp = '\0';
+
+ /* strip { } notation */
+ if (buf[0] == '{') {
+ in = buf + 1;
+ cp = strchr(in, '}');
+ if (cp)
+ *cp = '\0';
+ }
+
+ if (uuid_parse(in, uu) < 0) {
+ RTE_LOG(ERR, EAL, "%s %s not a valid UUID\n",
+ filename, buf);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* map a particular resource from a file */
+static void *
+vmbus_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
+ int flags)
+{
+ void *mapaddr;
+
+ /* Map the memory resource of device */
+ mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | flags, fd, offset);
+ if (mapaddr == MAP_FAILED ||
+ (requested_addr != NULL && mapaddr != requested_addr)) {
+ RTE_LOG(ERR, EAL,
+ "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s)\n",
+ __func__, fd, requested_addr,
+ (unsigned long)size, (unsigned long)offset,
+ strerror(errno));
+ } else
+ RTE_LOG(DEBUG, EAL, " VMBUS memory mapped at %p\n", mapaddr);
+
+ return mapaddr;
+}
+
+/* unmap a particular resource */
+static void
+vmbus_unmap_resource(void *requested_addr, size_t size)
+{
+ if (requested_addr == NULL)
+ return;
+
+ /* Unmap the VMBUS memory resource of device */
+ if (munmap(requested_addr, size)) {
+ RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n",
+ __func__, requested_addr, (unsigned long)size,
+ strerror(errno));
+ } else
+ RTE_LOG(DEBUG, EAL, " VMBUS memory unmapped at %p\n",
+ requested_addr);
+}
+
+/* Only supports current kernel version
+ * Unlike PCI there is no option (or need) to create UIO device.
+ */
+static int vmbus_get_uio_dev(const char *name,
+ char *dstbuf, size_t buflen)
+{
+ char dirname[PATH_MAX];
+ unsigned int uio_num;
+ struct dirent *e;
+ DIR *dir;
+
+ snprintf(dirname, sizeof(dirname),
+ "/sys/bus/vmbus/devices/%s/uio", name);
+
+ dir = opendir(dirname);
+ if (dir == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot map uio resources for %s: %s\n",
+ name, strerror(errno));
+ return -1;
+ }
+
+ /* take the first file starting with "uio" */
+ while ((e = readdir(dir)) != NULL) {
+ if (sscanf(e->d_name, "uio%u", &uio_num) != 1)
+ continue;
+
+ snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num);
+ break;
+ }
+ closedir(dir);
+
+ return e ? (int) uio_num : -1;
+}
+
+/*
+ * parse a sysfs file containing one integer value
+ * different to the eal version, as it needs to work with 64-bit values
+ */
+static int
+vmbus_parse_sysfs_value(const char *dir, const char *name,
+ uint64_t *val)
+{
+ char filename[PATH_MAX];
+ FILE *f;
+ char buf[BUFSIZ];
+ char *end = NULL;
+
+ snprintf(filename, sizeof(filename), "%s/%s", dir, name);
+ f = fopen(filename, "r");
+ if (f == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
+ __func__, filename);
+ return -1;
+ }
+
+ if (fgets(buf, sizeof(buf), f) == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
+ __func__, filename);
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+
+ *val = strtoull(buf, &end, 0);
+ if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
+ RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
+ __func__, filename);
+ return -1;
+ }
+ return 0;
+}
+
+/* Get mappings out of values provided by uio */
+static int
+vmbus_uio_get_mappings(const char *uioname,
+ struct vmbus_map maps[])
+{
+ int i;
+
+ for (i = 0; i != VMBUS_MAX_RESOURCE; i++) {
+ struct vmbus_map *map = &maps[i];
+ char dirname[PATH_MAX];
+
+ /* check if map directory exists */
+ snprintf(dirname, sizeof(dirname),
+ "%s/maps/map%d", uioname, i);
+
+ if (access(dirname, F_OK) != 0)
+ break;
+
+ /* get mapping offset */
+ if (vmbus_parse_sysfs_value(dirname, "offset",
+ &map->offset) < 0)
+ return -1;
+
+ /* get mapping size */
+ if (vmbus_parse_sysfs_value(dirname, "size",
+ &map->size) < 0)
+ return -1;
+
+ /* get mapping physical address */
+ if (vmbus_parse_sysfs_value(dirname, "addr",
+ &maps->phaddr) < 0)
+ return -1;
+ }
+
+ return i;
+}
+
+static void
+vmbus_uio_free_resource(struct rte_vmbus_device *dev,
+ struct mapped_vmbus_resource *uio_res)
+{
+ rte_free(uio_res);
+
+ if (dev->intr_handle.fd) {
+ close(dev->intr_handle.fd);
+ dev->intr_handle.fd = -1;
+ dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+ }
+}
+
+static struct mapped_vmbus_resource *
+vmbus_uio_alloc_resource(struct rte_vmbus_device *dev)
+{
+ struct mapped_vmbus_resource *uio_res;
+ char dirname[PATH_MAX], devname[PATH_MAX];
+ int uio_num, nb_maps;
+
+ uio_num = vmbus_get_uio_dev(dev->sysfs_name, dirname, sizeof(dirname));
+ if (uio_num < 0) {
+ RTE_LOG(WARNING, EAL,
+ " %s not managed by UIO driver, skipping\n",
+ dev->sysfs_name);
+ return NULL;
+ }
+
+ /* allocate the mapping details for secondary processes*/
+ uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0);
+ if (uio_res == NULL) {
+ RTE_LOG(ERR, EAL,
+ "%s(): cannot store uio mmap details\n", __func__);
+ goto error;
+ }
+
+ snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
+ dev->intr_handle.fd = open(devname, O_RDWR);
+ if (dev->intr_handle.fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+ devname, strerror(errno));
+ goto error;
+ }
+
+ dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX;
+
+ snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname);
+ uuid_copy(uio_res->uuid, dev->device_id);
+
+ nb_maps = vmbus_uio_get_mappings(dirname, uio_res->maps);
+ if (nb_maps < 0)
+ goto error;
+
+ RTE_LOG(DEBUG, EAL, "Found %d memory maps for device %s\n",
+ nb_maps, dev->sysfs_name);
+
+ return uio_res;
+
+ error:
+ vmbus_uio_free_resource(dev, uio_res);
+ return NULL;
+}
+
+static int
+vmbus_uio_map_resource_by_index(struct rte_vmbus_device *dev,
+ unsigned int res_idx,
+ struct mapped_vmbus_resource *uio_res,
+ unsigned int map_idx)
+{
+ struct vmbus_map *maps = uio_res->maps;
+ char devname[PATH_MAX];
+ void *mapaddr;
+ int fd;
+
+ snprintf(devname, sizeof(devname),
+ "/sys/bus/vmbus/%s/resource%u", dev->sysfs_name, res_idx);
+
+ fd = open(devname, O_RDWR);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+ devname, strerror(errno));
+ return -1;
+ }
+
+ /* allocate memory to keep path */
+ maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
+ if (maps[map_idx].path == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ /* try mapping somewhere close to the end of hugepages */
+ if (vmbus_map_addr == NULL)
+ vmbus_map_addr = pci_find_max_end_va();
+
+ mapaddr = vmbus_map_resource(vmbus_map_addr, fd, 0,
+ dev->mem_resource[res_idx].len, 0);
+ close(fd);
+ if (mapaddr == MAP_FAILED) {
+ rte_free(maps[map_idx].path);
+ return -1;
+ }
+
+ vmbus_map_addr = RTE_PTR_ADD(mapaddr,
+ dev->mem_resource[res_idx].len);
+
+ maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
+ maps[map_idx].size = dev->mem_resource[res_idx].len;
+ maps[map_idx].addr = mapaddr;
+ maps[map_idx].offset = 0;
+ strcpy(maps[map_idx].path, devname);
+ dev->mem_resource[res_idx].addr = mapaddr;
+
+ return 0;
+}
+
+static void
+vmbus_uio_unmap(struct mapped_vmbus_resource *uio_res)
+{
+ int i;
+
+ if (uio_res == NULL)
+ return;
+
+ for (i = 0; i != uio_res->nb_maps; i++) {
+ vmbus_unmap_resource(uio_res->maps[i].addr,
+ uio_res->maps[i].size);
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ rte_free(uio_res->maps[i].path);
+ }
+}
+
+static struct mapped_vmbus_resource *
+vmbus_uio_find_resource(struct rte_vmbus_device *dev)
+{
+ struct mapped_vmbus_resource *uio_res;
+ struct mapped_vmbus_res_list *uio_res_list =
+ RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head,
+ mapped_vmbus_res_list);
+
+ if (dev == NULL)
+ return NULL;
+
+ TAILQ_FOREACH(uio_res, uio_res_list, next) {
+ if (uuid_compare(uio_res->uuid, dev->device_id) == 0)
+ return uio_res;
+ }
+ return NULL;
+}
+
+/* unmap the VMBUS resource of a VMBUS device in virtual memory */
+static void
+vmbus_uio_unmap_resource(struct rte_vmbus_device *dev)
+{
+ struct mapped_vmbus_resource *uio_res;
+ struct mapped_vmbus_res_list *uio_res_list =
+ RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head,
+ mapped_vmbus_res_list);
+
+ if (dev == NULL)
+ return;
+
+ /* find an entry for the device */
+ uio_res = vmbus_uio_find_resource(dev);
+ if (uio_res == NULL)
+ return;
+
+ /* secondary processes - just free maps */
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return vmbus_uio_unmap(uio_res);
+
+ TAILQ_REMOVE(uio_res_list, uio_res, next);
+
+ /* unmap all resources */
+ vmbus_uio_unmap(uio_res);
+
+ /* free uio resource */
+ rte_free(uio_res);
+
+ /* close fd if in primary process */
+ close(dev->intr_handle.fd);
+ if (dev->intr_handle.uio_cfg_fd >= 0) {
+ close(dev->intr_handle.uio_cfg_fd);
+ dev->intr_handle.uio_cfg_fd = -1;
+ }
+
+ dev->intr_handle.fd = -1;
+ dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+}
+
+static int
+vmbus_uio_map_secondary(struct rte_vmbus_device *dev)
+{
+ struct mapped_vmbus_resource *uio_res;
+ struct mapped_vmbus_res_list *uio_res_list =
+ RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head,
+ mapped_vmbus_res_list);
+
+ TAILQ_FOREACH(uio_res, uio_res_list, next) {
+ int i;
+
+ /* skip this element if it doesn't match our id */
+ if (uuid_compare(uio_res->uuid, dev->device_id))
+ continue;
+
+ for (i = 0; i != uio_res->nb_maps; i++) {
+ void *mapaddr;
+ int fd;
+
+ fd = open(uio_res->maps[i].path, O_RDWR);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+ uio_res->maps[i].path, strerror(errno));
+ return -1;
+ }
+
+ mapaddr = vmbus_map_resource(uio_res->maps[i].addr, fd,
+ uio_res->maps[i].offset,
+ uio_res->maps[i].size, 0);
+ /* fd is not needed in slave process, close it */
+ close(fd);
+
+ if (mapaddr == uio_res->maps[i].addr)
+ continue;
+
+ RTE_LOG(ERR, EAL,
+ "Cannot mmap device resource file %s to address: %p\n",
+ uio_res->maps[i].path,
+ uio_res->maps[i].addr);
+
+ /* unmap addrs correctly mapped */
+ while (i != 0) {
+ --i;
+ vmbus_unmap_resource(uio_res->maps[i].addr,
+ uio_res->maps[i].size);
+ }
+ return -1;
+
+ }
+ return 0;
+ }
+
+ RTE_LOG(ERR, EAL, "Cannot find resource for device\n");
+ return 1;
+}
+
+/* map the resources of a vmbus device in virtual memory */
+int
+rte_eal_vmbus_map_device(struct rte_vmbus_device *dev)
+{
+ struct mapped_vmbus_resource *uio_res;
+ struct mapped_vmbus_res_list *uio_res_list =
+ RTE_TAILQ_CAST(rte_vmbus_uio_tailq.head, mapped_vmbus_res_list);
+ int i, ret, map_idx = 0;
+
+ dev->intr_handle.fd = -1;
+ dev->intr_handle.uio_cfg_fd = -1;
+ dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+
+ /* secondary processes - use already recorded details */
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return vmbus_uio_map_secondary(dev);
+
+ /* allocate uio resource */
+ uio_res = vmbus_uio_alloc_resource(dev);
+ if (uio_res == NULL)
+ return -1;
+
+ /* Map all BARs */
+ for (i = 0; i != VMBUS_MAX_RESOURCE; i++) {
+ uint64_t phaddr;
+
+ /* skip empty BAR */
+ phaddr = dev->mem_resource[i].phys_addr;
+ if (phaddr == 0)
+ continue;
+
+ ret = vmbus_uio_map_resource_by_index(dev, i,
+ uio_res, map_idx);
+ if (ret)
+ goto error;
+
+ map_idx++;
+ }
+
+ uio_res->nb_maps = map_idx;
+
+ TAILQ_INSERT_TAIL(uio_res_list, uio_res, next);
+
+ return 0;
+error:
+ for (i = 0; i < map_idx; i++) {
+ vmbus_unmap_resource(uio_res->maps[i].addr,
+ uio_res->maps[i].size);
+ rte_free(uio_res->maps[i].path);
+ }
+ vmbus_uio_free_resource(dev, uio_res);
+ return -1;
+}
+
+/* Scan one vmbus sysfs entry, and fill the devices list from it. */
+static int
+vmbus_scan_one(const char *name)
+{
+ struct rte_vmbus_device *dev, *dev2;
+ char filename[PATH_MAX];
+ char dirname[PATH_MAX];
+ unsigned long tmp;
+
+ dev = malloc(sizeof(*dev) + strlen(name) + 1);
+ if (dev == NULL)
+ return -1;
+
+ memset(dev, 0, sizeof(*dev));
+ strcpy(dev->sysfs_name, name);
+ if (dev->sysfs_name == NULL)
+ goto error;
+
+ /* sysfs base directory
+ * /sys/bus/vmbus/devices/7a08391f-f5a0-4ac0-9802-d13fd964f8df
+ * or on older kernel
+ * /sys/bus/vmbus/devices/vmbus_1
+ */
+ snprintf(dirname, sizeof(dirname), "%s/%s",
+ SYSFS_VMBUS_DEVICES, name);
+
+ /* get device id */
+ snprintf(filename, sizeof(filename), "%s/device_id", dirname);
+ if (vmbus_get_sysfs_uuid(filename, dev->device_id) < 0)
+ goto error;
+
+ /* get device class */
+ snprintf(filename, sizeof(filename), "%s/class_id", dirname);
+ if (vmbus_get_sysfs_uuid(filename, dev->class_id) < 0)
+ goto error;
+
+ /* get relid */
+ snprintf(filename, sizeof(filename), "%s/id", dirname);
+ if (eal_parse_sysfs_value(filename, &tmp) < 0)
+ goto error;
+ dev->relid = tmp;
+
+ /* get monitor id */
+ snprintf(filename, sizeof(filename), "%s/monitor_id", dirname);
+ if (eal_parse_sysfs_value(filename, &tmp) < 0)
+ goto error;
+ dev->monitor_id = tmp;
+
+ /* get numa node */
+ snprintf(filename, sizeof(filename), "%s/numa_node",
+ dirname);
+ if (eal_parse_sysfs_value(filename, &tmp) < 0)
+ /* if no NUMA support, set default to 0 */
+ dev->device.numa_node = 0;
+ else
+ dev->device.numa_node = tmp;
+
+ /* device is valid, add in list (sorted) */
+ RTE_LOG(DEBUG, EAL, "Adding vmbus device %s\n", name);
+
+ TAILQ_FOREACH(dev2, &vmbus_device_list, next) {
+ int ret;
+
+ ret = uuid_compare(dev->device_id, dev->device_id);
+ if (ret > 0)
+ continue;
+
+ if (ret < 0) {
+ TAILQ_INSERT_BEFORE(dev2, dev, next);
+ rte_eal_device_insert(&dev->device);
+ } else { /* already registered */
+ memmove(dev2->mem_resource, dev->mem_resource,
+ sizeof(dev->mem_resource));
+ free(dev);
+ }
+ return 0;
+ }
+
+ rte_eal_device_insert(&dev->device);
+ TAILQ_INSERT_TAIL(&vmbus_device_list, dev, next);
+
+ return 0;
+error:
+ free(dev);
+ return -1;
+}
+
+/*
+ * Scan the content of the vmbus, and the devices in the devices list
+ */
+static int
+vmbus_scan(void)
+{
+ struct dirent *e;
+ DIR *dir;
+
+ dir = opendir(SYSFS_VMBUS_DEVICES);
+ if (dir == NULL) {
+ if (errno == ENOENT)
+ return 0;
+
+ RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+
+ while ((e = readdir(dir)) != NULL) {
+ if (e->d_name[0] == '.')
+ continue;
+
+ if (vmbus_scan_one(e->d_name) < 0)
+ goto error;
+ }
+ closedir(dir);
+ return 0;
+
+error:
+ closedir(dir);
+ return -1;
+}
+
+/* Init the VMBUS EAL subsystem */
+int rte_eal_vmbus_init(void)
+{
+ /* VMBUS can be disabled */
+ if (internal_config.no_vmbus)
+ return 0;
+
+ if (vmbus_scan() < 0) {
+ RTE_LOG(ERR, EAL, "%s(): Cannot scan vmbus\n", __func__);
+ return -1;
+ }
+ return 0;
+}
+
+/* Below is PROBE part of eal_vmbus library */
+
+/*
+ * If device ID match, call the devinit() function of the driver.
+ */
+static int
+rte_eal_vmbus_probe_one_driver(struct rte_vmbus_driver *dr,
+ struct rte_vmbus_device *dev)
+{
+ const uuid_t *id_table;
+
+ RTE_LOG(DEBUG, EAL, " probe driver: %s\n", dr->driver.name);
+
+ for (id_table = dr->id_table; !uuid_is_null(*id_table); ++id_table) {
+ struct rte_devargs *args;
+ char guid[UUID_BUF_SZ];
+ int ret;
+
+ /* skip devices not assocaited with this device class */
+ if (uuid_compare(*id_table, dev->class_id) != 0)
+ continue;
+
+ uuid_unparse(dev->device_id, guid);
+ RTE_LOG(INFO, EAL, "VMBUS device %s on NUMA socket %i\n",
+ guid, dev->device.numa_node);
+
+ /* no initialization when blacklisted, return without error */
+ args = dev->device.devargs;
+ if (args && args->type == RTE_DEVTYPE_BLACKLISTED_VMBUS) {
+ RTE_LOG(INFO, EAL, " Device is blacklisted, not initializing\n");
+ return 1;
+ }
+
+ RTE_LOG(INFO, EAL, " probe driver: %s\n", dr->driver.name);
+
+ /* map resources for device */
+ ret = rte_eal_vmbus_map_device(dev);
+ if (ret != 0)
+ return ret;
+
+ /* reference driver structure */
+ dev->driver = dr;
+
+ /* call the driver probe() function */
+ ret = dr->probe(dr, dev);
+ if (ret)
+ dev->driver = NULL;
+
+ return ret;
+ }
+
+ /* return positive value if driver doesn't support this device */
+ return 1;
+}
+
+
+/*
+ * If vendor/device ID match, call the remove() function of the
+ * driver.
+ */
+static int
+vmbus_detach_dev(struct rte_vmbus_driver *dr,
+ struct rte_vmbus_device *dev)
+{
+ const uuid_t *id_table;
+
+ for (id_table = dr->id_table; !uuid_is_null(*id_table); ++id_table) {
+ char guid[UUID_BUF_SZ];
+
+ /* skip devices not assocaited with this device class */
+ if (uuid_compare(*id_table, dev->class_id) != 0)
+ continue;
+
+ uuid_unparse(dev->device_id, guid);
+ RTE_LOG(INFO, EAL, "VMBUS device %s on NUMA socket %i\n",
+ guid, dev->device.numa_node);
+
+ RTE_LOG(DEBUG, EAL, " remove driver: %s\n", dr->driver.name);
+
+ if (dr->remove && (dr->remove(dev) < 0))
+ return -1; /* negative value is an error */
+
+ /* clear driver structure */
+ dev->driver = NULL;
+
+ vmbus_uio_unmap_resource(dev);
+ return 0;
+ }
+
+ /* return positive value if driver doesn't support this device */
+ return 1;
+}
+
+/*
+ * call the devinit() function of all
+ * registered drivers for the vmbus device. Return -1 if no driver is
+ * found for this class of vmbus device.
+ * The present assumption is that we have drivers only for vmbus network
+ * devices. That's why we don't check driver's id_table now.
+ */
+static int
+vmbus_probe_all_drivers(struct rte_vmbus_device *dev)
+{
+ struct rte_vmbus_driver *dr = NULL;
+ int ret;
+
+ TAILQ_FOREACH(dr, &vmbus_driver_list, next) {
+ ret = rte_eal_vmbus_probe_one_driver(dr, dev);
+ if (ret < 0) {
+ /* negative value is an error */
+ RTE_LOG(ERR, EAL, "Failed to probe driver %s\n",
+ dr->driver.name);
+ return -1;
+ }
+ /* positive value means driver doesn't support it */
+ if (ret > 0)
+ continue;
+
+ return 0;
+ }
+
+ return 1;
+}
+
+
+/*
+ * If device ID matches, call the remove() function of all
+ * registered driver for the given device. Return -1 if initialization
+ * failed, return 1 if no driver is found for this device.
+ */
+static int
+vmbus_detach_all_drivers(struct rte_vmbus_device *dev)
+{
+ struct rte_vmbus_driver *dr;
+ int rc = 0;
+
+ if (dev == NULL)
+ return -1;
+
+ TAILQ_FOREACH(dr, &vmbus_driver_list, next) {
+ rc = vmbus_detach_dev(dr, dev);
+ if (rc < 0)
+ /* negative value is an error */
+ return -1;
+ if (rc > 0)
+ /* positive value means driver doesn't support it */
+ continue;
+ return 0;
+ }
+ return 1;
+}
+
+/* Detach device specified by its VMBUS id */
+int
+rte_eal_vmbus_detach(uuid_t device_id)
+{
+ struct rte_vmbus_device *dev;
+ char ubuf[UUID_BUF_SZ];
+
+ TAILQ_FOREACH(dev, &vmbus_device_list, next) {
+ if (uuid_compare(dev->device_id, device_id) != 0)
+ continue;
+
+ if (vmbus_detach_all_drivers(dev) < 0)
+ goto err_return;
+
+ TAILQ_REMOVE(&vmbus_device_list, dev, next);
+ free(dev);
+ return 0;
+ }
+ return -1;
+
+err_return:
+ uuid_unparse(device_id, ubuf);
+ RTE_LOG(WARNING, EAL, "Requested device %s cannot be used\n",
+ ubuf);
+ return -1;
+}
+
+/*
+ * Scan the vmbus, and call the devinit() function for
+ * all registered drivers that have a matching entry in its id_table
+ * for discovered devices.
+ */
+int
+rte_eal_vmbus_probe(void)
+{
+ struct rte_vmbus_device *dev = NULL;
+
+ TAILQ_FOREACH(dev, &vmbus_device_list, next) {
+ char ubuf[UUID_BUF_SZ];
+
+ uuid_unparse(dev->device_id, ubuf);
+
+ RTE_LOG(DEBUG, EAL, "Probing driver for device %s ...\n",
+ ubuf);
+ vmbus_probe_all_drivers(dev);
+ }
+ return 0;
+}
+
+/* register vmbus driver */
+void
+rte_eal_vmbus_register(struct rte_vmbus_driver *driver)
+{
+ TAILQ_INSERT_TAIL(&vmbus_driver_list, driver, next);
+}
+
+/* unregister vmbus driver */
+void
+rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver)
+{
+ TAILQ_REMOVE(&vmbus_driver_list, driver, next);
+}
@@ -3334,3 +3334,93 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id,
-ENOTSUP);
return (*dev->dev_ops->l2_tunnel_offload_set)(dev, l2_tunnel, mask, en);
}
+
+
+#ifdef RTE_LIBRTE_HV_PMD
+int
+rte_eth_dev_vmbus_probe(struct rte_vmbus_driver *vmbus_drv,
+ struct rte_vmbus_device *vmbus_dev)
+{
+ struct eth_driver *eth_drv = (struct eth_driver *)vmbus_drv;
+ struct rte_eth_dev *eth_dev;
+ char ustr[UUID_BUF_SZ];
+ int diag;
+
+ uuid_unparse(vmbus_dev->device_id, ustr);
+
+ eth_dev = rte_eth_dev_allocate(ustr);
+ if (eth_dev == NULL)
+ return -ENOMEM;
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ eth_dev->data->dev_private = rte_zmalloc("ethdev private structure",
+ eth_drv->dev_private_size,
+ RTE_CACHE_LINE_SIZE);
+ if (eth_dev->data->dev_private == NULL)
+ rte_panic("Cannot allocate memzone for private port data\n");
+ }
+
+ eth_dev->device = &vmbus_dev->device;
+ eth_dev->driver = eth_drv;
+ eth_dev->data->rx_mbuf_alloc_failed = 0;
+
+ /* init user callbacks */
+ TAILQ_INIT(&(eth_dev->link_intr_cbs));
+
+ /*
+ * Set the default maximum frame size.
+ */
+ eth_dev->data->mtu = ETHER_MTU;
+
+ /* Invoke PMD device initialization function */
+ diag = (*eth_drv->eth_dev_init)(eth_dev);
+ if (diag == 0)
+ return 0;
+
+ RTE_PMD_DEBUG_TRACE("driver %s: eth_dev_init(%s) failed\n",
+ vmbus_drv->driver.name, ustr);
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ rte_free(eth_dev->data->dev_private);
+
+ return diag;
+}
+
+int
+rte_eth_dev_vmbus_remove(struct rte_vmbus_device *vmbus_dev)
+{
+ const struct eth_driver *eth_drv;
+ struct rte_eth_dev *eth_dev;
+ char ustr[UUID_BUF_SZ];
+ int ret;
+
+ if (vmbus_dev == NULL)
+ return -EINVAL;
+
+ uuid_unparse(vmbus_dev->device_id, ustr);
+ eth_dev = rte_eth_dev_allocated(ustr);
+ if (eth_dev == NULL)
+ return -ENODEV;
+
+ eth_drv = (const struct eth_driver *)vmbus_dev->driver;
+
+ /* Invoke PMD device uninit function */
+ if (*eth_drv->eth_dev_uninit) {
+ ret = (*eth_drv->eth_dev_uninit)(eth_dev);
+ if (ret)
+ return ret;
+ }
+
+ /* free ether device */
+ rte_eth_dev_release_port(eth_dev);
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ rte_free(eth_dev->data->dev_private);
+
+ eth_dev->device = NULL;
+ eth_dev->driver = NULL;
+ eth_dev->data = NULL;
+
+ return 0;
+}
+#endif
@@ -180,6 +180,9 @@ extern "C" {
#include <rte_log.h>
#include <rte_interrupts.h>
#include <rte_pci.h>
+#ifdef RTE_LIBRTE_HV_PMD
+#include <rte_vmbus.h>
+#endif
#include <rte_dev.h>
#include <rte_devargs.h>
#include <rte_errno.h>
@@ -1908,6 +1911,17 @@ struct rte_pci_eth_driver {
struct eth_driver eth_drv; /**< Ethernet driver. */
};
+#ifdef RTE_LIBRTE_HV_PMD
+/**
+ * @internal
+ * The structure associated with a PMD VMBUS Ethernet driver.
+ */
+struct rte_vmbus_eth_driver {
+ struct rte_vmbus_driver vmbus_drv; /**< Underlying VMBUS driver. */
+ struct eth_driver eth_drv; /**< Ethernet driver. */
+};
+#endif
+
/**
* Convert a numerical speed in Mbps to a bitmap flag that can be used in
* the bitmap link_speeds of the struct rte_eth_conf
@@ -4543,6 +4557,23 @@ int rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv,
*/
int rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev);
+#ifdef RTE_LIBRTE_HV_PMD
+/**
+ * @internal
+ * Wrapper for use by vmbus drivers as a .probe function to attach to a ethdev
+ * interface.
+ */
+int rte_eth_dev_vmbus_probe(struct rte_vmbus_driver *vmbus_drv,
+ struct rte_vmbus_device *vmbus_dev);
+
+/**
+ * @internal
+ * Wrapper for use by vmbus drivers as a .remove function to detach a ethdev
+ * interface.
+ */
+int rte_eth_dev_vmbus_remove(struct rte_vmbus_device *vmbus_dev);
+#endif
+
#ifdef __cplusplus
}
#endif
@@ -130,6 +130,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_VHOST) += -lrte_pmd_vhost
endif # $(CONFIG_RTE_LIBRTE_VHOST)
_LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += -lrte_pmd_vmxnet3_uio
+_LDLIBS-$(CONFIG_RTE_LIBRTE_HV_PMD) += -luuid
ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB) += -lrte_pmd_aesni_mb