@@ -9,6 +9,7 @@ drivers = [
'ifpga',
'pci',
'platform',
+ 'shared_mem',
'vdev',
'vmbus',
]
new file mode 100644
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2023 Intel Corporation
+
+if is_windows
+ build = false
+ reason = 'not supported on Windows'
+endif
+
+sources = files('shared_mem_bus.c')
+require_iova_in_mbuf = false
+deps += ['mbuf', 'net']
new file mode 100644
@@ -0,0 +1,323 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Intel Corporation
+ */
+#include <errno.h>
+#include <malloc.h>
+#include <inttypes.h>
+#include <sys/un.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+
+#include <rte_log.h>
+#include <rte_lcore.h>
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_devargs.h>
+#include <rte_mbuf_pool_ops.h>
+
+#include <bus_driver.h>
+#include <dev_driver.h>
+#include "shared_mem_bus.h"
+
+RTE_LOG_REGISTER_DEFAULT(shared_mem_bus_logtype, DEBUG);
+#define BUS_LOG(level, fmt, args...) rte_log(RTE_LOG_ ## level, \
+ shared_mem_bus_logtype, "## SHARED MEM BUS: %s(): " fmt "\n", __func__, ##args)
+#define BUS_ERR(fmt, args...) BUS_LOG(ERR, fmt, ## args)
+#define BUS_INFO(fmt, args...) BUS_LOG(INFO, fmt, ## args)
+#define BUS_DEBUG(fmt, args...) BUS_LOG(DEBUG, fmt, ## args)
+
+static int dev_scan(void);
+static int dev_probe(void);
+static struct rte_device *find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
+ const void *data);
+static enum rte_iova_mode get_iommu_class(void);
+static int addr_parse(const char *, void *);
+
+struct socket_device {
+ struct rte_device rte_device;
+ TAILQ_ENTRY(socket_device) next;
+ int fd;
+ uintptr_t membase;
+ uintptr_t memlen;
+};
+
+/** List of devices */
+TAILQ_HEAD(socket_list, socket_device);
+TAILQ_HEAD(device_list, rte_device);
+
+struct shared_mem_bus {
+ struct rte_bus bus;
+ struct socket_list socket_list;
+ struct shared_mem_drv *ethdrv;
+ struct device_list device_list;
+};
+
+static struct shared_mem_bus shared_mem_bus = {
+ .bus = {
+ .scan = dev_scan,
+ .probe = dev_probe,
+ .find_device = find_device,
+ .get_iommu_class = get_iommu_class,
+ .parse = addr_parse,
+ },
+
+ .socket_list = TAILQ_HEAD_INITIALIZER(shared_mem_bus.socket_list),
+ .device_list = TAILQ_HEAD_INITIALIZER(shared_mem_bus.device_list),
+};
+
+RTE_REGISTER_BUS(shared_mem, shared_mem_bus.bus);
+
+int
+rte_shm_bus_send_message(void *msg, size_t msglen)
+{
+ return send(shared_mem_bus.socket_list.tqh_first->fd, msg, msglen, 0);
+}
+
+int
+rte_shm_bus_recv_message(void *msg, size_t msglen)
+{
+ return recv(shared_mem_bus.socket_list.tqh_first->fd, msg, msglen, 0);
+}
+
+uintptr_t
+rte_shm_bus_get_mem_offset(void *ptr)
+{
+ struct socket_device *dev;
+ uintptr_t pval = (uintptr_t)ptr;
+
+ TAILQ_FOREACH(dev, &shared_mem_bus.socket_list, next) {
+ if (dev->membase < pval && dev->membase + dev->memlen > pval)
+ return pval - dev->membase;
+ }
+ return (uintptr_t)-1;
+}
+
+void *
+rte_shm_bus_get_mem_ptr(uintptr_t offset)
+{
+ struct socket_device *dev;
+
+ TAILQ_FOREACH(dev, &shared_mem_bus.socket_list, next) {
+ if (offset < dev->memlen)
+ return RTE_PTR_ADD(dev->membase, offset);
+ }
+ return (void *)-1;
+}
+
+static int
+dev_scan(void)
+{
+ if (shared_mem_bus.bus.conf.scan_mode != RTE_BUS_SCAN_ALLOWLIST)
+ return 0;
+
+ struct rte_devargs *devargs;
+ RTE_EAL_DEVARGS_FOREACH(shared_mem_bus.bus.name, devargs) {
+
+ int fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+ if (fd < 0) {
+ BUS_ERR("Error creating socket");
+ return -errno;
+ }
+
+ struct sockaddr_un sun = {.sun_family = AF_UNIX};
+ if (strlen(devargs->name) - 5 >= sizeof(sun.sun_path) ||
+ addr_parse(devargs->name, sun.sun_path) != 0) {
+ BUS_ERR("Error parsing device address");
+ return -EINVAL;
+ }
+
+ if (connect(fd, (void *)&sun, sizeof(sun)) != 0) {
+ BUS_ERR("Error connecting to socket");
+ return -errno;
+ }
+
+ struct socket_device *sdev = malloc(sizeof(*sdev));
+ if (sdev == NULL) {
+ BUS_ERR("Error with malloc");
+ return -ENOMEM;
+ }
+ BUS_INFO("Allocating dev for %s", devargs->name);
+ sdev->rte_device.name = devargs->name;
+ sdev->rte_device.numa_node = rte_socket_id();
+ sdev->rte_device.bus = &shared_mem_bus.bus;
+ sdev->fd = fd;
+ TAILQ_INSERT_TAIL(&shared_mem_bus.socket_list, sdev, next);
+ }
+
+ return 0;
+}
+
+static int
+recv_fd(int from, uint64_t *memsize, rte_iova_t *iova, uint64_t *pg_size)
+{
+ int fd = 0;
+ struct {
+ uint64_t fd_size;
+ rte_iova_t iova;
+ uint64_t pg_size;
+ } data_message;
+
+ size_t cmsglen = CMSG_LEN(sizeof(fd));
+ struct cmsghdr *cmhdr = malloc(cmsglen);
+ if (cmhdr == NULL) {
+ BUS_ERR("Malloc error");
+ return -1;
+ }
+
+ struct iovec iov = {
+ .iov_base = (void *)&data_message,
+ .iov_len = sizeof(data_message)
+ };
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = cmhdr,
+ .msg_controllen = cmsglen,
+ };
+ if (recvmsg(from, &msg, 0) != (int)iov.iov_len) {
+ BUS_ERR("recvmsg error %s", strerror(errno));
+ return -1;
+ }
+ if (msg.msg_controllen != cmsglen) {
+ BUS_ERR("Error with fd on message received");
+ return -1;
+ }
+ fd = *(int *)CMSG_DATA(cmhdr);
+
+ free(cmhdr);
+
+ *memsize = data_message.fd_size;
+ *iova = data_message.iova;
+ *pg_size = data_message.pg_size;
+ return fd;
+}
+
+static int
+dev_probe(void)
+{
+ if (TAILQ_EMPTY(&shared_mem_bus.socket_list))
+ return 0;
+
+ if (rte_mbuf_set_platform_mempool_ops("shared_mem") != 0) {
+ BUS_ERR("Error setting default mempool ops\n");
+ return -1;
+ }
+ BUS_INFO("Set default mempool ops to 'shared_mem'");
+
+ struct socket_device *dev;
+ TAILQ_FOREACH(dev, &shared_mem_bus.socket_list, next) {
+ uint64_t memsize = 0;
+ uint64_t pgsize = 0;
+ rte_iova_t iova = 0;
+ int memfd = recv_fd(dev->fd, &memsize, &iova, &pgsize);
+ /* check memfd is valid, the size is non-zero and multiple of 2MB */
+ if (memfd < 0 || memsize <= 0 || memsize % (1 << 21) != 0) {
+ BUS_ERR("Error getting memfd and size");
+ return -1;
+ }
+ BUS_DEBUG("Received fd %d with memsize %"PRIu64" and pgsize %"PRIu64,
+ memfd, memsize, pgsize);
+
+ void *mem = mmap(NULL, memsize, PROT_READ|PROT_WRITE, MAP_SHARED, memfd, 0);
+ if (mem == MAP_FAILED) {
+ BUS_ERR("Error mmapping the received fd");
+ return -1;
+ }
+ BUS_DEBUG("%u MB of memory mapped at %p\n", (unsigned int)(memsize >> 20), mem);
+ dev->membase = (uintptr_t)mem;
+ dev->memlen = memsize;
+
+ struct eth_shared_mem_msg msg = {
+ .type = MSG_TYPE_MMAP_BASE_ADDR,
+ .offset = dev->membase,
+ };
+ rte_shm_bus_send_message(&msg, sizeof(msg));
+
+ char malloc_heap_name[32];
+ snprintf(malloc_heap_name, sizeof(malloc_heap_name),
+ "socket_%d_ext", rte_socket_id());
+ if (rte_malloc_heap_create(malloc_heap_name) != 0) {
+ BUS_ERR("Error creating heap %s\n", malloc_heap_name);
+ return -1;
+ }
+
+ int nb_pages = (memsize / pgsize);
+ rte_iova_t *iovas = malloc(sizeof(iovas[0]) * nb_pages);
+ iovas[0] = iova;
+ for (int i = 1; i < nb_pages; i++)
+ iovas[i] = iovas[i - 1] + pgsize;
+ BUS_DEBUG("Attempting to add memory to heap: %s", malloc_heap_name);
+ if (rte_malloc_heap_memory_add(malloc_heap_name, mem, memsize,
+ iovas, nb_pages, pgsize) < 0) {
+ BUS_ERR("Error adding to malloc heap: %s", strerror(rte_errno));
+ free(iovas);
+ return -1;
+ }
+ free(iovas);
+ BUS_DEBUG("Added memory to heap");
+ rte_malloc_heap_swap_socket(rte_socket_id(),
+ rte_malloc_heap_get_socket(malloc_heap_name));
+ BUS_DEBUG("Swapped in memory as socket %d memory\n", rte_socket_id());
+
+ if (shared_mem_bus.ethdrv != NULL) {
+ struct rte_device *dev = malloc(sizeof(*dev));
+ if (dev == NULL)
+ return -1;
+ *dev = (struct rte_device){
+ .name = "shared_mem_ethdev",
+ .driver = &shared_mem_bus.ethdrv->driver,
+ .bus = &shared_mem_bus.bus,
+ .numa_node = SOCKET_ID_ANY,
+ };
+ shared_mem_bus.ethdrv->probe(shared_mem_bus.ethdrv, dev);
+ }
+ }
+ return 0;
+}
+static struct rte_device *
+find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
+ const void *data)
+{
+ RTE_SET_USED(start);
+ RTE_SET_USED(cmp);
+ RTE_SET_USED(data);
+ return NULL;
+}
+
+static enum rte_iova_mode
+get_iommu_class(void)
+{
+ /* if there are no devices, report don't care, otherwise VA mode */
+ return TAILQ_EMPTY(&shared_mem_bus.socket_list) ? RTE_IOVA_DC : RTE_IOVA_VA;
+}
+
+static int
+addr_parse(const char *name, void *addr)
+{
+ if (strncmp(name, "sock:", 5) != 0) {
+ BUS_DEBUG("no sock: prefix on %s", name);
+ return -1;
+ }
+
+ const char *filename = &name[5];
+ struct stat st;
+ if (stat(filename, &st) < 0 || (st.st_mode & S_IFMT) != S_IFSOCK) {
+ BUS_ERR("stat failed, or not a socket, %s", filename);
+ return -1;
+ }
+ if (addr != NULL)
+ strcpy(addr, filename);
+ BUS_DEBUG("Matched filename: %s", filename);
+ return 0;
+}
+
+int
+shared_mem_register_driver(struct shared_mem_drv *drv)
+{
+ if (drv->probe == NULL)
+ return -1;
+ shared_mem_bus.ethdrv = drv;
+ return 0;
+}
+
new file mode 100644
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Intel Corporation
+ */
+
+#ifndef DRIVERS_BUS_SHARED_MEM_H_
+#define DRIVERS_BUS_SHARED_MEM_H_
+
+#include <stdint.h>
+#include <rte_common.h>
+#include <rte_ether.h>
+#include <dev_driver.h>
+
+enum shared_mem_msg_type {
+ MSG_TYPE_ACK = 0,
+ MSG_TYPE_MMAP_BASE_ADDR,
+ MSG_TYPE_MEMPOOL_OFFSET,
+ MSG_TYPE_RX_RING_OFFSET,
+ MSG_TYPE_TX_RING_OFFSET,
+ MSG_TYPE_START,
+ MSG_TYPE_GET_MAC,
+ MSG_TYPE_REPORT_MAC,
+};
+
+struct eth_shared_mem_msg {
+ enum shared_mem_msg_type type; /* type implicitly defines which union member is used */
+ union {
+ uintptr_t offset; /* for many messages, just pass an offset */
+ struct rte_ether_addr ethaddr; /* allow passing mac address */
+ uintptr_t datalen; /* for other messages, pass a data length after the data */
+ };
+ char data[];
+};
+
+struct shared_mem_drv;
+
+/**
+ * Initialisation function for the driver
+ */
+typedef int (c_eth_probe_t)(struct shared_mem_drv *drv, struct rte_device *dev);
+
+struct shared_mem_drv {
+ struct rte_driver driver;
+ c_eth_probe_t *probe; /**< Device probe function. */
+};
+
+/** Helper for PCI device registration from driver (eth, crypto) instance */
+#define RTE_PMD_REGISTER_SHMEM_DRV(nm, c_drv) \
+RTE_INIT(shared_mem_initfn_ ##nm) \
+{\
+ (c_drv).driver.name = RTE_STR(nm);\
+ shared_mem_register_driver(&c_drv); \
+} \
+RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
+
+__rte_internal
+int
+shared_mem_register_driver(struct shared_mem_drv *drv);
+
+__rte_internal
+int
+rte_shm_bus_send_message(void *msg, size_t msglen);
+
+__rte_internal
+int
+rte_shm_bus_recv_message(void *msg, size_t msglen);
+
+__rte_internal
+uintptr_t
+rte_shm_bus_get_mem_offset(void *ptr);
+
+__rte_internal
+void *
+rte_shm_bus_get_mem_ptr(uintptr_t offset);
+
+#endif /* DRIVERS_BUS_SHARED_MEM_H_ */
new file mode 100644
@@ -0,0 +1,11 @@
+INTERNAL {
+ global:
+
+ shared_mem_register_driver;
+ rte_shm_bus_get_mem_offset;
+ rte_shm_bus_get_mem_ptr;
+ rte_shm_bus_recv_message;
+ rte_shm_bus_send_message;
+
+ local: *;
+};