@@ -45,7 +45,7 @@ client.
"socket=/tmp/memif.sock", "Socket filename", "/tmp/memif.sock", "string len 108"
"mac=01:23:45:ab:cd:ef", "Mac address", "01:ab:23:cd:45:ef", ""
"secret=abc123", "Secret is an optional security option, which if specified, must be matched by peer", "", "string len 24"
- "zero-copy=yes", "Enable/disable zero-copy slave mode", "no", "yes|no"
+ "zero-copy=yes", "Enable/disable zero-copy slave mode. Only relevant to slave, requires '--single-file-segments' eal argument", "no", "yes|no"
**Connection establishment**
@@ -171,6 +171,42 @@ Files
- net/memif/memif.h *- descriptor and ring definitions*
- net/memif/rte_eth_memif.c *- eth_memif_rx() eth_memif_tx()*
+Zero-copy slave
+~~~~~~~~~~~~~~~
+
+Zero-copy slave can be enabled with memif configuration option 'zero-copy=yes'. This option
+is only relevant to slave and requires eal argument '--single-file-segments'.
+This limitation is in place, because it is too expensive to identify memseg
+for each packet buffer, resulting in worse performance than with zero-copy disabled.
+With single file segments we can calculate offset from the beginning of the file
+for each packet buffer.
+
+**Shared memory format**
+
+Region 0 is created by memif driver and contains rings. Slave interface exposes DPDK memory (memseg).
+Instead of using memfd_create() to create new shared file, existing memsegs are used.
+Master interface functions the same as with zero-copy disabled.
+
+region 0:
+
++-----------------------+
+| Rings |
++-----------+-----------+
+| S2M rings | M2S rings |
++-----------+-----------+
+
+region n:
+
++-----------------+
+| Buffers |
++-----------------+
+|memseg |
++-----------------+
+
+Buffers are dequeued and enqueued as needed. Offset descriptor field is calculated at tx.
+Only single file segments mode (EAL option --single-file-segments) is supported, as calculating
+offset from multiple segments is too expensive.
+
Example: testpmd
----------------------------
In this example we run two instances of testpmd application and transmit packets over memif.
@@ -183,6 +219,10 @@ Now create ``slave`` interface (master must be already running so the slave will
#./build/app/testpmd -l 2-3 --proc-type=primary --file-prefix=pmd2 --vdev=net_memif -- -i
+You can also enable ``zero-copy`` on ``slave`` interface::
+
+ #./build/app/testpmd -l 2-3 --proc-type=primary --file-prefix=pmd2 --vdev=net_memif,zero-copy=yes --single-file-segments -- -i
+
Start forwarding packets::
Slave:
@@ -20,6 +20,7 @@ CFLAGS += -DALLOW_EXPERIMENTAL_API
# - rte_mp_action_register
# - rte_mp_reply
# - rte_mp_request_sync
+# - rte_mcfg_get_single_file_segments
LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool
LDLIBS += -lrte_ethdev -lrte_kvargs -lrte_net
LDLIBS += -lrte_hash
@@ -175,8 +175,7 @@ memif_msg_receive_hello(struct rte_eth_dev *dev, memif_msg_t *msg)
strlcpy(pmd->remote_name, (char *)h->name, sizeof(pmd->remote_name));
- MIF_LOG(DEBUG, "%s: Connecting to %s.",
- rte_vdev_device_name(pmd->vdev), pmd->remote_name);
+ MIF_LOG(DEBUG, "Connecting to %s.", pmd->remote_name);
return 0;
}
@@ -338,8 +337,7 @@ memif_msg_receive_connect(struct rte_eth_dev *dev, memif_msg_t *msg)
strlcpy(pmd->remote_if_name, (char *)c->if_name,
sizeof(pmd->remote_if_name));
- MIF_LOG(INFO, "%s: Remote interface %s connected.",
- rte_vdev_device_name(pmd->vdev), pmd->remote_if_name);
+ MIF_LOG(INFO, "Remote interface %s connected.", pmd->remote_if_name);
return 0;
}
@@ -357,8 +355,7 @@ memif_msg_receive_connected(struct rte_eth_dev *dev, memif_msg_t *msg)
strlcpy(pmd->remote_if_name, (char *)c->if_name,
sizeof(pmd->remote_if_name));
- MIF_LOG(INFO, "%s: Remote interface %s connected.",
- rte_vdev_device_name(pmd->vdev), pmd->remote_if_name);
+ MIF_LOG(INFO, "Remote interface %s connected.", pmd->remote_if_name);
return 0;
}
@@ -369,14 +366,13 @@ memif_msg_receive_disconnect(struct rte_eth_dev *dev, memif_msg_t *msg)
struct pmd_internals *pmd = dev->data->dev_private;
memif_msg_disconnect_t *d = &msg->disconnect;
- memset(pmd->remote_disc_string, 0, ETH_MEMIF_DISC_STRING_SIZE);
+ memset(pmd->remote_disc_string, 0, sizeof(pmd->remote_disc_string));
strlcpy(pmd->remote_disc_string, (char *)d->string,
sizeof(pmd->remote_disc_string));
- MIF_LOG(INFO, "%s: Disconnect received: %s",
- rte_vdev_device_name(pmd->vdev), pmd->remote_disc_string);
+ MIF_LOG(INFO, "Disconnect received: %s", pmd->remote_disc_string);
- memset(pmd->local_disc_string, 0, ETH_MEMIF_DISC_STRING_SIZE);
+ memset(pmd->local_disc_string, 0, 96);
memif_disconnect(dev);
return 0;
}
@@ -472,7 +468,6 @@ memif_msg_enq_connect(struct rte_eth_dev *dev)
{
struct pmd_internals *pmd = dev->data->dev_private;
struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
- const char *name = rte_vdev_device_name(pmd->vdev);
memif_msg_connect_t *c;
if (e == NULL)
@@ -480,7 +475,7 @@ memif_msg_enq_connect(struct rte_eth_dev *dev)
c = &e->msg.connect;
e->msg.type = MEMIF_MSG_TYPE_CONNECT;
- strlcpy((char *)c->if_name, name, sizeof(c->if_name));
+ strlcpy((char *)c->if_name, dev->data->name, sizeof(c->if_name));
return 0;
}
@@ -490,7 +485,6 @@ memif_msg_enq_connected(struct rte_eth_dev *dev)
{
struct pmd_internals *pmd = dev->data->dev_private;
struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
- const char *name = rte_vdev_device_name(pmd->vdev);
memif_msg_connected_t *c;
if (e == NULL)
@@ -498,7 +492,7 @@ memif_msg_enq_connected(struct rte_eth_dev *dev)
c = &e->msg.connected;
e->msg.type = MEMIF_MSG_TYPE_CONNECTED;
- strlcpy((char *)c->if_name, name, sizeof(c->if_name));
+ strlcpy((char *)c->if_name, dev->data->name, sizeof(c->if_name));
return 0;
}
@@ -524,7 +518,6 @@ void
memif_disconnect(struct rte_eth_dev *dev)
{
struct pmd_internals *pmd = dev->data->dev_private;
- struct pmd_process_private *proc_private = dev->process_private;
struct memif_msg_queue_elt *elt, *next;
struct memif_queue *mq;
struct rte_intr_handle *ih;
@@ -614,7 +607,7 @@ memif_disconnect(struct rte_eth_dev *dev)
}
}
- memif_free_regions(proc_private);
+ memif_free_regions(dev);
/* reset connection configuration */
memset(&pmd->run, 0, sizeof(pmd->run));
@@ -661,7 +654,7 @@ memif_msg_receive(struct memif_control_channel *cc)
if (cmsg->cmsg_type == SCM_CREDENTIALS)
cr = (struct ucred *)CMSG_DATA(cmsg);
else if (cmsg->cmsg_type == SCM_RIGHTS)
- memcpy(&afd, CMSG_DATA(cmsg), sizeof(int));
+ rte_memcpy(&afd, CMSG_DATA(cmsg), sizeof(int));
}
cmsg = CMSG_NXTHDR(&mh, cmsg);
}
@@ -860,8 +853,7 @@ memif_listener_handler(void *arg)
}
static struct memif_socket *
-memif_socket_create(struct pmd_internals *pmd,
- const char *key, uint8_t listener)
+memif_socket_create(char *key, uint8_t listener)
{
struct memif_socket *sock;
struct sockaddr_un un;
@@ -900,17 +892,15 @@ memif_socket_create(struct pmd_internals *pmd,
if (ret < 0)
goto error;
- MIF_LOG(DEBUG, "%s: Memif listener socket %s created.",
- rte_vdev_device_name(pmd->vdev), sock->filename);
+ MIF_LOG(DEBUG, "Memif listener socket %s created.", sock->filename);
sock->intr_handle.fd = sockfd;
sock->intr_handle.type = RTE_INTR_HANDLE_EXT;
ret = rte_intr_callback_register(&sock->intr_handle,
memif_listener_handler, sock);
if (ret < 0) {
- MIF_LOG(ERR, "%s: Failed to register interrupt "
- "callback for listener socket",
- rte_vdev_device_name(pmd->vdev));
+ MIF_LOG(ERR, "Failed to register interrupt "
+ "callback for listener socket");
return NULL;
}
}
@@ -918,9 +908,7 @@ memif_socket_create(struct pmd_internals *pmd,
return sock;
error:
- MIF_LOG(ERR, "%s: Failed to setup socket %s: %s",
- rte_vdev_device_name(pmd->vdev) ?
- rte_vdev_device_name(pmd->vdev) : "NULL", key, strerror(errno));
+ MIF_LOG(ERR, "Failed to setup socket %s: %s", key, strerror(errno));
if (sock != NULL)
rte_free(sock);
if (sockfd >= 0)
@@ -965,9 +953,8 @@ memif_socket_init(struct rte_eth_dev *dev, const char *socket_filename)
strlcpy(key, socket_filename, MEMIF_SOCKET_UN_SIZE);
ret = rte_hash_lookup_data(hash, key, (void **)&socket);
if (ret < 0) {
- socket = memif_socket_create(pmd, key,
- (pmd->role ==
- MEMIF_ROLE_SLAVE) ? 0 : 1);
+ socket = memif_socket_create(key,
+ (pmd->role == MEMIF_ROLE_SLAVE) ? 0 : 1);
if (socket == NULL)
return -1;
ret = rte_hash_add_key_data(hash, key, socket);
@@ -998,8 +985,7 @@ memif_socket_init(struct rte_eth_dev *dev, const char *socket_filename)
elt = rte_malloc("pmd-queue", sizeof(struct memif_socket_dev_list_elt), 0);
if (elt == NULL) {
- MIF_LOG(ERR, "%s: Failed to add device to socket device list.",
- rte_vdev_device_name(pmd->vdev));
+ MIF_LOG(ERR, "Failed to add device to socket device list.");
return -1;
}
elt->dev = dev;
@@ -1077,8 +1063,7 @@ memif_connect_slave(struct rte_eth_dev *dev)
sockfd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
if (sockfd < 0) {
- MIF_LOG(ERR, "%s: Failed to open socket.",
- rte_vdev_device_name(pmd->vdev));
+ MIF_LOG(ERR, "Failed to open socket.");
return -1;
}
@@ -1089,19 +1074,16 @@ memif_connect_slave(struct rte_eth_dev *dev)
ret = connect(sockfd, (struct sockaddr *)&sun,
sizeof(struct sockaddr_un));
if (ret < 0) {
- MIF_LOG(ERR, "%s: Failed to connect socket: %s.",
- rte_vdev_device_name(pmd->vdev), pmd->socket_filename);
+ MIF_LOG(ERR, "Failed to connect socket: %s.", pmd->socket_filename);
goto error;
}
- MIF_LOG(DEBUG, "%s: Memif socket: %s connected.",
- rte_vdev_device_name(pmd->vdev), pmd->socket_filename);
+ MIF_LOG(DEBUG, "Memif socket: %s connected.", pmd->socket_filename);
pmd->cc = rte_zmalloc("memif-cc",
sizeof(struct memif_control_channel), 0);
if (pmd->cc == NULL) {
- MIF_LOG(ERR, "%s: Failed to allocate control channel.",
- rte_vdev_device_name(pmd->vdev));
+ MIF_LOG(ERR, "Failed to allocate control channel.");
goto error;
}
@@ -1114,8 +1096,7 @@ memif_connect_slave(struct rte_eth_dev *dev)
ret = rte_intr_callback_register(&pmd->cc->intr_handle,
memif_intr_handler, pmd->cc);
if (ret < 0) {
- MIF_LOG(ERR, "%s: Failed to register interrupt callback "
- "for control fd", rte_vdev_device_name(pmd->vdev));
+ MIF_LOG(ERR, "Failed to register interrupt callback for control fd");
goto error;
}
@@ -15,5 +15,6 @@ allow_experimental_apis = true
# - rte_mp_action_register
# - rte_mp_reply
# - rte_mp_request_sync
+# - rte_mcfg_get_single_file_segments
deps += ['hash']
@@ -23,6 +23,10 @@
#include <rte_kvargs.h>
#include <rte_bus_vdev.h>
#include <rte_string_fns.h>
+#include <rte_errno.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal_memconfig.h>
#include "rte_eth_memif.h"
#include "memif_socket.h"
@@ -50,6 +54,10 @@ static const char * const valid_arguments[] = {
#define MEMIF_MP_SEND_REGION "memif_mp_send_region"
+
+static int memif_region_init_zc(const struct rte_memseg_list *msl,
+ const struct rte_memseg *ms, void *arg);
+
const char *
memif_version(void)
{
@@ -116,10 +124,14 @@ memif_mp_request_regions(struct rte_eth_dev *dev)
struct mp_region_msg *reply_param;
struct memif_region *r;
struct pmd_process_private *proc_private = dev->process_private;
+ struct pmd_internals *pmd = dev->data->dev_private;
+ /* in case of zero-copy slave, only request region 0 */
+ uint16_t max_region_num = (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) ?
+ 1 : ETH_MEMIF_MAX_REGION_NUM;
MIF_LOG(DEBUG, "Requesting memory regions");
- for (i = 0; i < ETH_MEMIF_MAX_REGION_NUM; i++) {
+ for (i = 0; i < max_region_num; i++) {
/* Prepare the message */
memset(&msg, 0, sizeof(msg));
strlcpy(msg.name, MEMIF_MP_SEND_REGION, sizeof(msg.name));
@@ -161,6 +173,12 @@ memif_mp_request_regions(struct rte_eth_dev *dev)
free(reply);
}
+ if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
+ ret = rte_memseg_walk(memif_region_init_zc, (void *)proc_private);
+ if (ret < 0)
+ return ret;
+ }
+
return memif_connect(dev);
}
@@ -220,6 +238,23 @@ memif_get_buffer(struct pmd_process_private *proc_private, memif_desc_t *d)
return ((uint8_t *)proc_private->regions[d->region]->addr + d->offset);
}
+/* Free mbufs received by master */
+static void
+memif_free_stored_mbufs(struct pmd_process_private *proc_private, struct memif_queue *mq)
+{
+ uint16_t mask = (1 << mq->log2_ring_size) - 1;
+ memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
+
+ /* FIXME: improve performance */
+ while (mq->last_tail != ring->tail) {
+ RTE_MBUF_PREFETCH_TO_FREE(mq->buffers[(mq->last_tail + 1) & mask]);
+ /* Decrement refcnt and free mbuf. (current segment) */
+ rte_mbuf_refcnt_update(mq->buffers[mq->last_tail & mask], -1);
+ rte_pktmbuf_free_seg(mq->buffers[mq->last_tail & mask]);
+ mq->last_tail++;
+ }
+}
+
static int
memif_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *cur_tail,
struct rte_mbuf *tail)
@@ -334,8 +369,8 @@ eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
memcpy(rte_pktmbuf_mtod_offset(mbuf, void *, dst_off),
- (uint8_t *)memif_get_buffer(proc_private, d0) +
- src_off, cp_len);
+ (uint8_t *)memif_get_buffer(proc_private, d0) + src_off,
+ cp_len);
src_off += cp_len;
dst_off += cp_len;
@@ -378,6 +413,120 @@ eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
return n_rx_pkts;
}
+static uint16_t
+eth_memif_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+ struct memif_queue *mq = queue;
+ struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
+ struct pmd_process_private *proc_private =
+ rte_eth_devices[mq->in_port].process_private;
+ memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
+ uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0, head;
+ uint16_t n_rx_pkts = 0;
+ memif_desc_t *d0;
+ struct rte_mbuf *mbuf, *mbuf_tail;
+ struct rte_mbuf *mbuf_head = NULL;
+ int ret;
+ struct rte_eth_link link;
+
+ if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
+ return 0;
+ if (unlikely(ring == NULL)) {
+ /* Secondary process will attempt to request regions. */
+ rte_eth_link_get(mq->in_port, &link);
+ return 0;
+ }
+
+ /* consume interrupt */
+ if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
+ uint64_t b;
+ ssize_t size __rte_unused;
+ size = read(mq->intr_handle.fd, &b, sizeof(b));
+ }
+
+ ring_size = 1 << mq->log2_ring_size;
+ mask = ring_size - 1;
+
+ cur_slot = mq->last_tail;
+ last_slot = ring->tail;
+ if (cur_slot == last_slot)
+ goto refill;
+ n_slots = last_slot - cur_slot;
+
+ while (n_slots && n_rx_pkts < nb_pkts) {
+ s0 = cur_slot & mask;
+
+ d0 = &ring->desc[s0];
+ mbuf_head = mq->buffers[s0];
+ mbuf = mbuf_head;
+
+next_slot:
+ /* prefetch next descriptor */
+ if (n_rx_pkts + 1 < nb_pkts)
+ rte_prefetch0(&ring->desc[(cur_slot + 1) & mask]);
+
+ mbuf->port = mq->in_port;
+ rte_pktmbuf_data_len(mbuf) = d0->length;
+ rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
+
+ mq->n_bytes += rte_pktmbuf_data_len(mbuf);
+
+ cur_slot++;
+ n_slots--;
+ if (d0->flags & MEMIF_DESC_FLAG_NEXT) {
+ s0 = cur_slot & mask;
+ d0 = &ring->desc[s0];
+ mbuf_tail = mbuf;
+ mbuf = mq->buffers[s0];
+ ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
+ if (unlikely(ret < 0)) {
+ MIF_LOG(ERR, "number-of-segments-overflow");
+ goto refill;
+ }
+ goto next_slot;
+ }
+
+ *bufs++ = mbuf_head;
+ n_rx_pkts++;
+ }
+
+ mq->last_tail = cur_slot;
+
+/* Supply master with new buffers */
+refill:
+ head = ring->head;
+ n_slots = ring_size - head + mq->last_tail;
+
+ if (n_slots < 32)
+ goto no_free_mbufs;
+
+ ret = rte_pktmbuf_alloc_bulk(mq->mempool, &mq->buffers[head & mask], n_slots);
+ if (unlikely(ret < 0))
+ goto no_free_mbufs;
+
+ while (n_slots--) {
+ s0 = head++ & mask;
+ if (n_slots > 0)
+ rte_prefetch0(mq->buffers[head & mask]);
+ d0 = &ring->desc[s0];
+ /* store buffer header */
+ mbuf = mq->buffers[s0];
+ /* populate descriptor */
+ d0->length = rte_pktmbuf_data_room_size(mq->mempool) -
+ RTE_PKTMBUF_HEADROOM;
+ d0->region = 1;
+ d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) -
+ (uint8_t *)proc_private->regions[d0->region]->addr;
+ }
+no_free_mbufs:
+ rte_mb();
+ ring->head = head;
+
+ mq->n_pkts += n_rx_pkts;
+
+ return n_rx_pkts;
+}
+
static uint16_t
eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
{
@@ -498,19 +647,173 @@ eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
return n_tx_pkts;
}
+
+static int
+memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue *mq,
+ memif_ring_t *ring, struct rte_mbuf *mbuf, const uint16_t mask,
+ uint16_t slot, uint16_t n_free)
+{
+ memif_desc_t *d0;
+ int used_slots = 1;
+
+next_in_chain:
+ /* store pointer to mbuf to free it later */
+ mq->buffers[slot & mask] = mbuf;
+ /* Increment refcnt to make sure the buffer is not freed before master
+ * receives it. (current segment)
+ */
+ rte_mbuf_refcnt_update(mbuf, 1);
+ /* populate descriptor */
+ d0 = &ring->desc[slot & mask];
+ d0->length = rte_pktmbuf_data_len(mbuf);
+ /* FIXME: get region index */
+ d0->region = 1;
+ d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) -
+ (uint8_t *)proc_private->regions[d0->region]->addr;
+ d0->flags = 0;
+
+ /* check if buffer is chained */
+ if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
+ if (n_free < 2)
+ return 0;
+ /* mark buffer as chained */
+ d0->flags |= MEMIF_DESC_FLAG_NEXT;
+ /* advance mbuf */
+ mbuf = mbuf->next;
+ /* update counters */
+ used_slots++;
+ slot++;
+ n_free--;
+ goto next_in_chain;
+ }
+ return used_slots;
+}
+
+static uint16_t
+eth_memif_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+ struct memif_queue *mq = queue;
+ struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
+ struct pmd_process_private *proc_private =
+ rte_eth_devices[mq->in_port].process_private;
+ memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
+ uint16_t slot, n_free, ring_size, mask, n_tx_pkts = 0;
+ memif_ring_type_t type = mq->type;
+ struct rte_eth_link link;
+
+ if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
+ return 0;
+ if (unlikely(ring == NULL)) {
+ /* Secondary process will attempt to request regions. */
+ rte_eth_link_get(mq->in_port, &link);
+ return 0;
+ }
+
+ ring_size = 1 << mq->log2_ring_size;
+ mask = ring_size - 1;
+
+ /* free mbufs received by master */
+ memif_free_stored_mbufs(proc_private, mq);
+
+ /* ring type always MEMIF_RING_S2M */
+ slot = ring->head;
+ n_free = ring_size - ring->head + mq->last_tail;
+
+ int used_slots;
+
+ while (n_free && (n_tx_pkts < nb_pkts)) {
+ while ((n_free > 4) && ((nb_pkts - n_tx_pkts) > 4)) {
+ if ((nb_pkts - n_tx_pkts) > 8) {
+ rte_prefetch0(*bufs + 4);
+ rte_prefetch0(*bufs + 5);
+ rte_prefetch0(*bufs + 6);
+ rte_prefetch0(*bufs + 7);
+ }
+ used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
+ mask, slot, n_free);
+ if (unlikely(used_slots < 1))
+ goto no_free_slots;
+ n_tx_pkts++;
+ slot += used_slots;
+ n_free -= used_slots;
+
+ used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
+ mask, slot, n_free);
+ if (unlikely(used_slots < 1))
+ goto no_free_slots;
+ n_tx_pkts++;
+ slot += used_slots;
+ n_free -= used_slots;
+
+ used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
+ mask, slot, n_free);
+ if (unlikely(used_slots < 1))
+ goto no_free_slots;
+ n_tx_pkts++;
+ slot += used_slots;
+ n_free -= used_slots;
+
+ used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
+ mask, slot, n_free);
+ if (unlikely(used_slots < 1))
+ goto no_free_slots;
+ n_tx_pkts++;
+ slot += used_slots;
+ n_free -= used_slots;
+ }
+ used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
+ mask, slot, n_free);
+ if (unlikely(used_slots < 1))
+ goto no_free_slots;
+ n_tx_pkts++;
+ slot += used_slots;
+ n_free -= used_slots;
+ }
+
+no_free_slots:
+ rte_mb();
+ /* update ring pointers */
+ if (type == MEMIF_RING_S2M)
+ ring->head = slot;
+ else
+ ring->tail = slot;
+
+ /* Send interrupt, if enabled. */
+ if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
+ uint64_t a = 1;
+ ssize_t size = write(mq->intr_handle.fd, &a, sizeof(a));
+ if (unlikely(size < 0)) {
+ MIF_LOG(WARNING,
+ "Failed to send interrupt. %s", strerror(errno));
+ }
+ }
+
+ /* increment queue counters */
+ mq->n_pkts += n_tx_pkts;
+
+ return n_tx_pkts;
+}
+
void
-memif_free_regions(struct pmd_process_private *proc_private)
+memif_free_regions(struct rte_eth_dev *dev)
{
+ struct pmd_process_private *proc_private = dev->process_private;
+ struct pmd_internals *pmd = dev->data->dev_private;
int i;
struct memif_region *r;
- MIF_LOG(DEBUG, "Free memory regions");
/* regions are allocated contiguously, so it's
* enough to loop until 'proc_private->regions_num'
*/
for (i = 0; i < proc_private->regions_num; i++) {
r = proc_private->regions[i];
if (r != NULL) {
+ /* This is memzone */
+ if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) {
+ r->addr = NULL;
+ if (r->fd > 0)
+ close(r->fd);
+ }
if (r->addr != NULL) {
munmap(r->addr, r->region_size);
if (r->fd > 0) {
@@ -525,6 +828,45 @@ memif_free_regions(struct pmd_process_private *proc_private)
proc_private->regions_num = 0;
}
+static int
+memif_region_init_zc(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+ void *arg)
+{
+ struct pmd_process_private *proc_private = (struct pmd_process_private *)arg;
+ struct memif_region *r;
+
+ if (proc_private->regions_num < 1) {
+ MIF_LOG(ERR, "Missing descriptor region");
+ return -1;
+ }
+
+ r = proc_private->regions[proc_private->regions_num - 1];
+
+ if (r->addr != msl->base_va)
+ r = proc_private->regions[++proc_private->regions_num - 1];
+
+ if (r == NULL) {
+ r = rte_zmalloc("region", sizeof(struct memif_region), 0);
+ if (r == NULL) {
+ MIF_LOG(ERR, "Failed to alloc memif region.");
+ return -ENOMEM;
+ }
+
+ r->addr = msl->base_va;
+ r->region_size = ms->len;
+ r->fd = rte_memseg_get_fd(ms);
+ if (r->fd < 0)
+ return -1;
+ r->pkt_buffer_offset = 0;
+
+ proc_private->regions[proc_private->regions_num - 1] = r;
+ } else {
+ r->region_size += ms->len;
+ }
+
+ return 0;
+}
+
static int
memif_region_init_shm(struct rte_eth_dev *dev, uint8_t has_buffers)
{
@@ -605,12 +947,29 @@ memif_region_init_shm(struct rte_eth_dev *dev, uint8_t has_buffers)
static int
memif_regions_init(struct rte_eth_dev *dev)
{
+ struct pmd_internals *pmd = dev->data->dev_private;
int ret;
- /* create one buffer region */
- ret = memif_region_init_shm(dev, /* has buffer */ 1);
- if (ret < 0)
- return ret;
+ /*
+ * Zero-copy exposes dpdk memory.
+ * Each memseg list will be represented by memif region.
+ * Zero-copy regions indexing: memseg list idx + 1,
+ * as we already have region 0 reserved for descriptors.
+ */
+ if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
+ /* create region idx 0 containing descriptors */
+ ret = memif_region_init_shm(dev, 0);
+ if (ret < 0)
+ return ret;
+ ret = rte_memseg_walk(memif_region_init_zc, (void *)dev->process_private);
+ if (ret < 0)
+ return ret;
+ } else {
+ /* create one memory region contaning rings and buffers */
+ ret = memif_region_init_shm(dev, /* has buffers */ 1);
+ if (ret < 0)
+ return ret;
+ }
return 0;
}
@@ -630,6 +989,10 @@ memif_init_rings(struct rte_eth_dev *dev)
__atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
ring->cookie = MEMIF_COOKIE;
ring->flags = 0;
+
+ if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)
+ continue;
+
for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
slot = i * (1 << pmd->run.log2_ring_size) + j;
ring->desc[j].region = 0;
@@ -646,6 +1009,10 @@ memif_init_rings(struct rte_eth_dev *dev)
__atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
ring->cookie = MEMIF_COOKIE;
ring->flags = 0;
+
+ if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)
+ continue;
+
for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
slot = (i + pmd->run.num_s2m_rings) *
(1 << pmd->run.log2_ring_size) + j;
@@ -659,7 +1026,7 @@ memif_init_rings(struct rte_eth_dev *dev)
}
/* called only by slave */
-static void
+static int
memif_init_queues(struct rte_eth_dev *dev)
{
struct pmd_internals *pmd = dev->data->dev_private;
@@ -680,6 +1047,13 @@ memif_init_queues(struct rte_eth_dev *dev)
"Failed to create eventfd for tx queue %d: %s.", i,
strerror(errno));
}
+ mq->buffers = NULL;
+ if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
+ mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) *
+ (1 << mq->log2_ring_size), 0);
+ if (mq->buffers == NULL)
+ return -ENOMEM;
+ }
}
for (i = 0; i < pmd->run.num_m2s_rings; i++) {
@@ -696,7 +1070,15 @@ memif_init_queues(struct rte_eth_dev *dev)
"Failed to create eventfd for rx queue %d: %s.", i,
strerror(errno));
}
+ mq->buffers = NULL;
+ if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
+ mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) *
+ (1 << mq->log2_ring_size), 0);
+ if (mq->buffers == NULL)
+ return -ENOMEM;
+ }
}
+ return 0;
}
int
@@ -710,7 +1092,9 @@ memif_init_regions_and_queues(struct rte_eth_dev *dev)
memif_init_rings(dev);
- memif_init_queues(dev);
+ ret = memif_init_queues(dev);
+ if (ret < 0)
+ return ret;
return 0;
}
@@ -734,8 +1118,16 @@ memif_connect(struct rte_eth_dev *dev)
mr->addr = mmap(NULL, mr->region_size,
PROT_READ | PROT_WRITE,
MAP_SHARED, mr->fd, 0);
- if (mr->addr == NULL)
+ if (mr->addr == MAP_FAILED) {
+ MIF_LOG(ERR, "mmap failed: %s\n",
+ strerror(errno));
return -1;
+ }
+ }
+ if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) {
+ /* close memseg file */
+ close(mr->fd);
+ mr->fd = -1;
}
}
}
@@ -796,8 +1188,7 @@ memif_dev_start(struct rte_eth_dev *dev)
ret = memif_connect_master(dev);
break;
default:
- MIF_LOG(ERR, "%s: Unknown role: %d.",
- rte_vdev_device_name(pmd->vdev), pmd->role);
+ MIF_LOG(ERR, "Unknown role: %d.", pmd->role);
ret = -1;
break;
}
@@ -862,8 +1253,7 @@ memif_tx_queue_setup(struct rte_eth_dev *dev,
mq = rte_zmalloc("tx-queue", sizeof(struct memif_queue), 0);
if (mq == NULL) {
- MIF_LOG(ERR, "%s: Failed to allocate tx queue id: %u",
- rte_vdev_device_name(pmd->vdev), qid);
+ MIF_LOG(ERR, "Failed to allocate tx queue id: %u", qid);
return -ENOMEM;
}
@@ -892,8 +1282,7 @@ memif_rx_queue_setup(struct rte_eth_dev *dev,
mq = rte_zmalloc("rx-queue", sizeof(struct memif_queue), 0);
if (mq == NULL) {
- MIF_LOG(ERR, "%s: Failed to allocate rx queue id: %u",
- rte_vdev_device_name(pmd->vdev), qid);
+ MIF_LOG(ERR, "Failed to allocate rx queue id: %u", qid);
return -ENOMEM;
}
@@ -933,7 +1322,7 @@ memif_link_update(struct rte_eth_dev *dev,
memif_mp_request_regions(dev);
} else if (dev->data->dev_link.link_status == ETH_LINK_DOWN &&
proc_private->regions_num > 0) {
- memif_free_regions(proc_private);
+ memif_free_regions(dev);
}
}
return 0;
@@ -1054,11 +1443,6 @@ memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
const unsigned int numa_node = vdev->device.numa_node;
const char *name = rte_vdev_device_name(vdev);
- if (flags & ETH_MEMIF_FLAG_ZERO_COPY) {
- MIF_LOG(ERR, "Zero-copy slave not supported.");
- return -1;
- }
-
eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
if (eth_dev == NULL) {
MIF_LOG(ERR, "%s: Unable to allocate device struct.", name);
@@ -1082,6 +1466,9 @@ memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
pmd->flags = flags;
pmd->flags |= ETH_MEMIF_FLAG_DISABLED;
pmd->role = role;
+ /* Zero-copy flag irelevant to master. */
+ if (pmd->role == MEMIF_ROLE_MASTER)
+ pmd->flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
ret = memif_socket_init(eth_dev, socket_filename);
if (ret < 0)
@@ -1105,8 +1492,14 @@ memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
eth_dev->dev_ops = &ops;
eth_dev->device = &vdev->device;
- eth_dev->rx_pkt_burst = eth_memif_rx;
- eth_dev->tx_pkt_burst = eth_memif_tx;
+ if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
+ eth_dev->rx_pkt_burst = eth_memif_rx_zc;
+ eth_dev->tx_pkt_burst = eth_memif_tx_zc;
+ } else {
+ eth_dev->rx_pkt_burst = eth_memif_rx;
+ eth_dev->tx_pkt_burst = eth_memif_tx;
+ }
+
eth_dev->data->dev_flags &= RTE_ETH_DEV_CLOSE_REMOVE;
@@ -1138,6 +1531,10 @@ memif_set_zc(const char *key __rte_unused, const char *value, void *extra_args)
uint32_t *flags = (uint32_t *)extra_args;
if (strstr(value, "yes") != NULL) {
+ if (!rte_mcfg_get_single_file_segments()) {
+ MIF_LOG(ERR, "Zero-copy doesn't support multi-file segments.");
+ return -ENOTSUP;
+ }
*flags |= ETH_MEMIF_FLAG_ZERO_COPY;
} else if (strstr(value, "no") != NULL) {
*flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
@@ -63,12 +63,15 @@ struct memif_queue {
uint16_t last_head; /**< last ring head */
uint16_t last_tail; /**< last ring tail */
+ struct rte_mbuf **buffers;
+ /**< Stored mbufs. Used in zero-copy tx. Slave stores transmitted
+ * mbufs to free them once master has received them.
+ */
+
/* rx/tx info */
uint64_t n_pkts; /**< number of rx/tx packets */
uint64_t n_bytes; /**< number of rx/tx bytes */
- memif_ring_t *ring; /**< pointer to ring */
-
struct rte_intr_handle intr_handle; /**< interrupt handle */
memif_log2_ring_size_t log2_ring_size; /**< log2 of ring size */
@@ -115,8 +118,6 @@ struct pmd_internals {
/**< local disconnect reason */
char remote_disc_string[ETH_MEMIF_DISC_STRING_SIZE];
/**< remote disconnect reason */
-
- struct rte_vdev_device *vdev; /**< vdev handle */
};
struct pmd_process_private {
@@ -131,7 +132,7 @@ struct pmd_process_private {
* @param proc_private
* device process private data
*/
-void memif_free_regions(struct pmd_process_private *proc_private);
+void memif_free_regions(struct rte_eth_dev *dev);
/**
* Finalize connection establishment process. Map shared memory file
@@ -162,3 +162,10 @@ rte_mcfg_timer_unlock(void)
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
rte_spinlock_unlock(&mcfg->tlock);
}
+
+bool
+rte_mcfg_get_single_file_segments(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ return (bool)mcfg->single_file_segments;
+}
@@ -5,6 +5,8 @@
#ifndef _RTE_EAL_MEMCONFIG_H_
#define _RTE_EAL_MEMCONFIG_H_
+#include <stdbool.h>
+
#include <rte_compat.h>
/**
@@ -109,6 +111,17 @@ __rte_experimental
void
rte_mcfg_timer_unlock(void);
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * If true, pages are put in single files (per memseg list),
+ * as opposed to creating a file per page.
+ */
+__rte_experimental
+bool
+rte_mcfg_get_single_file_segments(void);
+
#ifdef __cplusplus
}
#endif
@@ -429,4 +429,5 @@ EXPERIMENTAL {
# added in 19.11
rte_log_get_stream;
+ rte_mcfg_get_single_file_segments;
};