Message ID | 20190905090118.17406-1-ciara.loftus@intel.com (mailing list archive) |
---|---|
State | Superseded, archived |
Delegated to: | Ferruh Yigit |
Headers | show |
Series | [v2] net/af_xdp: enable support for unaligned umem chunks | expand |
Context | Check | Description |
---|---|---|
ci/checkpatch | success | coding style OK |
ci/Intel-compilation | success | Compilation OK |
ci/iol-dpdk_compile_ovs | success | Compile Testing PASS |
ci/iol-dpdk_compile | success | Compile Testing PASS |
ci/iol-dpdk_compile_spdk | success | Compile Testing PASS |
ci/intel-Performance | success | Performance Testing PASS |
ci/mellanox-Performance | success | Performance Testing PASS |
Hi, Ciara On 09/05, Ciara Loftus wrote: >This patch enables the unaligned chunks feature for AF_XDP which allows >chunks to be placed at arbitrary places in the umem, as opposed to them >being required to be aligned to 2k. This allows for DPDK application >mempools to be mapped directly into the umem and in turn enable zero copy >transfer between umem and the PMD. > >This patch replaces the zero copy via external mbuf mechanism introduced >in commit e9ff8bb71943 ("net/af_xdp: enable zero copy by external mbuf"). >The pmd_zero copy vdev argument is also removed as now the PMD will >auto-detect presence of the unaligned chunks feature and enable it if so >and otherwise fall back to copy mode if not detected. > >When enabled, this feature significantly improves single-core performance >of the PMD. > >Signed-off-by: Ciara Loftus <ciara.loftus@intel.com> >Signed-off-by: Kevin Laatz <kevin.laatz@intel.com> >--- >v2: >* Updated af_xdp.rst with kernel dependency info >* Fixed case on rx where mbuf alloc fails > > doc/guides/nics/af_xdp.rst | 2 +- > doc/guides/rel_notes/release_19_11.rst | 9 + > drivers/net/af_xdp/rte_eth_af_xdp.c | 306 ++++++++++++++++++------- > 3 files changed, 234 insertions(+), 83 deletions(-) > >diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst >index ec46f08f0..b434b25df 100644 >--- a/doc/guides/nics/af_xdp.rst >+++ b/doc/guides/nics/af_xdp.rst >@@ -35,7 +35,6 @@ The following options can be provided to set up an af_xdp port in DPDK. > * ``iface`` - name of the Kernel interface to attach to (required); > * ``start_queue`` - starting netdev queue id (optional, default 0); > * ``queue_count`` - total netdev queue number (optional, default 1); >-* ``pmd_zero_copy`` - enable zero copy or not (optional, default 0); > > Prerequisites > ------------- >@@ -48,6 +47,7 @@ This is a Linux-specific PMD, thus the following prerequisites apply: > <kernel src tree>/tools/lib/bpf; > * A Kernel bound interface to attach to; > * For need_wakeup feature, it requires kernel version later than v5.3-rc1; >+* For PMD zero copy, it requires kernel version later than v5.4-rc1; > > Set up an af_xdp interface > ----------------------------- >diff --git a/doc/guides/rel_notes/release_19_11.rst b/doc/guides/rel_notes/release_19_11.rst >index 8490d897c..28a8e5372 100644 >--- a/doc/guides/rel_notes/release_19_11.rst >+++ b/doc/guides/rel_notes/release_19_11.rst >@@ -56,6 +56,13 @@ New Features > Also, make sure to start the actual text at the margin. > ========================================================= > >+* **Updated the AF_XDP PMD.** >+ >+ Updated the AF_XDP PMD. The new features include: >+ >+ * Enabled zero copy between application mempools and UMEM by enabling the >+ XDP_UMEM_UNALIGNED_CHUNKS UMEM flag. >+ > > Removed Items > ------------- >@@ -69,6 +76,8 @@ Removed Items > Also, make sure to start the actual text at the margin. > ========================================================= > >+* Removed AF_XDP pmd_zero copy vdev argument. Support is now auto-detected. >+ > > API Changes > ----------- >diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c >index 41ed5b2af..e1089e40c 100644 >--- a/drivers/net/af_xdp/rte_eth_af_xdp.c >+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c >@@ -58,7 +58,13 @@ static int af_xdp_logtype; > > #define ETH_AF_XDP_FRAME_SIZE 2048 > #define ETH_AF_XDP_NUM_BUFFERS 4096 >+#ifdef XDP_UMEM_UNALIGNED_CHUNK_FLAG >+#define ETH_AF_XDP_MBUF_OVERHEAD 128 /* sizeof(struct rte_mbuf) */ >+#define ETH_AF_XDP_DATA_HEADROOM \ >+ (ETH_AF_XDP_MBUF_OVERHEAD + RTE_PKTMBUF_HEADROOM) >+#else > #define ETH_AF_XDP_DATA_HEADROOM 0 >+#endif > #define ETH_AF_XDP_DFLT_NUM_DESCS XSK_RING_CONS__DEFAULT_NUM_DESCS > #define ETH_AF_XDP_DFLT_START_QUEUE_IDX 0 > #define ETH_AF_XDP_DFLT_QUEUE_COUNT 1 >@@ -73,7 +79,8 @@ struct xsk_umem_info { > struct xsk_umem *umem; > struct rte_ring *buf_ring; > const struct rte_memzone *mz; >- int pmd_zc; >+ struct rte_mempool *mb_pool; >+ void *buffer; > }; > > struct rx_stats { >@@ -98,10 +105,12 @@ struct pkt_rx_queue { > struct tx_stats { > uint64_t tx_pkts; > uint64_t tx_bytes; >+ uint64_t tx_dropped; > }; > > struct pkt_tx_queue { > struct xsk_ring_prod tx; >+ struct xsk_umem_info *umem; > > struct tx_stats stats; > >@@ -117,7 +126,6 @@ struct pmd_internals { > int max_queue_cnt; > int combined_queue_cnt; > >- int pmd_zc; > struct rte_ether_addr eth_addr; > > struct pkt_rx_queue *rx_queues; >@@ -127,13 +135,11 @@ struct pmd_internals { > #define ETH_AF_XDP_IFACE_ARG "iface" > #define ETH_AF_XDP_START_QUEUE_ARG "start_queue" > #define ETH_AF_XDP_QUEUE_COUNT_ARG "queue_count" >-#define ETH_AF_XDP_PMD_ZC_ARG "pmd_zero_copy" > > static const char * const valid_arguments[] = { > ETH_AF_XDP_IFACE_ARG, > ETH_AF_XDP_START_QUEUE_ARG, > ETH_AF_XDP_QUEUE_COUNT_ARG, >- ETH_AF_XDP_PMD_ZC_ARG, > NULL > }; > >@@ -148,9 +154,32 @@ static inline int > reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size) > { > struct xsk_ring_prod *fq = &umem->fq; >- void *addrs[reserve_size]; > uint32_t idx; > uint16_t i; >+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) >+ >+ if (unlikely(!xsk_ring_prod__reserve(fq, reserve_size, &idx))) { >+ AF_XDP_LOG(DEBUG, "Failed to reserve enough fq descs.\n"); >+ return -1; >+ } >+ >+ for (i = 0; i < reserve_size; i++) { >+ struct rte_mbuf *mbuf; >+ __u64 *fq_addr; >+ uint64_t addr; >+ >+ mbuf = rte_pktmbuf_alloc(umem->mb_pool); >+ if (unlikely(mbuf == NULL)) { >+ reserve_size = i; If this case happens, the new reserve_sizei (n) will be less than its original value (N), so we would end up with reserve N slots, but only submit n slots later, then we would get an wrong number of available slots later. My previous way of solving this inconsistent issue is to alloc a bulk of mbuf at first, if alloc fails, directly return before reserving any slots, in this way we can make sure the reserve/submit consistent, there should also be some performance benefit from the bulk allocation. >+ break; >+ } >+ >+ fq_addr = xsk_ring_prod__fill_addr(fq, idx++); >+ addr = (uint64_t)mbuf - (uint64_t)umem->buffer; >+ *fq_addr = addr; >+ } >+#else >+ void *addrs[reserve_size]; > > if (rte_ring_dequeue_bulk(umem->buf_ring, addrs, reserve_size, NULL) > != reserve_size) { >@@ -171,21 +200,13 @@ reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size) > fq_addr = xsk_ring_prod__fill_addr(fq, idx++); > *fq_addr = (uint64_t)addrs[i]; > } >+#endif > > xsk_ring_prod__submit(fq, reserve_size); > > return 0; > } > >-static void >-umem_buf_release_to_fq(void *addr, void *opaque) >-{ >- struct xsk_umem_info *umem = (struct xsk_umem_info *)opaque; >- uint64_t umem_addr = (uint64_t)addr - umem->mz->addr_64; >- >- rte_ring_enqueue(umem->buf_ring, (void *)umem_addr); >-} >- > static uint16_t > eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) > { >@@ -194,12 +215,53 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) > struct xsk_umem_info *umem = rxq->umem; > struct xsk_ring_prod *fq = &umem->fq; > uint32_t idx_rx = 0; >- uint32_t free_thresh = fq->size >> 1; >- int pmd_zc = umem->pmd_zc; >- struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE]; > unsigned long dropped = 0; > unsigned long rx_bytes = 0; > int rcvd, i; >+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) What about we extract out some functions to make rx more readable? Some thing like: #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) af_xdp_rx_zc() #else af_xdp_rx_cp() #endif I think this also applies for tx. Otherwise, people will easily get lost in multiple #if/#else/#endif, since we have have XDP_USE_NEED_WAKEUP here. >+ >+ rcvd = xsk_ring_cons__peek(rx, nb_pkts, &idx_rx); >+ if (rcvd == 0) { >+#if defined(XDP_USE_NEED_WAKEUP) >+ if (xsk_ring_prod__needs_wakeup(fq)) >+ (void)poll(rxq->fds, 1, 1000); >+#endif >+ >+ return rcvd; >+ } >+ >+ for (i = 0; i < rcvd; i++) { >+ const struct xdp_desc *desc; >+ uint64_t addr; >+ uint32_t len; >+ uint64_t offset; >+ >+ desc = xsk_ring_cons__rx_desc(rx, idx_rx++); >+ addr = desc->addr; >+ len = desc->len; >+ >+ offset = xsk_umem__extract_offset(addr); >+ addr = xsk_umem__extract_addr(addr); >+ >+ bufs[i] = (struct rte_mbuf *) >+ xsk_umem__get_data(umem->buffer, addr); >+ bufs[i]->data_off = offset - sizeof(struct rte_mbuf); >+ >+ rte_pktmbuf_pkt_len(bufs[i]) = len; >+ rte_pktmbuf_data_len(bufs[i]) = len; >+ rx_bytes += len; >+ } >+ >+ xsk_ring_cons__release(rx, rcvd); >+ >+ /* statistics */ >+ rxq->stats.rx_pkts += (rcvd - dropped); >+ rxq->stats.rx_bytes += rx_bytes; >+ >+ (void)reserve_fill_queue(umem, rcvd); Since both zc and cp mode would call reserve_fill_queue, how about just move it to the common part of rx to avoid duplication? >+#else >+ uint32_t free_thresh = fq->size >> 1; >+ struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE]; > > nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_RX_BATCH_SIZE); > >@@ -224,25 +286,14 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) > uint64_t addr; > uint32_t len; > void *pkt; >- uint16_t buf_len = ETH_AF_XDP_FRAME_SIZE; >- struct rte_mbuf_ext_shared_info *shinfo; > > desc = xsk_ring_cons__rx_desc(rx, idx_rx++); > addr = desc->addr; > len = desc->len; > pkt = xsk_umem__get_data(rxq->umem->mz->addr, addr); > >- if (pmd_zc) { >- shinfo = rte_pktmbuf_ext_shinfo_init_helper(pkt, >- &buf_len, umem_buf_release_to_fq, umem); >- >- rte_pktmbuf_attach_extbuf(mbufs[i], pkt, 0, buf_len, >- shinfo); >- } else { >- rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), >- pkt, len); >- rte_ring_enqueue(umem->buf_ring, (void *)addr); >- } >+ rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), pkt, len); >+ rte_ring_enqueue(umem->buf_ring, (void *)addr); > rte_pktmbuf_pkt_len(mbufs[i]) = len; > rte_pktmbuf_data_len(mbufs[i]) = len; > rx_bytes += len; >@@ -259,6 +310,7 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) > if (rcvd != nb_pkts) > rte_mempool_put_bulk(rxq->mb_pool, (void **)&mbufs[rcvd], > nb_pkts - rcvd); >+#endif > > return rcvd; > } >@@ -275,7 +327,14 @@ pull_umem_cq(struct xsk_umem_info *umem, int size) > for (i = 0; i < n; i++) { > uint64_t addr; > addr = *xsk_ring_cons__comp_addr(cq, idx_cq++); >+ >+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) >+ addr = xsk_umem__extract_addr(addr); >+ rte_pktmbuf_free((struct rte_mbuf *) >+ xsk_umem__get_data(umem->buffer, addr)); >+#else > rte_ring_enqueue(umem->buf_ring, (void *)addr); >+#endif > } > > xsk_ring_cons__release(cq, n); >@@ -284,7 +343,7 @@ pull_umem_cq(struct xsk_umem_info *umem, int size) > static void > kick_tx(struct pkt_tx_queue *txq) > { >- struct xsk_umem_info *umem = txq->pair->umem; >+ struct xsk_umem_info *umem = txq->umem; > > #if defined(XDP_USE_NEED_WAKEUP) > if (xsk_ring_prod__needs_wakeup(&txq->tx)) >@@ -299,28 +358,70 @@ kick_tx(struct pkt_tx_queue *txq) > if (errno == EAGAIN) > pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE); > } >- pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE); >-} >- >-static inline bool >-in_umem_range(struct xsk_umem_info *umem, uint64_t addr) >-{ >- uint64_t mz_base_addr = umem->mz->addr_64; >- >- return addr >= mz_base_addr && addr < mz_base_addr + umem->mz->len; > } > > static uint16_t > eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) > { > struct pkt_tx_queue *txq = queue; >- struct xsk_umem_info *umem = txq->pair->umem; >+ struct xsk_umem_info *umem = txq->umem; > struct rte_mbuf *mbuf; >- int pmd_zc = umem->pmd_zc; >- void *addrs[ETH_AF_XDP_TX_BATCH_SIZE]; > unsigned long tx_bytes = 0; > int i; > uint32_t idx_tx; >+ uint16_t dropped = 0; >+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) >+ >+ pull_umem_cq(umem, nb_pkts); >+ >+ nb_pkts = xsk_ring_prod__reserve(&txq->tx, nb_pkts, &idx_tx); >+ >+ for (i = 0; i < nb_pkts; i++) { >+ struct xdp_desc *desc; >+ uint64_t addr, offset; >+ >+ desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx + i); >+ mbuf = bufs[i]; >+ desc->len = mbuf->pkt_len; >+ >+ if (mbuf->pool == umem->mb_pool) { >+ addr = (uint64_t)mbuf - (uint64_t)umem->buffer; >+ offset = rte_pktmbuf_mtod(mbuf, uint64_t) - >+ (uint64_t)mbuf; >+ offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT; >+ desc->addr = addr; >+ desc->addr |= offset; What about desc->addr = addr | offset; >+ } else { >+ struct rte_mbuf *local_mbuf = >+ rte_pktmbuf_alloc(umem->mb_pool); >+ void *pkt; >+ >+ if (!local_mbuf) { Better to user style like 'local_mbuf == NULL' to follow the convention of this file, and this is also preferred according to dpdk's coding style. >+ rte_pktmbuf_free(mbuf); >+ dropped++; >+ continue; >+ } >+ >+ addr = (uint64_t)local_mbuf - (uint64_t)umem->buffer; >+ offset = rte_pktmbuf_mtod(local_mbuf, uint64_t) - >+ (uint64_t)local_mbuf; >+ pkt = xsk_umem__get_data(umem->buffer, addr + offset); >+ offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT; >+ desc->addr = addr; >+ desc->addr |= offset; desc->addr = addr | offset; >+ rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), >+ desc->len); >+ rte_pktmbuf_free(mbuf); >+ } >+ >+ tx_bytes += mbuf->pkt_len; >+ } >+ >+ xsk_ring_prod__submit(&txq->tx, nb_pkts - dropped); Similar inconsistent reserve/submit may happen in tx. Thanks, Xiaolong >+ >+ kick_tx(txq); >+#else >+ void *addrs[ETH_AF_XDP_TX_BATCH_SIZE]; > > nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_TX_BATCH_SIZE); > >@@ -333,6 +434,7 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) > > if (xsk_ring_prod__reserve(&txq->tx, nb_pkts, &idx_tx) != nb_pkts) { > kick_tx(txq); >+ pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE); > rte_ring_enqueue_bulk(umem->buf_ring, addrs, nb_pkts, NULL); > return 0; > } >@@ -345,35 +447,23 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) > mbuf = bufs[i]; > desc->len = mbuf->pkt_len; > >- /* >- * We need to make sure the external mbuf address is within >- * current port's umem memzone range >- */ >- if (pmd_zc && RTE_MBUF_HAS_EXTBUF(mbuf) && >- in_umem_range(umem, (uint64_t)mbuf->buf_addr)) { >- desc->addr = (uint64_t)mbuf->buf_addr - >- umem->mz->addr_64; >- mbuf->buf_addr = xsk_umem__get_data(umem->mz->addr, >- (uint64_t)addrs[i]); >- } else { >- desc->addr = (uint64_t)addrs[i]; >- pkt = xsk_umem__get_data(umem->mz->addr, >- desc->addr); >- rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), >- desc->len); >- } >+ desc->addr = (uint64_t)addrs[i]; >+ pkt = xsk_umem__get_data(umem->mz->addr, >+ desc->addr); >+ rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), desc->len); > tx_bytes += mbuf->pkt_len; >+ rte_pktmbuf_free(mbuf); > } > > xsk_ring_prod__submit(&txq->tx, nb_pkts); > > kick_tx(txq); >+ pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE); >+#endif > >- txq->stats.tx_pkts += nb_pkts; >+ txq->stats.tx_pkts += nb_pkts - dropped; > txq->stats.tx_bytes += tx_bytes; >- >- for (i = 0; i < nb_pkts; i++) >- rte_pktmbuf_free(bufs[i]); >+ txq->stats.tx_dropped += dropped; > > return nb_pkts; > } >@@ -446,6 +536,7 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) > stats->ipackets += stats->q_ipackets[i]; > stats->ibytes += stats->q_ibytes[i]; > stats->imissed += rxq->stats.rx_dropped; >+ stats->oerrors += txq->stats.tx_dropped; > ret = getsockopt(xsk_socket__fd(rxq->xsk), SOL_XDP, > XDP_STATISTICS, &xdp_stats, &optlen); > if (ret != 0) { >@@ -492,11 +583,16 @@ remove_xdp_program(struct pmd_internals *internals) > static void > xdp_umem_destroy(struct xsk_umem_info *umem) > { >+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) >+ rte_mempool_free(umem->mb_pool); >+ umem->mb_pool = NULL; >+#else > rte_memzone_free(umem->mz); > umem->mz = NULL; > > rte_ring_free(umem->buf_ring); > umem->buf_ring = NULL; >+#endif > > rte_free(umem); > umem = NULL; >@@ -546,6 +642,55 @@ eth_link_update(struct rte_eth_dev *dev __rte_unused, > return 0; > } > >+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) >+static inline uint64_t get_base_addr(struct rte_mempool *mp) >+{ >+ struct rte_mempool_memhdr *memhdr; >+ >+ memhdr = STAILQ_FIRST(&mp->mem_list); >+ return (uint64_t)memhdr->addr & ~(getpagesize() - 1); >+} >+ >+static struct >+xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals __rte_unused, >+ struct pkt_rx_queue *rxq) >+{ >+ struct xsk_umem_info *umem; >+ int ret; >+ struct xsk_umem_config usr_config = { >+ .fill_size = ETH_AF_XDP_DFLT_NUM_DESCS, >+ .comp_size = ETH_AF_XDP_DFLT_NUM_DESCS, >+ .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG}; >+ void *base_addr = NULL; >+ struct rte_mempool *mb_pool = rxq->mb_pool; >+ >+ usr_config.frame_size = rte_pktmbuf_data_room_size(mb_pool) + >+ ETH_AF_XDP_MBUF_OVERHEAD + >+ mb_pool->private_data_size; >+ usr_config.frame_headroom = ETH_AF_XDP_DATA_HEADROOM + >+ mb_pool->private_data_size; >+ >+ umem = rte_zmalloc_socket("umem", sizeof(*umem), 0, rte_socket_id()); >+ if (umem == NULL) { >+ AF_XDP_LOG(ERR, "Failed to allocate umem info"); >+ return NULL; >+ } >+ >+ umem->mb_pool = mb_pool; >+ base_addr = (void *)get_base_addr(mb_pool); >+ >+ ret = xsk_umem__create(&umem->umem, base_addr, >+ mb_pool->populated_size * usr_config.frame_size, >+ &umem->fq, &umem->cq, >+ &usr_config); >+ >+ if (ret) { >+ AF_XDP_LOG(ERR, "Failed to create umem"); >+ goto err; >+ } >+ umem->buffer = base_addr; >+ >+#else > static struct > xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, > struct pkt_rx_queue *rxq) >@@ -606,6 +751,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, > } > umem->mz = mz; > >+#endif > return umem; > > err: >@@ -625,6 +771,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, > rxq->umem = xdp_umem_configure(internals, rxq); > if (rxq->umem == NULL) > return -ENOMEM; >+ txq->umem = rxq->umem; > > cfg.rx_size = ring_size; > cfg.tx_size = ring_size; >@@ -669,7 +816,6 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, > struct rte_mempool *mb_pool) > { > struct pmd_internals *internals = dev->data->dev_private; >- uint32_t buf_size, data_size; > struct pkt_rx_queue *rxq; > int ret; > >@@ -677,6 +823,10 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, > > AF_XDP_LOG(INFO, "Set up rx queue, rx queue id: %d, xsk queue id: %d\n", > rx_queue_id, rxq->xsk_queue_idx); >+ >+#ifndef XDP_UMEM_UNALIGNED_CHUNK_FLAG >+ uint32_t buf_size, data_size; >+ > /* Now get the space available for data in the mbuf */ > buf_size = rte_pktmbuf_data_room_size(mb_pool) - > RTE_PKTMBUF_HEADROOM; >@@ -688,6 +838,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, > ret = -ENOMEM; > goto err; > } >+#endif > > rxq->mb_pool = mb_pool; > >@@ -700,8 +851,6 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, > rxq->fds[0].fd = xsk_socket__fd(rxq->xsk); > rxq->fds[0].events = POLLIN; > >- rxq->umem->pmd_zc = internals->pmd_zc; >- > dev->data->rx_queues[rx_queue_id] = rxq; > return 0; > >@@ -877,7 +1026,7 @@ xdp_get_channels_info(const char *if_name, int *max_queues, > > static int > parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, >- int *queue_cnt, int *pmd_zc) >+ int *queue_cnt) > { > int ret; > >@@ -898,11 +1047,6 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, > goto free_kvlist; > } > >- ret = rte_kvargs_process(kvlist, ETH_AF_XDP_PMD_ZC_ARG, >- &parse_integer_arg, pmd_zc); >- if (ret < 0) >- goto free_kvlist; >- > free_kvlist: > rte_kvargs_free(kvlist); > return ret; >@@ -940,7 +1084,7 @@ get_iface_info(const char *if_name, > > static struct rte_eth_dev * > init_internals(struct rte_vdev_device *dev, const char *if_name, >- int start_queue_idx, int queue_cnt, int pmd_zc) >+ int start_queue_idx, int queue_cnt) > { > const char *name = rte_vdev_device_name(dev); > const unsigned int numa_node = dev->device.numa_node; >@@ -955,7 +1099,6 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, > > internals->start_queue_idx = start_queue_idx; > internals->queue_cnt = queue_cnt; >- internals->pmd_zc = pmd_zc; > strlcpy(internals->if_name, if_name, IFNAMSIZ); > > if (xdp_get_channels_info(if_name, &internals->max_queue_cnt, >@@ -1011,8 +1154,9 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, > /* Let rte_eth_dev_close() release the port resources. */ > eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; > >- if (internals->pmd_zc) >- AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n"); >+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) >+ AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n"); >+#endif > > return eth_dev; > >@@ -1034,7 +1178,6 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) > int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT; > struct rte_eth_dev *eth_dev = NULL; > const char *name; >- int pmd_zc = 0; > > AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n", > rte_vdev_device_name(dev)); >@@ -1062,7 +1205,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) > dev->device.numa_node = rte_socket_id(); > > if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx, >- &xsk_queue_cnt, &pmd_zc) < 0) { >+ &xsk_queue_cnt) < 0) { > AF_XDP_LOG(ERR, "Invalid kvargs value\n"); > return -EINVAL; > } >@@ -1073,7 +1216,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) > } > > eth_dev = init_internals(dev, if_name, xsk_start_queue_idx, >- xsk_queue_cnt, pmd_zc); >+ xsk_queue_cnt); > if (eth_dev == NULL) { > AF_XDP_LOG(ERR, "Failed to init internals\n"); > return -1; >@@ -1116,8 +1259,7 @@ RTE_PMD_REGISTER_VDEV(net_af_xdp, pmd_af_xdp_drv); > RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp, > "iface=<string> " > "start_queue=<int> " >- "queue_count=<int> " >- "pmd_zero_copy=<0|1>"); >+ "queue_count=<int>"); > > RTE_INIT(af_xdp_init_log) > { >-- >2.17.1 >
diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst index ec46f08f0..b434b25df 100644 --- a/doc/guides/nics/af_xdp.rst +++ b/doc/guides/nics/af_xdp.rst @@ -35,7 +35,6 @@ The following options can be provided to set up an af_xdp port in DPDK. * ``iface`` - name of the Kernel interface to attach to (required); * ``start_queue`` - starting netdev queue id (optional, default 0); * ``queue_count`` - total netdev queue number (optional, default 1); -* ``pmd_zero_copy`` - enable zero copy or not (optional, default 0); Prerequisites ------------- @@ -48,6 +47,7 @@ This is a Linux-specific PMD, thus the following prerequisites apply: <kernel src tree>/tools/lib/bpf; * A Kernel bound interface to attach to; * For need_wakeup feature, it requires kernel version later than v5.3-rc1; +* For PMD zero copy, it requires kernel version later than v5.4-rc1; Set up an af_xdp interface ----------------------------- diff --git a/doc/guides/rel_notes/release_19_11.rst b/doc/guides/rel_notes/release_19_11.rst index 8490d897c..28a8e5372 100644 --- a/doc/guides/rel_notes/release_19_11.rst +++ b/doc/guides/rel_notes/release_19_11.rst @@ -56,6 +56,13 @@ New Features Also, make sure to start the actual text at the margin. ========================================================= +* **Updated the AF_XDP PMD.** + + Updated the AF_XDP PMD. The new features include: + + * Enabled zero copy between application mempools and UMEM by enabling the + XDP_UMEM_UNALIGNED_CHUNKS UMEM flag. + Removed Items ------------- @@ -69,6 +76,8 @@ Removed Items Also, make sure to start the actual text at the margin. ========================================================= +* Removed AF_XDP pmd_zero copy vdev argument. Support is now auto-detected. + API Changes ----------- diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c index 41ed5b2af..e1089e40c 100644 --- a/drivers/net/af_xdp/rte_eth_af_xdp.c +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c @@ -58,7 +58,13 @@ static int af_xdp_logtype; #define ETH_AF_XDP_FRAME_SIZE 2048 #define ETH_AF_XDP_NUM_BUFFERS 4096 +#ifdef XDP_UMEM_UNALIGNED_CHUNK_FLAG +#define ETH_AF_XDP_MBUF_OVERHEAD 128 /* sizeof(struct rte_mbuf) */ +#define ETH_AF_XDP_DATA_HEADROOM \ + (ETH_AF_XDP_MBUF_OVERHEAD + RTE_PKTMBUF_HEADROOM) +#else #define ETH_AF_XDP_DATA_HEADROOM 0 +#endif #define ETH_AF_XDP_DFLT_NUM_DESCS XSK_RING_CONS__DEFAULT_NUM_DESCS #define ETH_AF_XDP_DFLT_START_QUEUE_IDX 0 #define ETH_AF_XDP_DFLT_QUEUE_COUNT 1 @@ -73,7 +79,8 @@ struct xsk_umem_info { struct xsk_umem *umem; struct rte_ring *buf_ring; const struct rte_memzone *mz; - int pmd_zc; + struct rte_mempool *mb_pool; + void *buffer; }; struct rx_stats { @@ -98,10 +105,12 @@ struct pkt_rx_queue { struct tx_stats { uint64_t tx_pkts; uint64_t tx_bytes; + uint64_t tx_dropped; }; struct pkt_tx_queue { struct xsk_ring_prod tx; + struct xsk_umem_info *umem; struct tx_stats stats; @@ -117,7 +126,6 @@ struct pmd_internals { int max_queue_cnt; int combined_queue_cnt; - int pmd_zc; struct rte_ether_addr eth_addr; struct pkt_rx_queue *rx_queues; @@ -127,13 +135,11 @@ struct pmd_internals { #define ETH_AF_XDP_IFACE_ARG "iface" #define ETH_AF_XDP_START_QUEUE_ARG "start_queue" #define ETH_AF_XDP_QUEUE_COUNT_ARG "queue_count" -#define ETH_AF_XDP_PMD_ZC_ARG "pmd_zero_copy" static const char * const valid_arguments[] = { ETH_AF_XDP_IFACE_ARG, ETH_AF_XDP_START_QUEUE_ARG, ETH_AF_XDP_QUEUE_COUNT_ARG, - ETH_AF_XDP_PMD_ZC_ARG, NULL }; @@ -148,9 +154,32 @@ static inline int reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size) { struct xsk_ring_prod *fq = &umem->fq; - void *addrs[reserve_size]; uint32_t idx; uint16_t i; +#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) + + if (unlikely(!xsk_ring_prod__reserve(fq, reserve_size, &idx))) { + AF_XDP_LOG(DEBUG, "Failed to reserve enough fq descs.\n"); + return -1; + } + + for (i = 0; i < reserve_size; i++) { + struct rte_mbuf *mbuf; + __u64 *fq_addr; + uint64_t addr; + + mbuf = rte_pktmbuf_alloc(umem->mb_pool); + if (unlikely(mbuf == NULL)) { + reserve_size = i; + break; + } + + fq_addr = xsk_ring_prod__fill_addr(fq, idx++); + addr = (uint64_t)mbuf - (uint64_t)umem->buffer; + *fq_addr = addr; + } +#else + void *addrs[reserve_size]; if (rte_ring_dequeue_bulk(umem->buf_ring, addrs, reserve_size, NULL) != reserve_size) { @@ -171,21 +200,13 @@ reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size) fq_addr = xsk_ring_prod__fill_addr(fq, idx++); *fq_addr = (uint64_t)addrs[i]; } +#endif xsk_ring_prod__submit(fq, reserve_size); return 0; } -static void -umem_buf_release_to_fq(void *addr, void *opaque) -{ - struct xsk_umem_info *umem = (struct xsk_umem_info *)opaque; - uint64_t umem_addr = (uint64_t)addr - umem->mz->addr_64; - - rte_ring_enqueue(umem->buf_ring, (void *)umem_addr); -} - static uint16_t eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) { @@ -194,12 +215,53 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) struct xsk_umem_info *umem = rxq->umem; struct xsk_ring_prod *fq = &umem->fq; uint32_t idx_rx = 0; - uint32_t free_thresh = fq->size >> 1; - int pmd_zc = umem->pmd_zc; - struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE]; unsigned long dropped = 0; unsigned long rx_bytes = 0; int rcvd, i; +#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) + + rcvd = xsk_ring_cons__peek(rx, nb_pkts, &idx_rx); + if (rcvd == 0) { +#if defined(XDP_USE_NEED_WAKEUP) + if (xsk_ring_prod__needs_wakeup(fq)) + (void)poll(rxq->fds, 1, 1000); +#endif + + return rcvd; + } + + for (i = 0; i < rcvd; i++) { + const struct xdp_desc *desc; + uint64_t addr; + uint32_t len; + uint64_t offset; + + desc = xsk_ring_cons__rx_desc(rx, idx_rx++); + addr = desc->addr; + len = desc->len; + + offset = xsk_umem__extract_offset(addr); + addr = xsk_umem__extract_addr(addr); + + bufs[i] = (struct rte_mbuf *) + xsk_umem__get_data(umem->buffer, addr); + bufs[i]->data_off = offset - sizeof(struct rte_mbuf); + + rte_pktmbuf_pkt_len(bufs[i]) = len; + rte_pktmbuf_data_len(bufs[i]) = len; + rx_bytes += len; + } + + xsk_ring_cons__release(rx, rcvd); + + /* statistics */ + rxq->stats.rx_pkts += (rcvd - dropped); + rxq->stats.rx_bytes += rx_bytes; + + (void)reserve_fill_queue(umem, rcvd); +#else + uint32_t free_thresh = fq->size >> 1; + struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE]; nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_RX_BATCH_SIZE); @@ -224,25 +286,14 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) uint64_t addr; uint32_t len; void *pkt; - uint16_t buf_len = ETH_AF_XDP_FRAME_SIZE; - struct rte_mbuf_ext_shared_info *shinfo; desc = xsk_ring_cons__rx_desc(rx, idx_rx++); addr = desc->addr; len = desc->len; pkt = xsk_umem__get_data(rxq->umem->mz->addr, addr); - if (pmd_zc) { - shinfo = rte_pktmbuf_ext_shinfo_init_helper(pkt, - &buf_len, umem_buf_release_to_fq, umem); - - rte_pktmbuf_attach_extbuf(mbufs[i], pkt, 0, buf_len, - shinfo); - } else { - rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), - pkt, len); - rte_ring_enqueue(umem->buf_ring, (void *)addr); - } + rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), pkt, len); + rte_ring_enqueue(umem->buf_ring, (void *)addr); rte_pktmbuf_pkt_len(mbufs[i]) = len; rte_pktmbuf_data_len(mbufs[i]) = len; rx_bytes += len; @@ -259,6 +310,7 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) if (rcvd != nb_pkts) rte_mempool_put_bulk(rxq->mb_pool, (void **)&mbufs[rcvd], nb_pkts - rcvd); +#endif return rcvd; } @@ -275,7 +327,14 @@ pull_umem_cq(struct xsk_umem_info *umem, int size) for (i = 0; i < n; i++) { uint64_t addr; addr = *xsk_ring_cons__comp_addr(cq, idx_cq++); + +#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) + addr = xsk_umem__extract_addr(addr); + rte_pktmbuf_free((struct rte_mbuf *) + xsk_umem__get_data(umem->buffer, addr)); +#else rte_ring_enqueue(umem->buf_ring, (void *)addr); +#endif } xsk_ring_cons__release(cq, n); @@ -284,7 +343,7 @@ pull_umem_cq(struct xsk_umem_info *umem, int size) static void kick_tx(struct pkt_tx_queue *txq) { - struct xsk_umem_info *umem = txq->pair->umem; + struct xsk_umem_info *umem = txq->umem; #if defined(XDP_USE_NEED_WAKEUP) if (xsk_ring_prod__needs_wakeup(&txq->tx)) @@ -299,28 +358,70 @@ kick_tx(struct pkt_tx_queue *txq) if (errno == EAGAIN) pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE); } - pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE); -} - -static inline bool -in_umem_range(struct xsk_umem_info *umem, uint64_t addr) -{ - uint64_t mz_base_addr = umem->mz->addr_64; - - return addr >= mz_base_addr && addr < mz_base_addr + umem->mz->len; } static uint16_t eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) { struct pkt_tx_queue *txq = queue; - struct xsk_umem_info *umem = txq->pair->umem; + struct xsk_umem_info *umem = txq->umem; struct rte_mbuf *mbuf; - int pmd_zc = umem->pmd_zc; - void *addrs[ETH_AF_XDP_TX_BATCH_SIZE]; unsigned long tx_bytes = 0; int i; uint32_t idx_tx; + uint16_t dropped = 0; +#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) + + pull_umem_cq(umem, nb_pkts); + + nb_pkts = xsk_ring_prod__reserve(&txq->tx, nb_pkts, &idx_tx); + + for (i = 0; i < nb_pkts; i++) { + struct xdp_desc *desc; + uint64_t addr, offset; + + desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx + i); + mbuf = bufs[i]; + desc->len = mbuf->pkt_len; + + if (mbuf->pool == umem->mb_pool) { + addr = (uint64_t)mbuf - (uint64_t)umem->buffer; + offset = rte_pktmbuf_mtod(mbuf, uint64_t) - + (uint64_t)mbuf; + offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT; + desc->addr = addr; + desc->addr |= offset; + } else { + struct rte_mbuf *local_mbuf = + rte_pktmbuf_alloc(umem->mb_pool); + void *pkt; + + if (!local_mbuf) { + rte_pktmbuf_free(mbuf); + dropped++; + continue; + } + + addr = (uint64_t)local_mbuf - (uint64_t)umem->buffer; + offset = rte_pktmbuf_mtod(local_mbuf, uint64_t) - + (uint64_t)local_mbuf; + pkt = xsk_umem__get_data(umem->buffer, addr + offset); + offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT; + desc->addr = addr; + desc->addr |= offset; + rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), + desc->len); + rte_pktmbuf_free(mbuf); + } + + tx_bytes += mbuf->pkt_len; + } + + xsk_ring_prod__submit(&txq->tx, nb_pkts - dropped); + + kick_tx(txq); +#else + void *addrs[ETH_AF_XDP_TX_BATCH_SIZE]; nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_TX_BATCH_SIZE); @@ -333,6 +434,7 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) if (xsk_ring_prod__reserve(&txq->tx, nb_pkts, &idx_tx) != nb_pkts) { kick_tx(txq); + pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE); rte_ring_enqueue_bulk(umem->buf_ring, addrs, nb_pkts, NULL); return 0; } @@ -345,35 +447,23 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) mbuf = bufs[i]; desc->len = mbuf->pkt_len; - /* - * We need to make sure the external mbuf address is within - * current port's umem memzone range - */ - if (pmd_zc && RTE_MBUF_HAS_EXTBUF(mbuf) && - in_umem_range(umem, (uint64_t)mbuf->buf_addr)) { - desc->addr = (uint64_t)mbuf->buf_addr - - umem->mz->addr_64; - mbuf->buf_addr = xsk_umem__get_data(umem->mz->addr, - (uint64_t)addrs[i]); - } else { - desc->addr = (uint64_t)addrs[i]; - pkt = xsk_umem__get_data(umem->mz->addr, - desc->addr); - rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), - desc->len); - } + desc->addr = (uint64_t)addrs[i]; + pkt = xsk_umem__get_data(umem->mz->addr, + desc->addr); + rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), desc->len); tx_bytes += mbuf->pkt_len; + rte_pktmbuf_free(mbuf); } xsk_ring_prod__submit(&txq->tx, nb_pkts); kick_tx(txq); + pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE); +#endif - txq->stats.tx_pkts += nb_pkts; + txq->stats.tx_pkts += nb_pkts - dropped; txq->stats.tx_bytes += tx_bytes; - - for (i = 0; i < nb_pkts; i++) - rte_pktmbuf_free(bufs[i]); + txq->stats.tx_dropped += dropped; return nb_pkts; } @@ -446,6 +536,7 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) stats->ipackets += stats->q_ipackets[i]; stats->ibytes += stats->q_ibytes[i]; stats->imissed += rxq->stats.rx_dropped; + stats->oerrors += txq->stats.tx_dropped; ret = getsockopt(xsk_socket__fd(rxq->xsk), SOL_XDP, XDP_STATISTICS, &xdp_stats, &optlen); if (ret != 0) { @@ -492,11 +583,16 @@ remove_xdp_program(struct pmd_internals *internals) static void xdp_umem_destroy(struct xsk_umem_info *umem) { +#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) + rte_mempool_free(umem->mb_pool); + umem->mb_pool = NULL; +#else rte_memzone_free(umem->mz); umem->mz = NULL; rte_ring_free(umem->buf_ring); umem->buf_ring = NULL; +#endif rte_free(umem); umem = NULL; @@ -546,6 +642,55 @@ eth_link_update(struct rte_eth_dev *dev __rte_unused, return 0; } +#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) +static inline uint64_t get_base_addr(struct rte_mempool *mp) +{ + struct rte_mempool_memhdr *memhdr; + + memhdr = STAILQ_FIRST(&mp->mem_list); + return (uint64_t)memhdr->addr & ~(getpagesize() - 1); +} + +static struct +xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals __rte_unused, + struct pkt_rx_queue *rxq) +{ + struct xsk_umem_info *umem; + int ret; + struct xsk_umem_config usr_config = { + .fill_size = ETH_AF_XDP_DFLT_NUM_DESCS, + .comp_size = ETH_AF_XDP_DFLT_NUM_DESCS, + .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG}; + void *base_addr = NULL; + struct rte_mempool *mb_pool = rxq->mb_pool; + + usr_config.frame_size = rte_pktmbuf_data_room_size(mb_pool) + + ETH_AF_XDP_MBUF_OVERHEAD + + mb_pool->private_data_size; + usr_config.frame_headroom = ETH_AF_XDP_DATA_HEADROOM + + mb_pool->private_data_size; + + umem = rte_zmalloc_socket("umem", sizeof(*umem), 0, rte_socket_id()); + if (umem == NULL) { + AF_XDP_LOG(ERR, "Failed to allocate umem info"); + return NULL; + } + + umem->mb_pool = mb_pool; + base_addr = (void *)get_base_addr(mb_pool); + + ret = xsk_umem__create(&umem->umem, base_addr, + mb_pool->populated_size * usr_config.frame_size, + &umem->fq, &umem->cq, + &usr_config); + + if (ret) { + AF_XDP_LOG(ERR, "Failed to create umem"); + goto err; + } + umem->buffer = base_addr; + +#else static struct xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq) @@ -606,6 +751,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, } umem->mz = mz; +#endif return umem; err: @@ -625,6 +771,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, rxq->umem = xdp_umem_configure(internals, rxq); if (rxq->umem == NULL) return -ENOMEM; + txq->umem = rxq->umem; cfg.rx_size = ring_size; cfg.tx_size = ring_size; @@ -669,7 +816,6 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, struct rte_mempool *mb_pool) { struct pmd_internals *internals = dev->data->dev_private; - uint32_t buf_size, data_size; struct pkt_rx_queue *rxq; int ret; @@ -677,6 +823,10 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, AF_XDP_LOG(INFO, "Set up rx queue, rx queue id: %d, xsk queue id: %d\n", rx_queue_id, rxq->xsk_queue_idx); + +#ifndef XDP_UMEM_UNALIGNED_CHUNK_FLAG + uint32_t buf_size, data_size; + /* Now get the space available for data in the mbuf */ buf_size = rte_pktmbuf_data_room_size(mb_pool) - RTE_PKTMBUF_HEADROOM; @@ -688,6 +838,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, ret = -ENOMEM; goto err; } +#endif rxq->mb_pool = mb_pool; @@ -700,8 +851,6 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, rxq->fds[0].fd = xsk_socket__fd(rxq->xsk); rxq->fds[0].events = POLLIN; - rxq->umem->pmd_zc = internals->pmd_zc; - dev->data->rx_queues[rx_queue_id] = rxq; return 0; @@ -877,7 +1026,7 @@ xdp_get_channels_info(const char *if_name, int *max_queues, static int parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, - int *queue_cnt, int *pmd_zc) + int *queue_cnt) { int ret; @@ -898,11 +1047,6 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, goto free_kvlist; } - ret = rte_kvargs_process(kvlist, ETH_AF_XDP_PMD_ZC_ARG, - &parse_integer_arg, pmd_zc); - if (ret < 0) - goto free_kvlist; - free_kvlist: rte_kvargs_free(kvlist); return ret; @@ -940,7 +1084,7 @@ get_iface_info(const char *if_name, static struct rte_eth_dev * init_internals(struct rte_vdev_device *dev, const char *if_name, - int start_queue_idx, int queue_cnt, int pmd_zc) + int start_queue_idx, int queue_cnt) { const char *name = rte_vdev_device_name(dev); const unsigned int numa_node = dev->device.numa_node; @@ -955,7 +1099,6 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, internals->start_queue_idx = start_queue_idx; internals->queue_cnt = queue_cnt; - internals->pmd_zc = pmd_zc; strlcpy(internals->if_name, if_name, IFNAMSIZ); if (xdp_get_channels_info(if_name, &internals->max_queue_cnt, @@ -1011,8 +1154,9 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, /* Let rte_eth_dev_close() release the port resources. */ eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; - if (internals->pmd_zc) - AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n"); +#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) + AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n"); +#endif return eth_dev; @@ -1034,7 +1178,6 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT; struct rte_eth_dev *eth_dev = NULL; const char *name; - int pmd_zc = 0; AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n", rte_vdev_device_name(dev)); @@ -1062,7 +1205,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) dev->device.numa_node = rte_socket_id(); if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx, - &xsk_queue_cnt, &pmd_zc) < 0) { + &xsk_queue_cnt) < 0) { AF_XDP_LOG(ERR, "Invalid kvargs value\n"); return -EINVAL; } @@ -1073,7 +1216,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) } eth_dev = init_internals(dev, if_name, xsk_start_queue_idx, - xsk_queue_cnt, pmd_zc); + xsk_queue_cnt); if (eth_dev == NULL) { AF_XDP_LOG(ERR, "Failed to init internals\n"); return -1; @@ -1116,8 +1259,7 @@ RTE_PMD_REGISTER_VDEV(net_af_xdp, pmd_af_xdp_drv); RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp, "iface=<string> " "start_queue=<int> " - "queue_count=<int> " - "pmd_zero_copy=<0|1>"); + "queue_count=<int>"); RTE_INIT(af_xdp_init_log) {