diff mbox series

[3/3] net/af_xdp: enable support for unaligned umem chunks

Message ID 20190919141520.4227-4-ciara.loftus@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers show
Series AF_XDP tx halt fix, IRQ pinning and unaligned chunks | expand

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Ciara Loftus Sept. 19, 2019, 2:15 p.m. UTC
This patch enables the unaligned chunks feature for AF_XDP which allows
chunks to be placed at arbitrary places in the umem, as opposed to them
being required to be aligned to 2k. This allows for DPDK application
mempools to be mapped directly into the umem and in turn enable zero copy
transfer between umem and the PMD.

This patch replaces the zero copy via external mbuf mechanism introduced
in commit e9ff8bb71943 ("net/af_xdp: enable zero copy by external mbuf").
The pmd_zero copy vdev argument is also removed as now the PMD will
auto-detect presence of the unaligned chunks feature and enable it if so
and otherwise fall back to copy mode if not detected.

Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
---
 doc/guides/nics/af_xdp.rst             |   2 +-
 doc/guides/rel_notes/release_19_11.rst |   4 +
 drivers/net/af_xdp/rte_eth_af_xdp.c    | 375 ++++++++++++++++++++-----
 3 files changed, 305 insertions(+), 76 deletions(-)
diff mbox series

Patch

diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
index a255ba4e7..40a35a822 100644
--- a/doc/guides/nics/af_xdp.rst
+++ b/doc/guides/nics/af_xdp.rst
@@ -35,7 +35,6 @@  The following options can be provided to set up an af_xdp port in DPDK.
 *   ``iface`` - name of the Kernel interface to attach to (required);
 *   ``start_queue`` - starting netdev queue id (optional, default 0);
 *   ``queue_count`` - total netdev queue number (optional, default 1);
-*   ``pmd_zero_copy`` - enable zero copy or not (optional, default 0);
 *   ``queue_irq`` - pin queue irqs to specified core <queue:core> (optional,
     default no pinning). The queue argument refers to the ethdev queue as
     opposed to the netdev queue. These values are the same unless a value
@@ -53,6 +52,7 @@  This is a Linux-specific PMD, thus the following prerequisites apply:
    <kernel src tree>/tools/lib/bpf;
 *  A Kernel bound interface to attach to;
 *  For need_wakeup feature, it requires kernel version later than v5.3-rc1;
+*  For PMD zero copy, it requires kernel version later than v5.4-rc1;
 
 Set up an af_xdp interface
 -----------------------------
diff --git a/doc/guides/rel_notes/release_19_11.rst b/doc/guides/rel_notes/release_19_11.rst
index 06bf57c42..22369107c 100644
--- a/doc/guides/rel_notes/release_19_11.rst
+++ b/doc/guides/rel_notes/release_19_11.rst
@@ -62,6 +62,8 @@  New Features
 
   * Support for pinning netdev queue IRQs to cores specified by the user.
     Available for ixgbe, i40e and mlx5 drivers.
+  * Enabled zero copy between application mempools and UMEM by enabling the
+    XDP_UMEM_UNALIGNED_CHUNKS UMEM flag.
 
 
 Removed Items
@@ -85,6 +87,8 @@  Removed Items
      "port config <port_id> rx_offload crc_strip|scatter|ipv4_cksum|udp_cksum|tcp_cksum|
      timestamp|vlan_strip|vlan_filter|vlan_extend on|off"
 
+   * Removed AF_XDP pmd_zero copy vdev argument. Support is now auto-detected.
+
 
 API Changes
 -----------
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index a00eb6460..9abb9d9ae 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -62,7 +62,13 @@  static int af_xdp_logtype;
 
 #define ETH_AF_XDP_FRAME_SIZE		2048
 #define ETH_AF_XDP_NUM_BUFFERS		4096
+#ifdef XDP_UMEM_UNALIGNED_CHUNK_FLAG
+#define ETH_AF_XDP_MBUF_OVERHEAD	128 /* sizeof(struct rte_mbuf) */
+#define ETH_AF_XDP_DATA_HEADROOM \
+	(ETH_AF_XDP_MBUF_OVERHEAD + RTE_PKTMBUF_HEADROOM)
+#else
 #define ETH_AF_XDP_DATA_HEADROOM	0
+#endif
 #define ETH_AF_XDP_DFLT_NUM_DESCS	XSK_RING_CONS__DEFAULT_NUM_DESCS
 #define ETH_AF_XDP_DFLT_START_QUEUE_IDX	0
 #define ETH_AF_XDP_DFLT_QUEUE_COUNT	1
@@ -77,7 +83,8 @@  struct xsk_umem_info {
 	struct xsk_umem *umem;
 	struct rte_ring *buf_ring;
 	const struct rte_memzone *mz;
-	int pmd_zc;
+	struct rte_mempool *mb_pool;
+	void *buffer;
 };
 
 struct rx_stats {
@@ -102,10 +109,12 @@  struct pkt_rx_queue {
 struct tx_stats {
 	uint64_t tx_pkts;
 	uint64_t tx_bytes;
+	uint64_t tx_dropped;
 };
 
 struct pkt_tx_queue {
 	struct xsk_ring_prod tx;
+	struct xsk_umem_info *umem;
 
 	struct tx_stats stats;
 
@@ -122,7 +131,6 @@  struct pmd_internals {
 	int combined_queue_cnt;
 	int queue_irqs[RTE_MAX_QUEUES_PER_PORT];
 
-	int pmd_zc;
 	struct rte_ether_addr eth_addr;
 
 	struct pkt_rx_queue *rx_queues;
@@ -132,14 +140,12 @@  struct pmd_internals {
 #define ETH_AF_XDP_IFACE_ARG			"iface"
 #define ETH_AF_XDP_START_QUEUE_ARG		"start_queue"
 #define ETH_AF_XDP_QUEUE_COUNT_ARG		"queue_count"
-#define ETH_AF_XDP_PMD_ZC_ARG			"pmd_zero_copy"
 #define ETH_AF_XDP_QUEUE_IRQ_ARG		"queue_irq"
 
 static const char * const valid_arguments[] = {
 	ETH_AF_XDP_IFACE_ARG,
 	ETH_AF_XDP_START_QUEUE_ARG,
 	ETH_AF_XDP_QUEUE_COUNT_ARG,
-	ETH_AF_XDP_PMD_ZC_ARG,
 	ETH_AF_XDP_QUEUE_IRQ_ARG,
 	NULL
 };
@@ -166,8 +172,43 @@  int (*generate_driver_regex_func)(char *iface_regex_str,
 				  struct pmd_internals *internals,
 				  uint16_t netdev_qid);
 
+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
 static inline int
-reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size)
+reserve_fill_queue_zc(struct xsk_umem_info *umem, uint16_t reserve_size)
+{
+	struct xsk_ring_prod *fq = &umem->fq;
+	uint32_t idx;
+	uint16_t i;
+	struct rte_mbuf *bufs[reserve_size];
+
+	if (rte_pktmbuf_alloc_bulk(umem->mb_pool, bufs, reserve_size)) {
+		AF_XDP_LOG(DEBUG, "Failed to get enough buffers for fq.\n");
+		return -1;
+	}
+
+	if (unlikely(!xsk_ring_prod__reserve(fq, reserve_size, &idx))) {
+		for (i = 0; i < reserve_size; i++)
+			rte_pktmbuf_free(bufs[i]);
+		AF_XDP_LOG(DEBUG, "Failed to reserve enough fq descs.\n");
+		return -1;
+	}
+
+	for (i = 0; i < reserve_size; i++) {
+		__u64 *fq_addr;
+		uint64_t addr;
+
+		fq_addr = xsk_ring_prod__fill_addr(fq, idx++);
+		addr = (uint64_t)bufs[i] - (uint64_t)umem->buffer;
+		*fq_addr = addr;
+	}
+
+	xsk_ring_prod__submit(fq, reserve_size);
+
+	return 0;
+}
+#else
+static inline int
+reserve_fill_queue_cp(struct xsk_umem_info *umem, uint16_t reserve_size)
 {
 	struct xsk_ring_prod *fq = &umem->fq;
 	void *addrs[reserve_size];
@@ -198,30 +239,87 @@  reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size)
 
 	return 0;
 }
+#endif
 
-static void
-umem_buf_release_to_fq(void *addr, void *opaque)
+static inline int
+reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size)
 {
-	struct xsk_umem_info *umem = (struct xsk_umem_info *)opaque;
-	uint64_t umem_addr = (uint64_t)addr - umem->mz->addr_64;
-
-	rte_ring_enqueue(umem->buf_ring, (void *)umem_addr);
+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+	return reserve_fill_queue_zc(umem, reserve_size);
+#else
+	return reserve_fill_queue_cp(umem, reserve_size);
+#endif
 }
 
+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
 static uint16_t
-eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	struct pkt_rx_queue *rxq = queue;
 	struct xsk_ring_cons *rx = &rxq->rx;
 	struct xsk_umem_info *umem = rxq->umem;
 	struct xsk_ring_prod *fq = &umem->fq;
 	uint32_t idx_rx = 0;
+	unsigned long rx_bytes = 0;
+	int rcvd, i;
 	uint32_t free_thresh = fq->size >> 1;
-	int pmd_zc = umem->pmd_zc;
-	struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE];
-	unsigned long dropped = 0;
+
+	rcvd = xsk_ring_cons__peek(rx, nb_pkts, &idx_rx);
+	if (rcvd == 0) {
+#if defined(XDP_USE_NEED_WAKEUP)
+		if (xsk_ring_prod__needs_wakeup(fq))
+			(void)poll(rxq->fds, 1, 1000);
+#endif
+
+		return rcvd;
+	}
+
+	if (xsk_prod_nb_free(fq, free_thresh) >= free_thresh)
+		(void)reserve_fill_queue(umem, ETH_AF_XDP_RX_BATCH_SIZE);
+
+	for (i = 0; i < rcvd; i++) {
+		const struct xdp_desc *desc;
+		uint64_t addr;
+		uint32_t len;
+		uint64_t offset;
+
+		desc = xsk_ring_cons__rx_desc(rx, idx_rx++);
+		addr = desc->addr;
+		len = desc->len;
+
+		offset = xsk_umem__extract_offset(addr);
+		addr = xsk_umem__extract_addr(addr);
+
+		bufs[i] = (struct rte_mbuf *)
+				xsk_umem__get_data(umem->buffer, addr);
+		bufs[i]->data_off = offset - sizeof(struct rte_mbuf);
+
+		rte_pktmbuf_pkt_len(bufs[i]) = len;
+		rte_pktmbuf_data_len(bufs[i]) = len;
+		rx_bytes += len;
+	}
+
+	xsk_ring_cons__release(rx, rcvd);
+
+	/* statistics */
+	rxq->stats.rx_pkts += rcvd;
+	rxq->stats.rx_bytes += rx_bytes;
+
+	return rcvd;
+}
+#else
+static uint16_t
+af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct pkt_rx_queue *rxq = queue;
+	struct xsk_ring_cons *rx = &rxq->rx;
+	struct xsk_umem_info *umem = rxq->umem;
+	struct xsk_ring_prod *fq = &umem->fq;
+	uint32_t idx_rx = 0;
 	unsigned long rx_bytes = 0;
 	int rcvd, i;
+	uint32_t free_thresh = fq->size >> 1;
+	struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE];
 
 	nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_RX_BATCH_SIZE);
 
@@ -246,25 +344,14 @@  eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		uint64_t addr;
 		uint32_t len;
 		void *pkt;
-		uint16_t buf_len = ETH_AF_XDP_FRAME_SIZE;
-		struct rte_mbuf_ext_shared_info *shinfo;
 
 		desc = xsk_ring_cons__rx_desc(rx, idx_rx++);
 		addr = desc->addr;
 		len = desc->len;
 		pkt = xsk_umem__get_data(rxq->umem->mz->addr, addr);
 
-		if (pmd_zc) {
-			shinfo = rte_pktmbuf_ext_shinfo_init_helper(pkt,
-					&buf_len, umem_buf_release_to_fq, umem);
-
-			rte_pktmbuf_attach_extbuf(mbufs[i], pkt, 0, buf_len,
-						  shinfo);
-		} else {
-			rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *),
-							pkt, len);
-			rte_ring_enqueue(umem->buf_ring, (void *)addr);
-		}
+		rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), pkt, len);
+		rte_ring_enqueue(umem->buf_ring, (void *)addr);
 		rte_pktmbuf_pkt_len(mbufs[i]) = len;
 		rte_pktmbuf_data_len(mbufs[i]) = len;
 		rx_bytes += len;
@@ -274,7 +361,7 @@  eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	xsk_ring_cons__release(rx, rcvd);
 
 	/* statistics */
-	rxq->stats.rx_pkts += (rcvd - dropped);
+	rxq->stats.rx_pkts += rcvd;
 	rxq->stats.rx_bytes += rx_bytes;
 
 out:
@@ -284,6 +371,17 @@  eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 	return rcvd;
 }
+#endif
+
+static uint16_t
+eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+	return af_xdp_rx_zc(queue, bufs, nb_pkts);
+#else
+	return af_xdp_rx_cp(queue, bufs, nb_pkts);
+#endif
+}
 
 static void
 pull_umem_cq(struct xsk_umem_info *umem, int size)
@@ -297,7 +395,13 @@  pull_umem_cq(struct xsk_umem_info *umem, int size)
 	for (i = 0; i < n; i++) {
 		uint64_t addr;
 		addr = *xsk_ring_cons__comp_addr(cq, idx_cq++);
+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+		addr = xsk_umem__extract_addr(addr);
+		rte_pktmbuf_free((struct rte_mbuf *)
+					xsk_umem__get_data(umem->buffer, addr));
+#else
 		rte_ring_enqueue(umem->buf_ring, (void *)addr);
+#endif
 	}
 
 	xsk_ring_cons__release(cq, n);
@@ -306,7 +410,7 @@  pull_umem_cq(struct xsk_umem_info *umem, int size)
 static void
 kick_tx(struct pkt_tx_queue *txq)
 {
-	struct xsk_umem_info *umem = txq->pair->umem;
+	struct xsk_umem_info *umem = txq->umem;
 
 	while (send(xsk_socket__fd(txq->pair->xsk), NULL,
 		    0, MSG_DONTWAIT) < 0) {
@@ -318,24 +422,97 @@  kick_tx(struct pkt_tx_queue *txq)
 		if (errno == EAGAIN)
 			pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE);
 	}
+#ifndef XDP_UMEM_UNALIGNED_CHUNK_FLAG
 	pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE);
+#endif
 }
 
-static inline bool
-in_umem_range(struct xsk_umem_info *umem, uint64_t addr)
+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+static uint16_t
+af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
-	uint64_t mz_base_addr = umem->mz->addr_64;
+	struct pkt_tx_queue *txq = queue;
+	struct xsk_umem_info *umem = txq->umem;
+	struct rte_mbuf *mbuf;
+	unsigned long tx_bytes = 0;
+	int i;
+	uint32_t idx_tx;
+	uint16_t count = 0;
+	struct xdp_desc *desc;
+	uint64_t addr, offset;
 
-	return addr >= mz_base_addr && addr < mz_base_addr + umem->mz->len;
-}
+	pull_umem_cq(umem, nb_pkts);
+
+	for (i = 0; i < nb_pkts; i++) {
+		mbuf = bufs[i];
+
+		if (mbuf->pool == umem->mb_pool) {
+			if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) {
+				kick_tx(txq);
+				goto out;
+			}
+			desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
+			desc->len = mbuf->pkt_len;
+			addr = (uint64_t)mbuf - (uint64_t)umem->buffer;
+			offset = rte_pktmbuf_mtod(mbuf, uint64_t) -
+					(uint64_t)mbuf;
+			offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+			desc->addr = addr | offset;
+			count++;
+		} else {
+			struct rte_mbuf *local_mbuf =
+					rte_pktmbuf_alloc(umem->mb_pool);
+			void *pkt;
+
+			if (local_mbuf == NULL) {
+				rte_pktmbuf_free(mbuf);
+				goto out;
+			}
 
+			if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) {
+				kick_tx(txq);
+				goto out;
+			}
+
+			desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
+			desc->len = mbuf->pkt_len;
+
+			addr = (uint64_t)local_mbuf - (uint64_t)umem->buffer;
+			offset = rte_pktmbuf_mtod(local_mbuf, uint64_t) -
+					(uint64_t)local_mbuf;
+			pkt = xsk_umem__get_data(umem->buffer, addr + offset);
+			offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+			desc->addr = addr | offset;
+			rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
+					desc->len);
+			rte_pktmbuf_free(mbuf);
+			count++;
+		}
+
+		tx_bytes += mbuf->pkt_len;
+	}
+
+#if defined(XDP_USE_NEED_WAKEUP)
+	if (xsk_ring_prod__needs_wakeup(&txq->tx))
+#endif
+		kick_tx(txq);
+
+out:
+	xsk_ring_prod__submit(&txq->tx, count);
+
+	txq->stats.tx_pkts += count;
+	txq->stats.tx_bytes += tx_bytes;
+	txq->stats.tx_dropped += nb_pkts - count;
+
+	return count;
+}
+#else
 static uint16_t
-eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	struct pkt_tx_queue *txq = queue;
-	struct xsk_umem_info *umem = txq->pair->umem;
+	struct xsk_umem_info *umem = txq->umem;
 	struct rte_mbuf *mbuf;
-	int pmd_zc = umem->pmd_zc;
 	void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
 	unsigned long tx_bytes = 0;
 	int i;
@@ -364,24 +541,12 @@  eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		mbuf = bufs[i];
 		desc->len = mbuf->pkt_len;
 
-		/*
-		 * We need to make sure the external mbuf address is within
-		 * current port's umem memzone range
-		 */
-		if (pmd_zc && RTE_MBUF_HAS_EXTBUF(mbuf) &&
-				in_umem_range(umem, (uint64_t)mbuf->buf_addr)) {
-			desc->addr = (uint64_t)mbuf->buf_addr -
-				umem->mz->addr_64;
-			mbuf->buf_addr = xsk_umem__get_data(umem->mz->addr,
-					(uint64_t)addrs[i]);
-		} else {
-			desc->addr = (uint64_t)addrs[i];
-			pkt = xsk_umem__get_data(umem->mz->addr,
-					desc->addr);
-			rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
-					desc->len);
-		}
+		desc->addr = (uint64_t)addrs[i];
+		pkt = xsk_umem__get_data(umem->mz->addr,
+					 desc->addr);
+		rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), desc->len);
 		tx_bytes += mbuf->pkt_len;
+		rte_pktmbuf_free(mbuf);
 	}
 
 	xsk_ring_prod__submit(&txq->tx, nb_pkts);
@@ -394,11 +559,19 @@  eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	txq->stats.tx_pkts += nb_pkts;
 	txq->stats.tx_bytes += tx_bytes;
 
-	for (i = 0; i < nb_pkts; i++)
-		rte_pktmbuf_free(bufs[i]);
-
 	return nb_pkts;
 }
+#endif
+
+static uint16_t
+eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+	return af_xdp_tx_zc(queue, bufs, nb_pkts);
+#else
+	return af_xdp_tx_cp(queue, bufs, nb_pkts);
+#endif
+}
 
 static int
 eth_dev_start(struct rte_eth_dev *dev)
@@ -468,6 +641,7 @@  eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 		stats->ipackets += stats->q_ipackets[i];
 		stats->ibytes += stats->q_ibytes[i];
 		stats->imissed += rxq->stats.rx_dropped;
+		stats->oerrors += txq->stats.tx_dropped;
 		ret = getsockopt(xsk_socket__fd(rxq->xsk), SOL_XDP,
 				XDP_STATISTICS, &xdp_stats, &optlen);
 		if (ret != 0) {
@@ -514,11 +688,16 @@  remove_xdp_program(struct pmd_internals *internals)
 static void
 xdp_umem_destroy(struct xsk_umem_info *umem)
 {
+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+	rte_mempool_free(umem->mb_pool);
+	umem->mb_pool = NULL;
+#else
 	rte_memzone_free(umem->mz);
 	umem->mz = NULL;
 
 	rte_ring_free(umem->buf_ring);
 	umem->buf_ring = NULL;
+#endif
 
 	rte_free(umem);
 	umem = NULL;
@@ -568,6 +747,55 @@  eth_link_update(struct rte_eth_dev *dev __rte_unused,
 	return 0;
 }
 
+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+static inline uint64_t get_base_addr(struct rte_mempool *mp)
+{
+	struct rte_mempool_memhdr *memhdr;
+
+	memhdr = STAILQ_FIRST(&mp->mem_list);
+	return (uint64_t)memhdr->addr & ~(getpagesize() - 1);
+}
+
+static struct
+xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals __rte_unused,
+				  struct pkt_rx_queue *rxq)
+{
+	struct xsk_umem_info *umem;
+	int ret;
+	struct xsk_umem_config usr_config = {
+		.fill_size = ETH_AF_XDP_DFLT_NUM_DESCS,
+		.comp_size = ETH_AF_XDP_DFLT_NUM_DESCS,
+		.flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG};
+	void *base_addr = NULL;
+	struct rte_mempool *mb_pool = rxq->mb_pool;
+
+	usr_config.frame_size = rte_pktmbuf_data_room_size(mb_pool) +
+					ETH_AF_XDP_MBUF_OVERHEAD +
+					mb_pool->private_data_size;
+	usr_config.frame_headroom = ETH_AF_XDP_DATA_HEADROOM +
+					mb_pool->private_data_size;
+
+	umem = rte_zmalloc_socket("umem", sizeof(*umem), 0, rte_socket_id());
+	if (umem == NULL) {
+		AF_XDP_LOG(ERR, "Failed to allocate umem info");
+		return NULL;
+	}
+
+	umem->mb_pool = mb_pool;
+	base_addr = (void *)get_base_addr(mb_pool);
+
+	ret = xsk_umem__create(&umem->umem, base_addr,
+			       mb_pool->populated_size * usr_config.frame_size,
+			       &umem->fq, &umem->cq,
+			       &usr_config);
+
+	if (ret) {
+		AF_XDP_LOG(ERR, "Failed to create umem");
+		goto err;
+	}
+	umem->buffer = base_addr;
+
+#else
 static struct
 xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals,
 				  struct pkt_rx_queue *rxq)
@@ -628,6 +856,7 @@  xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals,
 	}
 	umem->mz = mz;
 
+#endif
 	return umem;
 
 err:
@@ -647,6 +876,7 @@  xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
 	rxq->umem = xdp_umem_configure(internals, rxq);
 	if (rxq->umem == NULL)
 		return -ENOMEM;
+	txq->umem = rxq->umem;
 
 	cfg.rx_size = ring_size;
 	cfg.tx_size = ring_size;
@@ -968,7 +1198,6 @@  eth_rx_queue_setup(struct rte_eth_dev *dev,
 		   struct rte_mempool *mb_pool)
 {
 	struct pmd_internals *internals = dev->data->dev_private;
-	uint32_t buf_size, data_size;
 	struct pkt_rx_queue *rxq;
 	int ret;
 
@@ -976,6 +1205,10 @@  eth_rx_queue_setup(struct rte_eth_dev *dev,
 
 	AF_XDP_LOG(INFO, "Set up rx queue, rx queue id: %d, xsk queue id: %d\n",
 		   rx_queue_id, rxq->xsk_queue_idx);
+
+#ifndef XDP_UMEM_UNALIGNED_CHUNK_FLAG
+	uint32_t buf_size, data_size;
+
 	/* Now get the space available for data in the mbuf */
 	buf_size = rte_pktmbuf_data_room_size(mb_pool) -
 		RTE_PKTMBUF_HEADROOM;
@@ -987,6 +1220,7 @@  eth_rx_queue_setup(struct rte_eth_dev *dev,
 		ret = -ENOMEM;
 		goto err;
 	}
+#endif
 
 	rxq->mb_pool = mb_pool;
 
@@ -1001,8 +1235,6 @@  eth_rx_queue_setup(struct rte_eth_dev *dev,
 	rxq->fds[0].fd = xsk_socket__fd(rxq->xsk);
 	rxq->fds[0].events = POLLIN;
 
-	rxq->umem->pmd_zc = internals->pmd_zc;
-
 	dev->data->rx_queues[rx_queue_id] = rxq;
 	return 0;
 
@@ -1211,7 +1443,7 @@  xdp_get_channels_info(const char *if_name, int *max_queues,
 
 static int
 parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
-			int *queue_cnt, int *pmd_zc,
+			int *queue_cnt,
 			int (*queue_irqs)[RTE_MAX_QUEUES_PER_PORT])
 {
 	int ret;
@@ -1233,11 +1465,6 @@  parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
 		goto free_kvlist;
 	}
 
-	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_PMD_ZC_ARG,
-				 &parse_integer_arg, pmd_zc);
-	if (ret < 0)
-		goto free_kvlist;
-
 	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_QUEUE_IRQ_ARG,
 				 &parse_queue_irq_arg, queue_irqs);
 	if (ret < 0)
@@ -1280,7 +1507,7 @@  get_iface_info(const char *if_name,
 
 static struct rte_eth_dev *
 init_internals(struct rte_vdev_device *dev, const char *if_name,
-			int start_queue_idx, int queue_cnt, int pmd_zc,
+			int start_queue_idx, int queue_cnt,
 			int queue_irqs[RTE_MAX_QUEUES_PER_PORT])
 {
 	const char *name = rte_vdev_device_name(dev);
@@ -1296,7 +1523,6 @@  init_internals(struct rte_vdev_device *dev, const char *if_name,
 
 	internals->start_queue_idx = start_queue_idx;
 	internals->queue_cnt = queue_cnt;
-	internals->pmd_zc = pmd_zc;
 	strlcpy(internals->if_name, if_name, IFNAMSIZ);
 	memcpy(internals->queue_irqs, queue_irqs,
 		sizeof(int) * RTE_MAX_QUEUES_PER_PORT);
@@ -1354,8 +1580,9 @@  init_internals(struct rte_vdev_device *dev, const char *if_name,
 	/* Let rte_eth_dev_close() release the port resources. */
 	eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
 
-	if (internals->pmd_zc)
-		AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n");
+#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+	AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n");
+#endif
 
 	return eth_dev;
 
@@ -1377,7 +1604,6 @@  rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT;
 	struct rte_eth_dev *eth_dev = NULL;
 	const char *name;
-	int pmd_zc = 0;
 	int queue_irqs[RTE_MAX_QUEUES_PER_PORT];
 
 	memset(queue_irqs, -1, sizeof(int) * RTE_MAX_QUEUES_PER_PORT);
@@ -1408,7 +1634,7 @@  rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 		dev->device.numa_node = rte_socket_id();
 
 	if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx,
-			     &xsk_queue_cnt, &pmd_zc, &queue_irqs) < 0) {
+			     &xsk_queue_cnt, &queue_irqs) < 0) {
 		AF_XDP_LOG(ERR, "Invalid kvargs value\n");
 		return -EINVAL;
 	}
@@ -1419,7 +1645,7 @@  rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	}
 
 	eth_dev = init_internals(dev, if_name, xsk_start_queue_idx,
-					xsk_queue_cnt, pmd_zc, queue_irqs);
+					xsk_queue_cnt, queue_irqs);
 	if (eth_dev == NULL) {
 		AF_XDP_LOG(ERR, "Failed to init internals\n");
 		return -1;
@@ -1463,7 +1689,6 @@  RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
 			      "iface=<string> "
 			      "start_queue=<int> "
 			      "queue_count=<int> "
-			      "pmd_zero_copy=<0|1> "
 			      "queue_irq=<int>:<int>");
 
 RTE_INIT(af_xdp_init_log)