[v1,3/3] net/af_xdp: add busy poll support

Message ID 20190515083842.15116-4-xiaolong.ye@intel.com
State New
Headers show
Series
  • add more features for AF_XDP pmd
Related show

Checks

Context Check Description
ci/Intel-compilation success Compilation OK
ci/checkpatch success coding style OK

Commit Message

Xiaolong Ye May 15, 2019, 8:38 a.m.
This patch enables busy-poll support for AF_XDP pmd. With busy-poll, the
kernel driver is executed in process context by calling the poll() syscall.

The main advantage of busy-poll feature is that all processing occurs on a
single core. This eliminates the core-to-core cache transfers that occur
between the application and the softirqd processing on another core.

The drawback of busy-poll is that it will downgrade the max throughput due
to syscall, but from a per-core perspective, the performance is better as
normal mode runs on two cores and busy-poll only runs on a single core.

Signed-off-by: Xiaolong Ye <xiaolong.ye@intel.com>
---
 doc/guides/nics/af_xdp.rst          |  1 +
 drivers/net/af_xdp/rte_eth_af_xdp.c | 48 ++++++++++++++++++++++++++---
 2 files changed, 45 insertions(+), 4 deletions(-)

Patch

diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
index 18defcda3..e42065170 100644
--- a/doc/guides/nics/af_xdp.rst
+++ b/doc/guides/nics/af_xdp.rst
@@ -29,6 +29,7 @@  The following options can be provided to set up an af_xdp port in DPDK.
 *   ``iface`` - name of the Kernel interface to attach to (required);
 *   ``start_queue`` - starting netdev queue id (optional, default 0);
 *   ``queue_count`` - total netdev queue number (optional, default 1);
+*   ``busy_poll_size`` - busy poll batch size (optional, default 0);
 *   ``pmd_zero_copy`` - enable zero copy or not (optional, default 0);
 
 Prerequisites
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 9a4510701..1e46a4ef4 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -6,6 +6,7 @@ 
 #include <stdlib.h>
 #include <string.h>
 #include <netinet/in.h>
+#include <poll.h>
 #include <net/if.h>
 #include <sys/socket.h>
 #include <sys/ioctl.h>
@@ -72,6 +73,7 @@  struct xsk_umem_info {
 	struct rte_ring *buf_ring;
 	const struct rte_memzone *mz;
 	int pmd_zc;
+	int busy_poll;
 };
 
 struct rx_stats {
@@ -114,6 +116,7 @@  struct pmd_internals {
 	int queue_cnt;
 
 	int pmd_zc;
+	int busy_poll_size;
 	struct ether_addr eth_addr;
 	struct xsk_umem_info *umem;
 	struct rte_mempool *mb_pool_share;
@@ -126,12 +129,14 @@  struct pmd_internals {
 #define ETH_AF_XDP_START_QUEUE_ARG		"start_queue"
 #define ETH_AF_XDP_QUEUE_COUNT_ARG		"queue_count"
 #define ETH_AF_XDP_PMD_ZC_ARG			"pmd_zero_copy"
+#define ETH_AF_XDP_BUSY_POLL_SIZE_ARG		"busy_poll_size"
 
 static const char * const valid_arguments[] = {
 	ETH_AF_XDP_IFACE_ARG,
 	ETH_AF_XDP_START_QUEUE_ARG,
 	ETH_AF_XDP_QUEUE_COUNT_ARG,
 	ETH_AF_XDP_PMD_ZC_ARG,
+	ETH_AF_XDP_BUSY_POLL_SIZE_ARG,
 	NULL
 };
 
@@ -191,6 +196,7 @@  eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_ring_cons *rx = &rxq->rx;
 	struct xsk_umem_info *umem = rxq->umem;
 	struct xsk_ring_prod *fq = &umem->fq;
+	struct pollfd pfds[1];
 	uint32_t idx_rx = 0;
 	uint32_t free_thresh = fq->size >> 1;
 	int pmd_zc = umem->pmd_zc;
@@ -199,6 +205,15 @@  eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	unsigned long rx_bytes = 0;
 	int rcvd, i;
 
+	if (umem->busy_poll) {
+		memset(pfds, 0, sizeof(pfds));
+		pfds[0].fd = xsk_socket__fd(rxq->xsk);
+		pfds[0].events = POLLIN;
+
+		if (poll(pfds, 1, 0) <= 0)
+			return 0;
+	}
+
 	nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_RX_BATCH_SIZE);
 
 	if (unlikely(rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, nb_pkts) != 0))
@@ -305,12 +320,23 @@  eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct pkt_tx_queue *txq = queue;
 	struct xsk_umem_info *umem = txq->pair->umem;
 	struct rte_mbuf *mbuf;
+	struct pollfd pfds[1];
 	int pmd_zc = umem->pmd_zc;
 	void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
 	unsigned long tx_bytes = 0;
 	int i;
 	uint32_t idx_tx;
 
+	if (umem->busy_poll) {
+		memset(pfds, 0, sizeof(pfds));
+		pfds[0].fd = xsk_socket__fd(txq->pair->xsk);
+		pfds[0].events = POLLOUT;
+		if (poll(pfds, 1, 0) <= 0)
+			return 0;
+		if (!(pfds[0].revents & POLLOUT))
+			return 0;
+	}
+
 	nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_TX_BATCH_SIZE);
 
 	pull_umem_cq(umem, nb_pkts);
@@ -615,6 +641,7 @@  xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
 	cfg.rx_size = ring_size;
 	cfg.tx_size = ring_size;
 	cfg.libbpf_flags = 0;
+	cfg.busy_poll = internals->busy_poll_size;
 	cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 	cfg.bind_flags = 0;
 	ret = xsk_socket__create(&rxq->xsk, internals->if_name,
@@ -680,10 +707,14 @@  eth_rx_queue_setup(struct rte_eth_dev *dev,
 
 	internals->umem = rxq->umem;
 	internals->umem->pmd_zc = internals->pmd_zc;
+	internals->umem->busy_poll = internals->busy_poll_size ? 1 : 0;
 
 	if (internals->umem->pmd_zc)
 		AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n");
 
+	if (internals->umem->busy_poll)
+		AF_XDP_LOG(INFO, "Busy poll enabled.\n");
+
 	dev->data->rx_queues[rx_queue_id] = rxq;
 	return 0;
 
@@ -818,7 +849,7 @@  parse_name_arg(const char *key __rte_unused,
 
 static int
 parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
-			int *queue_cnt, int *pmd_zc)
+			int *queue_cnt, int *pmd_zc, int *busy_poll_size)
 {
 	int ret;
 
@@ -844,6 +875,11 @@  parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
 	if (ret < 0)
 		goto free_kvlist;
 
+	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_BUSY_POLL_SIZE_ARG,
+				 &parse_integer_arg, busy_poll_size);
+	if (ret < 0)
+		goto free_kvlist;
+
 free_kvlist:
 	rte_kvargs_free(kvlist);
 	return ret;
@@ -881,7 +917,8 @@  get_iface_info(const char *if_name,
 
 static struct rte_eth_dev *
 init_internals(struct rte_vdev_device *dev, const char *if_name,
-			int start_queue_idx, int queue_cnt, int pmd_zc)
+			int start_queue_idx, int queue_cnt, int pmd_zc,
+				int busy_poll_size)
 {
 	const char *name = rte_vdev_device_name(dev);
 	const unsigned int numa_node = dev->device.numa_node;
@@ -897,6 +934,7 @@  init_internals(struct rte_vdev_device *dev, const char *if_name,
 	internals->start_queue_idx = start_queue_idx;
 	internals->queue_cnt = queue_cnt;
 	internals->pmd_zc = pmd_zc;
+	internals->busy_poll_size = busy_poll_size;
 	strlcpy(internals->if_name, if_name, IFNAMSIZ);
 
 	for (i = 0; i < queue_cnt; i++) {
@@ -941,6 +979,7 @@  rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	struct rte_eth_dev *eth_dev = NULL;
 	const char *name;
 	int pmd_zc = 0;
+	int busy_poll_size = 0;
 
 	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n",
 		rte_vdev_device_name(dev));
@@ -968,7 +1007,7 @@  rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 		dev->device.numa_node = rte_socket_id();
 
 	if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx,
-			     &xsk_queue_cnt, &pmd_zc) < 0) {
+			     &xsk_queue_cnt, &pmd_zc, &busy_poll_size) < 0) {
 		AF_XDP_LOG(ERR, "Invalid kvargs value\n");
 		return -EINVAL;
 	}
@@ -979,7 +1018,7 @@  rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	}
 
 	eth_dev = init_internals(dev, if_name, xsk_start_queue_idx,
-					xsk_queue_cnt, pmd_zc);
+				 xsk_queue_cnt, pmd_zc, busy_poll_size);
 	if (eth_dev == NULL) {
 		AF_XDP_LOG(ERR, "Failed to init internals\n");
 		return -1;
@@ -1023,6 +1062,7 @@  RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
 			      "iface=<string> "
 			      "start_queue=<int> "
 			      "queue_count=<int> "
+			      "busy_poll_size=<int> "
 			      "pmd_zero_copy=<0|1>");
 
 RTE_INIT(af_xdp_init_log)