[RFC,v3] tap: do not duplicate fd's

Message ID 20240309181554.289317-1-stephen@networkplumber.org (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series [RFC,v3] tap: do not duplicate fd's |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/github-robot: build success github build: passed
ci/intel-Functional success Functional PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-compile-amd64-testing success Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-abi-testing success Testing PASS
ci/iol-unit-amd64-testing success Testing PASS
ci/iol-sample-apps-testing success Testing PASS
ci/iol-unit-arm64-testing success Testing PASS
ci/iol-compile-arm64-testing success Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS

Commit Message

Stephen Hemminger March 9, 2024, 6:12 p.m. UTC
  The TAP devic can use the same file descriptor for both rx and tx queues.
This allows up to 8 queues (versus 4) and reduces some resource consumption.
Also, reduce the TAP_MAX_QUEUES to what the multi-process restrictions
will allow.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
v3 - This is more limited patch, only addresses tap device
     and only gets tap up from 4 to 8 queues.

     Still better to fix underlying EAL issue, but that requires
     overriding strict ABI rules.

 drivers/net/tap/meson.build   |   2 +-
 drivers/net/tap/rte_eth_tap.c | 197 +++++++++++++++-------------------
 drivers/net/tap/rte_eth_tap.h |   3 +-
 drivers/net/tap/tap_flow.c    |   3 +-
 drivers/net/tap/tap_intr.c    |  12 ++-
 5 files changed, 95 insertions(+), 122 deletions(-)
  

Patch

diff --git a/drivers/net/tap/meson.build b/drivers/net/tap/meson.build
index 5099ccdff11b..9cd124d53e23 100644
--- a/drivers/net/tap/meson.build
+++ b/drivers/net/tap/meson.build
@@ -16,7 +16,7 @@  sources = files(
 
 deps = ['bus_vdev', 'gso', 'hash']
 
-cflags += '-DTAP_MAX_QUEUES=16'
+cflags += '-DTAP_MAX_QUEUES=8'
 
 # input array for meson symbol search:
 # [ "MACRO to define if found", "header for the search",
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 69d9da695bed..38a1b2d825f9 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -124,8 +124,7 @@  enum ioctl_mode {
 /* Message header to synchronize queues via IPC */
 struct ipc_queues {
 	char port_name[RTE_DEV_NAME_MAX_LEN];
-	int rxq_count;
-	int txq_count;
+	int q_count;
 	/*
 	 * The file descriptors are in the dedicated part
 	 * of the Unix message to be translated by the kernel.
@@ -446,7 +445,7 @@  pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		uint16_t data_off = rte_pktmbuf_headroom(mbuf);
 		int len;
 
-		len = readv(process_private->rxq_fds[rxq->queue_id],
+		len = readv(process_private->fds[rxq->queue_id],
 			*rxq->iovecs,
 			1 + (rxq->rxmode->offloads & RTE_ETH_RX_OFFLOAD_SCATTER ?
 			     rxq->nb_rx_desc : 1));
@@ -643,7 +642,7 @@  tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
 		}
 
 		/* copy the tx frame data */
-		n = writev(process_private->txq_fds[txq->queue_id], iovecs, k);
+		n = writev(process_private->fds[txq->queue_id], iovecs, k);
 		if (n <= 0)
 			return -1;
 
@@ -851,7 +850,6 @@  tap_mp_req_on_rxtx(struct rte_eth_dev *dev)
 	struct rte_mp_msg msg;
 	struct ipc_queues *request_param = (struct ipc_queues *)msg.param;
 	int err;
-	int fd_iterator = 0;
 	struct pmd_process_private *process_private = dev->process_private;
 	int i;
 
@@ -859,16 +857,13 @@  tap_mp_req_on_rxtx(struct rte_eth_dev *dev)
 	strlcpy(msg.name, TAP_MP_REQ_START_RXTX, sizeof(msg.name));
 	strlcpy(request_param->port_name, dev->data->name, sizeof(request_param->port_name));
 	msg.len_param = sizeof(*request_param);
-	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		msg.fds[fd_iterator++] = process_private->txq_fds[i];
-		msg.num_fds++;
-		request_param->txq_count++;
-	}
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		msg.fds[fd_iterator++] = process_private->rxq_fds[i];
-		msg.num_fds++;
-		request_param->rxq_count++;
-	}
+
+	/* rx and tx share file descriptors and nb_tx_queues == nb_rx_queues */
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		msg.fds[i] = process_private->fds[i];
+
+	request_param->q_count = dev->data->nb_rx_queues;
+	msg.num_fds = dev->data->nb_rx_queues;
 
 	err = rte_mp_sendmsg(&msg);
 	if (err < 0) {
@@ -910,8 +905,6 @@  tap_mp_req_start_rxtx(const struct rte_mp_msg *request, __rte_unused const void
 	struct rte_eth_dev *dev;
 	const struct ipc_queues *request_param =
 		(const struct ipc_queues *)request->param;
-	int fd_iterator;
-	int queue;
 	struct pmd_process_private *process_private;
 
 	dev = rte_eth_dev_get_by_name(request_param->port_name);
@@ -920,14 +913,13 @@  tap_mp_req_start_rxtx(const struct rte_mp_msg *request, __rte_unused const void
 			request_param->port_name);
 		return -1;
 	}
+
 	process_private = dev->process_private;
-	fd_iterator = 0;
-	TAP_LOG(DEBUG, "tap_attach rx_q:%d tx_q:%d\n", request_param->rxq_count,
-		request_param->txq_count);
-	for (queue = 0; queue < request_param->txq_count; queue++)
-		process_private->txq_fds[queue] = request->fds[fd_iterator++];
-	for (queue = 0; queue < request_param->rxq_count; queue++)
-		process_private->rxq_fds[queue] = request->fds[fd_iterator++];
+	TAP_LOG(DEBUG, "tap_attach q:%d\n", request_param->q_count);
+
+	for (int q = 0; q < request_param->q_count; q++)
+		process_private->fds[q] = request->fds[q];
+
 
 	return 0;
 }
@@ -1121,7 +1113,6 @@  tap_dev_close(struct rte_eth_dev *dev)
 	int i;
 	struct pmd_internals *internals = dev->data->dev_private;
 	struct pmd_process_private *process_private = dev->process_private;
-	struct rx_queue *rxq;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
 		rte_free(dev->process_private);
@@ -1141,19 +1132,18 @@  tap_dev_close(struct rte_eth_dev *dev)
 	}
 
 	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
-		if (process_private->rxq_fds[i] != -1) {
-			rxq = &internals->rxq[i];
-			close(process_private->rxq_fds[i]);
-			process_private->rxq_fds[i] = -1;
-			tap_rxq_pool_free(rxq->pool);
-			rte_free(rxq->iovecs);
-			rxq->pool = NULL;
-			rxq->iovecs = NULL;
-		}
-		if (process_private->txq_fds[i] != -1) {
-			close(process_private->txq_fds[i]);
-			process_private->txq_fds[i] = -1;
-		}
+		struct rx_queue *rxq = &internals->rxq[i];
+
+		if (process_private->fds[i] == -1)
+			continue;
+
+		close(process_private->fds[i]);
+		process_private->fds[i] = -1;
+
+		tap_rxq_pool_free(rxq->pool);
+		rte_free(rxq->iovecs);
+		rxq->pool = NULL;
+		rxq->iovecs = NULL;
 	}
 
 	if (internals->remote_if_index) {
@@ -1198,6 +1188,15 @@  tap_dev_close(struct rte_eth_dev *dev)
 	return 0;
 }
 
+static void
+tap_queue_close(struct pmd_process_private *process_private, uint16_t qid)
+{
+	if (process_private->fds[qid] != -1) {
+		close(process_private->fds[qid]);
+		process_private->fds[qid] = -1;
+	}
+}
+
 static void
 tap_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 {
@@ -1206,15 +1205,16 @@  tap_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 
 	if (!rxq)
 		return;
+
 	process_private = rte_eth_devices[rxq->in_port].process_private;
-	if (process_private->rxq_fds[rxq->queue_id] != -1) {
-		close(process_private->rxq_fds[rxq->queue_id]);
-		process_private->rxq_fds[rxq->queue_id] = -1;
-		tap_rxq_pool_free(rxq->pool);
-		rte_free(rxq->iovecs);
-		rxq->pool = NULL;
-		rxq->iovecs = NULL;
-	}
+
+	tap_rxq_pool_free(rxq->pool);
+	rte_free(rxq->iovecs);
+	rxq->pool = NULL;
+	rxq->iovecs = NULL;
+
+	if (dev->data->tx_queues[qid] == NULL)
+		tap_queue_close(process_private, qid);
 }
 
 static void
@@ -1225,12 +1225,10 @@  tap_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 
 	if (!txq)
 		return;
-	process_private = rte_eth_devices[txq->out_port].process_private;
 
-	if (process_private->txq_fds[txq->queue_id] != -1) {
-		close(process_private->txq_fds[txq->queue_id]);
-		process_private->txq_fds[txq->queue_id] = -1;
-	}
+	process_private = rte_eth_devices[txq->out_port].process_private;
+	if (dev->data->rx_queues[qid] == NULL)
+		tap_queue_close(process_private, qid);
 }
 
 static int
@@ -1482,52 +1480,34 @@  tap_setup_queue(struct rte_eth_dev *dev,
 		uint16_t qid,
 		int is_rx)
 {
-	int ret;
-	int *fd;
-	int *other_fd;
-	const char *dir;
+	int fd, ret;
 	struct pmd_internals *pmd = dev->data->dev_private;
 	struct pmd_process_private *process_private = dev->process_private;
 	struct rx_queue *rx = &internals->rxq[qid];
 	struct tx_queue *tx = &internals->txq[qid];
-	struct rte_gso_ctx *gso_ctx;
+	struct rte_gso_ctx *gso_ctx = NULL;
+	const char *dir = is_rx ? "rx" : "tx";
 
-	if (is_rx) {
-		fd = &process_private->rxq_fds[qid];
-		other_fd = &process_private->txq_fds[qid];
-		dir = "rx";
-		gso_ctx = NULL;
-	} else {
-		fd = &process_private->txq_fds[qid];
-		other_fd = &process_private->rxq_fds[qid];
-		dir = "tx";
+	if (is_rx)
 		gso_ctx = &tx->gso_ctx;
-	}
-	if (*fd != -1) {
+
+	fd = process_private->fds[qid];
+	if (fd != -1) {
 		/* fd for this queue already exists */
 		TAP_LOG(DEBUG, "%s: fd %d for %s queue qid %d exists",
-			pmd->name, *fd, dir, qid);
+			pmd->name, fd, dir, qid);
 		gso_ctx = NULL;
-	} else if (*other_fd != -1) {
-		/* Only other_fd exists. dup it */
-		*fd = dup(*other_fd);
-		if (*fd < 0) {
-			*fd = -1;
-			TAP_LOG(ERR, "%s: dup() failed.", pmd->name);
-			return -1;
-		}
-		TAP_LOG(DEBUG, "%s: dup fd %d for %s queue qid %d (%d)",
-			pmd->name, *other_fd, dir, qid, *fd);
 	} else {
-		/* Both RX and TX fds do not exist (equal -1). Create fd */
-		*fd = tun_alloc(pmd, 0, 0);
-		if (*fd < 0) {
-			*fd = -1; /* restore original value */
+		fd = tun_alloc(pmd, 0, 0);
+		if (fd < 0) {
 			TAP_LOG(ERR, "%s: tun_alloc() failed.", pmd->name);
 			return -1;
 		}
+
 		TAP_LOG(DEBUG, "%s: add %s queue for qid %d fd %d",
-			pmd->name, dir, qid, *fd);
+			pmd->name, dir, qid, fd);
+
+		process_private->fds[qid] = fd;
 	}
 
 	tx->mtu = &dev->data->mtu;
@@ -1540,7 +1520,7 @@  tap_setup_queue(struct rte_eth_dev *dev,
 
 	tx->type = pmd->type;
 
-	return *fd;
+	return fd;
 }
 
 static int
@@ -1620,7 +1600,7 @@  tap_rx_queue_setup(struct rte_eth_dev *dev,
 
 	TAP_LOG(DEBUG, "  RX TUNTAP device name %s, qid %d on fd %d",
 		internals->name, rx_queue_id,
-		process_private->rxq_fds[rx_queue_id]);
+		process_private->fds[rx_queue_id]);
 
 	return 0;
 
@@ -1664,7 +1644,7 @@  tap_tx_queue_setup(struct rte_eth_dev *dev,
 	TAP_LOG(DEBUG,
 		"  TX TUNTAP device name %s, qid %d on fd %d csum %s",
 		internals->name, tx_queue_id,
-		process_private->txq_fds[tx_queue_id],
+		process_private->fds[tx_queue_id],
 		txq->csum ? "on" : "off");
 
 	return 0;
@@ -2001,10 +1981,9 @@  eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name,
 	dev->intr_handle = pmd->intr_handle;
 
 	/* Presetup the fds to -1 as being not valid */
-	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
-		process_private->rxq_fds[i] = -1;
-		process_private->txq_fds[i] = -1;
-	}
+	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++)
+		process_private->fds[i] = -1;
+
 
 	if (pmd->type == ETH_TUNTAP_TYPE_TAP) {
 		if (rte_is_zero_ether_addr(mac_addr))
@@ -2332,7 +2311,6 @@  tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev)
 	struct ipc_queues *request_param = (struct ipc_queues *)request.param;
 	struct ipc_queues *reply_param;
 	struct pmd_process_private *process_private = dev->process_private;
-	int queue, fd_iterator;
 
 	/* Prepare the request */
 	memset(&request, 0, sizeof(request));
@@ -2352,18 +2330,17 @@  tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev)
 	TAP_LOG(DEBUG, "Received IPC reply for %s", reply_param->port_name);
 
 	/* Attach the queues from received file descriptors */
-	if (reply_param->rxq_count + reply_param->txq_count != reply->num_fds) {
+	if (reply_param->q_count != reply->num_fds) {
 		TAP_LOG(ERR, "Unexpected number of fds received");
 		return -1;
 	}
 
-	dev->data->nb_rx_queues = reply_param->rxq_count;
-	dev->data->nb_tx_queues = reply_param->txq_count;
-	fd_iterator = 0;
-	for (queue = 0; queue < reply_param->rxq_count; queue++)
-		process_private->rxq_fds[queue] = reply->fds[fd_iterator++];
-	for (queue = 0; queue < reply_param->txq_count; queue++)
-		process_private->txq_fds[queue] = reply->fds[fd_iterator++];
+	dev->data->nb_rx_queues = reply_param->q_count;
+	dev->data->nb_tx_queues = reply_param->q_count;
+
+	for (int q = 0; q < reply_param->q_count; q++)
+		process_private->fds[q] = reply->fds[q];
+
 	free(reply);
 	return 0;
 }
@@ -2393,25 +2370,19 @@  tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer)
 
 	/* Fill file descriptors for all queues */
 	reply.num_fds = 0;
-	reply_param->rxq_count = 0;
-	if (dev->data->nb_rx_queues + dev->data->nb_tx_queues >
-			RTE_MP_MAX_FD_NUM){
-		TAP_LOG(ERR, "Number of rx/tx queues exceeds max number of fds");
+	reply_param->q_count = 0;
+
+	RTE_ASSERT(dev->data->nb_rx_queues == dev->data->nb_tx_queues);
+	if (dev->data->nb_rx_queues > RTE_MP_MAX_FD_NUM) {
+		TAP_LOG(ERR, "Number of rx/tx queues %u exceeds max number of fds %u",
+			dev->data->nb_rx_queues, RTE_MP_MAX_FD_NUM);
 		return -1;
 	}
 
 	for (queue = 0; queue < dev->data->nb_rx_queues; queue++) {
-		reply.fds[reply.num_fds++] = process_private->rxq_fds[queue];
-		reply_param->rxq_count++;
-	}
-	RTE_ASSERT(reply_param->rxq_count == dev->data->nb_rx_queues);
-
-	reply_param->txq_count = 0;
-	for (queue = 0; queue < dev->data->nb_tx_queues; queue++) {
-		reply.fds[reply.num_fds++] = process_private->txq_fds[queue];
-		reply_param->txq_count++;
+		reply.fds[reply.num_fds++] = process_private->fds[queue];
+		reply_param->q_count++;
 	}
-	RTE_ASSERT(reply_param->txq_count == dev->data->nb_tx_queues);
 
 	/* Send reply */
 	strlcpy(reply.name, request->name, sizeof(reply.name));
diff --git a/drivers/net/tap/rte_eth_tap.h b/drivers/net/tap/rte_eth_tap.h
index 5ac93f93e961..dc8201020b5f 100644
--- a/drivers/net/tap/rte_eth_tap.h
+++ b/drivers/net/tap/rte_eth_tap.h
@@ -96,8 +96,7 @@  struct pmd_internals {
 };
 
 struct pmd_process_private {
-	int rxq_fds[RTE_PMD_TAP_MAX_QUEUES];
-	int txq_fds[RTE_PMD_TAP_MAX_QUEUES];
+	int fds[RTE_PMD_TAP_MAX_QUEUES];
 };
 
 /* tap_intr.c */
diff --git a/drivers/net/tap/tap_flow.c b/drivers/net/tap/tap_flow.c
index fa50fe45d7b7..a78fd50cd494 100644
--- a/drivers/net/tap/tap_flow.c
+++ b/drivers/net/tap/tap_flow.c
@@ -1595,8 +1595,9 @@  tap_flow_isolate(struct rte_eth_dev *dev,
 	 * If netdevice is there, setup appropriate flow rules immediately.
 	 * Otherwise it will be set when bringing up the netdevice (tun_alloc).
 	 */
-	if (!process_private->rxq_fds[0])
+	if (process_private->fds[0] == -1)
 		return 0;
+
 	if (set) {
 		struct rte_flow *remote_flow;
 
diff --git a/drivers/net/tap/tap_intr.c b/drivers/net/tap/tap_intr.c
index a9097def1a32..bc953791635e 100644
--- a/drivers/net/tap/tap_intr.c
+++ b/drivers/net/tap/tap_intr.c
@@ -68,20 +68,22 @@  tap_rx_intr_vec_install(struct rte_eth_dev *dev)
 	}
 	for (i = 0; i < n; i++) {
 		struct rx_queue *rxq = pmd->dev->data->rx_queues[i];
+		int fd = process_private->fds[i];
 
 		/* Skip queues that cannot request interrupts. */
-		if (!rxq || process_private->rxq_fds[i] == -1) {
+		if (!rxq || fd == -1) {
 			/* Use invalid intr_vec[] index to disable entry. */
 			if (rte_intr_vec_list_index_set(intr_handle, i,
-			RTE_INTR_VEC_RXTX_OFFSET + RTE_MAX_RXTX_INTR_VEC_ID))
+						RTE_INTR_VEC_RXTX_OFFSET + RTE_MAX_RXTX_INTR_VEC_ID))
 				return -rte_errno;
 			continue;
 		}
+
 		if (rte_intr_vec_list_index_set(intr_handle, i,
-					RTE_INTR_VEC_RXTX_OFFSET + count))
+						RTE_INTR_VEC_RXTX_OFFSET + count))
 			return -rte_errno;
-		if (rte_intr_efds_index_set(intr_handle, count,
-						   process_private->rxq_fds[i]))
+
+		if (rte_intr_efds_index_set(intr_handle, count, fd))
 			return -rte_errno;
 		count++;
 	}