[dpdk-dev,RFC] net/tap: add queues when attaching from secondary process

Message ID 1528374591-26126-1-git-send-email-rasland@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series [dpdk-dev,RFC] net/tap: add queues when attaching from secondary process |

Checks

Context Check Description
ci/Intel-compilation success Compilation OK
ci/checkpatch success coding style OK

Commit Message

Raslan Darawsheh June 7, 2018, 12:29 p.m. UTC
  In the case where the device is created by the primary process.
Currently, secondary process only contains the eth dev without being
able to do any Rx/Tx.

When attaching the device from secondary process this patch adds queues
info got from IPC massaging.

Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
---
 drivers/net/tap/Makefile      |   2 +
 drivers/net/tap/rte_eth_tap.c | 105 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 106 insertions(+), 1 deletion(-)
  

Comments

Wiles, Keith June 7, 2018, 7:09 p.m. UTC | #1
> On Jun 7, 2018, at 5:29 AM, Raslan Darawsheh <rasland@mellanox.com> wrote:
> 
> In the case where the device is created by the primary process.
> Currently, secondary process only contains the eth dev without being
> able to do any Rx/Tx.
> 
> When attaching the device from secondary process this patch adds queues
> info got from IPC massaging.

Can you explain this details a bit more here, not sure I follow the real problem and the solution?

> 
> Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
> ---
> drivers/net/tap/Makefile      |   2 +
> drivers/net/tap/rte_eth_tap.c | 105 +++++++++++++++++++++++++++++++++++++++++-
> 2 files changed, 106 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
> index ccc5c5f..913423c 100644
> --- a/drivers/net/tap/Makefile
> +++ b/drivers/net/tap/Makefile
> @@ -27,6 +27,8 @@ LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
> LDLIBS += -lrte_bus_vdev
> 
> CFLAGS += -DTAP_MAX_QUEUES=$(TAP_MAX_QUEUES)
> +CFLAGS += -DALLOW_EXPERIMENTAL_API
> +
> 
> #
> # all source are stored in SRCS-y
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index 5531fe9..0f4c8d9 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -16,6 +16,8 @@
> #include <rte_debug.h>
> #include <rte_ip.h>
> #include <rte_string_fns.h>
> +#include <rte_ethdev.h>
> +#include <rte_errno.h>
> 
> #include <sys/types.h>
> #include <sys/stat.h>
> @@ -55,6 +57,9 @@
> #define ETH_TAP_CMP_MAC_FMT     "0123456789ABCDEFabcdef"
> #define ETH_TAP_MAC_ARG_FMT     ETH_TAP_MAC_FIXED "|" ETH_TAP_USR_MAC_FMT
> 
> +/* IPC key for communication of queue fds between processes. */
> +#define TAP_MP_KEY     "tap_mp_exchange_fds"
> +
> static struct rte_vdev_driver pmd_tap_drv;
> static struct rte_vdev_driver pmd_tun_drv;
> 
> @@ -93,6 +98,15 @@ enum ioctl_mode {
> 	REMOTE_ONLY,
> };
> 
> +/* To communicate queue infos between processes */
> +struct queues_fds {
> +	char name[RTE_DEV_NAME_MAX_LEN];
> +	int num_rxq_fds;
> +	int num_txq_fds;
> +	int rxq_fds[RTE_PMD_TAP_MAX_QUEUES];
> +	int txq_fds[RTE_PMD_TAP_MAX_QUEUES];
> +};
> +
> static int tap_intr_handle_set(struct rte_eth_dev *dev, int set);
> 
> /**
> @@ -1731,6 +1745,47 @@ rte_pmd_tun_probe(struct rte_vdev_device *dev)
> 	return ret;
> }
> 
> +/*
> + * Send the queues fds from the primary process to secondary.
> + */
> +static int
> +tap_exchange_queues_fds(const struct rte_mp_msg *mp_msg, const void *peer)
> +{
> +	struct rte_eth_dev *eth_dev;
> +	struct rte_mp_msg mp_resp;
> +	struct queues_fds *out = (struct queues_fds *)&mp_resp.param;
> +	const struct queues_fds *in = (const struct queues_fds *)mp_msg->param;
> +	uint16_t port_id;
> +	int i, ret;
> +
> +	TAP_LOG(DEBUG, "received request");
> +	strlcpy(out->name, in->name, sizeof(out->name));
> +	ret = rte_eth_dev_get_port_by_name(in->name, &port_id);
> +	if (ret) {
> +		TAP_LOG(ERR, "Failed to get dev %s", in->name);
> +		return -1;
> +	}
> +	eth_dev = &rte_eth_devices[port_id];
> +	struct pmd_internals *pmd = eth_dev->data->dev_private;
> +
> +	/* fill the queues fds data in the reply msg */
> +	strlcpy(mp_resp.name, TAP_MP_KEY, sizeof(mp_resp.name));
> +	out->num_rxq_fds = eth_dev->data->nb_rx_queues;
> +	for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
> +		out->rxq_fds[i] = pmd->rxq[i].fd;
> +	out->num_txq_fds = eth_dev->data->nb_tx_queues;
> +	for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
> +		out->txq_fds[i] = pmd->txq[i].fd;
> +	mp_resp.len_param = sizeof(*out);
> +	mp_resp.num_fds = 0;
> +	if (rte_mp_reply(&mp_resp, peer) < 0) {
> +		TAP_LOG(ERR, "Failed to reply a fds request");
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
> /* Open a TAP interface device.
>  */
> static int
> @@ -1744,6 +1799,7 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
> 	char remote_iface[RTE_ETH_NAME_MAX_LEN];
> 	struct ether_addr user_mac = { .addr_bytes = {0} };
> 	struct rte_eth_dev *eth_dev;
> +	int queue_id;
> 
> 	strcpy(tuntap_name, "TAP");
> 
> @@ -1757,8 +1813,46 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
> 			TAP_LOG(ERR, "Failed to probe %s", name);
> 			return -1;
> 		}
> -		/* TODO: request info from primary to set up Rx and Tx */
> 		eth_dev->dev_ops = &ops;
> +		/* request a sync from the primary process to get queues fds */
> +		eth_dev->rx_pkt_burst = pmd_rx_burst;
> +		eth_dev->tx_pkt_burst = pmd_tx_burst;
> +		if (!rte_eal_primary_proc_alive(NULL)) {
> +			TAP_LOG(ERR, "cannot initialize secondary process "
> +				"without a primary one");
> +			return  -1;
> +		}
> +		struct rte_mp_msg mp_req, *mp_rep;
> +		struct rte_mp_reply mp_reply;
> +		struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
> +		struct queues_fds *req = (struct queues_fds *)mp_req.param;
> +		struct queues_fds *resp;

What is the rule for DPDK coding style of having declares in the middle of a block, I thought we only wanted them at the beginning of block of code.

> +
> +		strlcpy(req->name, name, sizeof(mp_req.name));
> +		strlcpy(mp_req.name, TAP_MP_KEY, sizeof(mp_req.name));
> +		mp_req.len_param = sizeof(*req);
> +		/* request for sync from primary process to get queues fds. */
> +		if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
> +		    mp_reply.nb_received == 1) {
> +			mp_rep = &mp_reply.msgs[0];
> +			resp = (struct queues_fds *)mp_rep->param;
> +			TAP_LOG(INFO, "Received fds for %d rx_queues and "
> +				"%d tx_queues", resp->num_rxq_fds,
> +				resp->num_txq_fds);
> +		} else {
> +			TAP_LOG(ERR, "Failed to request queues from primary");
> +			return -1;
> +		}

I thought passing a FD from process to process you had to have the kernel convert the FD to the local process value. At least that was the way it was done in mmap memory FD. Is this the same problem and needs to be send in a message using a control structure with the correct flags set?

> +
> +		struct pmd_internals *pmd = eth_dev->data->dev_private;
> +		for (queue_id = 0; queue_id < resp->num_rxq_fds; queue_id++)
> +			pmd->rxq[queue_id].fd = resp->rxq_fds[queue_id];
> +
> +		for (queue_id = 0; queue_id < resp->num_txq_fds; queue_id++)
> +			pmd->txq[queue_id].fd = resp->txq_fds[queue_id];
> +
> +		TAP_LOG(NOTICE, "Initializing secondary process pmd_tap for %s",
> +			name);
> 		rte_eth_dev_probing_finish(eth_dev);
> 		return 0;
> 	}
> @@ -1806,6 +1900,14 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
> 	TAP_LOG(NOTICE, "Initializing pmd_tap for %s as %s",
> 		name, tap_name);
> 
> +	/* register for mp communication between secondary and primary */
> +	if (rte_mp_action_register(TAP_MP_KEY, tap_exchange_queues_fds) &&
> +	    rte_errno != EEXIST) {
> +		TAP_LOG(ERR, "%s : %s fail to register MP action : %s",
> +			tuntap_name, name, strerror(errno));
> +		return  -1;
> +	}
> +
> 	ret = eth_dev_tap_create(dev, tap_name, remote_iface, &user_mac,
> 		ETH_TUNTAP_TYPE_TAP);
> 
> @@ -1813,6 +1915,7 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
> 	if (ret == -1) {
> 		TAP_LOG(ERR, "Failed to create pmd for %s as %s",
> 			name, tap_name);
> +		rte_mp_action_unregister(TAP_MP_KEY);
> 		tap_unit--;		/* Restore the unit number */
> 	}
> 	rte_kvargs_free(kvlist);
> -- 
> 2.7.4
> 

Regards,
Keith
  
Raslan Darawsheh June 7, 2018, 11:24 p.m. UTC | #2
Hi,

As you know that currently TAP pmd support attaching a secondary process to a primary process. 
But, it's still lacking the ability to do Rx/Tx burst since it's lacking the necessary fds for RX/TX queues,
And the setting of Rx/Tx burst function.

This patch the main purpose is to exchange the fds between the processes throw the IPC massages
And to set the Rx/Tx functions for the secondary.

I hope I explained it properly, please let me know if you still didn't get it. 

Kindest regards,
Raslan Darawsheh

-----Original Message-----
From: Wiles, Keith [mailto:keith.wiles@intel.com] 
Sent: Thursday, June 7, 2018 10:10 PM
To: Raslan Darawsheh <rasland@mellanox.com>
Cc: Thomas Monjalon <thomas@monjalon.net>; dev@dpdk.org
Subject: Re: [dpdk-dev] [RFC] net/tap: add queues when attaching from secondary process



> On Jun 7, 2018, at 5:29 AM, Raslan Darawsheh <rasland@mellanox.com> wrote:
> 
> In the case where the device is created by the primary process.
> Currently, secondary process only contains the eth dev without being
> able to do any Rx/Tx.
> 
> When attaching the device from secondary process this patch adds queues
> info got from IPC massaging.

Can you explain this details a bit more here, not sure I follow the real problem and the solution?

> 
> Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
> ---
> drivers/net/tap/Makefile      |   2 +
> drivers/net/tap/rte_eth_tap.c | 105 +++++++++++++++++++++++++++++++++++++++++-
> 2 files changed, 106 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
> index ccc5c5f..913423c 100644
> --- a/drivers/net/tap/Makefile
> +++ b/drivers/net/tap/Makefile
> @@ -27,6 +27,8 @@ LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
> LDLIBS += -lrte_bus_vdev
> 
> CFLAGS += -DTAP_MAX_QUEUES=$(TAP_MAX_QUEUES)
> +CFLAGS += -DALLOW_EXPERIMENTAL_API
> +
> 
> #
> # all source are stored in SRCS-y
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index 5531fe9..0f4c8d9 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -16,6 +16,8 @@
> #include <rte_debug.h>
> #include <rte_ip.h>
> #include <rte_string_fns.h>
> +#include <rte_ethdev.h>
> +#include <rte_errno.h>
> 
> #include <sys/types.h>
> #include <sys/stat.h>
> @@ -55,6 +57,9 @@
> #define ETH_TAP_CMP_MAC_FMT     "0123456789ABCDEFabcdef"
> #define ETH_TAP_MAC_ARG_FMT     ETH_TAP_MAC_FIXED "|" ETH_TAP_USR_MAC_FMT
> 
> +/* IPC key for communication of queue fds between processes. */
> +#define TAP_MP_KEY     "tap_mp_exchange_fds"
> +
> static struct rte_vdev_driver pmd_tap_drv;
> static struct rte_vdev_driver pmd_tun_drv;
> 
> @@ -93,6 +98,15 @@ enum ioctl_mode {
> 	REMOTE_ONLY,
> };
> 
> +/* To communicate queue infos between processes */
> +struct queues_fds {
> +	char name[RTE_DEV_NAME_MAX_LEN];
> +	int num_rxq_fds;
> +	int num_txq_fds;
> +	int rxq_fds[RTE_PMD_TAP_MAX_QUEUES];
> +	int txq_fds[RTE_PMD_TAP_MAX_QUEUES];
> +};
> +
> static int tap_intr_handle_set(struct rte_eth_dev *dev, int set);
> 
> /**
> @@ -1731,6 +1745,47 @@ rte_pmd_tun_probe(struct rte_vdev_device *dev)
> 	return ret;
> }
> 
> +/*
> + * Send the queues fds from the primary process to secondary.
> + */
> +static int
> +tap_exchange_queues_fds(const struct rte_mp_msg *mp_msg, const void *peer)
> +{
> +	struct rte_eth_dev *eth_dev;
> +	struct rte_mp_msg mp_resp;
> +	struct queues_fds *out = (struct queues_fds *)&mp_resp.param;
> +	const struct queues_fds *in = (const struct queues_fds *)mp_msg->param;
> +	uint16_t port_id;
> +	int i, ret;
> +
> +	TAP_LOG(DEBUG, "received request");
> +	strlcpy(out->name, in->name, sizeof(out->name));
> +	ret = rte_eth_dev_get_port_by_name(in->name, &port_id);
> +	if (ret) {
> +		TAP_LOG(ERR, "Failed to get dev %s", in->name);
> +		return -1;
> +	}
> +	eth_dev = &rte_eth_devices[port_id];
> +	struct pmd_internals *pmd = eth_dev->data->dev_private;
> +
> +	/* fill the queues fds data in the reply msg */
> +	strlcpy(mp_resp.name, TAP_MP_KEY, sizeof(mp_resp.name));
> +	out->num_rxq_fds = eth_dev->data->nb_rx_queues;
> +	for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
> +		out->rxq_fds[i] = pmd->rxq[i].fd;
> +	out->num_txq_fds = eth_dev->data->nb_tx_queues;
> +	for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
> +		out->txq_fds[i] = pmd->txq[i].fd;
> +	mp_resp.len_param = sizeof(*out);
> +	mp_resp.num_fds = 0;
> +	if (rte_mp_reply(&mp_resp, peer) < 0) {
> +		TAP_LOG(ERR, "Failed to reply a fds request");
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
> /* Open a TAP interface device.
>  */
> static int
> @@ -1744,6 +1799,7 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
> 	char remote_iface[RTE_ETH_NAME_MAX_LEN];
> 	struct ether_addr user_mac = { .addr_bytes = {0} };
> 	struct rte_eth_dev *eth_dev;
> +	int queue_id;
> 
> 	strcpy(tuntap_name, "TAP");
> 
> @@ -1757,8 +1813,46 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
> 			TAP_LOG(ERR, "Failed to probe %s", name);
> 			return -1;
> 		}
> -		/* TODO: request info from primary to set up Rx and Tx */
> 		eth_dev->dev_ops = &ops;
> +		/* request a sync from the primary process to get queues fds */
> +		eth_dev->rx_pkt_burst = pmd_rx_burst;
> +		eth_dev->tx_pkt_burst = pmd_tx_burst;
> +		if (!rte_eal_primary_proc_alive(NULL)) {
> +			TAP_LOG(ERR, "cannot initialize secondary process "
> +				"without a primary one");
> +			return  -1;
> +		}
> +		struct rte_mp_msg mp_req, *mp_rep;
> +		struct rte_mp_reply mp_reply;
> +		struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
> +		struct queues_fds *req = (struct queues_fds *)mp_req.param;
> +		struct queues_fds *resp;

What is the rule for DPDK coding style of having declares in the middle of a block, I thought we only wanted them at the beginning of block of code.

> +
> +		strlcpy(req->name, name, sizeof(mp_req.name));
> +		strlcpy(mp_req.name, TAP_MP_KEY, sizeof(mp_req.name));
> +		mp_req.len_param = sizeof(*req);
> +		/* request for sync from primary process to get queues fds. */
> +		if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
> +		    mp_reply.nb_received == 1) {
> +			mp_rep = &mp_reply.msgs[0];
> +			resp = (struct queues_fds *)mp_rep->param;
> +			TAP_LOG(INFO, "Received fds for %d rx_queues and "
> +				"%d tx_queues", resp->num_rxq_fds,
> +				resp->num_txq_fds);
> +		} else {
> +			TAP_LOG(ERR, "Failed to request queues from primary");
> +			return -1;
> +		}

I thought passing a FD from process to process you had to have the kernel convert the FD to the local process value. At least that was the way it was done in mmap memory FD. Is this the same problem and needs to be send in a message using a control structure with the correct flags set?

> +
> +		struct pmd_internals *pmd = eth_dev->data->dev_private;
> +		for (queue_id = 0; queue_id < resp->num_rxq_fds; queue_id++)
> +			pmd->rxq[queue_id].fd = resp->rxq_fds[queue_id];
> +
> +		for (queue_id = 0; queue_id < resp->num_txq_fds; queue_id++)
> +			pmd->txq[queue_id].fd = resp->txq_fds[queue_id];
> +
> +		TAP_LOG(NOTICE, "Initializing secondary process pmd_tap for %s",
> +			name);
> 		rte_eth_dev_probing_finish(eth_dev);
> 		return 0;
> 	}
> @@ -1806,6 +1900,14 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
> 	TAP_LOG(NOTICE, "Initializing pmd_tap for %s as %s",
> 		name, tap_name);
> 
> +	/* register for mp communication between secondary and primary */
> +	if (rte_mp_action_register(TAP_MP_KEY, tap_exchange_queues_fds) &&
> +	    rte_errno != EEXIST) {
> +		TAP_LOG(ERR, "%s : %s fail to register MP action : %s",
> +			tuntap_name, name, strerror(errno));
> +		return  -1;
> +	}
> +
> 	ret = eth_dev_tap_create(dev, tap_name, remote_iface, &user_mac,
> 		ETH_TUNTAP_TYPE_TAP);
> 
> @@ -1813,6 +1915,7 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
> 	if (ret == -1) {
> 		TAP_LOG(ERR, "Failed to create pmd for %s as %s",
> 			name, tap_name);
> +		rte_mp_action_unregister(TAP_MP_KEY);
> 		tap_unit--;		/* Restore the unit number */
> 	}
> 	rte_kvargs_free(kvlist);
> -- 
> 2.7.4
> 

Regards,
Keith
  
Wiles, Keith June 8, 2018, 2:52 a.m. UTC | #3
On Jun 7, 2018, at 4:24 PM, Raslan Darawsheh <rasland@mellanox.com<mailto:rasland@mellanox.com>> wrote:

Hi,

As you know that currently TAP pmd support attaching a secondary process to a primary process.
But, it's still lacking the ability to do Rx/Tx burst since it's lacking the necessary fds for RX/TX queues,
And the setting of Rx/Tx burst function.

This patch the main purpose is to exchange the fds between the processes throw the IPC massages
And to set the Rx/Tx functions for the secondary.

I hope I explained it properly, please let me know if you still didn't get it.

I see the code sending the FD’s of primary and secondary to each other and the code looks fine. The problem I see is what I asked before in the comments below, which is the FDs on one process can not be used on another process without the kernel converting the FD for the given process. Is this the case here or not?

https://stackoverflow.com/questions/8037746/is-there-an-easier-way-to-share-file-descriptors-between-unrelated-processes-on


Kindest regards,
Raslan Darawsheh

-----Original Message-----
From: Wiles, Keith [mailto:keith.wiles@intel.com]
Sent: Thursday, June 7, 2018 10:10 PM
To: Raslan Darawsheh <rasland@mellanox.com<mailto:rasland@mellanox.com>>
Cc: Thomas Monjalon <thomas@monjalon.net<mailto:thomas@monjalon.net>>; dev@dpdk.org<mailto:dev@dpdk.org>
Subject: Re: [dpdk-dev] [RFC] net/tap: add queues when attaching from secondary process



On Jun 7, 2018, at 5:29 AM, Raslan Darawsheh <rasland@mellanox.com<mailto:rasland@mellanox.com>> wrote:

In the case where the device is created by the primary process.
Currently, secondary process only contains the eth dev without being
able to do any Rx/Tx.

When attaching the device from secondary process this patch adds queues
info got from IPC massaging.

Can you explain this details a bit more here, not sure I follow the real problem and the solution?


Signed-off-by: Raslan Darawsheh <rasland@mellanox.com<mailto:rasland@mellanox.com>>
---
drivers/net/tap/Makefile      |   2 +
drivers/net/tap/rte_eth_tap.c | 105 +++++++++++++++++++++++++++++++++++++++++-
2 files changed, 106 insertions(+), 1 deletion(-)

diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
index ccc5c5f..913423c 100644
--- a/drivers/net/tap/Makefile
+++ b/drivers/net/tap/Makefile
@@ -27,6 +27,8 @@ LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
LDLIBS += -lrte_bus_vdev

CFLAGS += -DTAP_MAX_QUEUES=$(TAP_MAX_QUEUES)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+

#
# all source are stored in SRCS-y
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 5531fe9..0f4c8d9 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -16,6 +16,8 @@
#include <rte_debug.h>
#include <rte_ip.h>
#include <rte_string_fns.h>
+#include <rte_ethdev.h>
+#include <rte_errno.h>

#include <sys/types.h>
#include <sys/stat.h>
@@ -55,6 +57,9 @@
#define ETH_TAP_CMP_MAC_FMT     "0123456789ABCDEFabcdef"
#define ETH_TAP_MAC_ARG_FMT     ETH_TAP_MAC_FIXED "|" ETH_TAP_USR_MAC_FMT

+/* IPC key for communication of queue fds between processes. */
+#define TAP_MP_KEY     "tap_mp_exchange_fds"
+
static struct rte_vdev_driver pmd_tap_drv;
static struct rte_vdev_driver pmd_tun_drv;

@@ -93,6 +98,15 @@ enum ioctl_mode {
REMOTE_ONLY,
};

+/* To communicate queue infos between processes */
+struct queues_fds {
+ char name[RTE_DEV_NAME_MAX_LEN];
+ int num_rxq_fds;
+ int num_txq_fds;
+ int rxq_fds[RTE_PMD_TAP_MAX_QUEUES];
+ int txq_fds[RTE_PMD_TAP_MAX_QUEUES];
+};
+
static int tap_intr_handle_set(struct rte_eth_dev *dev, int set);

/**
@@ -1731,6 +1745,47 @@ rte_pmd_tun_probe(struct rte_vdev_device *dev)
return ret;
}

+/*
+ * Send the queues fds from the primary process to secondary.
+ */
+static int
+tap_exchange_queues_fds(const struct rte_mp_msg *mp_msg, const void *peer)
+{
+ struct rte_eth_dev *eth_dev;
+ struct rte_mp_msg mp_resp;
+ struct queues_fds *out = (struct queues_fds *)&mp_resp.param;
+ const struct queues_fds *in = (const struct queues_fds *)mp_msg->param;
+ uint16_t port_id;
+ int i, ret;
+
+ TAP_LOG(DEBUG, "received request");
+ strlcpy(out->name, in->name, sizeof(out->name));
+ ret = rte_eth_dev_get_port_by_name(in->name, &port_id);
+ if (ret) {
+ TAP_LOG(ERR, "Failed to get dev %s", in->name);
+ return -1;
+ }
+ eth_dev = &rte_eth_devices[port_id];
+ struct pmd_internals *pmd = eth_dev->data->dev_private;
+
+ /* fill the queues fds data in the reply msg */
+ strlcpy(mp_resp.name, TAP_MP_KEY, sizeof(mp_resp.name));
+ out->num_rxq_fds = eth_dev->data->nb_rx_queues;
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+ out->rxq_fds[i] = pmd->rxq[i].fd;
+ out->num_txq_fds = eth_dev->data->nb_tx_queues;
+ for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
+ out->txq_fds[i] = pmd->txq[i].fd;
+ mp_resp.len_param = sizeof(*out);
+ mp_resp.num_fds = 0;
+ if (rte_mp_reply(&mp_resp, peer) < 0) {
+ TAP_LOG(ERR, "Failed to reply a fds request");
+ return -1;
+ }
+
+ return 0;
+}
+
/* Open a TAP interface device.
*/
static int
@@ -1744,6 +1799,7 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
char remote_iface[RTE_ETH_NAME_MAX_LEN];
struct ether_addr user_mac = { .addr_bytes = {0} };
struct rte_eth_dev *eth_dev;
+ int queue_id;

strcpy(tuntap_name, "TAP");

@@ -1757,8 +1813,46 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
TAP_LOG(ERR, "Failed to probe %s", name);
return -1;
}
- /* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &ops;
+ /* request a sync from the primary process to get queues fds */
+ eth_dev->rx_pkt_burst = pmd_rx_burst;
+ eth_dev->tx_pkt_burst = pmd_tx_burst;
+ if (!rte_eal_primary_proc_alive(NULL)) {
+ TAP_LOG(ERR, "cannot initialize secondary process "
+ "without a primary one");
+ return  -1;
+ }
+ struct rte_mp_msg mp_req, *mp_rep;
+ struct rte_mp_reply mp_reply;
+ struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+ struct queues_fds *req = (struct queues_fds *)mp_req.param;
+ struct queues_fds *resp;

What is the rule for DPDK coding style of having declares in the middle of a block, I thought we only wanted them at the beginning of block of code.

+
+ strlcpy(req->name, name, sizeof(mp_req.name));
+ strlcpy(mp_req.name, TAP_MP_KEY, sizeof(mp_req.name));
+ mp_req.len_param = sizeof(*req);
+ /* request for sync from primary process to get queues fds. */
+ if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
+    mp_reply.nb_received == 1) {
+ mp_rep = &mp_reply.msgs[0];
+ resp = (struct queues_fds *)mp_rep->param;
+ TAP_LOG(INFO, "Received fds for %d rx_queues and "
+ "%d tx_queues", resp->num_rxq_fds,
+ resp->num_txq_fds);
+ } else {
+ TAP_LOG(ERR, "Failed to request queues from primary");
+ return -1;
+ }

I thought passing a FD from process to process you had to have the kernel convert the FD to the local process value. At least that was the way it was done in mmap memory FD. Is this the same problem and needs to be send in a message using a control structure with the correct flags set?

+
+ struct pmd_internals *pmd = eth_dev->data->dev_private;
+ for (queue_id = 0; queue_id < resp->num_rxq_fds; queue_id++)
+ pmd->rxq[queue_id].fd = resp->rxq_fds[queue_id];
+
+ for (queue_id = 0; queue_id < resp->num_txq_fds; queue_id++)
+ pmd->txq[queue_id].fd = resp->txq_fds[queue_id];
+
+ TAP_LOG(NOTICE, "Initializing secondary process pmd_tap for %s",
+ name);
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -1806,6 +1900,14 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
TAP_LOG(NOTICE, "Initializing pmd_tap for %s as %s",
name, tap_name);

+ /* register for mp communication between secondary and primary */
+ if (rte_mp_action_register(TAP_MP_KEY, tap_exchange_queues_fds) &&
+    rte_errno != EEXIST) {
+ TAP_LOG(ERR, "%s : %s fail to register MP action : %s",
+ tuntap_name, name, strerror(errno));
+ return  -1;
+ }
+
ret = eth_dev_tap_create(dev, tap_name, remote_iface, &user_mac,
ETH_TUNTAP_TYPE_TAP);

@@ -1813,6 +1915,7 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
if (ret == -1) {
TAP_LOG(ERR, "Failed to create pmd for %s as %s",
name, tap_name);
+ rte_mp_action_unregister(TAP_MP_KEY);
tap_unit--; /* Restore the unit number */
}
rte_kvargs_free(kvlist);
--
2.7.4


Regards,
Keith


Regards,
Keith
  
Wiles, Keith June 12, 2018, 12:46 p.m. UTC | #4
> On Jun 7, 2018, at 6:24 PM, Raslan Darawsheh <rasland@mellanox.com> wrote:
> 
> Hi,
> 
> As you know that currently TAP pmd support attaching a secondary process to a primary process. 
> But, it's still lacking the ability to do Rx/Tx burst since it's lacking the necessary fds for RX/TX queues,
> And the setting of Rx/Tx burst function.
> 
> This patch the main purpose is to exchange the fds between the processes throw the IPC massages
> And to set the Rx/Tx functions for the secondary.
> 
> I hope I explained it properly, please let me know if you still didn't get it. 


I see the code sending the FD’s of primary and secondary to each other and the code looks fine. The problem I see is what I asked before in the comments below, which is the FDs on one process can not be used on another process without the kernel converting the FD for the given process. Is this the case here or not?

https://stackoverflow.com/questions/8037746/is-there-an-easier-way-to-share-file-descriptors-between-unrelated-processes-on


Regards,
Keith
  
Raslan Darawsheh June 12, 2018, 1:21 p.m. UTC | #5
Hi Keith, 
Yes you are right about that,
This is exactly the case and I'll need to provide a V2 for this with the kernel mapping.

Kindest regards,
Raslan Darawsheh

-----Original Message-----
From: Wiles, Keith [mailto:keith.wiles@intel.com] 
Sent: Tuesday, June 12, 2018 3:46 PM
To: Raslan Darawsheh <rasland@mellanox.com>
Cc: Thomas Monjalon <thomas@monjalon.net>; dev@dpdk.org
Subject: Re: [dpdk-dev] [RFC] net/tap: add queues when attaching from secondary process



> On Jun 7, 2018, at 6:24 PM, Raslan Darawsheh <rasland@mellanox.com> wrote:
> 
> Hi,
> 
> As you know that currently TAP pmd support attaching a secondary process to a primary process. 
> But, it's still lacking the ability to do Rx/Tx burst since it's 
> lacking the necessary fds for RX/TX queues, And the setting of Rx/Tx burst function.
> 
> This patch the main purpose is to exchange the fds between the 
> processes throw the IPC massages And to set the Rx/Tx functions for the secondary.
> 
> I hope I explained it properly, please let me know if you still didn't get it. 


I see the code sending the FD’s of primary and secondary to each other and the code looks fine. The problem I see is what I asked before in the comments below, which is the FDs on one process can not be used on another process without the kernel converting the FD for the given process. Is this the case here or not?

https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fstackoverflow.com%2Fquestions%2F8037746%2Fis-there-an-easier-way-to-share-file-descriptors-between-unrelated-processes-on&data=02%7C01%7Crasland%40mellanox.com%7C6356c8b95d8042516c1108d5d0628740%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C636644043986930914&sdata=nX7%2FhXRDKrccz%2BVDUyZ%2BwB088r4R9KEQGeoZ0i2CJZk%3D&reserved=0


Regards,
Keith
  

Patch

diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
index ccc5c5f..913423c 100644
--- a/drivers/net/tap/Makefile
+++ b/drivers/net/tap/Makefile
@@ -27,6 +27,8 @@  LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
 LDLIBS += -lrte_bus_vdev
 
 CFLAGS += -DTAP_MAX_QUEUES=$(TAP_MAX_QUEUES)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+
 
 #
 # all source are stored in SRCS-y
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 5531fe9..0f4c8d9 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -16,6 +16,8 @@ 
 #include <rte_debug.h>
 #include <rte_ip.h>
 #include <rte_string_fns.h>
+#include <rte_ethdev.h>
+#include <rte_errno.h>
 
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -55,6 +57,9 @@ 
 #define ETH_TAP_CMP_MAC_FMT     "0123456789ABCDEFabcdef"
 #define ETH_TAP_MAC_ARG_FMT     ETH_TAP_MAC_FIXED "|" ETH_TAP_USR_MAC_FMT
 
+/* IPC key for communication of queue fds between processes. */
+#define TAP_MP_KEY     "tap_mp_exchange_fds"
+
 static struct rte_vdev_driver pmd_tap_drv;
 static struct rte_vdev_driver pmd_tun_drv;
 
@@ -93,6 +98,15 @@  enum ioctl_mode {
 	REMOTE_ONLY,
 };
 
+/* To communicate queue infos between processes */
+struct queues_fds {
+	char name[RTE_DEV_NAME_MAX_LEN];
+	int num_rxq_fds;
+	int num_txq_fds;
+	int rxq_fds[RTE_PMD_TAP_MAX_QUEUES];
+	int txq_fds[RTE_PMD_TAP_MAX_QUEUES];
+};
+
 static int tap_intr_handle_set(struct rte_eth_dev *dev, int set);
 
 /**
@@ -1731,6 +1745,47 @@  rte_pmd_tun_probe(struct rte_vdev_device *dev)
 	return ret;
 }
 
+/*
+ * Send the queues fds from the primary process to secondary.
+ */
+static int
+tap_exchange_queues_fds(const struct rte_mp_msg *mp_msg, const void *peer)
+{
+	struct rte_eth_dev *eth_dev;
+	struct rte_mp_msg mp_resp;
+	struct queues_fds *out = (struct queues_fds *)&mp_resp.param;
+	const struct queues_fds *in = (const struct queues_fds *)mp_msg->param;
+	uint16_t port_id;
+	int i, ret;
+
+	TAP_LOG(DEBUG, "received request");
+	strlcpy(out->name, in->name, sizeof(out->name));
+	ret = rte_eth_dev_get_port_by_name(in->name, &port_id);
+	if (ret) {
+		TAP_LOG(ERR, "Failed to get dev %s", in->name);
+		return -1;
+	}
+	eth_dev = &rte_eth_devices[port_id];
+	struct pmd_internals *pmd = eth_dev->data->dev_private;
+
+	/* fill the queues fds data in the reply msg */
+	strlcpy(mp_resp.name, TAP_MP_KEY, sizeof(mp_resp.name));
+	out->num_rxq_fds = eth_dev->data->nb_rx_queues;
+	for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+		out->rxq_fds[i] = pmd->rxq[i].fd;
+	out->num_txq_fds = eth_dev->data->nb_tx_queues;
+	for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
+		out->txq_fds[i] = pmd->txq[i].fd;
+	mp_resp.len_param = sizeof(*out);
+	mp_resp.num_fds = 0;
+	if (rte_mp_reply(&mp_resp, peer) < 0) {
+		TAP_LOG(ERR, "Failed to reply a fds request");
+		return -1;
+	}
+
+	return 0;
+}
+
 /* Open a TAP interface device.
  */
 static int
@@ -1744,6 +1799,7 @@  rte_pmd_tap_probe(struct rte_vdev_device *dev)
 	char remote_iface[RTE_ETH_NAME_MAX_LEN];
 	struct ether_addr user_mac = { .addr_bytes = {0} };
 	struct rte_eth_dev *eth_dev;
+	int queue_id;
 
 	strcpy(tuntap_name, "TAP");
 
@@ -1757,8 +1813,46 @@  rte_pmd_tap_probe(struct rte_vdev_device *dev)
 			TAP_LOG(ERR, "Failed to probe %s", name);
 			return -1;
 		}
-		/* TODO: request info from primary to set up Rx and Tx */
 		eth_dev->dev_ops = &ops;
+		/* request a sync from the primary process to get queues fds */
+		eth_dev->rx_pkt_burst = pmd_rx_burst;
+		eth_dev->tx_pkt_burst = pmd_tx_burst;
+		if (!rte_eal_primary_proc_alive(NULL)) {
+			TAP_LOG(ERR, "cannot initialize secondary process "
+				"without a primary one");
+			return  -1;
+		}
+		struct rte_mp_msg mp_req, *mp_rep;
+		struct rte_mp_reply mp_reply;
+		struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+		struct queues_fds *req = (struct queues_fds *)mp_req.param;
+		struct queues_fds *resp;
+
+		strlcpy(req->name, name, sizeof(mp_req.name));
+		strlcpy(mp_req.name, TAP_MP_KEY, sizeof(mp_req.name));
+		mp_req.len_param = sizeof(*req);
+		/* request for sync from primary process to get queues fds. */
+		if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
+		    mp_reply.nb_received == 1) {
+			mp_rep = &mp_reply.msgs[0];
+			resp = (struct queues_fds *)mp_rep->param;
+			TAP_LOG(INFO, "Received fds for %d rx_queues and "
+				"%d tx_queues", resp->num_rxq_fds,
+				resp->num_txq_fds);
+		} else {
+			TAP_LOG(ERR, "Failed to request queues from primary");
+			return -1;
+		}
+
+		struct pmd_internals *pmd = eth_dev->data->dev_private;
+		for (queue_id = 0; queue_id < resp->num_rxq_fds; queue_id++)
+			pmd->rxq[queue_id].fd = resp->rxq_fds[queue_id];
+
+		for (queue_id = 0; queue_id < resp->num_txq_fds; queue_id++)
+			pmd->txq[queue_id].fd = resp->txq_fds[queue_id];
+
+		TAP_LOG(NOTICE, "Initializing secondary process pmd_tap for %s",
+			name);
 		rte_eth_dev_probing_finish(eth_dev);
 		return 0;
 	}
@@ -1806,6 +1900,14 @@  rte_pmd_tap_probe(struct rte_vdev_device *dev)
 	TAP_LOG(NOTICE, "Initializing pmd_tap for %s as %s",
 		name, tap_name);
 
+	/* register for mp communication between secondary and primary */
+	if (rte_mp_action_register(TAP_MP_KEY, tap_exchange_queues_fds) &&
+	    rte_errno != EEXIST) {
+		TAP_LOG(ERR, "%s : %s fail to register MP action : %s",
+			tuntap_name, name, strerror(errno));
+		return  -1;
+	}
+
 	ret = eth_dev_tap_create(dev, tap_name, remote_iface, &user_mac,
 		ETH_TUNTAP_TYPE_TAP);
 
@@ -1813,6 +1915,7 @@  rte_pmd_tap_probe(struct rte_vdev_device *dev)
 	if (ret == -1) {
 		TAP_LOG(ERR, "Failed to create pmd for %s as %s",
 			name, tap_name);
+		rte_mp_action_unregister(TAP_MP_KEY);
 		tap_unit--;		/* Restore the unit number */
 	}
 	rte_kvargs_free(kvlist);