net/tap: Bug fix to populate fds in secondary process

Message ID 20211126041515.96259-1-kumaraparamesh92@gmail.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series net/tap: Bug fix to populate fds in secondary process |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/github-robot: build success github build: passed
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-intel-Performance fail Performance Testing issues
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS

Commit Message

Kumara Parameshwaran Nov. 26, 2021, 4:15 a.m. UTC
  From: Kumara Parameshwaran <kparameshwar@vmware.com>

When a tap device is hotplugged to primary process which in turn
adds the device to all secondary process, the secondary process
does a tap_mp_attach_queues, but the fds are not populated in
the primary during the probe they are populated during the queue_setup,
added a fix to sync the queues during rte_eth_dev_start

Signed-off-by: Kumara Parameshwaran <kparameshwar@vmware.com>
---
 drivers/net/tap/rte_eth_tap.c | 80 +++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
  

Comments

Ferruh Yigit Jan. 17, 2022, 6:22 p.m. UTC | #1
On 11/26/2021 4:15 AM, Kumara Parameshwaran wrote:
> From: Kumara Parameshwaran <kparameshwar@vmware.com>
> 
> When a tap device is hotplugged to primary process which in turn
> adds the device to all secondary process, the secondary process
> does a tap_mp_attach_queues, but the fds are not populated in
> the primary during the probe they are populated during the queue_setup,
> added a fix to sync the queues during rte_eth_dev_start
> 

Hi Kumara,

Original intention seems first running primary application, later secondary,
so yes it doesn't looks like covering the hotplug case.

I have a few questions below, can you please check?

> Signed-off-by: Kumara Parameshwaran <kparameshwar@vmware.com>
> ---
>   drivers/net/tap/rte_eth_tap.c | 80 +++++++++++++++++++++++++++++++++++
>   1 file changed, 80 insertions(+)
> 
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index f1b48cae82..8e7915656b 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -67,6 +67,7 @@
>   
>   /* IPC key for queue fds sync */
>   #define TAP_MP_KEY "tap_mp_sync_queues"
> +#define TAP_MP_REQ_START_RXTX "tap_mp_req_start_rxtx"
>   
>   #define TAP_IOV_DEFAULT_MAX 1024
>   
> @@ -880,11 +881,51 @@ tap_link_set_up(struct rte_eth_dev *dev)
>   	return tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE);
>   }
>   
> +static int tap_mp_req_on_rxtx(struct rte_eth_dev *dev)
> +{
> +	struct rte_mp_msg msg;
> +	struct ipc_queues *request_param = (struct ipc_queues *)msg.param;
> +	int err;
> +	int fd_iterator = 0;
> +	struct pmd_process_private *process_private = dev->process_private;
> +	int i;
> +
> +	memset(&msg, 0, sizeof(msg));
> +	strcpy(msg.name, TAP_MP_REQ_START_RXTX);
> +	strlcpy(request_param->port_name, dev->data->name,
> +		sizeof(request_param->port_name));

Why one use 'strcpy' and other 'strlcpy', can you please unify both to 'strlcpy'?

> +	msg.len_param = sizeof(*request_param);
> +	for (i = 0; i < dev->data->nb_tx_queues; i++) {
> +		msg.fds[fd_iterator++] = process_private->txq_fds[i];
> +		msg.num_fds++;
> +		request_param->txq_count++;
> +	}
> +	for (i = 0; i < dev->data->nb_rx_queues; i++) {
> +		msg.fds[fd_iterator++] = process_private->rxq_fds[i];
> +		msg.num_fds++;
> +		request_param->rxq_count++;
> +	}
> +
> +	RTE_ASSERT(request_param->txq_count == dev->data->nb_tx_queues);
> +	RTE_ASSERT(request_param->rxq_count == dev->data->nb_rx_queues);
> +

Are these assertions really useful?
Asserts are good to verify the external assumptions, but for this case
the values are all related to above logic already.

> +	err = rte_mp_sendmsg(&msg);
> +	if (err < 0) {
> +		TAP_LOG(ERR, "Failed to send start req to secondary %d",
> +			rte_errno);
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
>   static int
>   tap_dev_start(struct rte_eth_dev *dev)
>   {
>   	int err, i;
>   
> +	tap_mp_req_on_rxtx(dev);
> +

As for as I understand your logic is primary sends the message to the secondar(y|ies),
so what happens first secondary is started?

What about secondary sends the message when they are started?

Also above functions is called by both primary and secondary, what happens when it is
called by secondary? And the logic is not clear, it can be good to add a process type
check to clarify.

>   	err = tap_intr_handle_set(dev, 1);
>   	if (err)
>   		return err;
> @@ -901,6 +942,39 @@ tap_dev_start(struct rte_eth_dev *dev)
>   	return err;
>   }
>   
> +static int
> +tap_mp_req_start_rxtx(const struct rte_mp_msg *request, __rte_unused const void *peer)
> +{
> +	struct rte_eth_dev *dev;
> +	int ret;
> +	uint16_t port_id;
> +	const struct ipc_queues *request_param =
> +		(const struct ipc_queues *)request->param;
> +	int fd_iterator;
> +	int queue;
> +	struct pmd_process_private *process_private;
> +
> +	ret = rte_eth_dev_get_port_by_name(request_param->port_name, &port_id);
> +	if (ret) {
> +		TAP_LOG(ERR, "Failed to get port id for %s",
> +			request_param->port_name);
> +		return -1;
> +	}
> +	dev = &rte_eth_devices[port_id];> +	process_private = dev->process_private;
> +	dev->data->nb_rx_queues = request_param->rxq_count;
> +	dev->data->nb_tx_queues = request_param->txq_count;
> +	fd_iterator = 0;
> +	RTE_LOG(DEBUG, PMD, "tap_attach rx_q:%d tx_q:%d\n", request_param->rxq_count,
> +		request_param->txq_count);
> +	for (queue = 0; queue < request_param->txq_count; queue++)
> +		process_private->txq_fds[queue] = request->fds[fd_iterator++];
> +	for (queue = 0; queue < request_param->rxq_count; queue++)
> +		process_private->rxq_fds[queue] = request->fds[fd_iterator++];
> +
> +	return 0;
> +}
> +
>   /* This function gets called when the current port gets stopped.
>    */
>   static int
> @@ -2445,6 +2519,12 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
>   		ret = tap_mp_attach_queues(name, eth_dev);
>   		if (ret != 0)
>   			return -1;
> +
> +		ret = rte_mp_action_register(TAP_MP_REQ_START_RXTX, tap_mp_req_start_rxtx);
> +		if (ret < 0 && rte_errno != ENOTSUP)
> +			TAP_LOG(ERR, "tap: Failed to register IPC callback: %s",
> +				strerror(rte_errno));
> +

While this update fds in start logic is added, do we still need the 'tap_mp_attach_queues()'?
Can't we remove the 'tap_mp_sync_queues()' after this change?


And need to unregister the mp_action, possibly in the 'tap_dev_close()'?
  
Ferruh Yigit Jan. 17, 2022, 6:28 p.m. UTC | #2
On 11/26/2021 4:15 AM, Kumara Parameshwaran wrote:
> From: Kumara Parameshwaran <kparameshwar@vmware.com>
> 
> When a tap device is hotplugged to primary process which in turn
> adds the device to all secondary process, the secondary process
> does a tap_mp_attach_queues, but the fds are not populated in
> the primary during the probe they are populated during the queue_setup,
> added a fix to sync the queues during rte_eth_dev_start
> 
> Signed-off-by: Kumara Parameshwaran <kparameshwar@vmware.com>

<...>

>   
> +static int
> +tap_mp_req_start_rxtx(const struct rte_mp_msg *request, __rte_unused const void *peer)
> +{
> +	struct rte_eth_dev *dev;
> +	int ret;
> +	uint16_t port_id;
> +	const struct ipc_queues *request_param =
> +		(const struct ipc_queues *)request->param;
> +	int fd_iterator;
> +	int queue;
> +	struct pmd_process_private *process_private;
> +
> +	ret = rte_eth_dev_get_port_by_name(request_param->port_name, &port_id);
> +	if (ret) {
> +		TAP_LOG(ERR, "Failed to get port id for %s",
> +			request_param->port_name);
> +		return -1;
> +	}
> +	dev = &rte_eth_devices[port_id];

Since this is not really related with your patch, I want to have a separate thread for it.

It is not good to access the 'rte_eth_devices' global variable directly from a driver, that
is error prone.

Btw, what 'peer' supposed to contain?

It can be solved by adding an internal API, only for drivers to get eth_dev from the name,
like: 'rte_eth_dev_get_by_name()'.
This way a few other usage can be converted to this API.

@Thomas and @Andrew what do you think about the new API proposal?
  
Thomas Monjalon Jan. 17, 2022, 6:33 p.m. UTC | #3
17/01/2022 19:28, Ferruh Yigit:
> > +	ret = rte_eth_dev_get_port_by_name(request_param->port_name, &port_id);
> > +	if (ret) {
> > +		TAP_LOG(ERR, "Failed to get port id for %s",
> > +			request_param->port_name);
> > +		return -1;
> > +	}
> > +	dev = &rte_eth_devices[port_id];
> 
> Since this is not really related with your patch, I want to have a separate thread for it.
> 
> It is not good to access the 'rte_eth_devices' global variable directly from a driver, that
> is error prone.
> 
> Btw, what 'peer' supposed to contain?
> 
> It can be solved by adding an internal API, only for drivers to get eth_dev from the name,
> like: 'rte_eth_dev_get_by_name()'.
> This way a few other usage can be converted to this API.
> 
> @Thomas and @Andrew what do you think about the new API proposal?

It looks similar to rte_eth_dev_get_port_by_name() which returns a port_id.
It is a bit strange for an ethdev driver to not have access to its own ethdev struct.
Isn't there something broken in the logic?
  
Stephen Hemminger Jan. 17, 2022, 10:16 p.m. UTC | #4
On Fri, 26 Nov 2021 09:45:15 +0530
Kumara Parameshwaran <kumaraparamesh92@gmail.com> wrote:

> +	ret = rte_eth_dev_get_port_by_name(request_param->port_name, &port_id);
> +	if (ret) {
> +		TAP_LOG(ERR, "Failed to get port id for %s",
> +			request_param->port_name);
> +		return -1;
> +	}
> +	dev = &rte_eth_devices[port_id];
> +	process_private = dev->process_private;
> +	dev->data->nb_rx_queues = request_param->rxq_count;
> +	dev->data->nb_tx_queues = request_param->txq_count;

Why is this necessary?  dev->data is already in memory shared between primary
and secondary process.
  
Kumara Parameshwaran Jan. 18, 2022, 5:22 a.m. UTC | #5
@Stephen Hemminger<mailto:stephen@networkplumber.org> This is process private as the tap fds are maintained in per process data structures. In existing scheme, the fds are opened by the primary during queue setup and exchanged to during secondary probe where the send_msg using SOL_SOCKET and SCM_RIGHTS would remap the corresponding fds to the secondary process. If the secondary process is coming up once the primary is initialised things would work fine, but it's a problem during hotplug of the tap device.

Thanks,
Param.
  
Ferruh Yigit Jan. 18, 2022, 9:10 a.m. UTC | #6
On 1/18/2022 4:39 AM, Kumara Parameshwaran wrote:
>>   static int
>>   tap_dev_start(struct rte_eth_dev *dev)
>>   {
>>        int err, i;
>>   
>> +     tap_mp_req_on_rxtx(dev);
>> +
> 
> As for as I understand your logic is primary sends the message to the secondar(y|ies),
> so what happens first secondary is started?
> ​In case of TAP PMD looks like there is an assumption where primary should be started first. There is an existing check below during the probe function call.
> if (!rte_eal_primary_proc_alive(NULL)) {
>       TAP_LOG(ERR, "Primary process is missing");
>        return -1;
> }
> 
> What about secondary sends the message when they are started?
> ​IMHO, since primary process setups the queue it should be sufficient for the primary processes to the send the message and secondary need not send anything.
> 
> Also above functions is called by both primary and secondary, what happens when it is
> called by secondary? And the logic is not clear, it can be good to add a process type
> check to clarify.
> ​Sure, these are for tap_intr_handle_set and tap_dev_start functions?

I was thinking within the 'tap_dev_start()' function, for 'tap_mp_req_on_rxtx()' call.

Not sure how 'tap_intr_handle_set()' is involved, am I missing something.
  
Ferruh Yigit Jan. 18, 2022, 9:47 a.m. UTC | #7
On 1/17/2022 6:33 PM, Thomas Monjalon wrote:
> 17/01/2022 19:28, Ferruh Yigit:
>>> +	ret = rte_eth_dev_get_port_by_name(request_param->port_name, &port_id);
>>> +	if (ret) {
>>> +		TAP_LOG(ERR, "Failed to get port id for %s",
>>> +			request_param->port_name);
>>> +		return -1;
>>> +	}
>>> +	dev = &rte_eth_devices[port_id];
>>
>> Since this is not really related with your patch, I want to have a separate thread for it.
>>
>> It is not good to access the 'rte_eth_devices' global variable directly from a driver, that
>> is error prone.
>>
>> Btw, what 'peer' supposed to contain?
>>
>> It can be solved by adding an internal API, only for drivers to get eth_dev from the name,
>> like: 'rte_eth_dev_get_by_name()'.
>> This way a few other usage can be converted to this API.
>>
>> @Thomas and @Andrew what do you think about the new API proposal?
> 
> It looks similar to rte_eth_dev_get_port_by_name() which returns a port_id.

Exactly, but get eth_dev directly for drivers. For drivers no need to work with port_id
handler, they can use eth_dev directly.

Another solution can be an getter function for drivers, which gets port_id and returns
the eth_dev.

> It is a bit strange for an ethdev driver to not have access to its own ethdev struct.
> Isn't there something broken in the logic?
> 

This is callback function between primary and secondary applications sync. So port name
will be same for both, but eth_dev will be different and port_id may be different.
Driver finds its own eth_dev from the shared port name.
  
Kumara Parameshwaran Jan. 18, 2022, 10:52 a.m. UTC | #8
Yes, even I was confused if it  had been the tap_intr_handle_set function.

In general the tap_dev_start should not be invoked by the secondary and
only primary should do it. I referred it to a couple of PMDs and that was
the case. Please let me know if I am missing something in my understanding.


On Tue, Jan 18, 2022 at 2:40 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:

> On 1/18/2022 4:39 AM, Kumara Parameshwaran wrote:
> >>   static int
> >>   tap_dev_start(struct rte_eth_dev *dev)
> >>   {
> >>        int err, i;
> >>
> >> +     tap_mp_req_on_rxtx(dev);
> >> +
> >
> > As for as I understand your logic is primary sends the message to the
> secondar(y|ies),
> > so what happens first secondary is started?
> > ​In case of TAP PMD looks like there is an assumption where primary
> should be started first. There is an existing check below during the probe
> function call.
> > if (!rte_eal_primary_proc_alive(NULL)) {
> >       TAP_LOG(ERR, "Primary process is missing");
> >        return -1;
> > }
> >
> > What about secondary sends the message when they are started?
> > ​IMHO, since primary process setups the queue it should be sufficient
> for the primary processes to the send the message and secondary need not
> send anything.
> >
> > Also above functions is called by both primary and secondary, what
> happens when it is
> > called by secondary? And the logic is not clear, it can be good to add a
> process type
> > check to clarify.
> > ​Sure, these are for tap_intr_handle_set and tap_dev_start functions?
>
> I was thinking within the 'tap_dev_start()' function, for
> 'tap_mp_req_on_rxtx()' call.
>
> Not sure how 'tap_intr_handle_set()' is involved, am I missing something.
>
  
Kumara Parameshwaran Jan. 18, 2022, 11:21 a.m. UTC | #9
Just wanted to bring it to your attention,

In Mellanox driver there is a requirement to exchange fds between primary
and secondary and similar usage is seen, the primary sends the port_id and
the secondary refers to the rte_eth_devices in the driver,
The functions are
           - mlx5_mp_secondary_handle in secondary
           - mlx5_mp_req_start_rxtx in primary which is invoked from
mlx5_dev_start.

In my implementation I have used the name and invoked get_port_by_name, I
can also pass the port_id from the primary to make it uniform. So with
similar usage in Mellanox is there a problem there as well on referring to
the rte_eth_devices from the PMD ?


On Tue, Jan 18, 2022 at 3:17 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:

> On 1/17/2022 6:33 PM, Thomas Monjalon wrote:
> > 17/01/2022 19:28, Ferruh Yigit:
> >>> +   ret = rte_eth_dev_get_port_by_name(request_param->port_name,
> &port_id);
> >>> +   if (ret) {
> >>> +           TAP_LOG(ERR, "Failed to get port id for %s",
> >>> +                   request_param->port_name);
> >>> +           return -1;
> >>> +   }
> >>> +   dev = &rte_eth_devices[port_id];
> >>
> >> Since this is not really related with your patch, I want to have a
> separate thread for it.
> >>
> >> It is not good to access the 'rte_eth_devices' global variable directly
> from a driver, that
> >> is error prone.
> >>
> >> Btw, what 'peer' supposed to contain?
> >>
> >> It can be solved by adding an internal API, only for drivers to get
> eth_dev from the name,
> >> like: 'rte_eth_dev_get_by_name()'.
> >> This way a few other usage can be converted to this API.
> >>
> >> @Thomas and @Andrew what do you think about the new API proposal?
> >
> > It looks similar to rte_eth_dev_get_port_by_name() which returns a
> port_id.
>
> Exactly, but get eth_dev directly for drivers. For drivers no need to work
> with port_id
> handler, they can use eth_dev directly.
>
> Another solution can be an getter function for drivers, which gets port_id
> and returns
> the eth_dev.
>
> > It is a bit strange for an ethdev driver to not have access to its own
> ethdev struct.
> > Isn't there something broken in the logic?
> >
>
> This is callback function between primary and secondary applications sync.
> So port name
> will be same for both, but eth_dev will be different and port_id may be
> different.
> Driver finds its own eth_dev from the shared port name.
>
>
  
Ferruh Yigit Jan. 18, 2022, 12:12 p.m. UTC | #10
On 1/18/2022 11:21 AM, kumaraparameshwaran rathinavel wrote:

Comment moved down.

Please don't top post, it makes very hard to follow the discussion and bad
for archives to visit discussion later.

> 
> On Tue, Jan 18, 2022 at 3:17 PM Ferruh Yigit <ferruh.yigit@intel.com <mailto:ferruh.yigit@intel.com>> wrote:
> 
>     On 1/17/2022 6:33 PM, Thomas Monjalon wrote:
>      > 17/01/2022 19:28, Ferruh Yigit:
>      >>> +   ret = rte_eth_dev_get_port_by_name(request_param->port_name, &port_id);
>      >>> +   if (ret) {
>      >>> +           TAP_LOG(ERR, "Failed to get port id for %s",
>      >>> +                   request_param->port_name);
>      >>> +           return -1;
>      >>> +   }
>      >>> +   dev = &rte_eth_devices[port_id];
>      >>
>      >> Since this is not really related with your patch, I want to have a separate thread for it.
>      >>
>      >> It is not good to access the 'rte_eth_devices' global variable directly from a driver, that
>      >> is error prone.
>      >>
>      >> Btw, what 'peer' supposed to contain?
>      >>
>      >> It can be solved by adding an internal API, only for drivers to get eth_dev from the name,
>      >> like: 'rte_eth_dev_get_by_name()'.
>      >> This way a few other usage can be converted to this API.
>      >>
>      >> @Thomas and @Andrew what do you think about the new API proposal?
>      >
>      > It looks similar to rte_eth_dev_get_port_by_name() which returns a port_id.
> 
>     Exactly, but get eth_dev directly for drivers. For drivers no need to work with port_id
>     handler, they can use eth_dev directly.
> 
>     Another solution can be an getter function for drivers, which gets port_id and returns
>     the eth_dev.
> 
>      > It is a bit strange for an ethdev driver to not have access to its own ethdev struct.
>      > Isn't there something broken in the logic?
>      >
> 
>     This is callback function between primary and secondary applications sync. So port name
>     will be same for both, but eth_dev will be different and port_id may be different.
>     Driver finds its own eth_dev from the shared port name.
> 

> Just wanted to bring it to your attention,
> 
> In Mellanox driver there is a requirement to exchange fds between primary and secondary and similar usage is seen, the primary sends the port_id and the secondary refers to the rte_eth_devices in the driver,
> The functions are
>             - mlx5_mp_secondary_handle in secondary
>             - mlx5_mp_req_start_rxtx in primary which is invoked from mlx5_dev_start.
> 
> In my implementation I have used the name and invoked get_port_by_name, I can also pass the port_id from the primary to make it uniform. So with similar usage in Mellanox is there a problem there as well on referring to the rte_eth_devices from the PMD ?
> 

It would be same, still will be accessing to the 'rte_eth_devices'.
That is why a new API for drivers may help.
  
Ferruh Yigit Jan. 18, 2022, 12:14 p.m. UTC | #11
On 1/18/2022 10:52 AM, kumaraparameshwaran rathinavel wrote:

Comment moved down, please avoid top posting.

> 
> On Tue, Jan 18, 2022 at 2:40 PM Ferruh Yigit <ferruh.yigit@intel.com <mailto:ferruh.yigit@intel.com>> wrote:
> 
>     On 1/18/2022 4:39 AM, Kumara Parameshwaran wrote:
>      >>   static int
>      >>   tap_dev_start(struct rte_eth_dev *dev)
>      >>   {
>      >>        int err, i;
>      >>
>      >> +     tap_mp_req_on_rxtx(dev);
>      >> +
>      >
>      > As for as I understand your logic is primary sends the message to the secondar(y|ies),
>      > so what happens first secondary is started?
>      > ​In case of TAP PMD looks like there is an assumption where primary should be started first. There is an existing check below during the probe function call.
>      > if (!rte_eal_primary_proc_alive(NULL)) {
>      >       TAP_LOG(ERR, "Primary process is missing");
>      >        return -1;
>      > }
>      >
>      > What about secondary sends the message when they are started?
>      > ​IMHO, since primary process setups the queue it should be sufficient for the primary processes to the send the message and secondary need not send anything.
>      >
>      > Also above functions is called by both primary and secondary, what happens when it is
>      > called by secondary? And the logic is not clear, it can be good to add a process type
>      > check to clarify.
>      > ​Sure, these are for tap_intr_handle_set and tap_dev_start functions?
> 
>     I was thinking within the 'tap_dev_start()' function, for 'tap_mp_req_on_rxtx()' call.
> 
>     Not sure how 'tap_intr_handle_set()' is involved, am I missing something.
>> Yes, even I was confused if it  had been the tap_intr_handle_set function.
> 
> In general the tap_dev_start should not be invoked by the secondary and only primary should do it. I referred it to a couple of PMDs and that was the case. Please let me know if I am missing something in my understanding.
> 


Yes, that is the intended usecase, that primary does the start.
But having the check can help documenting the intention, that it is only for primary.
  
Thomas Monjalon Jan. 18, 2022, 12:31 p.m. UTC | #12
18/01/2022 13:12, Ferruh Yigit:
> On 1/18/2022 11:21 AM, kumaraparameshwaran rathinavel wrote:
> 
> Comment moved down.
> 
> Please don't top post, it makes very hard to follow the discussion and bad
> for archives to visit discussion later.
> 
> > 
> > On Tue, Jan 18, 2022 at 3:17 PM Ferruh Yigit <ferruh.yigit@intel.com <mailto:ferruh.yigit@intel.com>> wrote:
> > 
> >     On 1/17/2022 6:33 PM, Thomas Monjalon wrote:
> >      > 17/01/2022 19:28, Ferruh Yigit:
> >      >>> +   ret = rte_eth_dev_get_port_by_name(request_param->port_name, &port_id);
> >      >>> +   if (ret) {
> >      >>> +           TAP_LOG(ERR, "Failed to get port id for %s",
> >      >>> +                   request_param->port_name);
> >      >>> +           return -1;
> >      >>> +   }
> >      >>> +   dev = &rte_eth_devices[port_id];
> >      >>
> >      >> Since this is not really related with your patch, I want to have a separate thread for it.
> >      >>
> >      >> It is not good to access the 'rte_eth_devices' global variable directly from a driver, that
> >      >> is error prone.
> >      >>
> >      >> Btw, what 'peer' supposed to contain?
> >      >>
> >      >> It can be solved by adding an internal API, only for drivers to get eth_dev from the name,
> >      >> like: 'rte_eth_dev_get_by_name()'.
> >      >> This way a few other usage can be converted to this API.
> >      >>
> >      >> @Thomas and @Andrew what do you think about the new API proposal?
> >      >
> >      > It looks similar to rte_eth_dev_get_port_by_name() which returns a port_id.
> > 
> >     Exactly, but get eth_dev directly for drivers. For drivers no need to work with port_id
> >     handler, they can use eth_dev directly.
> > 
> >     Another solution can be an getter function for drivers, which gets port_id and returns
> >     the eth_dev.
> > 
> >      > It is a bit strange for an ethdev driver to not have access to its own ethdev struct.
> >      > Isn't there something broken in the logic?
> >      >
> > 
> >     This is callback function between primary and secondary applications sync. So port name
> >     will be same for both, but eth_dev will be different and port_id may be different.
> >     Driver finds its own eth_dev from the shared port name.
> > 
> 
> > Just wanted to bring it to your attention,
> > 
> > In Mellanox driver there is a requirement to exchange fds between primary and secondary and similar usage is seen, the primary sends the port_id and the secondary refers to the rte_eth_devices in the driver,
> > The functions are
> >             - mlx5_mp_secondary_handle in secondary
> >             - mlx5_mp_req_start_rxtx in primary which is invoked from mlx5_dev_start.
> > 
> > In my implementation I have used the name and invoked get_port_by_name, I can also pass the port_id from the primary to make it uniform. So with similar usage in Mellanox is there a problem there as well on referring to the rte_eth_devices from the PMD ?
> > 
> 
> It would be same, still will be accessing to the 'rte_eth_devices'.
> That is why a new API for drivers may help.

I agree to add a new API if needed to remove those direct access to rte_eth_devices.
  
Stephen Hemminger Jan. 18, 2022, 4:21 p.m. UTC | #13
On Tue, 18 Jan 2022 05:22:19 +0000
Kumara Parameshwaran <kparameshwar@vmware.com> wrote:

> @Stephen Hemminger<mailto:stephen@networkplumber.org> This is process private as the tap fds are maintained in per process data structures. In existing scheme, the fds are opened by the primary during queue setup and exchanged to during secondary probe where the send_msg using SOL_SOCKET and SCM_RIGHTS would remap the corresponding fds to the secondary process. If the secondary process is coming up once the primary is initialised things would work fine, but it's a problem during hotplug of the tap device.
> 
> Thanks,
> Param.
> ________________________________
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: 18 January 2022 03:46
> To: Kumara Parameshwaran <kumaraparamesh92@gmail.com>
> Cc: keith.wiles@intel.com <keith.wiles@intel.com>; dev@dpdk.org <dev@dpdk.org>; Kumara Parameshwaran <kparameshwar@vmware.com>
> Subject: Re: [PATCH] net/tap: Bug fix to populate fds in secondary process
> 
> On Fri, 26 Nov 2021 09:45:15 +0530
> Kumara Parameshwaran <kumaraparamesh92@gmail.com> wrote:
> 
> > +     ret = rte_eth_dev_get_port_by_name(request_param->port_name, &port_id);
> > +     if (ret) {
> > +             TAP_LOG(ERR, "Failed to get port id for %s",
> > +                     request_param->port_name);
> > +             return -1;
> > +     }
> > +     dev = &rte_eth_devices[port_id];
> > +     process_private = dev->process_private;
> > +     dev->data->nb_rx_queues = request_param->rxq_count;
> > +     dev->data->nb_tx_queues = request_param->txq_count;  
> 
> Why is this necessary?  dev->data is already in memory shared between primary
> and secondary process.

The question is about the two assignments that happen in secondary proces
that change dev->data->nb_rx_queues and dev->data->nb_tx_queues.  These are
shared and should not need to be modified here.
  
Kumara Parameshwaran Jan. 19, 2022, 4:33 a.m. UTC | #14
On Tue, Jan 18, 2022 at 9:51 PM Stephen Hemminger <
stephen@networkplumber.org> wrote:

> On Tue, 18 Jan 2022 05:22:19 +0000
> Kumara Parameshwaran <kparameshwar@vmware.com> wrote:
>
> > @Stephen Hemminger<mailto:stephen@networkplumber.org> This is process
> private as the tap fds are maintained in per process data structures. In
> existing scheme, the fds are opened by the primary during queue setup and
> exchanged to during secondary probe where the send_msg using SOL_SOCKET and
> SCM_RIGHTS would remap the corresponding fds to the secondary process. If
> the secondary process is coming up once the primary is initialised things
> would work fine, but it's a problem during hotplug of the tap device.
> >
> > Thanks,
> > Param.
> > ________________________________
> > From: Stephen Hemminger <stephen@networkplumber.org>
> > Sent: 18 January 2022 03:46
> > To: Kumara Parameshwaran <kumaraparamesh92@gmail.com>
> > Cc: keith.wiles@intel.com <keith.wiles@intel.com>; dev@dpdk.org <
> dev@dpdk.org>; Kumara Parameshwaran <kparameshwar@vmware.com>
> > Subject: Re: [PATCH] net/tap: Bug fix to populate fds in secondary
> process
> >
> > On Fri, 26 Nov 2021 09:45:15 +0530
> > Kumara Parameshwaran <kumaraparamesh92@gmail.com> wrote:
> >
> > > +     ret = rte_eth_dev_get_port_by_name(request_param->port_name,
> &port_id);
> > > +     if (ret) {
> > > +             TAP_LOG(ERR, "Failed to get port id for %s",
> > > +                     request_param->port_name);
> > > +             return -1;
> > > +     }
> > > +     dev = &rte_eth_devices[port_id];
> > > +     process_private = dev->process_private;
> > > +     dev->data->nb_rx_queues = request_param->rxq_count;
> > > +     dev->data->nb_tx_queues = request_param->txq_count;
> >
> > Why is this necessary?  dev->data is already in memory shared between
> primary
> > and secondary process.
>
> > The question is about the two assignments that happen in secondary proces
> > that change dev->data->nb_rx_queues and dev->data->nb_tx_queues.  These
> are
> > shared and should not need to be modified here.
>
 Sure my bad. Misunderstood the comment. Yes, this should not be done, will
remove the assignments.
  
Stephen Hemminger Jan. 19, 2022, 4:51 a.m. UTC | #15
On Wed, 19 Jan 2022 10:03:49 +0530
kumaraparameshwaran rathinavel <kumaraparamesh92@gmail.com> wrote:

> > > Why is this necessary?  dev->data is already in memory shared between  
> > primary  
> > > and secondary process.  
> >  
> > > The question is about the two assignments that happen in secondary proces
> > > that change dev->data->nb_rx_queues and dev->data->nb_tx_queues.  These  
> > are  
> > > shared and should not need to be modified here.  
> >  
>  Sure my bad. Misunderstood the comment. Yes, this should not be done, will
> remove the assignments.
 
No problem, primary/secondary process support is a bug farm because it
is too easy to have pointer that is from primary process leak into secondary.

I just wanted to make sure there wasn't something wrong in the infrastructure.
  

Patch

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index f1b48cae82..8e7915656b 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -67,6 +67,7 @@ 
 
 /* IPC key for queue fds sync */
 #define TAP_MP_KEY "tap_mp_sync_queues"
+#define TAP_MP_REQ_START_RXTX "tap_mp_req_start_rxtx"
 
 #define TAP_IOV_DEFAULT_MAX 1024
 
@@ -880,11 +881,51 @@  tap_link_set_up(struct rte_eth_dev *dev)
 	return tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE);
 }
 
+static int tap_mp_req_on_rxtx(struct rte_eth_dev *dev)
+{
+	struct rte_mp_msg msg;
+	struct ipc_queues *request_param = (struct ipc_queues *)msg.param;
+	int err;
+	int fd_iterator = 0;
+	struct pmd_process_private *process_private = dev->process_private;
+	int i;
+
+	memset(&msg, 0, sizeof(msg));
+	strcpy(msg.name, TAP_MP_REQ_START_RXTX);
+	strlcpy(request_param->port_name, dev->data->name,
+		sizeof(request_param->port_name));
+	msg.len_param = sizeof(*request_param);
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		msg.fds[fd_iterator++] = process_private->txq_fds[i];
+		msg.num_fds++;
+		request_param->txq_count++;
+	}
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		msg.fds[fd_iterator++] = process_private->rxq_fds[i];
+		msg.num_fds++;
+		request_param->rxq_count++;
+	}
+
+	RTE_ASSERT(request_param->txq_count == dev->data->nb_tx_queues);
+	RTE_ASSERT(request_param->rxq_count == dev->data->nb_rx_queues);
+
+	err = rte_mp_sendmsg(&msg);
+	if (err < 0) {
+		TAP_LOG(ERR, "Failed to send start req to secondary %d",
+			rte_errno);
+		return -1;
+	}
+
+	return 0;
+}
+
 static int
 tap_dev_start(struct rte_eth_dev *dev)
 {
 	int err, i;
 
+	tap_mp_req_on_rxtx(dev);
+
 	err = tap_intr_handle_set(dev, 1);
 	if (err)
 		return err;
@@ -901,6 +942,39 @@  tap_dev_start(struct rte_eth_dev *dev)
 	return err;
 }
 
+static int
+tap_mp_req_start_rxtx(const struct rte_mp_msg *request, __rte_unused const void *peer)
+{
+	struct rte_eth_dev *dev;
+	int ret;
+	uint16_t port_id;
+	const struct ipc_queues *request_param =
+		(const struct ipc_queues *)request->param;
+	int fd_iterator;
+	int queue;
+	struct pmd_process_private *process_private;
+
+	ret = rte_eth_dev_get_port_by_name(request_param->port_name, &port_id);
+	if (ret) {
+		TAP_LOG(ERR, "Failed to get port id for %s",
+			request_param->port_name);
+		return -1;
+	}
+	dev = &rte_eth_devices[port_id];
+	process_private = dev->process_private;
+	dev->data->nb_rx_queues = request_param->rxq_count;
+	dev->data->nb_tx_queues = request_param->txq_count;
+	fd_iterator = 0;
+	RTE_LOG(DEBUG, PMD, "tap_attach rx_q:%d tx_q:%d\n", request_param->rxq_count,
+		request_param->txq_count);
+	for (queue = 0; queue < request_param->txq_count; queue++)
+		process_private->txq_fds[queue] = request->fds[fd_iterator++];
+	for (queue = 0; queue < request_param->rxq_count; queue++)
+		process_private->rxq_fds[queue] = request->fds[fd_iterator++];
+
+	return 0;
+}
+
 /* This function gets called when the current port gets stopped.
  */
 static int
@@ -2445,6 +2519,12 @@  rte_pmd_tap_probe(struct rte_vdev_device *dev)
 		ret = tap_mp_attach_queues(name, eth_dev);
 		if (ret != 0)
 			return -1;
+
+		ret = rte_mp_action_register(TAP_MP_REQ_START_RXTX, tap_mp_req_start_rxtx);
+		if (ret < 0 && rte_errno != ENOTSUP)
+			TAP_LOG(ERR, "tap: Failed to register IPC callback: %s",
+				strerror(rte_errno));
+
 		rte_eth_dev_probing_finish(eth_dev);
 		return 0;
 	}