ethdev: warn only once for badly behaving applications

Message ID 20211026145851.21944-1-david.marchand@redhat.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series ethdev: warn only once for badly behaving applications |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/iol-testing warning apply patch failure

Commit Message

David Marchand Oct. 26, 2021, 2:58 p.m. UTC
  Warning continuously is a pain when developping or if a unit test
is/gets broken.

It could also be a problem if application behaves badly only in some
corner cases and a DoS results of those logs being continuously displayed.

Let's warn once per port and per rx/tx.

Getting such a log is scary, but let's make it more eye catching by
dumping a backtrace with it.

Tested by introducing a bug in testpmd:
  

Comments

Thomas Monjalon Oct. 26, 2021, 3:56 p.m. UTC | #1
26/10/2021 16:58, David Marchand:
> Warning continuously is a pain when developping or if a unit test
> is/gets broken.
> 
> It could also be a problem if application behaves badly only in some
> corner cases and a DoS results of those logs being continuously displayed.
> 
> Let's warn once per port and per rx/tx.
> 
> Getting such a log is scary, but let's make it more eye catching by
> dumping a backtrace with it.
[...]
> Fixes: c87d435a4d79 ("ethdev: copy fast-path API into separate structure")
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>
[...]
> +static struct dummy_queue *dummy_queues_ref[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
> +static struct dummy_queue dummy_queues[RTE_MAX_ETHPORTS];

I feel we could better name those arrays, maybe adding a comment.
First one is really queues array while the second one is to share
the same value with all queues of a port. Right?

> +RTE_INIT(dummy_queue_init)
> +{
> +	uint16_t port_id;
> +
> +	for (port_id = 0; port_id < RTE_DIM(dummy_queues); port_id++) {
> +		unsigned int i;

q would be a better name than i

> +
> +		for (i = 0; i < RTE_DIM(dummy_queues_ref[port_id]); i++)
> +			dummy_queues_ref[port_id][i] = &dummy_queues[port_id];
> +	}
> +}
> +
>  static uint16_t
> -dummy_eth_rx_burst(__rte_unused void *rxq,
> +dummy_eth_rx_burst(void *rxq,
>  		__rte_unused struct rte_mbuf **rx_pkts,
>  		__rte_unused uint16_t nb_pkts)
>  {
> -	RTE_ETHDEV_LOG(ERR, "rx_pkt_burst for not ready port\n");
> +	struct dummy_queue *q = rxq;
> +
> +	if (!q->rx_warn_once) {
> +		uint16_t port_id = q - dummy_queues;
> +
> +		RTE_ETHDEV_LOG(ERR, "lcore %u called rx_pkt_burst for not ready port %"PRIu16"\n",
> +			rte_lcore_id(), port_id);
> +		rte_dump_stack();
> +		q->rx_warn_once = true;
> +	}
>  	rte_errno = ENOTSUP;
>  	return 0;
>  }

OK with this log.

[...]
>  eth_dev_fp_ops_reset(struct rte_eth_fp_ops *fpo)
>  {
>  	static void *dummy_data[RTE_MAX_QUEUES_PER_PORT];
> -	static const struct rte_eth_fp_ops dummy_ops = {
> +	uint16_t port_id = fpo - rte_eth_fp_ops;
> +
> +	dummy_queues[port_id].rx_warn_once = false;
> +	dummy_queues[port_id].tx_warn_once = false;
> +	*fpo = (struct rte_eth_fp_ops) {
>  		.rx_pkt_burst = dummy_eth_rx_burst,
>  		.tx_pkt_burst = dummy_eth_tx_burst,
> -		.rxq = {.data = dummy_data, .clbk = dummy_data,},
> -		.txq = {.data = dummy_data, .clbk = dummy_data,},
> +		.rxq = (struct rte_ethdev_qdata) {

Why this cast? rte_eth_fp_ops.rxq is of type rte_ethdev_qdata.

> +			.data = (void **)&dummy_queues_ref[port_id],
> +			.clbk = dummy_data,
> +		},
> +		.txq = (struct rte_ethdev_qdata) {
> +			.data = (void **)&dummy_queues_ref[port_id],
> +			.clbk = dummy_data,
> +		},
>  	};
> -
> -	*fpo = dummy_ops;
>  }
  
Ananyev, Konstantin Oct. 26, 2021, 5:10 p.m. UTC | #2
> 
> Warning continuously is a pain when developping or if a unit test
> is/gets broken.
> 
> It could also be a problem if application behaves badly only in some
> corner cases and a DoS results of those logs being continuously displayed.
> 
> Let's warn once per port and per rx/tx.
> 
> Getting such a log is scary, but let's make it more eye catching by
> dumping a backtrace with it.
> 
> Tested by introducing a bug in testpmd:
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
> @@ -610,7 +610,7 @@ eth_dev_configure_mp(uint16_t port_id, uint16_t
>   nb_rx_q, uint16_t nb_tx_q,
>  static int
>  eth_dev_start_mp(uint16_t port_id)
>  {
> -       if (is_proc_primary())
> +       if (!is_proc_primary())
>                 return rte_eth_dev_start(port_id);
> 
>         return 0;
> 
> Then, running a basic null test:
> $ ./devtools/test-null.sh
> ...
> Start automatic packet forwarding
> io packet forwarding - ports=2 - cores=1 - streams=2 - NUMA support
>   enabled, MP allocation mode: native
> Logical Core 1 (socket 0) forwards packets on 2 streams:
>   RX P=0/Q=0 (socket 0) -> TX P=1/Q=0 (socket 0) peer=02:00:00:00:00:01
>   RX P=1/Q=0 (socket 0) -> TX P=0/Q=0 (socket 0) peer=02:00:00:00:00:00
> 
> lcore 0 called rx_pkt_burst for not ready port 0
> 8: [build/app/dpdk-testpmd() [0x59e839]]
> 7: [/lib64/libc.so.6(__libc_start_main+0xf5) [0x7ff481b69555]]
> 6: [build/app/dpdk-testpmd(main+0x54b) [0x662d24]]
> 5: [build/app/dpdk-testpmd(start_packet_forwarding+0x263) [0x65e795]]
> 4: [build/app/dpdk-testpmd() [0x65e1be]]
> 3: [build/app/dpdk-testpmd() [0x65a996]]
> 2: [build/app/dpdk-testpmd() [0xa6cbc7]]
> 1: [build/app/dpdk-testpmd(rte_dump_stack+0x27) [0xaee796]]
> lcore 0 called rx_pkt_burst for not ready port 1
> 8: [build/app/dpdk-testpmd() [0x59e839]]
> 7: [/lib64/libc.so.6(__libc_start_main+0xf5) [0x7ff481b69555]]
> 6: [build/app/dpdk-testpmd(main+0x54b) [0x662d24]]
> 5: [build/app/dpdk-testpmd(start_packet_forwarding+0x263) [0x65e795]]
> 4: [build/app/dpdk-testpmd() [0x65e1be]]
> 3: [build/app/dpdk-testpmd() [0x65a996]]
> 2: [build/app/dpdk-testpmd() [0xa6cbc7]]
> 1: [build/app/dpdk-testpmd(rte_dump_stack+0x27) [0xaee796]]
>   io packet forwarding packets/burst=32
>   nb forwarding cores=1 - nb forwarding ports=2
>   port 0: RX queue number: 1 Tx queue number: 1
>     Rx offloads=0x0 Tx offloads=0x0
> 
> Fixes: c87d435a4d79 ("ethdev: copy fast-path API into separate structure")
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
>  lib/ethdev/ethdev_private.c | 63 +++++++++++++++++++++++++++++++------
>  1 file changed, 54 insertions(+), 9 deletions(-)
> 
> diff --git a/lib/ethdev/ethdev_private.c b/lib/ethdev/ethdev_private.c
> index c905c2df6f..7a5d05ff43 100644
> --- a/lib/ethdev/ethdev_private.c
> +++ b/lib/ethdev/ethdev_private.c
> @@ -2,6 +2,7 @@
>   * Copyright(c) 2018 Gaëtan Rivet
>   */
> 
> +#include <rte_debug.h>
>  #include "rte_ethdev.h"
>  #include "ethdev_driver.h"
>  #include "ethdev_private.h"
> @@ -175,22 +176,58 @@ rte_eth_devargs_parse_representor_ports(char *str, void *data)
>  	return str == NULL ? -1 : 0;
>  }
> 
> +struct dummy_queue {
> +	bool rx_warn_once;
> +	bool tx_warn_once;
> +};
> +static struct dummy_queue *dummy_queues_ref[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
> +static struct dummy_queue dummy_queues[RTE_MAX_ETHPORTS];
> +RTE_INIT(dummy_queue_init)
> +{
> +	uint16_t port_id;
> +
> +	for (port_id = 0; port_id < RTE_DIM(dummy_queues); port_id++) {
> +		unsigned int i;
> +
> +		for (i = 0; i < RTE_DIM(dummy_queues_ref[port_id]); i++)
> +			dummy_queues_ref[port_id][i] = &dummy_queues[port_id];
> +	}
> +}
> +
>  static uint16_t
> -dummy_eth_rx_burst(__rte_unused void *rxq,
> +dummy_eth_rx_burst(void *rxq,
>  		__rte_unused struct rte_mbuf **rx_pkts,
>  		__rte_unused uint16_t nb_pkts)
>  {
> -	RTE_ETHDEV_LOG(ERR, "rx_pkt_burst for not ready port\n");
> +	struct dummy_queue *q = rxq;
> +

LGTM in general, just one thing:
I think we'd better add extra check that rxq really points to dummy queues
before de-referencing it.
Something like:

uintptr_t port_id;
....
port_id =  q - dummy_queues;
if (port_id < RTE_DIM(dummy_queues) && !q->rx_warn_once) {
   ....
} 
 
Same for tx.

> +	if (!q->rx_warn_once) {
> +		uint16_t port_id = q - dummy_queues;
> +
> +		RTE_ETHDEV_LOG(ERR, "lcore %u called rx_pkt_burst for not ready port %"PRIu16"\n",
> +			rte_lcore_id(), port_id);
> +		rte_dump_stack();
> +		q->rx_warn_once = true;
> +	}
>  	rte_errno = ENOTSUP;
>  	return 0;
>  }
> 
>  static uint16_t
> -dummy_eth_tx_burst(__rte_unused void *txq,
> +dummy_eth_tx_burst(void *txq,
>  		__rte_unused struct rte_mbuf **tx_pkts,
>  		__rte_unused uint16_t nb_pkts)
>  {
> -	RTE_ETHDEV_LOG(ERR, "tx_pkt_burst for not ready port\n");
> +	struct dummy_queue *q = txq;
> +
> +	if (!q->tx_warn_once) {
> +		uint16_t port_id = q - dummy_queues;
> +
> +		RTE_ETHDEV_LOG(ERR, "lcore %u called tx_pkt_burst for not ready port %"PRIu16"\n",
> +			rte_lcore_id(), port_id);
> +		rte_dump_stack();
> +		q->tx_warn_once = true;
> +	}
>  	rte_errno = ENOTSUP;
>  	return 0;
>  }
> @@ -199,14 +236,22 @@ void
>  eth_dev_fp_ops_reset(struct rte_eth_fp_ops *fpo)
>  {
>  	static void *dummy_data[RTE_MAX_QUEUES_PER_PORT];
> -	static const struct rte_eth_fp_ops dummy_ops = {
> +	uint16_t port_id = fpo - rte_eth_fp_ops;
> +
> +	dummy_queues[port_id].rx_warn_once = false;
> +	dummy_queues[port_id].tx_warn_once = false;
> +	*fpo = (struct rte_eth_fp_ops) {
>  		.rx_pkt_burst = dummy_eth_rx_burst,
>  		.tx_pkt_burst = dummy_eth_tx_burst,
> -		.rxq = {.data = dummy_data, .clbk = dummy_data,},
> -		.txq = {.data = dummy_data, .clbk = dummy_data,},
> +		.rxq = (struct rte_ethdev_qdata) {

Here and for txq, do we need to explicitly specify type?
Wouldn't:
.rxq = {.data=..., .clbk=...,},
be enough here?

> +			.data = (void **)&dummy_queues_ref[port_id],
> +			.clbk = dummy_data,
> +		},
> +		.txq = (struct rte_ethdev_qdata) {
> +			.data = (void **)&dummy_queues_ref[port_id],
> +			.clbk = dummy_data,
> +		},
>  	};
> -
> -	*fpo = dummy_ops;
>  }
> 
>  void
> --
> 2.23.0
  
David Marchand Oct. 27, 2021, 7:20 a.m. UTC | #3
On Tue, Oct 26, 2021 at 5:57 PM Thomas Monjalon <thomas@monjalon.net> wrote:
>
> 26/10/2021 16:58, David Marchand:
> > Warning continuously is a pain when developping or if a unit test
> > is/gets broken.
> >
> > It could also be a problem if application behaves badly only in some
> > corner cases and a DoS results of those logs being continuously displayed.
> >
> > Let's warn once per port and per rx/tx.
> >
> > Getting such a log is scary, but let's make it more eye catching by
> > dumping a backtrace with it.
> [...]
> > Fixes: c87d435a4d79 ("ethdev: copy fast-path API into separate structure")
> >
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
> [...]
> > +static struct dummy_queue *dummy_queues_ref[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
> > +static struct dummy_queue dummy_queues[RTE_MAX_ETHPORTS];
>
> I feel we could better name those arrays, maybe adding a comment.
> First one is really queues array while the second one is to share
> the same value with all queues of a port. Right?

Yes, look fwd to v2 for better names.


>
> > +RTE_INIT(dummy_queue_init)
> > +{
> > +     uint16_t port_id;
> > +
> > +     for (port_id = 0; port_id < RTE_DIM(dummy_queues); port_id++) {
> > +             unsigned int i;
>
> q would be a better name than i

Ok, and I'll rename other variable q for actual queue objects later in
the patch.


> >  eth_dev_fp_ops_reset(struct rte_eth_fp_ops *fpo)
> >  {
> >       static void *dummy_data[RTE_MAX_QUEUES_PER_PORT];
> > -     static const struct rte_eth_fp_ops dummy_ops = {
> > +     uint16_t port_id = fpo - rte_eth_fp_ops;
> > +
> > +     dummy_queues[port_id].rx_warn_once = false;
> > +     dummy_queues[port_id].tx_warn_once = false;
> > +     *fpo = (struct rte_eth_fp_ops) {
> >               .rx_pkt_burst = dummy_eth_rx_burst,
> >               .tx_pkt_burst = dummy_eth_tx_burst,
> > -             .rxq = {.data = dummy_data, .clbk = dummy_data,},
> > -             .txq = {.data = dummy_data, .clbk = dummy_data,},
> > +             .rxq = (struct rte_ethdev_qdata) {
>
> Why this cast? rte_eth_fp_ops.rxq is of type rte_ethdev_qdata.

Funny how the compiler complains about:

../lib/ethdev/ethdev_private.c: In function ‘eth_dev_fp_ops_reset’:
../lib/ethdev/ethdev_private.c:243:9: error: expected expression
before ‘{’ token
  *fpo = {
         ^
if we don't explicitely tell this anonymous struct is of type struct
rte_eth_fp_ops (note that *fpo is of type struct rte_eth_fp_ops).
But otoh, compiler silently understands that, in .rxq case, the
anonymous struct is of type rte_ethdev_qdata.

So indeed, it works without the cast on .rxq and .txq.
I applied the cast on all anonymous struct in my patch once I hit the
first compiler complaint.

Do you have the explanation or can you point me at some standard
explaining the difference in treatment?


>
> > +                     .data = (void **)&dummy_queues_ref[port_id],
> > +                     .clbk = dummy_data,
> > +             },
> > +             .txq = (struct rte_ethdev_qdata) {
> > +                     .data = (void **)&dummy_queues_ref[port_id],
> > +                     .clbk = dummy_data,
> > +             },
> >       };
> > -
> > -     *fpo = dummy_ops;
> >  }
  
David Marchand Oct. 27, 2021, 7:23 a.m. UTC | #4
On Tue, Oct 26, 2021 at 7:10 PM Ananyev, Konstantin
<konstantin.ananyev@intel.com> wrote:
> >  static uint16_t
> > -dummy_eth_rx_burst(__rte_unused void *rxq,
> > +dummy_eth_rx_burst(void *rxq,
> >               __rte_unused struct rte_mbuf **rx_pkts,
> >               __rte_unused uint16_t nb_pkts)
> >  {
> > -     RTE_ETHDEV_LOG(ERR, "rx_pkt_burst for not ready port\n");
> > +     struct dummy_queue *q = rxq;
> > +
>
> LGTM in general, just one thing:
> I think we'd better add extra check that rxq really points to dummy queues
> before de-referencing it.
> Something like:
>
> uintptr_t port_id;
> ....
> port_id =  q - dummy_queues;
> if (port_id < RTE_DIM(dummy_queues) && !q->rx_warn_once) {
>    ....
> }
>
> Same for tx.

Yep, will add.


>
> > +     if (!q->rx_warn_once) {
> > +             uint16_t port_id = q - dummy_queues;
> > +
> > +             RTE_ETHDEV_LOG(ERR, "lcore %u called rx_pkt_burst for not ready port %"PRIu16"\n",
> > +                     rte_lcore_id(), port_id);
> > +             rte_dump_stack();
> > +             q->rx_warn_once = true;
> > +     }
> >       rte_errno = ENOTSUP;
> >       return 0;
> >  }
> >
> >  static uint16_t
> > -dummy_eth_tx_burst(__rte_unused void *txq,
> > +dummy_eth_tx_burst(void *txq,
> >               __rte_unused struct rte_mbuf **tx_pkts,
> >               __rte_unused uint16_t nb_pkts)
> >  {
> > -     RTE_ETHDEV_LOG(ERR, "tx_pkt_burst for not ready port\n");
> > +     struct dummy_queue *q = txq;
> > +
> > +     if (!q->tx_warn_once) {
> > +             uint16_t port_id = q - dummy_queues;
> > +
> > +             RTE_ETHDEV_LOG(ERR, "lcore %u called tx_pkt_burst for not ready port %"PRIu16"\n",
> > +                     rte_lcore_id(), port_id);
> > +             rte_dump_stack();
> > +             q->tx_warn_once = true;
> > +     }
> >       rte_errno = ENOTSUP;
> >       return 0;
> >  }
> > @@ -199,14 +236,22 @@ void
> >  eth_dev_fp_ops_reset(struct rte_eth_fp_ops *fpo)
> >  {
> >       static void *dummy_data[RTE_MAX_QUEUES_PER_PORT];
> > -     static const struct rte_eth_fp_ops dummy_ops = {
> > +     uint16_t port_id = fpo - rte_eth_fp_ops;
> > +
> > +     dummy_queues[port_id].rx_warn_once = false;
> > +     dummy_queues[port_id].tx_warn_once = false;
> > +     *fpo = (struct rte_eth_fp_ops) {
> >               .rx_pkt_burst = dummy_eth_rx_burst,
> >               .tx_pkt_burst = dummy_eth_tx_burst,
> > -             .rxq = {.data = dummy_data, .clbk = dummy_data,},
> > -             .txq = {.data = dummy_data, .clbk = dummy_data,},
> > +             .rxq = (struct rte_ethdev_qdata) {
>
> Here and for txq, do we need to explicitly specify type?
> Wouldn't:
> .rxq = {.data=..., .clbk=...,},
> be enough here?

Well, same question from Thomas.
It seems to work without it.
  
Olivier Matz Oct. 27, 2021, 8:16 a.m. UTC | #5
Hi,

On Wed, Oct 27, 2021 at 09:20:52AM +0200, David Marchand wrote:
> On Tue, Oct 26, 2021 at 5:57 PM Thomas Monjalon <thomas@monjalon.net> wrote:
> >
> > 26/10/2021 16:58, David Marchand:
> > > Warning continuously is a pain when developping or if a unit test
> > > is/gets broken.
> > >
> > > It could also be a problem if application behaves badly only in some
> > > corner cases and a DoS results of those logs being continuously displayed.
> > >
> > > Let's warn once per port and per rx/tx.
> > >
> > > Getting such a log is scary, but let's make it more eye catching by
> > > dumping a backtrace with it.
> > [...]
> > > Fixes: c87d435a4d79 ("ethdev: copy fast-path API into separate structure")
> > >
> > > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > [...]
> > > +static struct dummy_queue *dummy_queues_ref[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
> > > +static struct dummy_queue dummy_queues[RTE_MAX_ETHPORTS];
> >
> > I feel we could better name those arrays, maybe adding a comment.
> > First one is really queues array while the second one is to share
> > the same value with all queues of a port. Right?
> 
> Yes, look fwd to v2 for better names.
> 
> 
> >
> > > +RTE_INIT(dummy_queue_init)
> > > +{
> > > +     uint16_t port_id;
> > > +
> > > +     for (port_id = 0; port_id < RTE_DIM(dummy_queues); port_id++) {
> > > +             unsigned int i;
> >
> > q would be a better name than i
> 
> Ok, and I'll rename other variable q for actual queue objects later in
> the patch.
> 
> 
> > >  eth_dev_fp_ops_reset(struct rte_eth_fp_ops *fpo)
> > >  {
> > >       static void *dummy_data[RTE_MAX_QUEUES_PER_PORT];
> > > -     static const struct rte_eth_fp_ops dummy_ops = {
> > > +     uint16_t port_id = fpo - rte_eth_fp_ops;
> > > +
> > > +     dummy_queues[port_id].rx_warn_once = false;
> > > +     dummy_queues[port_id].tx_warn_once = false;
> > > +     *fpo = (struct rte_eth_fp_ops) {
> > >               .rx_pkt_burst = dummy_eth_rx_burst,
> > >               .tx_pkt_burst = dummy_eth_tx_burst,
> > > -             .rxq = {.data = dummy_data, .clbk = dummy_data,},
> > > -             .txq = {.data = dummy_data, .clbk = dummy_data,},
> > > +             .rxq = (struct rte_ethdev_qdata) {
> >
> > Why this cast? rte_eth_fp_ops.rxq is of type rte_ethdev_qdata.
> 
> Funny how the compiler complains about:
> 
> ../lib/ethdev/ethdev_private.c: In function ‘eth_dev_fp_ops_reset’:
> ../lib/ethdev/ethdev_private.c:243:9: error: expected expression
> before ‘{’ token
>   *fpo = {
>          ^
> if we don't explicitely tell this anonymous struct is of type struct
> rte_eth_fp_ops (note that *fpo is of type struct rte_eth_fp_ops).
> But otoh, compiler silently understands that, in .rxq case, the
> anonymous struct is of type rte_ethdev_qdata.
> 
> So indeed, it works without the cast on .rxq and .txq.
> I applied the cast on all anonymous struct in my patch once I hit the
> first compiler complaint.
> 
> Do you have the explanation or can you point me at some standard
> explaining the difference in treatment?

Let me try an explanation, hope it is the correct one.

In the first case, this is an assignment as described in 6.5.16 of
the standard [1]:

  *fpo = (struct rte_eth_fp_ops) { .rx_pkt_burst = dummy_eth_rx_burst, ... };

The compiler expects the right side to be an expression. The expression
is a "compound literal", as described in 6.5.2.5:

 1. The type name shall specify a complete object type or an array of
    unknown size, but not avariable length array type.
 2. All the constraints for initializer lists in 6.7.9 also apply to
    compound literals

The second cast { ..., .rxq = (struct rte_ethdev_qdata) { ... } } is
inside a construction that behaves like an initialization (according to
the second point above). The compiler already knows the type of the
struct (and therefore the types of the fields), so the cast is not
required.

[1] http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1548.pdf

 
> >
> > > +                     .data = (void **)&dummy_queues_ref[port_id],
> > > +                     .clbk = dummy_data,
> > > +             },
> > > +             .txq = (struct rte_ethdev_qdata) {
> > > +                     .data = (void **)&dummy_queues_ref[port_id],
> > > +                     .clbk = dummy_data,
> > > +             },
> > >       };
> > > -
> > > -     *fpo = dummy_ops;
> > >  }
> 
> 
> -- 
> David Marchand
>
  
David Marchand Oct. 27, 2021, 8:42 a.m. UTC | #6
On Wed, Oct 27, 2021 at 10:16 AM Olivier Matz <olivier.matz@6wind.com> wrote:
> > Do you have the explanation or can you point me at some standard
> > explaining the difference in treatment?
>
> Let me try an explanation, hope it is the correct one.
>
> In the first case, this is an assignment as described in 6.5.16 of
> the standard [1]:
>
>   *fpo = (struct rte_eth_fp_ops) { .rx_pkt_burst = dummy_eth_rx_burst, ... };
>
> The compiler expects the right side to be an expression. The expression
> is a "compound literal", as described in 6.5.2.5:
>
>  1. The type name shall specify a complete object type or an array of
>     unknown size, but not avariable length array type.
>  2. All the constraints for initializer lists in 6.7.9 also apply to
>     compound literals
>
> The second cast { ..., .rxq = (struct rte_ethdev_qdata) { ... } } is
> inside a construction that behaves like an initialization (according to
> the second point above). The compiler already knows the type of the
> struct (and therefore the types of the fields), so the cast is not
> required.
>
> [1] http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1548.pdf

I read/understand it like this too.
Thanks a lot, reading standards is always illuminating.
  

Patch

--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -610,7 +610,7 @@  eth_dev_configure_mp(uint16_t port_id, uint16_t
  nb_rx_q, uint16_t nb_tx_q,
 static int
 eth_dev_start_mp(uint16_t port_id)
 {
-       if (is_proc_primary())
+       if (!is_proc_primary())
                return rte_eth_dev_start(port_id);

        return 0;

Then, running a basic null test:
$ ./devtools/test-null.sh
...
Start automatic packet forwarding
io packet forwarding - ports=2 - cores=1 - streams=2 - NUMA support
  enabled, MP allocation mode: native
Logical Core 1 (socket 0) forwards packets on 2 streams:
  RX P=0/Q=0 (socket 0) -> TX P=1/Q=0 (socket 0) peer=02:00:00:00:00:01
  RX P=1/Q=0 (socket 0) -> TX P=0/Q=0 (socket 0) peer=02:00:00:00:00:00

lcore 0 called rx_pkt_burst for not ready port 0
8: [build/app/dpdk-testpmd() [0x59e839]]
7: [/lib64/libc.so.6(__libc_start_main+0xf5) [0x7ff481b69555]]
6: [build/app/dpdk-testpmd(main+0x54b) [0x662d24]]
5: [build/app/dpdk-testpmd(start_packet_forwarding+0x263) [0x65e795]]
4: [build/app/dpdk-testpmd() [0x65e1be]]
3: [build/app/dpdk-testpmd() [0x65a996]]
2: [build/app/dpdk-testpmd() [0xa6cbc7]]
1: [build/app/dpdk-testpmd(rte_dump_stack+0x27) [0xaee796]]
lcore 0 called rx_pkt_burst for not ready port 1
8: [build/app/dpdk-testpmd() [0x59e839]]
7: [/lib64/libc.so.6(__libc_start_main+0xf5) [0x7ff481b69555]]
6: [build/app/dpdk-testpmd(main+0x54b) [0x662d24]]
5: [build/app/dpdk-testpmd(start_packet_forwarding+0x263) [0x65e795]]
4: [build/app/dpdk-testpmd() [0x65e1be]]
3: [build/app/dpdk-testpmd() [0x65a996]]
2: [build/app/dpdk-testpmd() [0xa6cbc7]]
1: [build/app/dpdk-testpmd(rte_dump_stack+0x27) [0xaee796]]
  io packet forwarding packets/burst=32
  nb forwarding cores=1 - nb forwarding ports=2
  port 0: RX queue number: 1 Tx queue number: 1
    Rx offloads=0x0 Tx offloads=0x0

Fixes: c87d435a4d79 ("ethdev: copy fast-path API into separate structure")

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 lib/ethdev/ethdev_private.c | 63 +++++++++++++++++++++++++++++++------
 1 file changed, 54 insertions(+), 9 deletions(-)

diff --git a/lib/ethdev/ethdev_private.c b/lib/ethdev/ethdev_private.c
index c905c2df6f..7a5d05ff43 100644
--- a/lib/ethdev/ethdev_private.c
+++ b/lib/ethdev/ethdev_private.c
@@ -2,6 +2,7 @@ 
  * Copyright(c) 2018 Gaëtan Rivet
  */
 
+#include <rte_debug.h>
 #include "rte_ethdev.h"
 #include "ethdev_driver.h"
 #include "ethdev_private.h"
@@ -175,22 +176,58 @@  rte_eth_devargs_parse_representor_ports(char *str, void *data)
 	return str == NULL ? -1 : 0;
 }
 
+struct dummy_queue {
+	bool rx_warn_once;
+	bool tx_warn_once;
+};
+static struct dummy_queue *dummy_queues_ref[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
+static struct dummy_queue dummy_queues[RTE_MAX_ETHPORTS];
+RTE_INIT(dummy_queue_init)
+{
+	uint16_t port_id;
+
+	for (port_id = 0; port_id < RTE_DIM(dummy_queues); port_id++) {
+		unsigned int i;
+
+		for (i = 0; i < RTE_DIM(dummy_queues_ref[port_id]); i++)
+			dummy_queues_ref[port_id][i] = &dummy_queues[port_id];
+	}
+}
+
 static uint16_t
-dummy_eth_rx_burst(__rte_unused void *rxq,
+dummy_eth_rx_burst(void *rxq,
 		__rte_unused struct rte_mbuf **rx_pkts,
 		__rte_unused uint16_t nb_pkts)
 {
-	RTE_ETHDEV_LOG(ERR, "rx_pkt_burst for not ready port\n");
+	struct dummy_queue *q = rxq;
+
+	if (!q->rx_warn_once) {
+		uint16_t port_id = q - dummy_queues;
+
+		RTE_ETHDEV_LOG(ERR, "lcore %u called rx_pkt_burst for not ready port %"PRIu16"\n",
+			rte_lcore_id(), port_id);
+		rte_dump_stack();
+		q->rx_warn_once = true;
+	}
 	rte_errno = ENOTSUP;
 	return 0;
 }
 
 static uint16_t
-dummy_eth_tx_burst(__rte_unused void *txq,
+dummy_eth_tx_burst(void *txq,
 		__rte_unused struct rte_mbuf **tx_pkts,
 		__rte_unused uint16_t nb_pkts)
 {
-	RTE_ETHDEV_LOG(ERR, "tx_pkt_burst for not ready port\n");
+	struct dummy_queue *q = txq;
+
+	if (!q->tx_warn_once) {
+		uint16_t port_id = q - dummy_queues;
+
+		RTE_ETHDEV_LOG(ERR, "lcore %u called tx_pkt_burst for not ready port %"PRIu16"\n",
+			rte_lcore_id(), port_id);
+		rte_dump_stack();
+		q->tx_warn_once = true;
+	}
 	rte_errno = ENOTSUP;
 	return 0;
 }
@@ -199,14 +236,22 @@  void
 eth_dev_fp_ops_reset(struct rte_eth_fp_ops *fpo)
 {
 	static void *dummy_data[RTE_MAX_QUEUES_PER_PORT];
-	static const struct rte_eth_fp_ops dummy_ops = {
+	uint16_t port_id = fpo - rte_eth_fp_ops;
+
+	dummy_queues[port_id].rx_warn_once = false;
+	dummy_queues[port_id].tx_warn_once = false;
+	*fpo = (struct rte_eth_fp_ops) {
 		.rx_pkt_burst = dummy_eth_rx_burst,
 		.tx_pkt_burst = dummy_eth_tx_burst,
-		.rxq = {.data = dummy_data, .clbk = dummy_data,},
-		.txq = {.data = dummy_data, .clbk = dummy_data,},
+		.rxq = (struct rte_ethdev_qdata) {
+			.data = (void **)&dummy_queues_ref[port_id],
+			.clbk = dummy_data,
+		},
+		.txq = (struct rte_ethdev_qdata) {
+			.data = (void **)&dummy_queues_ref[port_id],
+			.clbk = dummy_data,
+		},
 	};
-
-	*fpo = dummy_ops;
 }
 
 void