[v7,2/2] net/af_xdp: Refactor af_xdp_tx_zc

Message ID 20250201100300.2194018-3-ariel.otilibili@6wind.com (mailing list archive)
State Superseded
Delegated to: Stephen Hemminger
Headers
Series Fix use after free, and refactor af_xdp_tx_zc |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/github-robot: build success github build: passed
ci/iol-marvell-Functional success Functional Testing PASS
ci/iol-abi-testing success Testing PASS
ci/iol-unit-amd64-testing success Testing PASS
ci/iol-sample-apps-testing warning Testing issues
ci/iol-unit-arm64-testing success Testing PASS
ci/iol-compile-arm64-testing success Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/intel-Functional success Functional PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-compile-amd64-testing success Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS

Commit Message

Ariel Otilibili Feb. 1, 2025, 10:03 a.m. UTC
Both legs of the loop share the same logic: the common parts are about
reserving and filling both address and length into the description.

This is moved into reserve_and_fill().

Bugzilla ID: 1440
Suggested-by: Maryam Tahhan <mtahhan@redhat.com>
Signed-off-by: Ariel Otilibili <ariel.otilibili@6wind.com>
---
 drivers/net/af_xdp/rte_eth_af_xdp.c | 62 ++++++++++++++++-------------
 1 file changed, 34 insertions(+), 28 deletions(-)
  

Comments

Maryam Tahhan Feb. 4, 2025, 1:39 p.m. UTC | #1
Hi Ariel

Have you had a chance to test this? I'm currently getting a segfault 
with testpmd

testpmd>
Thread 4 "dpdk-worker0" received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7ffff4e00400 (LWP 3768882)]
0x0000000000b57b44 in eth_af_xdp_tx ()

Thanks
Maryam

On 01/02/2025 10:03, Ariel Otilibili wrote:
> Both legs of the loop share the same logic: the common parts are about
> reserving and filling both address and length into the description.
>
> This is moved into reserve_and_fill().
>
> Bugzilla ID: 1440
> Suggested-by: Maryam Tahhan <mtahhan@redhat.com>
> Signed-off-by: Ariel Otilibili <ariel.otilibili@6wind.com>
> ---
>   drivers/net/af_xdp/rte_eth_af_xdp.c | 62 ++++++++++++++++-------------
>   1 file changed, 34 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
> index 092bcb73aa0a..840a12dbf508 100644
> --- a/drivers/net/af_xdp/rte_eth_af_xdp.c
> +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
> @@ -536,6 +536,31 @@ kick_tx(struct pkt_tx_queue *txq, struct xsk_ring_cons *cq)
>   		}
>   }
>   
> +static inline struct xdp_desc *
> +reserve_and_fill(struct pkt_tx_queue *txq, struct rte_mbuf *mbuf,
> +		 struct xsk_umem_info *umem)
> +{
> +	struct xdp_desc *desc = NULL;
> +	uint32_t *idx_tx = NULL;
> +	uint64_t addr, offset;
> +
> +	if (!xsk_ring_prod__reserve(&txq->tx, 1, idx_tx))
> +		goto out;
> +
> +	desc = xsk_ring_prod__tx_desc(&txq->tx, *idx_tx);
> +	desc->len = mbuf->pkt_len;
> +
> +	addr = (uint64_t)mbuf - (uint64_t)umem->buffer
> +		- umem->mb_pool->header_size;
> +	offset = rte_pktmbuf_mtod(mbuf, uint64_t) - (uint64_t)mbuf
> +		+ umem->mb_pool->header_size;
> +	offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
> +	desc->addr = addr | offset;
> +
> +out:
> +	return desc;
> +}
> +
>   #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
>   static uint16_t
>   af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> @@ -545,10 +570,8 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>   	struct rte_mbuf *mbuf;
>   	unsigned long tx_bytes = 0;
>   	int i;
> -	uint32_t idx_tx;
>   	uint16_t count = 0;
>   	struct xdp_desc *desc;
> -	uint64_t addr, offset;
>   	struct xsk_ring_cons *cq = &txq->pair->cq;
>   	uint32_t free_thresh = cq->size >> 1;
>   
> @@ -559,21 +582,12 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>   		mbuf = bufs[i];
>   
>   		if (mbuf->pool == umem->mb_pool) {
> -			if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) {
> +			if (!(desc = reserve_and_fill(txq, mbuf, umem))) {
>   				kick_tx(txq, cq);
> -				if (!xsk_ring_prod__reserve(&txq->tx, 1,
> -							    &idx_tx))
> +				if (!(desc = reserve_and_fill(txq, mbuf, umem)))
>   					goto out;
>   			}
> -			desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
> -			desc->len = mbuf->pkt_len;
> -			addr = (uint64_t)mbuf - (uint64_t)umem->buffer -
> -					umem->mb_pool->header_size;
> -			offset = rte_pktmbuf_mtod(mbuf, uint64_t) -
> -					(uint64_t)mbuf +
> -					umem->mb_pool->header_size;
> -			offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
> -			desc->addr = addr | offset;
> +
>   			tx_bytes += desc->len;
>   			count++;
>   		} else {
> @@ -584,26 +598,18 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>   			if (local_mbuf == NULL)
>   				goto out;
>   
> -			if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) {
> +			if (!(desc = reserve_and_fill(txq, local_mbuf, umem))) {
>   				rte_pktmbuf_free(local_mbuf);
>   				goto out;
>   			}
>   
> -			desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
> -			desc->len = mbuf->pkt_len;
> -
> -			addr = (uint64_t)local_mbuf - (uint64_t)umem->buffer -
> -					umem->mb_pool->header_size;
> -			offset = rte_pktmbuf_mtod(local_mbuf, uint64_t) -
> -					(uint64_t)local_mbuf +
> -					umem->mb_pool->header_size;
> -			pkt = xsk_umem__get_data(umem->buffer, addr + offset);
> -			offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
> -			desc->addr = addr | offset;
> +			pkt = xsk_umem__get_data(umem->buffer,
> +						 (desc->addr & ~0xFFF)
> +						 + (desc->addr & 0xFFF));
>   			rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
> -					desc->len);
> -			tx_bytes += desc->len;
> +				   desc->len);
>   			rte_pktmbuf_free(mbuf);
> +			tx_bytes += desc->len;
>   			count++;
>   		}
>   	}
  
Maryam Tahhan Feb. 6, 2025, 1:09 a.m. UTC | #2
On Sat 1 Feb 2025, 10:03 Ariel Otilibili, <ariel.otilibili@6wind.com> wrote:

> Both legs of the loop share the same logic: the common parts are about
> reserving and filling both address and length into the description.
>
> This is moved into reserve_and_fill().
>
> Bugzilla ID: 1440
> Suggested-by: Maryam Tahhan <mtahhan@redhat.com>
> Signed-off-by: Ariel Otilibili <ariel.otilibili@6wind.com>
> ---
>  drivers/net/af_xdp/rte_eth_af_xdp.c | 62 ++++++++++++++++-------------
>  1 file changed, 34 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c
> b/drivers/net/af_xdp/rte_eth_af_xdp.c
> index 092bcb73aa0a..840a12dbf508 100644
> --- a/drivers/net/af_xdp/rte_eth_af_xdp.c
> +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
> @@ -536,6 +536,31 @@ kick_tx(struct pkt_tx_queue *txq, struct
> xsk_ring_cons *cq)
>                 }
>  }
>
> +static inline struct xdp_desc *
> +reserve_and_fill(struct pkt_tx_queue *txq, struct rte_mbuf *mbuf,
> +                struct xsk_umem_info *umem)
> +{
> +       struct xdp_desc *desc = NULL;
> +       uint32_t *idx_tx = NULL;
> +       uint64_t addr, offset;
> +
> +       if (!xsk_ring_prod__reserve(&txq->tx, 1, idx_tx))
> +               goto out;
> +
> +       desc = xsk_ring_prod__tx_desc(&txq->tx, *idx_tx);
> +       desc->len = mbuf->pkt_len;
> +
> +       addr = (uint64_t)mbuf - (uint64_t)umem->buffer
> +               - umem->mb_pool->header_size;
> +       offset = rte_pktmbuf_mtod(mbuf, uint64_t) - (uint64_t)mbuf
> +               + umem->mb_pool->header_size;
> +       offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
> +       desc->addr = addr | offset;
> +
> +out:
> +       return desc;
> +}
> +
>  #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
>  static uint16_t
>  af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> @@ -545,10 +570,8 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs,
> uint16_t nb_pkts)
>         struct rte_mbuf *mbuf;
>         unsigned long tx_bytes = 0;
>         int i;
> -       uint32_t idx_tx;
>         uint16_t count = 0;
>         struct xdp_desc *desc;
> -       uint64_t addr, offset;
>         struct xsk_ring_cons *cq = &txq->pair->cq;
>         uint32_t free_thresh = cq->size >> 1;
>
> @@ -559,21 +582,12 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs,
> uint16_t nb_pkts)
>                 mbuf = bufs[i];
>
>                 if (mbuf->pool == umem->mb_pool) {
> -                       if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx))
> {
> +                       if (!(desc = reserve_and_fill(txq, mbuf, umem))) {
>                                 kick_tx(txq, cq);
> -                               if (!xsk_ring_prod__reserve(&txq->tx, 1,
> -                                                           &idx_tx))
> +                               if (!(desc = reserve_and_fill(txq, mbuf,
> umem)))
>                                         goto out;
>                         }
> -                       desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
> -                       desc->len = mbuf->pkt_len;
> -                       addr = (uint64_t)mbuf - (uint64_t)umem->buffer -
> -                                       umem->mb_pool->header_size;
> -                       offset = rte_pktmbuf_mtod(mbuf, uint64_t) -
> -                                       (uint64_t)mbuf +
> -                                       umem->mb_pool->header_size;
> -                       offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
> -                       desc->addr = addr | offset;
> +
>                         tx_bytes += desc->len;
>                         count++;
>                 } else {
> @@ -584,26 +598,18 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs,
> uint16_t nb_pkts)
>                         if (local_mbuf == NULL)
>                                 goto out;
>
> -                       if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx))
> {
> +                       if (!(desc = reserve_and_fill(txq, local_mbuf,
> umem))) {
>                                 rte_pktmbuf_free(local_mbuf);
>                                 goto out;
>                         }
>
> -                       desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
> -                       desc->len = mbuf->pkt_len;
> -
>

So I think i spotted one issue, you might need override desc->len after the
call to the reserve_and_fill function so as it's not taken supposed to be
from local_mbuf here.

-                       addr = (uint64_t)local_mbuf -
> (uint64_t)umem->buffer -
> -                                       umem->mb_pool->header_size;
> -                       offset = rte_pktmbuf_mtod(local_mbuf, uint64_t) -
> -                                       (uint64_t)local_mbuf +
> -                                       umem->mb_pool->header_size;
> -                       pkt = xsk_umem__get_data(umem->buffer, addr +
> offset);
> -                       offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
> -                       desc->addr = addr | offset;
> +                       pkt = xsk_umem__get_data(umem->buffer,
> +                                                (desc->addr & ~0xFFF)
> +                                                + (desc->addr & 0xFFF));
>

Would prefer to move this pkt assignment to reserve_and_fill()
What if we passed a void **ppkt to reserve_and_fill()  then kept the
original logic just wrapped in a NULL check?

 if (ppkt) {
      *ppkt = xsk_umem__get_data(umem->buffer, addr + offset);
    }


Before the offset and desc->addr
    offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
    desc->addr = addr | offset;

WDYT?

                        rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
> -                                       desc->len);
> -                       tx_bytes += desc->len;
> +                                  desc->len);
>                         rte_pktmbuf_free(mbuf);
> +                       tx_bytes += desc->len;
>                         count++;
>                 }
>         }
> --
> 2.30.2
>
>
  
Maryam Tahhan Feb. 6, 2025, 1:17 a.m. UTC | #3
>
> Sorry, just fixing my comment here:

>
> So I think i spotted one issue, you might need override desc->len after
> the call to the reserve_and_fill function so that it's *not taken* from
> local_mbuf here.
>
  
Maryam Tahhan Feb. 6, 2025, 2:08 a.m. UTC | #4
On Sat 1 Feb 2025, 10:03 Ariel Otilibili, <ariel.otilibili@6wind.com> wrote:

> Both legs of the loop share the same logic: the common parts are about
> reserving and filling both address and length into the description.
>
> This is moved into reserve_and_fill().
>
> Bugzilla ID: 1440
> Suggested-by: Maryam Tahhan <mtahhan@redhat.com>
> Signed-off-by: Ariel Otilibili <ariel.otilibili@6wind.com>
> ---
>  drivers/net/af_xdp/rte_eth_af_xdp.c | 62 ++++++++++++++++-------------
>  1 file changed, 34 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c
> b/drivers/net/af_xdp/rte_eth_af_xdp.c
> index 092bcb73aa0a..840a12dbf508 100644
> --- a/drivers/net/af_xdp/rte_eth_af_xdp.c
> +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
> @@ -536,6 +536,31 @@ kick_tx(struct pkt_tx_queue *txq, struct
> xsk_ring_cons *cq)
>                 }
>  }
>
> +static inline struct xdp_desc *
> +reserve_and_fill(struct pkt_tx_queue *txq, struct rte_mbuf *mbuf,
> +                struct xsk_umem_info *umem)
> +{
> +       struct xdp_desc *desc = NULL;
> +       uint32_t *idx_tx = NULL;
>

Took me a while to spot this but this needs to be a uint32_t not a pointer
to uint32_t, because xsk_ring_prod__reserve() does not allocate memory for
idx_tx it just expects to dereference a pointer to a uint32_t to store the
index...

+       uint64_t addr, offset;
> +
> +       if (!xsk_ring_prod__reserve(&txq->tx, 1, idx_tx))
> +               goto out;
> +
> +       desc = xsk_ring_prod__tx_desc(&txq->tx, *idx_tx);
> +       desc->len = mbuf->pkt_len;
> +
> +       addr = (uint64_t)mbuf - (uint64_t)umem->buffer
> +               - umem->mb_pool->header_size;
> +       offset = rte_pktmbuf_mtod(mbuf, uint64_t) - (uint64_t)mbuf
> +               + umem->mb_pool->header_size;
> +       offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
> +       desc->addr = addr | offset;
> +
> +out:
> +       return desc;
> +}
> +
> <snip>
  
Ariel Otilibili Feb. 6, 2025, 5:56 p.m. UTC | #5
Hi Maryam,

On Thu, Feb 6, 2025 at 3:09 AM Maryam Tahhan <mtahhan@redhat.com> wrote:

>
>
> On Sat 1 Feb 2025, 10:03 Ariel Otilibili, <ariel.otilibili@6wind.com>
> wrote:
>
>> diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c
>> b/drivers/net/af_xdp/rte_eth_af_xdp.c
>> index 092bcb73aa0a..840a12dbf508 100644
>> --- a/drivers/net/af_xdp/rte_eth_af_xdp.c
>> +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
>> @@ -536,6 +536,31 @@ kick_tx(struct pkt_tx_queue *txq, struct
>> xsk_ring_cons *cq)
>>                 }
>>  }
>>
>> +static inline struct xdp_desc *
>> +reserve_and_fill(struct pkt_tx_queue *txq, struct rte_mbuf *mbuf,
>> +                struct xsk_umem_info *umem)
>> +{
>> +       struct xdp_desc *desc = NULL;
>> +       uint32_t *idx_tx = NULL;
>>
>
> Took me a while to spot this but this needs to be a uint32_t not a pointer
> to uint32_t, because xsk_ring_prod__reserve() does not allocate memory for
> idx_tx it just expects to dereference a pointer to a uint32_t to store the
> index...
>

Only now could I answer back; sorry the late response. And thanks for your
thorough investigation.

I didn't get the chance to use *testpmd;* which arguments did you pass?

I ran a scan-build while compiling, as well as a compilation with sanitized
flags. And I saw no error in af_xdp source files.

meson --wipe build -Db_sanitize=address
ninja -C build 2>&1 san_`date -Iseconds`.log
ninja -C build 2>&1 | tee --append san_`date -Iseconds`.log
ninja -C build scan-build 2>&1 | tee --append san_`date -Iseconds`.log

How did you spot the error?

>
> +       uint64_t addr, offset;
>> +
>> +       if (!xsk_ring_prod__reserve(&txq->tx, 1, idx_tx))
>> +               goto out;
>> +
>> +       desc = xsk_ring_prod__tx_desc(&txq->tx, *idx_tx);
>> +       desc->len = mbuf->pkt_len;
>> +
>> +       addr = (uint64_t)mbuf - (uint64_t)umem->buffer
>> +               - umem->mb_pool->header_size;
>> +       offset = rte_pktmbuf_mtod(mbuf, uint64_t) - (uint64_t)mbuf
>> +               + umem->mb_pool->header_size;
>> +       offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
>> +       desc->addr = addr | offset;
>> +
>> +out:
>> +       return desc;
>> +}
>> +
>> <snip>
>
>
  
Ariel Otilibili Feb. 6, 2025, 6:06 p.m. UTC | #6
Hi Maryam,

On Thu, Feb 6, 2025 at 2:09 AM Maryam Tahhan <mtahhan@redhat.com> wrote:

>
>
> On Sat 1 Feb 2025, 10:03 Ariel Otilibili, <ariel.otilibili@6wind.com>
> wrote:
>
>> Both legs of the loop share the same logic: the common parts are about
>> reserving and filling both address and length into the description.
>>
>> This is moved into reserve_and_fill().
>>
>> Bugzilla ID: 1440
>> Suggested-by: Maryam Tahhan <mtahhan@redhat.com>
>> Signed-off-by: Ariel Otilibili <ariel.otilibili@6wind.com>
>> ---
>>  drivers/net/af_xdp/rte_eth_af_xdp.c | 62 ++++++++++++++++-------------
>>  1 file changed, 34 insertions(+), 28 deletions(-)
>>
>> diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c
>> b/drivers/net/af_xdp/rte_eth_af_xdp.c
>> index 092bcb73aa0a..840a12dbf508 100644
>> --- a/drivers/net/af_xdp/rte_eth_af_xdp.c
>> +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
>> @@ -536,6 +536,31 @@ kick_tx(struct pkt_tx_queue *txq, struct
>> xsk_ring_cons *cq)
>>                 }
>>  }
>>
>> +static inline struct xdp_desc *
>> +reserve_and_fill(struct pkt_tx_queue *txq, struct rte_mbuf *mbuf,
>> +                struct xsk_umem_info *umem)
>> +{
>> +       struct xdp_desc *desc = NULL;
>> +       uint32_t *idx_tx = NULL;
>> +       uint64_t addr, offset;
>> +
>> +       if (!xsk_ring_prod__reserve(&txq->tx, 1, idx_tx))
>> +               goto out;
>> +
>> +       desc = xsk_ring_prod__tx_desc(&txq->tx, *idx_tx);
>> +       desc->len = mbuf->pkt_len;
>> +
>> +       addr = (uint64_t)mbuf - (uint64_t)umem->buffer
>> +               - umem->mb_pool->header_size;
>> +       offset = rte_pktmbuf_mtod(mbuf, uint64_t) - (uint64_t)mbuf
>> +               + umem->mb_pool->header_size;
>> +       offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
>> +       desc->addr = addr | offset;
>> +
>> +out:
>> +       return desc;
>> +}
>> +
>>  #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
>>  static uint16_t
>>  af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>> @@ -545,10 +570,8 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs,
>> uint16_t nb_pkts)
>>         struct rte_mbuf *mbuf;
>>         unsigned long tx_bytes = 0;
>>         int i;
>> -       uint32_t idx_tx;
>>         uint16_t count = 0;
>>         struct xdp_desc *desc;
>> -       uint64_t addr, offset;
>>         struct xsk_ring_cons *cq = &txq->pair->cq;
>>         uint32_t free_thresh = cq->size >> 1;
>>
>> @@ -559,21 +582,12 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs,
>> uint16_t nb_pkts)
>>                 mbuf = bufs[i];
>>
>>                 if (mbuf->pool == umem->mb_pool) {
>> -                       if (!xsk_ring_prod__reserve(&txq->tx, 1,
>> &idx_tx)) {
>> +                       if (!(desc = reserve_and_fill(txq, mbuf, umem))) {
>>                                 kick_tx(txq, cq);
>> -                               if (!xsk_ring_prod__reserve(&txq->tx, 1,
>> -                                                           &idx_tx))
>> +                               if (!(desc = reserve_and_fill(txq, mbuf,
>> umem)))
>>                                         goto out;
>>                         }
>> -                       desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
>> -                       desc->len = mbuf->pkt_len;
>> -                       addr = (uint64_t)mbuf - (uint64_t)umem->buffer -
>> -                                       umem->mb_pool->header_size;
>> -                       offset = rte_pktmbuf_mtod(mbuf, uint64_t) -
>> -                                       (uint64_t)mbuf +
>> -                                       umem->mb_pool->header_size;
>> -                       offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
>> -                       desc->addr = addr | offset;
>> +
>>                         tx_bytes += desc->len;
>>                         count++;
>>                 } else {
>> @@ -584,26 +598,18 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs,
>> uint16_t nb_pkts)
>>                         if (local_mbuf == NULL)
>>                                 goto out;
>>
>> -                       if (!xsk_ring_prod__reserve(&txq->tx, 1,
>> &idx_tx)) {
>> +                       if (!(desc = reserve_and_fill(txq, local_mbuf,
>> umem))) {
>>                                 rte_pktmbuf_free(local_mbuf);
>>                                 goto out;
>>                         }
>>
>> -                       desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
>> -                       desc->len = mbuf->pkt_len;
>> -
>>
>
> So I think i spotted one issue, you might need override desc->len after
> the call to the reserve_and_fill function so as it's not taken supposed to
> be from local_mbuf here.
>

You are right; I will do that.

>
> -                       addr = (uint64_t)local_mbuf -
>> (uint64_t)umem->buffer -
>> -                                       umem->mb_pool->header_size;
>> -                       offset = rte_pktmbuf_mtod(local_mbuf, uint64_t) -
>> -                                       (uint64_t)local_mbuf +
>> -                                       umem->mb_pool->header_size;
>> -                       pkt = xsk_umem__get_data(umem->buffer, addr +
>> offset);
>> -                       offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
>> -                       desc->addr = addr | offset;
>> +                       pkt = xsk_umem__get_data(umem->buffer,
>> +                                                (desc->addr & ~0xFFF)
>> +                                                + (desc->addr & 0xFFF));
>>
>
> Would prefer to move this pkt assignment to reserve_and_fill()
> What if we passed a void **ppkt to reserve_and_fill()  then kept the
> original logic just wrapped in a NULL check?
>

It would look neater, indeed.

>
>  if (ppkt) {
>       *ppkt = xsk_umem__get_data(umem->buffer, addr + offset);
>     }
>
>
> Before the offset and desc->addr
>     offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
>     desc->addr = addr | offset;
>
> WDYT?
>

I will; thanks for the proposal.

For me to sum up which changes are needed:

* change desc->len to local_buf
* move pkt initialization into reserve_and_fill
*  change idx_tx from uint32_t* to uint32_t (
https://inbox.dpdk.org/dev/CAF1zDgZgJZgWOeG=RAjicmGVt24PTtB9B0ukWH_RdE3ooeYPzA@mail.gmail.com/
)

Thanks again,
Ariel

>
>                         rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
>> -                                       desc->len);
>> -                       tx_bytes += desc->len;
>> +                                  desc->len);
>>                         rte_pktmbuf_free(mbuf);
>> +                       tx_bytes += desc->len;
>>                         count++;
>>                 }
>>         }
>> --
>> 2.30.2
>>
>>
  

Patch

diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 092bcb73aa0a..840a12dbf508 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -536,6 +536,31 @@  kick_tx(struct pkt_tx_queue *txq, struct xsk_ring_cons *cq)
 		}
 }
 
+static inline struct xdp_desc *
+reserve_and_fill(struct pkt_tx_queue *txq, struct rte_mbuf *mbuf,
+		 struct xsk_umem_info *umem)
+{
+	struct xdp_desc *desc = NULL;
+	uint32_t *idx_tx = NULL;
+	uint64_t addr, offset;
+
+	if (!xsk_ring_prod__reserve(&txq->tx, 1, idx_tx))
+		goto out;
+
+	desc = xsk_ring_prod__tx_desc(&txq->tx, *idx_tx);
+	desc->len = mbuf->pkt_len;
+
+	addr = (uint64_t)mbuf - (uint64_t)umem->buffer
+		- umem->mb_pool->header_size;
+	offset = rte_pktmbuf_mtod(mbuf, uint64_t) - (uint64_t)mbuf
+		+ umem->mb_pool->header_size;
+	offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+	desc->addr = addr | offset;
+
+out:
+	return desc;
+}
+
 #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
 static uint16_t
 af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
@@ -545,10 +570,8 @@  af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	unsigned long tx_bytes = 0;
 	int i;
-	uint32_t idx_tx;
 	uint16_t count = 0;
 	struct xdp_desc *desc;
-	uint64_t addr, offset;
 	struct xsk_ring_cons *cq = &txq->pair->cq;
 	uint32_t free_thresh = cq->size >> 1;
 
@@ -559,21 +582,12 @@  af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		mbuf = bufs[i];
 
 		if (mbuf->pool == umem->mb_pool) {
-			if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) {
+			if (!(desc = reserve_and_fill(txq, mbuf, umem))) {
 				kick_tx(txq, cq);
-				if (!xsk_ring_prod__reserve(&txq->tx, 1,
-							    &idx_tx))
+				if (!(desc = reserve_and_fill(txq, mbuf, umem)))
 					goto out;
 			}
-			desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
-			desc->len = mbuf->pkt_len;
-			addr = (uint64_t)mbuf - (uint64_t)umem->buffer -
-					umem->mb_pool->header_size;
-			offset = rte_pktmbuf_mtod(mbuf, uint64_t) -
-					(uint64_t)mbuf +
-					umem->mb_pool->header_size;
-			offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
-			desc->addr = addr | offset;
+
 			tx_bytes += desc->len;
 			count++;
 		} else {
@@ -584,26 +598,18 @@  af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			if (local_mbuf == NULL)
 				goto out;
 
-			if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) {
+			if (!(desc = reserve_and_fill(txq, local_mbuf, umem))) {
 				rte_pktmbuf_free(local_mbuf);
 				goto out;
 			}
 
-			desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
-			desc->len = mbuf->pkt_len;
-
-			addr = (uint64_t)local_mbuf - (uint64_t)umem->buffer -
-					umem->mb_pool->header_size;
-			offset = rte_pktmbuf_mtod(local_mbuf, uint64_t) -
-					(uint64_t)local_mbuf +
-					umem->mb_pool->header_size;
-			pkt = xsk_umem__get_data(umem->buffer, addr + offset);
-			offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
-			desc->addr = addr | offset;
+			pkt = xsk_umem__get_data(umem->buffer,
+						 (desc->addr & ~0xFFF)
+						 + (desc->addr & 0xFFF));
 			rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
-					desc->len);
-			tx_bytes += desc->len;
+				   desc->len);
 			rte_pktmbuf_free(mbuf);
+			tx_bytes += desc->len;
 			count++;
 		}
 	}