[v3] mbuf: fix reset on mbuf free
Checks
Commit Message
m->nb_seg must be reset on mbuf free whatever the value of m->next,
because it can happen that m->nb_seg is != 1. For instance in this
case:
m1 = rte_pktmbuf_alloc(mp);
rte_pktmbuf_append(m1, 500);
m2 = rte_pktmbuf_alloc(mp);
rte_pktmbuf_append(m2, 500);
rte_pktmbuf_chain(m1, m2);
m0 = rte_pktmbuf_alloc(mp);
rte_pktmbuf_append(m0, 500);
rte_pktmbuf_chain(m0, m1);
As rte_pktmbuf_chain() does not reset nb_seg in the initial m1
segment (this is not required), after this code the mbuf chain
have 3 segments:
- m0: next=m1, nb_seg=3
- m1: next=m2, nb_seg=2
- m2: next=NULL, nb_seg=1
Then split this chain between m1 and m2, it would result in 2 packets:
- first packet
- m0: next=m1, nb_seg=2
- m1: next=NULL, nb_seg=2
- second packet
- m2: next=NULL, nb_seg=1
Freeing the first packet will not restore nb_seg=1 in the second
segment. This is an issue because it is expected that mbufs stored
in pool have their nb_seg field set to 1.
Fixes: 8f094a9ac5d7 ("mbuf: set mbuf fields while in pool")
Cc: stable@dpdk.org
Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
v3
* fix commit log again (thanks Morten for spotting it)
v2
* avoid write access if uneeded (suggested by Konstantin)
* enhance comments in mbuf header file (suggested by Morten)
* fix commit log
lib/librte_mbuf/rte_mbuf.c | 4 ++--
lib/librte_mbuf/rte_mbuf.h | 8 ++++----
lib/librte_mbuf/rte_mbuf_core.h | 13 +++++++++++--
3 files changed, 17 insertions(+), 8 deletions(-)
Comments
Hi Olivier,
> -----Original Message-----
> From: Olivier Matz <olivier.matz@6wind.com>
> Sent: Wednesday, January 6, 2021 3:34 PM
> To: dev@dpdk.org
> Cc: andrew.rybchenko@oktetlabs.ru; konstantin.ananyev@intel.com;
> mb@smartsharesystems.com; Ali Alnubani <alialnu@nvidia.com>;
> ajitkhaparde@gmail.com; stable@dpdk.org; Ajit Khaparde
> <ajit.khaparde@broadcom.com>
> Subject: [PATCH v3] mbuf: fix reset on mbuf free
>
Even though the performance tests on Mellanox NICs are passing, I see a performance drop of up to 0.5 mpps with 64B frames (tests only fail for 1 mpps drop or more):
https://mails.dpdk.org/archives/test-report/2021-January/172759.html
I'll verify this on local hardware and reply back.
Regards,
Ali
> m->nb_seg must be reset on mbuf free whatever the value of m->next,
> because it can happen that m->nb_seg is != 1. For instance in this
> case:
>
> m1 = rte_pktmbuf_alloc(mp);
> rte_pktmbuf_append(m1, 500);
> m2 = rte_pktmbuf_alloc(mp);
> rte_pktmbuf_append(m2, 500);
> rte_pktmbuf_chain(m1, m2);
> m0 = rte_pktmbuf_alloc(mp);
> rte_pktmbuf_append(m0, 500);
> rte_pktmbuf_chain(m0, m1);
>
> As rte_pktmbuf_chain() does not reset nb_seg in the initial m1
> segment (this is not required), after this code the mbuf chain
> have 3 segments:
> - m0: next=m1, nb_seg=3
> - m1: next=m2, nb_seg=2
> - m2: next=NULL, nb_seg=1
>
> Then split this chain between m1 and m2, it would result in 2 packets:
> - first packet
> - m0: next=m1, nb_seg=2
> - m1: next=NULL, nb_seg=2
> - second packet
> - m2: next=NULL, nb_seg=1
>
> Freeing the first packet will not restore nb_seg=1 in the second
> segment. This is an issue because it is expected that mbufs stored
> in pool have their nb_seg field set to 1.
>
> Fixes: 8f094a9ac5d7 ("mbuf: set mbuf fields while in pool")
> Cc: stable@dpdk.org
>
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> Acked-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
> ---
>
> v3
> * fix commit log again (thanks Morten for spotting it)
>
> v2
> * avoid write access if uneeded (suggested by Konstantin)
> * enhance comments in mbuf header file (suggested by Morten)
> * fix commit log
>
>
> lib/librte_mbuf/rte_mbuf.c | 4 ++--
> lib/librte_mbuf/rte_mbuf.h | 8 ++++----
> lib/librte_mbuf/rte_mbuf_core.h | 13 +++++++++++--
> 3 files changed, 17 insertions(+), 8 deletions(-)
>
> diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
> index 7d09ee2939..5f77840557 100644
> --- a/lib/librte_mbuf/rte_mbuf.c
> +++ b/lib/librte_mbuf/rte_mbuf.c
> @@ -129,10 +129,10 @@ rte_pktmbuf_free_pinned_extmem(void *addr, void *opaque)
>
> rte_mbuf_ext_refcnt_set(m->shinfo, 1);
> m->ol_flags = EXT_ATTACHED_MBUF;
> - if (m->next != NULL) {
> + if (m->next != NULL)
> m->next = NULL;
> + if (m->nb_segs != 1)
> m->nb_segs = 1;
> - }
> rte_mbuf_raw_free(m);
> }
>
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index c4c9ebfaa0..8c1097ed76 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -1340,10 +1340,10 @@ rte_pktmbuf_prefree_seg(struct rte_mbuf *m)
> return NULL;
> }
>
> - if (m->next != NULL) {
> + if (m->next != NULL)
> m->next = NULL;
> + if (m->nb_segs != 1)
> m->nb_segs = 1;
> - }
>
> return m;
>
> @@ -1357,10 +1357,10 @@ rte_pktmbuf_prefree_seg(struct rte_mbuf *m)
> return NULL;
> }
>
> - if (m->next != NULL) {
> + if (m->next != NULL)
> m->next = NULL;
> + if (m->nb_segs != 1)
> m->nb_segs = 1;
> - }
> rte_mbuf_refcnt_set(m, 1);
>
> return m;
> diff --git a/lib/librte_mbuf/rte_mbuf_core.h b/lib/librte_mbuf/rte_mbuf_core.h
> index 567551deab..78a1fcc8ff 100644
> --- a/lib/librte_mbuf/rte_mbuf_core.h
> +++ b/lib/librte_mbuf/rte_mbuf_core.h
> @@ -495,7 +495,12 @@ struct rte_mbuf {
> * or non-atomic) is controlled by the RTE_MBUF_REFCNT_ATOMIC flag.
> */
> uint16_t refcnt;
> - uint16_t nb_segs; /**< Number of segments. */
> +
> + /**
> + * Number of segments. Only valid for the first segment of an mbuf
> + * chain.
> + */
> + uint16_t nb_segs;
>
> /** Input port (16 bits to support more than 256 virtual ports).
> * The event eth Tx adapter uses this field to specify the output port.
> @@ -591,7 +596,11 @@ struct rte_mbuf {
> /* second cache line - fields only used in slow path or on TX */
> RTE_MARKER cacheline1 __rte_cache_min_aligned;
>
> - struct rte_mbuf *next; /**< Next segment of scattered packet. */
> + /**
> + * Next segment of scattered packet. Must be NULL in the last segment or
> + * in case of non-segmented packet.
> + */
> + struct rte_mbuf *next;
>
> /* fields to support TX offloads */
> RTE_STD_C11
> --
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> 2.29.2
@@ -129,10 +129,10 @@ rte_pktmbuf_free_pinned_extmem(void *addr, void *opaque)
rte_mbuf_ext_refcnt_set(m->shinfo, 1);
m->ol_flags = EXT_ATTACHED_MBUF;
- if (m->next != NULL) {
+ if (m->next != NULL)
m->next = NULL;
+ if (m->nb_segs != 1)
m->nb_segs = 1;
- }
rte_mbuf_raw_free(m);
}
@@ -1340,10 +1340,10 @@ rte_pktmbuf_prefree_seg(struct rte_mbuf *m)
return NULL;
}
- if (m->next != NULL) {
+ if (m->next != NULL)
m->next = NULL;
+ if (m->nb_segs != 1)
m->nb_segs = 1;
- }
return m;
@@ -1357,10 +1357,10 @@ rte_pktmbuf_prefree_seg(struct rte_mbuf *m)
return NULL;
}
- if (m->next != NULL) {
+ if (m->next != NULL)
m->next = NULL;
+ if (m->nb_segs != 1)
m->nb_segs = 1;
- }
rte_mbuf_refcnt_set(m, 1);
return m;
@@ -495,7 +495,12 @@ struct rte_mbuf {
* or non-atomic) is controlled by the RTE_MBUF_REFCNT_ATOMIC flag.
*/
uint16_t refcnt;
- uint16_t nb_segs; /**< Number of segments. */
+
+ /**
+ * Number of segments. Only valid for the first segment of an mbuf
+ * chain.
+ */
+ uint16_t nb_segs;
/** Input port (16 bits to support more than 256 virtual ports).
* The event eth Tx adapter uses this field to specify the output port.
@@ -591,7 +596,11 @@ struct rte_mbuf {
/* second cache line - fields only used in slow path or on TX */
RTE_MARKER cacheline1 __rte_cache_min_aligned;
- struct rte_mbuf *next; /**< Next segment of scattered packet. */
+ /**
+ * Next segment of scattered packet. Must be NULL in the last segment or
+ * in case of non-segmented packet.
+ */
+ struct rte_mbuf *next;
/* fields to support TX offloads */
RTE_STD_C11