eal: fix modify data area after memset
Checks
Commit Message
Let's look at this path:
malloc_elem_free
->malloc_elem_join_adjacent_free
->join_elem(elem, elem->next)
0. cur elem's pad > 0
1. data area memset in malloc_elem_free first.
2. next elem is free, try to join cur elem and next.
3. in join_elem, try to modify inner->size, this address had
memset in step 1, it casue the content of addrees become non-zero.
If user call rte_zmalloc, and pick this elem, it can't get all
zero'd memory.
Fixes: 2808a12cc053 (malloc: fix memory element size in case of padding)
Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
---
lib/eal/common/malloc_elem.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
Comments
ping
Fengnan Chang <changfengnan@bytedance.com> 于2023年9月12日周二 17:05写道:
>
> Let's look at this path:
> malloc_elem_free
> ->malloc_elem_join_adjacent_free
> ->join_elem(elem, elem->next)
>
> 0. cur elem's pad > 0
> 1. data area memset in malloc_elem_free first.
> 2. next elem is free, try to join cur elem and next.
> 3. in join_elem, try to modify inner->size, this address had
> memset in step 1, it casue the content of addrees become non-zero.
>
> If user call rte_zmalloc, and pick this elem, it can't get all
> zero'd memory.
>
> Fixes: 2808a12cc053 (malloc: fix memory element size in case of padding)
> Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
> ---
> lib/eal/common/malloc_elem.c | 10 +++++-----
> 1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/lib/eal/common/malloc_elem.c b/lib/eal/common/malloc_elem.c
> index 619c040aa3..93a23fa8d4 100644
> --- a/lib/eal/common/malloc_elem.c
> +++ b/lib/eal/common/malloc_elem.c
> @@ -492,7 +492,7 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
> * be contiguous in memory.
> */
> static inline void
> -join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
> +join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2, bool update_inner)
> {
> struct malloc_elem *next = elem2->next;
> elem1->size += elem2->size;
> @@ -502,7 +502,7 @@ join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
> elem1->heap->last = elem1;
> elem1->next = next;
> elem1->dirty |= elem2->dirty;
> - if (elem1->pad) {
> + if (elem1->pad && update_inner) {
> struct malloc_elem *inner = RTE_PTR_ADD(elem1, elem1->pad);
> inner->size = elem1->size - elem1->pad;
> }
> @@ -526,7 +526,7 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem)
>
> /* remove from free list, join to this one */
> malloc_elem_free_list_remove(elem->next);
> - join_elem(elem, elem->next);
> + join_elem(elem, elem->next, false);
>
> /* erase header, trailer and pad */
> memset(erase, MALLOC_POISON, erase_len);
> @@ -550,7 +550,7 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem)
> malloc_elem_free_list_remove(elem->prev);
>
> new_elem = elem->prev;
> - join_elem(new_elem, elem);
> + join_elem(new_elem, elem, false);
>
> /* erase header, trailer and pad */
> memset(erase, MALLOC_POISON, erase_len);
> @@ -683,7 +683,7 @@ malloc_elem_resize(struct malloc_elem *elem, size_t size)
> * join the two
> */
> malloc_elem_free_list_remove(elem->next);
> - join_elem(elem, elem->next);
> + join_elem(elem, elem->next, true);
>
> if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) {
> /* now we have a big block together. Lets cut it down a bit, by splitting */
> --
> 2.20.1
>
We need a careful review here, please.
12/09/2023 11:04, Fengnan Chang:
> Let's look at this path:
> malloc_elem_free
> ->malloc_elem_join_adjacent_free
> ->join_elem(elem, elem->next)
>
> 0. cur elem's pad > 0
> 1. data area memset in malloc_elem_free first.
> 2. next elem is free, try to join cur elem and next.
> 3. in join_elem, try to modify inner->size, this address had
> memset in step 1, it casue the content of addrees become non-zero.
>
> If user call rte_zmalloc, and pick this elem, it can't get all
> zero'd memory.
>
> Fixes: 2808a12cc053 (malloc: fix memory element size in case of padding)
> Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
> ---
> lib/eal/common/malloc_elem.c | 10 +++++-----
> 1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/lib/eal/common/malloc_elem.c b/lib/eal/common/malloc_elem.c
> index 619c040aa3..93a23fa8d4 100644
> --- a/lib/eal/common/malloc_elem.c
> +++ b/lib/eal/common/malloc_elem.c
> @@ -492,7 +492,7 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
> * be contiguous in memory.
> */
> static inline void
> -join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
> +join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2, bool update_inner)
> {
> struct malloc_elem *next = elem2->next;
> elem1->size += elem2->size;
> @@ -502,7 +502,7 @@ join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
> elem1->heap->last = elem1;
> elem1->next = next;
> elem1->dirty |= elem2->dirty;
> - if (elem1->pad) {
> + if (elem1->pad && update_inner) {
> struct malloc_elem *inner = RTE_PTR_ADD(elem1, elem1->pad);
> inner->size = elem1->size - elem1->pad;
> }
> @@ -526,7 +526,7 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem)
>
> /* remove from free list, join to this one */
> malloc_elem_free_list_remove(elem->next);
> - join_elem(elem, elem->next);
> + join_elem(elem, elem->next, false);
>
> /* erase header, trailer and pad */
> memset(erase, MALLOC_POISON, erase_len);
> @@ -550,7 +550,7 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem)
> malloc_elem_free_list_remove(elem->prev);
>
> new_elem = elem->prev;
> - join_elem(new_elem, elem);
> + join_elem(new_elem, elem, false);
>
> /* erase header, trailer and pad */
> memset(erase, MALLOC_POISON, erase_len);
> @@ -683,7 +683,7 @@ malloc_elem_resize(struct malloc_elem *elem, size_t size)
> * join the two
> */
> malloc_elem_free_list_remove(elem->next);
> - join_elem(elem, elem->next);
> + join_elem(elem, elem->next, true);
>
> if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) {
> /* now we have a big block together. Lets cut it down a bit, by splitting */
>
2023-09-22 16:12 (UTC+0800), Fengnan Chang:
> ping
>
> Fengnan Chang <changfengnan@bytedance.com> 于2023年9月12日周二 17:05写道:
> >
> > Let's look at this path:
> > malloc_elem_free
> > ->malloc_elem_join_adjacent_free
> > ->join_elem(elem, elem->next)
> >
> > 0. cur elem's pad > 0
> > 1. data area memset in malloc_elem_free first.
> > 2. next elem is free, try to join cur elem and next.
> > 3. in join_elem, try to modify inner->size, this address had
> > memset in step 1, it casue the content of addrees become non-zero.
> >
> > If user call rte_zmalloc, and pick this elem, it can't get all
> > zero'd memory.
malloc_elem_join_adjacent_free() always calls memset() after join_elem(),
for the next and the previous element respectively.
How to reproduce this bug?
Dmitry Kozlyuk <dmitry.kozliuk@gmail.com> 于2023年10月23日周一 04:22写道:
>
> 2023-09-22 16:12 (UTC+0800), Fengnan Chang:
> > ping
> >
> > Fengnan Chang <changfengnan@bytedance.com> 于2023年9月12日周二 17:05写道:
> > >
> > > Let's look at this path:
> > > malloc_elem_free
> > > ->malloc_elem_join_adjacent_free
> > > ->join_elem(elem, elem->next)
> > >
> > > 0. cur elem's pad > 0
> > > 1. data area memset in malloc_elem_free first.
> > > 2. next elem is free, try to join cur elem and next.
> > > 3. in join_elem, try to modify inner->size, this address had
> > > memset in step 1, it casue the content of addrees become non-zero.
> > >
> > > If user call rte_zmalloc, and pick this elem, it can't get all
> > > zero'd memory.
>
> malloc_elem_join_adjacent_free() always calls memset() after join_elem(),
> for the next and the previous element respectively.
when try to call join_elem() for the next element in
malloc_elem_join_adjacent_free(),
the memset is try to memset *next* element, but join_elem() is update
*current* element's
content, which shoudn't happen, it's two different element.
> How to reproduce this bug?
when I test this patch,
https://patches.dpdk.org/project/dpdk/patch/20230831111937.60975-1-changfengnan@bytedance.com/
I have a case try to alloc 64/128/192 size object and free with 16 threads,
after every
alloc I'll check wheather all content is 0 or not.
It's not easy to reproduce, you can have a try, it's easier to find
this problem in code level.
On Mon, 23 Oct 2023 17:07:21 +0800
Fengnan Chang <changfengnan@bytedance.com> wrote:
> Dmitry Kozlyuk <dmitry.kozliuk@gmail.com> 于2023年10月23日周一 04:22写道:
> >
> > 2023-09-22 16:12 (UTC+0800), Fengnan Chang:
> > > ping
> > >
> > > Fengnan Chang <changfengnan@bytedance.com> 于2023年9月12日周二 17:05写道:
> > > >
> > > > Let's look at this path:
> > > > malloc_elem_free
> > > > ->malloc_elem_join_adjacent_free
> > > > ->join_elem(elem, elem->next)
> > > >
> > > > 0. cur elem's pad > 0
> > > > 1. data area memset in malloc_elem_free first.
> > > > 2. next elem is free, try to join cur elem and next.
> > > > 3. in join_elem, try to modify inner->size, this address had
> > > > memset in step 1, it casue the content of addrees become non-zero.
> > > >
> > > > If user call rte_zmalloc, and pick this elem, it can't get all
> > > > zero'd memory.
> >
> > malloc_elem_join_adjacent_free() always calls memset() after join_elem(),
> > for the next and the previous element respectively.
> when try to call join_elem() for the next element in
> malloc_elem_join_adjacent_free(),
> the memset is try to memset *next* element, but join_elem() is update
> *current* element's
> content, which shoudn't happen, it's two different element.
>
> > How to reproduce this bug?
> when I test this patch,
> https://patches.dpdk.org/project/dpdk/patch/20230831111937.60975-1-changfengnan@bytedance.com/
> I have a case try to alloc 64/128/192 size object and free with 16 threads,
> after every
> alloc I'll check wheather all content is 0 or not.
> It's not easy to reproduce, you can have a try, it's easier to find
> this problem in code level.
I tried to make a test that would reproduce the problem but it did not.
diff --git a/app/test/test_malloc.c b/app/test/test_malloc.c
index cd579c503cf5..cfd45d6a28eb 100644
--- a/app/test/test_malloc.c
+++ b/app/test/test_malloc.c
@@ -28,6 +28,7 @@
#include <rte_string_fns.h>
#define N 10000
+#define BINS 100
static int
is_mem_on_socket(int32_t socket);
@@ -69,13 +70,24 @@ is_aligned(void *p, int align)
return 1;
}
+static bool is_all_zero(uint8_t *mem, size_t sz)
+{
+ size_t i;
+
+ for (i = 0; i < sz; i++)
+ if (mem[i] != 0)
+ return false;
+
+ return true;
+}
+
static int
test_align_overlap_per_lcore(__rte_unused void *arg)
{
const unsigned align1 = 8,
align2 = 64,
align3 = 2048;
- unsigned i,j;
+ unsigned int i;
void *p1 = NULL, *p2 = NULL, *p3 = NULL;
int ret = 0;
@@ -86,11 +98,12 @@ test_align_overlap_per_lcore(__rte_unused void *arg)
ret = -1;
break;
}
- for(j = 0; j < 1000 ; j++) {
- if( *(char *)p1 != 0) {
- printf("rte_zmalloc didn't zero the allocated memory\n");
- ret = -1;
- }
+
+ if (!is_all_zero(p1, 1000)) {
+ printf("rte_zmalloc didn't zero the allocated memory\n");
+ ret = -1;
+ rte_free(p1);
+ break;
}
p2 = rte_malloc("dummy", 1000, align2);
if (!p2){
@@ -140,6 +153,66 @@ test_align_overlap_per_lcore(__rte_unused void *arg)
return ret;
}
+/*
+ * Allocate random size chunks and make sure that they are
+ * always zero.
+ */
+static int
+test_zmalloc(__rte_unused void *arg)
+{
+ unsigned int i, n;
+ void *slots[BINS] = { };
+ void *p1;
+ size_t sz;
+
+ /* Allocate many variable size chunks */
+ for (i = 0; i < BINS; i++) {
+ sz = rte_rand_max(1024) + 1;
+ p1 = rte_zmalloc("slots", sz, 0);
+ if (p1 == NULL) {
+ printf("rte_zmalloc(%zu) returned NULL (i=%u)\n", sz, i);
+ goto fail;
+ }
+ slots[i] = p1;
+ if (!is_all_zero(p1, sz))
+ goto fail;
+ }
+
+ /* Drop one chunk per iteration */
+ for (n = BINS; n > 0; n--) {
+ /* Swap in a new block into a slot */
+ for (i = 0; i < N; i++) {
+ unsigned int bin = rte_rand_max(n);
+
+ sz = rte_rand_max(1024) + 1;
+ p1 = rte_zmalloc("swap", sz, 0);
+ if (!p1){
+ printf("rte_zmalloc(%zu) returned NULL (i=%u)\n", sz, i);
+ goto fail;
+ }
+
+ if (!is_all_zero(p1, sz)) {
+ printf("rte_zmalloc didn't zero the allocated memory\n");
+ goto fail;
+ }
+
+ rte_free(slots[bin]);
+ slots[bin] = p1;
+ }
+
+ /* Drop last bin */
+ rte_free(slots[n]);
+ slots[n] = NULL;
+ }
+
+ return 0;
+fail:
+ for (i = 0; i < BINS; i++)
+ rte_free(slots[i]);
+
+ return -1;
+}
+
static int
test_reordered_free_per_lcore(__rte_unused void *arg)
{
@@ -1020,6 +1091,21 @@ test_malloc(void)
}
else printf("test_realloc() passed\n");
+ /*----------------------------*/
+ RTE_LCORE_FOREACH_WORKER(lcore_id) {
+ rte_eal_remote_launch(test_zmalloc, NULL, lcore_id);
+ }
+
+ RTE_LCORE_FOREACH_WORKER(lcore_id) {
+ if (rte_eal_wait_lcore(lcore_id) < 0)
+ ret = -1;
+ }
+ if (ret < 0){
+ printf("test_zmalloc() failed\n");
+ return ret;
+ }
+ else printf("test_zmalloc() passed\n");
+
/*----------------------------*/
RTE_LCORE_FOREACH_WORKER(lcore_id) {
rte_eal_remote_launch(test_align_overlap_per_lcore, NULL, lcore_id);
Thanks for your response, It's my fault, I got mixed up,
this problem only can reproduce after apply
https://patches.dpdk.org/project/dpdk/patch/20230831111937.60975-1-changfengnan@bytedance.com/
,
I'll reorganize this to previous patch.
So sorry for waste your time.
Stephen Hemminger <stephen@networkplumber.org> 于2023年10月26日周四 00:04写道:
> On Mon, 23 Oct 2023 17:07:21 +0800
> Fengnan Chang <changfengnan@bytedance.com> wrote:
>
> > Dmitry Kozlyuk <dmitry.kozliuk@gmail.com> 于2023年10月23日周一 04:22写道:
> > >
> > > 2023-09-22 16:12 (UTC+0800), Fengnan Chang:
> > > > ping
> > > >
> > > > Fengnan Chang <changfengnan@bytedance.com> 于2023年9月12日周二 17:05写道:
> > > > >
> > > > > Let's look at this path:
> > > > > malloc_elem_free
> > > > > ->malloc_elem_join_adjacent_free
> > > > > ->join_elem(elem, elem->next)
> > > > >
> > > > > 0. cur elem's pad > 0
> > > > > 1. data area memset in malloc_elem_free first.
> > > > > 2. next elem is free, try to join cur elem and next.
> > > > > 3. in join_elem, try to modify inner->size, this address had
> > > > > memset in step 1, it casue the content of addrees become non-zero.
> > > > >
> > > > > If user call rte_zmalloc, and pick this elem, it can't get all
> > > > > zero'd memory.
> > >
> > > malloc_elem_join_adjacent_free() always calls memset() after
> join_elem(),
> > > for the next and the previous element respectively.
> > when try to call join_elem() for the next element in
> > malloc_elem_join_adjacent_free(),
> > the memset is try to memset *next* element, but join_elem() is update
> > *current* element's
> > content, which shoudn't happen, it's two different element.
> >
> > > How to reproduce this bug?
> > when I test this patch,
> >
> https://patches.dpdk.org/project/dpdk/patch/20230831111937.60975-1-changfengnan@bytedance.com/
> > I have a case try to alloc 64/128/192 size object and free with 16
> threads,
> > after every
> > alloc I'll check wheather all content is 0 or not.
> > It's not easy to reproduce, you can have a try, it's easier to find
> > this problem in code level.
>
> I tried to make a test that would reproduce the problem but it did not.
>
> diff --git a/app/test/test_malloc.c b/app/test/test_malloc.c
> index cd579c503cf5..cfd45d6a28eb 100644
> --- a/app/test/test_malloc.c
> +++ b/app/test/test_malloc.c
> @@ -28,6 +28,7 @@
> #include <rte_string_fns.h>
>
> #define N 10000
> +#define BINS 100
>
> static int
> is_mem_on_socket(int32_t socket);
> @@ -69,13 +70,24 @@ is_aligned(void *p, int align)
> return 1;
> }
>
> +static bool is_all_zero(uint8_t *mem, size_t sz)
> +{
> + size_t i;
> +
> + for (i = 0; i < sz; i++)
> + if (mem[i] != 0)
> + return false;
> +
> + return true;
> +}
> +
> static int
> test_align_overlap_per_lcore(__rte_unused void *arg)
> {
> const unsigned align1 = 8,
> align2 = 64,
> align3 = 2048;
> - unsigned i,j;
> + unsigned int i;
> void *p1 = NULL, *p2 = NULL, *p3 = NULL;
> int ret = 0;
>
> @@ -86,11 +98,12 @@ test_align_overlap_per_lcore(__rte_unused void *arg)
> ret = -1;
> break;
> }
> - for(j = 0; j < 1000 ; j++) {
> - if( *(char *)p1 != 0) {
> - printf("rte_zmalloc didn't zero the
> allocated memory\n");
> - ret = -1;
> - }
> +
> + if (!is_all_zero(p1, 1000)) {
> + printf("rte_zmalloc didn't zero the allocated
> memory\n");
> + ret = -1;
> + rte_free(p1);
> + break;
> }
> p2 = rte_malloc("dummy", 1000, align2);
> if (!p2){
> @@ -140,6 +153,66 @@ test_align_overlap_per_lcore(__rte_unused void *arg)
> return ret;
> }
>
> +/*
> + * Allocate random size chunks and make sure that they are
> + * always zero.
> + */
> +static int
> +test_zmalloc(__rte_unused void *arg)
> +{
> + unsigned int i, n;
> + void *slots[BINS] = { };
> + void *p1;
> + size_t sz;
> +
> + /* Allocate many variable size chunks */
> + for (i = 0; i < BINS; i++) {
> + sz = rte_rand_max(1024) + 1;
> + p1 = rte_zmalloc("slots", sz, 0);
> + if (p1 == NULL) {
> + printf("rte_zmalloc(%zu) returned NULL (i=%u)\n",
> sz, i);
> + goto fail;
> + }
> + slots[i] = p1;
> + if (!is_all_zero(p1, sz))
> + goto fail;
> + }
> +
> + /* Drop one chunk per iteration */
> + for (n = BINS; n > 0; n--) {
> + /* Swap in a new block into a slot */
> + for (i = 0; i < N; i++) {
> + unsigned int bin = rte_rand_max(n);
> +
> + sz = rte_rand_max(1024) + 1;
> + p1 = rte_zmalloc("swap", sz, 0);
> + if (!p1){
> + printf("rte_zmalloc(%zu) returned NULL
> (i=%u)\n", sz, i);
> + goto fail;
> + }
> +
> + if (!is_all_zero(p1, sz)) {
> + printf("rte_zmalloc didn't zero the
> allocated memory\n");
> + goto fail;
> + }
> +
> + rte_free(slots[bin]);
> + slots[bin] = p1;
> + }
> +
> + /* Drop last bin */
> + rte_free(slots[n]);
> + slots[n] = NULL;
> + }
> +
> + return 0;
> +fail:
> + for (i = 0; i < BINS; i++)
> + rte_free(slots[i]);
> +
> + return -1;
> +}
> +
> static int
> test_reordered_free_per_lcore(__rte_unused void *arg)
> {
> @@ -1020,6 +1091,21 @@ test_malloc(void)
> }
> else printf("test_realloc() passed\n");
>
> + /*----------------------------*/
> + RTE_LCORE_FOREACH_WORKER(lcore_id) {
> + rte_eal_remote_launch(test_zmalloc, NULL, lcore_id);
> + }
> +
> + RTE_LCORE_FOREACH_WORKER(lcore_id) {
> + if (rte_eal_wait_lcore(lcore_id) < 0)
> + ret = -1;
> + }
> + if (ret < 0){
> + printf("test_zmalloc() failed\n");
> + return ret;
> + }
> + else printf("test_zmalloc() passed\n");
> +
> /*----------------------------*/
> RTE_LCORE_FOREACH_WORKER(lcore_id) {
> rte_eal_remote_launch(test_align_overlap_per_lcore, NULL,
> lcore_id);
>
@@ -492,7 +492,7 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
* be contiguous in memory.
*/
static inline void
-join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
+join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2, bool update_inner)
{
struct malloc_elem *next = elem2->next;
elem1->size += elem2->size;
@@ -502,7 +502,7 @@ join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
elem1->heap->last = elem1;
elem1->next = next;
elem1->dirty |= elem2->dirty;
- if (elem1->pad) {
+ if (elem1->pad && update_inner) {
struct malloc_elem *inner = RTE_PTR_ADD(elem1, elem1->pad);
inner->size = elem1->size - elem1->pad;
}
@@ -526,7 +526,7 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem)
/* remove from free list, join to this one */
malloc_elem_free_list_remove(elem->next);
- join_elem(elem, elem->next);
+ join_elem(elem, elem->next, false);
/* erase header, trailer and pad */
memset(erase, MALLOC_POISON, erase_len);
@@ -550,7 +550,7 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem)
malloc_elem_free_list_remove(elem->prev);
new_elem = elem->prev;
- join_elem(new_elem, elem);
+ join_elem(new_elem, elem, false);
/* erase header, trailer and pad */
memset(erase, MALLOC_POISON, erase_len);
@@ -683,7 +683,7 @@ malloc_elem_resize(struct malloc_elem *elem, size_t size)
* join the two
*/
malloc_elem_free_list_remove(elem->next);
- join_elem(elem, elem->next);
+ join_elem(elem, elem->next, true);
if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) {
/* now we have a big block together. Lets cut it down a bit, by splitting */