[v2] timer: fix resource leak in finalize
Checks
Commit Message
The finalize function should free the memzone created in the init
function, rather than freeing the allocation the memzone references,
otherwise a memzone descriptor can be leaked.
Fixes: c0749f7096c7 ("timer: allow management in shared memory")
Signed-off-by: Erik Gabriel Carrillo <erik.g.carrillo@intel.com>
---
changes in v2:
- Handle scenario where primary process exits before secondaries such
that memzone is not freed early (Anatoly)
lib/librte_timer/rte_timer.c | 20 +++++++++++++++++---
1 file changed, 17 insertions(+), 3 deletions(-)
Comments
On 03-May-19 11:54 PM, Erik Gabriel Carrillo wrote:
> The finalize function should free the memzone created in the init
> function, rather than freeing the allocation the memzone references,
> otherwise a memzone descriptor can be leaked.
>
> Fixes: c0749f7096c7 ("timer: allow management in shared memory")
>
> Signed-off-by: Erik Gabriel Carrillo <erik.g.carrillo@intel.com>
> ---
> changes in v2:
> - Handle scenario where primary process exits before secondaries such
> that memzone is not freed early (Anatoly)
>
> lib/librte_timer/rte_timer.c | 20 +++++++++++++++++---
> 1 file changed, 17 insertions(+), 3 deletions(-)
>
> diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c
> index eb46009..4771287 100644
> --- a/lib/librte_timer/rte_timer.c
> +++ b/lib/librte_timer/rte_timer.c
> @@ -60,6 +60,8 @@ struct rte_timer_data {
> };
>
> #define RTE_MAX_DATA_ELS 64
> +static const struct rte_memzone *rte_timer_data_mz;
> +static rte_atomic16_t *rte_timer_mz_refcnt;
> static struct rte_timer_data *rte_timer_data_arr;
> static const uint32_t default_data_id;
> static uint32_t rte_timer_subsystem_initialized;
> @@ -155,6 +157,7 @@ rte_timer_subsystem_init_v1905(void)
> struct rte_timer_data *data;
> int i, lcore_id;
> static const char *mz_name = "rte_timer_mz";
> + size_t data_arr_size = RTE_MAX_DATA_ELS * sizeof(*rte_timer_data_arr);
nitpicking, but... const?
>
> if (rte_timer_subsystem_initialized)
> return -EALREADY;
> @@ -164,10 +167,14 @@ rte_timer_subsystem_init_v1905(void)
> if (mz == NULL)
> return -EEXIST;
>
> + rte_timer_data_mz = mz;
> rte_timer_data_arr = mz->addr;
> + rte_timer_mz_refcnt =
> + (void *)((char *)mz->addr + data_arr_size);
>
> rte_timer_data_arr[default_data_id].internal_flags |=
> FL_ALLOCATED;
> + rte_atomic16_inc(rte_timer_mz_refcnt);
>
> rte_timer_subsystem_initialized = 1;
>
> @@ -175,12 +182,15 @@ rte_timer_subsystem_init_v1905(void)
> }
>
> mz = rte_memzone_reserve_aligned(mz_name,
> - RTE_MAX_DATA_ELS * sizeof(*rte_timer_data_arr),
> + data_arr_size + sizeof(*rte_timer_mz_refcnt),
> SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE);
> if (mz == NULL)
> return -ENOMEM;
>
> + rte_timer_data_mz = mz;
> rte_timer_data_arr = mz->addr;
> + rte_timer_mz_refcnt = (void *)((char *)mz->addr + data_arr_size);
> + rte_atomic16_init(rte_timer_mz_refcnt);
>
> for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
> data = &rte_timer_data_arr[i];
> @@ -193,6 +203,7 @@ rte_timer_subsystem_init_v1905(void)
> }
>
> rte_timer_data_arr[default_data_id].internal_flags |= FL_ALLOCATED;
> + rte_atomic16_inc(rte_timer_mz_refcnt);
>
> rte_timer_subsystem_initialized = 1;
>
> @@ -205,8 +216,11 @@ BIND_DEFAULT_SYMBOL(rte_timer_subsystem_init, _v1905, 19.05);
> void __rte_experimental
> rte_timer_subsystem_finalize(void)
> {
> - if (rte_timer_data_arr)
> - rte_free(rte_timer_data_arr);
> + if (!rte_timer_subsystem_initialized)
> + return;
> +
> + if (rte_atomic16_dec_and_test(rte_timer_mz_refcnt))
> + rte_memzone_free(rte_timer_data_mz);
I think there's a race here. You may get preempted after test but before
free, where another secondary could initialize. As far as i know, we
also support a case when secondary initializes after primary stops running.
Let's even suppose that we allow secondary processes to initialize the
timer subsystem by reserving memzone and checking rte_errno. You would
still have a chance of two init/deinit conflicting, because there's a
hole between memzone allocation and atomic increment.
I don't think this race can be resolved in a safe way, so we might just
have to settle for a memory leak.
>
> rte_timer_subsystem_initialized = 0;
> }
>
Hi Anatoly,
Thanks for the review. Comments in-line:
<...snipped...>
> > #define RTE_MAX_DATA_ELS 64
> > +static const struct rte_memzone *rte_timer_data_mz; static
> > +rte_atomic16_t *rte_timer_mz_refcnt;
> > static struct rte_timer_data *rte_timer_data_arr;
> > static const uint32_t default_data_id;
> > static uint32_t rte_timer_subsystem_initialized; @@ -155,6 +157,7 @@
> > rte_timer_subsystem_init_v1905(void)
> > struct rte_timer_data *data;
> > int i, lcore_id;
> > static const char *mz_name = "rte_timer_mz";
> > + size_t data_arr_size = RTE_MAX_DATA_ELS *
> > +sizeof(*rte_timer_data_arr);
>
> nitpicking, but... const?
>
No problem - I'll make this change if this line persists into the next version.
<...snipped...>
> >
> > @@ -205,8 +216,11 @@
> BIND_DEFAULT_SYMBOL(rte_timer_subsystem_init, _v1905, 19.05);
> > void __rte_experimental
> > rte_timer_subsystem_finalize(void)
> > {
> > - if (rte_timer_data_arr)
> > - rte_free(rte_timer_data_arr);
> > + if (!rte_timer_subsystem_initialized)
> > + return;
> > +
> > + if (rte_atomic16_dec_and_test(rte_timer_mz_refcnt))
> > + rte_memzone_free(rte_timer_data_mz);
>
> I think there's a race here. You may get preempted after test but before
> free, where another secondary could initialize. As far as i know, we also
Indeed, thanks for catching this.
> support a case when secondary initializes after primary stops running.
>
> Let's even suppose that we allow secondary processes to initialize the timer
> subsystem by reserving memzone and checking rte_errno. You would still
> have a chance of two init/deinit conflicting, because there's a hole between
> memzone allocation and atomic increment.
>
> I don't think this race can be resolved in a safe way, so we might just have to
> settle for a memory leak.
>
I don't see a solution here currently either. I'll look at removing the memzone_free()
call and possibly the rte_timer_subsystem_finalize() API, since it seems like
there's no reason for it to exist if it can't free the allocations.
Regards,
Erik
> >
> > rte_timer_subsystem_initialized = 0;
> > }
> >
>
>
> --
> Thanks,
> Anatoly
On 07-May-19 11:04 PM, Carrillo, Erik G wrote:
> Hi Anatoly,
>
> Thanks for the review. Comments in-line:
>
> <...snipped...>
>
>>> #define RTE_MAX_DATA_ELS 64
>>> +static const struct rte_memzone *rte_timer_data_mz; static
>>> +rte_atomic16_t *rte_timer_mz_refcnt;
>>> static struct rte_timer_data *rte_timer_data_arr;
>>> static const uint32_t default_data_id;
>>> static uint32_t rte_timer_subsystem_initialized; @@ -155,6 +157,7 @@
>>> rte_timer_subsystem_init_v1905(void)
>>> struct rte_timer_data *data;
>>> int i, lcore_id;
>>> static const char *mz_name = "rte_timer_mz";
>>> + size_t data_arr_size = RTE_MAX_DATA_ELS *
>>> +sizeof(*rte_timer_data_arr);
>>
>> nitpicking, but... const?
>>
>
> No problem - I'll make this change if this line persists into the next version.
>
> <...snipped...>
>
>>>
>>> @@ -205,8 +216,11 @@
>> BIND_DEFAULT_SYMBOL(rte_timer_subsystem_init, _v1905, 19.05);
>>> void __rte_experimental
>>> rte_timer_subsystem_finalize(void)
>>> {
>>> - if (rte_timer_data_arr)
>>> - rte_free(rte_timer_data_arr);
>>> + if (!rte_timer_subsystem_initialized)
>>> + return;
>>> +
>>> + if (rte_atomic16_dec_and_test(rte_timer_mz_refcnt))
>>> + rte_memzone_free(rte_timer_data_mz);
>>
>> I think there's a race here. You may get preempted after test but before
>> free, where another secondary could initialize. As far as i know, we also
>
> Indeed, thanks for catching this.
>
>> support a case when secondary initializes after primary stops running.
>>
>> Let's even suppose that we allow secondary processes to initialize the timer
>> subsystem by reserving memzone and checking rte_errno. You would still
>> have a chance of two init/deinit conflicting, because there's a hole between
>> memzone allocation and atomic increment.
>>
>> I don't think this race can be resolved in a safe way, so we might just have to
>> settle for a memory leak.
>>
>
> I don't see a solution here currently either. I'll look at removing the memzone_free()
> call and possibly the rte_timer_subsystem_finalize() API, since it seems like
> there's no reason for it to exist if it can't free the allocations.
I wonder if there are other places in DPDK where this pattern is used.
Technically, this kind of thing /could/ be resolved by having something
in our multiprocess shared memory outside of DPDK heap. I.e. store
something in rte_eal_memconfig like some other things do. This change,
however, would require an ABI break, so while changing this particular
API won't need a deprecation notice, the change itself would.
>
> Regards,
> Erik
>
>>>
>>> rte_timer_subsystem_initialized = 0;
>>> }
>>>
>>
>>
>> --
>> Thanks,
>> Anatoly
> -----Original Message-----
> From: Burakov, Anatoly
> Sent: Wednesday, May 8, 2019 3:50 AM
> To: Carrillo, Erik G <erik.g.carrillo@intel.com>; rsanford@akamai.com;
> thomas@monjalon.net
> Cc: dev@dpdk.org
> Subject: Re: [PATCH v2] timer: fix resource leak in finalize
>
> On 07-May-19 11:04 PM, Carrillo, Erik G wrote:
> > Hi Anatoly,
> >
> > Thanks for the review. Comments in-line:
> >
> > <...snipped...>
> >
> >>> #define RTE_MAX_DATA_ELS 64
> >>> +static const struct rte_memzone *rte_timer_data_mz; static
> >>> +rte_atomic16_t *rte_timer_mz_refcnt;
> >>> static struct rte_timer_data *rte_timer_data_arr;
> >>> static const uint32_t default_data_id;
> >>> static uint32_t rte_timer_subsystem_initialized; @@ -155,6 +157,7
> >>> @@
> >>> rte_timer_subsystem_init_v1905(void)
> >>> struct rte_timer_data *data;
> >>> int i, lcore_id;
> >>> static const char *mz_name = "rte_timer_mz";
> >>> + size_t data_arr_size = RTE_MAX_DATA_ELS *
> >>> +sizeof(*rte_timer_data_arr);
> >>
> >> nitpicking, but... const?
> >>
> >
> > No problem - I'll make this change if this line persists into the next version.
> >
> > <...snipped...>
> >
> >>>
> >>> @@ -205,8 +216,11 @@
> >> BIND_DEFAULT_SYMBOL(rte_timer_subsystem_init, _v1905, 19.05);
> >>> void __rte_experimental
> >>> rte_timer_subsystem_finalize(void)
> >>> {
> >>> - if (rte_timer_data_arr)
> >>> - rte_free(rte_timer_data_arr);
> >>> + if (!rte_timer_subsystem_initialized)
> >>> + return;
> >>> +
> >>> + if (rte_atomic16_dec_and_test(rte_timer_mz_refcnt))
> >>> + rte_memzone_free(rte_timer_data_mz);
> >>
> >> I think there's a race here. You may get preempted after test but
> >> before free, where another secondary could initialize. As far as i
> >> know, we also
> >
> > Indeed, thanks for catching this.
> >
> >> support a case when secondary initializes after primary stops running.
> >>
> >> Let's even suppose that we allow secondary processes to initialize
> >> the timer subsystem by reserving memzone and checking rte_errno. You
> >> would still have a chance of two init/deinit conflicting, because
> >> there's a hole between memzone allocation and atomic increment.
> >>
> >> I don't think this race can be resolved in a safe way, so we might
> >> just have to settle for a memory leak.
> >>
> >
> > I don't see a solution here currently either. I'll look at removing
> > the memzone_free() call and possibly the
> > rte_timer_subsystem_finalize() API, since it seems like there's no reason
> for it to exist if it can't free the allocations.
>
> I wonder if there are other places in DPDK where this pattern is used.
>
> Technically, this kind of thing /could/ be resolved by having something in our
> multiprocess shared memory outside of DPDK heap. I.e. store something in
> rte_eal_memconfig like some other things do. This change, however, would
> require an ABI break, so while changing this particular API won't need a
> deprecation notice, the change itself would.
I like this idea; thanks for the suggestion. I went ahead and submitted a new version
of this patch with this approach. It's dependent on one other new patch that allows
secondary processes to reserve the memzone. I also submitted a deprecation
notice for the ABI break for the change to rte_mem_config.
Thomas, I suspect that I should mark v3 of this patch as "deferred", so I've gone ahead and
done that, but let me know if that's wrong.
Thanks,
Erik
>
> >
> > Regards,
> > Erik
> >
> >>>
> >>> rte_timer_subsystem_initialized = 0;
> >>> }
> >>>
> >>
> >>
> >> --
> >> Thanks,
> >> Anatoly
>
>
> --
> Thanks,
> Anatoly
09/05/2019 01:01, Carrillo, Erik G:
> I like this idea; thanks for the suggestion. I went ahead and submitted a new version
> of this patch with this approach. It's dependent on one other new patch that allows
> secondary processes to reserve the memzone. I also submitted a deprecation
> notice for the ABI break for the change to rte_mem_config.
>
> Thomas, I suspect that I should mark v3 of this patch as "deferred", so I've gone ahead and
> done that, but let me know if that's wrong.
Any patch targetting 19.08 should be marked as deferred, thanks.
@@ -60,6 +60,8 @@ struct rte_timer_data {
};
#define RTE_MAX_DATA_ELS 64
+static const struct rte_memzone *rte_timer_data_mz;
+static rte_atomic16_t *rte_timer_mz_refcnt;
static struct rte_timer_data *rte_timer_data_arr;
static const uint32_t default_data_id;
static uint32_t rte_timer_subsystem_initialized;
@@ -155,6 +157,7 @@ rte_timer_subsystem_init_v1905(void)
struct rte_timer_data *data;
int i, lcore_id;
static const char *mz_name = "rte_timer_mz";
+ size_t data_arr_size = RTE_MAX_DATA_ELS * sizeof(*rte_timer_data_arr);
if (rte_timer_subsystem_initialized)
return -EALREADY;
@@ -164,10 +167,14 @@ rte_timer_subsystem_init_v1905(void)
if (mz == NULL)
return -EEXIST;
+ rte_timer_data_mz = mz;
rte_timer_data_arr = mz->addr;
+ rte_timer_mz_refcnt =
+ (void *)((char *)mz->addr + data_arr_size);
rte_timer_data_arr[default_data_id].internal_flags |=
FL_ALLOCATED;
+ rte_atomic16_inc(rte_timer_mz_refcnt);
rte_timer_subsystem_initialized = 1;
@@ -175,12 +182,15 @@ rte_timer_subsystem_init_v1905(void)
}
mz = rte_memzone_reserve_aligned(mz_name,
- RTE_MAX_DATA_ELS * sizeof(*rte_timer_data_arr),
+ data_arr_size + sizeof(*rte_timer_mz_refcnt),
SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE);
if (mz == NULL)
return -ENOMEM;
+ rte_timer_data_mz = mz;
rte_timer_data_arr = mz->addr;
+ rte_timer_mz_refcnt = (void *)((char *)mz->addr + data_arr_size);
+ rte_atomic16_init(rte_timer_mz_refcnt);
for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
data = &rte_timer_data_arr[i];
@@ -193,6 +203,7 @@ rte_timer_subsystem_init_v1905(void)
}
rte_timer_data_arr[default_data_id].internal_flags |= FL_ALLOCATED;
+ rte_atomic16_inc(rte_timer_mz_refcnt);
rte_timer_subsystem_initialized = 1;
@@ -205,8 +216,11 @@ BIND_DEFAULT_SYMBOL(rte_timer_subsystem_init, _v1905, 19.05);
void __rte_experimental
rte_timer_subsystem_finalize(void)
{
- if (rte_timer_data_arr)
- rte_free(rte_timer_data_arr);
+ if (!rte_timer_subsystem_initialized)
+ return;
+
+ if (rte_atomic16_dec_and_test(rte_timer_mz_refcnt))
+ rte_memzone_free(rte_timer_data_mz);
rte_timer_subsystem_initialized = 0;
}