[RFC] malloc: add malloc and free log function

Message ID 1585900482-5234-2-git-send-email-xuemingl@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series [RFC] malloc: add malloc and free log function |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Xueming Li April 3, 2020, 7:54 a.m. UTC
  This patch introduces new feature to track rte_malloc leakage by logging
malloc and free function.

Signed-off-by: Xueming Li <xuemingl@mellanox.com>
---
 app/test-pmd/cmdline.c                      |  61 ++++++++++-
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |  15 +++
 lib/librte_eal/common/eal_memcfg.h          |  18 ++++
 lib/librte_eal/common/include/rte_malloc.h  |  30 +++++-
 lib/librte_eal/common/malloc_elem.h         |   4 +-
 lib/librte_eal/common/rte_malloc.c          | 154 +++++++++++++++++++++++++++-
 lib/librte_eal/rte_eal_version.map          |   2 +
 7 files changed, 280 insertions(+), 4 deletions(-)
  

Comments

Burakov, Anatoly April 3, 2020, 9:50 a.m. UTC | #1
On 03-Apr-20 8:54 AM, Xueming Li wrote:
> This patch introduces new feature to track rte_malloc leakage by logging
> malloc and free function.

Hi,

Thanks for the patch.

A general comment - i would avoid mixing testpmd code with adding a new 
API to malloc. I understand this is an RFC so it's OK for now, but by 
the time V1 comes i think it is better to add malloc API as a separate 
patch.

> 
> Signed-off-by: Xueming Li <xuemingl@mellanox.com>
> ---
>   app/test-pmd/cmdline.c                      |  61 ++++++++++-
>   doc/guides/testpmd_app_ug/testpmd_funcs.rst |  15 +++
>   lib/librte_eal/common/eal_memcfg.h          |  18 ++++
>   lib/librte_eal/common/include/rte_malloc.h  |  30 +++++-
>   lib/librte_eal/common/malloc_elem.h         |   4 +-
>   lib/librte_eal/common/rte_malloc.c          | 154 +++++++++++++++++++++++++++-
>   lib/librte_eal/rte_eal_version.map          |   2 +
>   7 files changed, 280 insertions(+), 4 deletions(-)
> 
> diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
> index a037a55..274e391 100644
> --- a/app/test-pmd/cmdline.c
> +++ b/app/test-pmd/cmdline.c
> @@ -9554,6 +9554,8 @@ struct cmd_rm_mirror_rule_result {
>   
>   struct cmd_dump_result {
>   	cmdline_fixed_string_t dump;
> +	cmdline_fixed_string_t cmd;
> +	uint32_t val;
>   };
>   
>   static void
> @@ -9576,6 +9578,16 @@ static void cmd_dump_parsed(void *parsed_result,
>   		rte_dump_physmem_layout(stdout);
>   	else if (!strcmp(res->dump, "dump_memzone"))
>   		rte_memzone_dump(stdout);
> +	else if (!strcmp(res->dump, "dump_malloc")) {
> +		if (!strcmp(res->cmd, "start")) {
> +			if (rte_malloc_log_init(res->val))
> +				fprintf(stdout, "Failed to start logging\n");
> +		} else if (!strcmp(res->cmd, "dump")) {
> +			rte_malloc_log_dump(stdout, res->val);
> +		} else if (!strcmp(res->cmd, "stop")) {
> +			rte_malloc_log_init(0);

This looks odd and confusing. I think an explicit "stop" API is needed 
here, even though technically you could get away with just one API.

> +		}
> +	}
>   	else if (!strcmp(res->dump, "dump_struct_sizes"))
>   		dump_struct_sizes();
>   	else if (!strcmp(res->dump, "dump_ring"))
> @@ -9595,9 +9607,53 @@ static void cmd_dump_parsed(void *parsed_result,
>   		"dump_struct_sizes#"
>   		"dump_ring#"
>   		"dump_mempool#"
> +		"dump_malloc#"
>   		"dump_devargs#"
>   		"dump_log_types");
> -
> +cmdline_parse_token_string_t cmd_dump_malloc =
> +	TOKEN_STRING_INITIALIZER(struct cmd_dump_result, dump,
> +		"dump_malloc");
> +cmdline_parse_token_string_t cmd_dump_malloc_cmd_start =
> +	TOKEN_STRING_INITIALIZER(struct cmd_dump_result, cmd,
> +		"start");
> +cmdline_parse_token_string_t cmd_dump_malloc_cmd_dump =
> +	TOKEN_STRING_INITIALIZER(struct cmd_dump_result, cmd,
> +		"dump");
> +cmdline_parse_token_string_t cmd_dump_malloc_cmd_stop =
> +	TOKEN_STRING_INITIALIZER(struct cmd_dump_result, cmd,
> +		"stop");
> +cmdline_parse_token_num_t cmd_dump_val =
> +	TOKEN_NUM_INITIALIZER(struct cmd_dump_result, val, UINT32);
> +
> +cmdline_parse_inst_t cmd_dump_malloc_start = {
> +	.f = cmd_dump_parsed,  /* function to call */
> +	.help_str = "Start rte_malloc tracking log with <count> buffer entries",
> +	.tokens = {        /* token list, NULL terminated */
> +		(void *)&cmd_dump_malloc,
> +		(void *)&cmd_dump_malloc_cmd_start,
> +		(void *)&cmd_dump_val,
> +		NULL,
> +	},
> +};
> +cmdline_parse_inst_t cmd_dump_malloc_dump = {
> +	.f = cmd_dump_parsed,  /* function to call */
> +	.help_str = "Dump rte_malloc tracking log with <level>, 0:summary, 1:leaks, 2: all",
> +	.tokens = {        /* token list, NULL terminated */
> +		(void *)&cmd_dump_malloc,
> +		(void *)&cmd_dump_malloc_cmd_dump,
> +		(void *)&cmd_dump_val,
> +		NULL,
> +	},
> +};
> +cmdline_parse_inst_t cmd_dump_malloc_stop = {
> +	.f = cmd_dump_parsed,  /* function to call */
> +	.help_str = "Stop rte_malloc tracking log",
> +	.tokens = {        /* token list, NULL terminated */
> +		(void *)&cmd_dump_malloc,
> +		(void *)&cmd_dump_malloc_cmd_stop,
> +		NULL,
> +	},
> +};
>   cmdline_parse_inst_t cmd_dump = {
>   	.f = cmd_dump_parsed,  /* function to call */
>   	.data = NULL,      /* 2nd arg of func */
> @@ -19452,6 +19508,9 @@ struct cmd_showport_macs_result {
>   	(cmdline_parse_inst_t *)&cmd_showport_rss_hash_key,
>   	(cmdline_parse_inst_t *)&cmd_config_rss_hash_key,
>   	(cmdline_parse_inst_t *)&cmd_dump,
> +	(cmdline_parse_inst_t *)&cmd_dump_malloc_start,
> +	(cmdline_parse_inst_t *)&cmd_dump_malloc_dump,
> +	(cmdline_parse_inst_t *)&cmd_dump_malloc_stop,
>   	(cmdline_parse_inst_t *)&cmd_dump_one,
>   	(cmdline_parse_inst_t *)&cmd_ethertype_filter,
>   	(cmdline_parse_inst_t *)&cmd_syn_filter,
> diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> index 5bb12a5..1a9879f 100644
> --- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> +++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> @@ -561,6 +561,21 @@ Dumps the statistics of all or specific memory pool::
>   
>      testpmd> dump_mempool [mempool_name]
>   
> +dump malloc
> +~~~~~~~~~~~~
> +
> +Start rte_malloc and rte_free tracking with number of buffers::
> +
> +   testpmd> dump_malloc start <count>
> +
> +Dump tracking result with different level, 0:summary, 1:leaks, 2: all::
> +
> +   testpmd> dump_malloc dump <level>
> +
> +Stop tracking::
> +
> +   testpmd> dump_malloc stop
> +
>   dump devargs
>   ~~~~~~~~~~~~
>   
> diff --git a/lib/librte_eal/common/eal_memcfg.h b/lib/librte_eal/common/eal_memcfg.h
> index 583fcb5..38055b4 100644
> --- a/lib/librte_eal/common/eal_memcfg.h
> +++ b/lib/librte_eal/common/eal_memcfg.h
> @@ -15,6 +15,20 @@
>   #include "malloc_heap.h"
>   
>   /**
> + * Memory allocation and free tracking log
> + */
> +struct rte_malloc_log {
> +	int socket;
> +	void *addr;
> +	size_t size; /* requested size */
> +	size_t real_size;

I'm not sure of the terminology here. If you're referring to outer 
element size and pad, then you should really use the same terms here as 
in the other malloc code. No need to introduce new ones.

> +	size_t align;
> +	int8_t free; /* allocate or free. */
> +	int8_t alloc_free; /* alloc and free both tracked. */

Why not just store a flag? E.g.

#define RTE_MALLOC_LOG_ALLOC (1 << 0)
#define RTE_MALLOC_LOG_FREE (1 << 1)
log->flags |= RTE_MALLOC_LOG_ALLOC;

Also, i don't see you handling rte_realloc() anywhere. While it could 
cause deallocation/new allocation, it can also resize existing allocated 
memory.

> +	char name[64]; /* name may come from stack, copy. */
> +};
> +
> +/**
>    * Memory configuration shared across multiple processes.
>    */
>   struct rte_mem_config {
> @@ -73,6 +87,10 @@ struct rte_mem_config {
>   	/**< TSC rate */
>   
>   	uint8_t dma_maskbits; /**< Keeps the more restricted dma mask. */
> +
> +	struct rte_malloc_log *malloc_logs; /**< Log entries. */
> +	int malloc_log_max; /**< Max number of logs to write. */
> +	int malloc_log_index; /**< Next log index to write to. */

I'm not 100% sure of this approach. I understand why you're doing it, 
but i have this nagging thought that there probably is a better way :D

>   };
>   
>   /* update internal config from shared mem config */
> diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h
> index 42ca051..fb116fd 100644
> --- a/lib/librte_eal/common/include/rte_malloc.h
> +++ b/lib/librte_eal/common/include/rte_malloc.h
> @@ -508,7 +508,35 @@ struct rte_malloc_socket_stats {
>    *   to dump all objects.
>    */
>   void
> -rte_malloc_dump_stats(FILE *f, const char *type);
> +rte_malloc_dump_stats(FILE *out, const char *type);
> +
> +/**
> + * Initialize malloc tracking log buffer.
> + *
> + * @param count
> + *   Max count of tracking log entries, 0 to stop tracking
> + * @return
> + *   0 on success, negative errno otherwise
> + */
> +__rte_experimental
> +int
> +rte_malloc_log_init(int count);
> +
> +
> +/**
> + * Dump malloc tracking log to output.
> + *
> + * @param out
> + *   output file handle
> + * @param detail
> + *   detail level of output
> + *   0: summary
> + *   1: potential leaks - allocated without free
> + *  -1: all entries
> + */
> +__rte_experimental
> +void
> +rte_malloc_log_dump(FILE *out, int detail);
>   
>   /**
>    * Dump contents of all malloc heaps to a file.
> diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h
> index a1e5f7f..6c6dba8 100644
> --- a/lib/librte_eal/common/malloc_elem.h
> +++ b/lib/librte_eal/common/malloc_elem.h
> @@ -27,7 +27,9 @@ struct malloc_elem {
>   	LIST_ENTRY(malloc_elem) free_list;
>   	/**< list of free elements in heap */
>   	struct rte_memseg_list *msl;
> -	volatile enum elem_state state;
> +	volatile uint32_t state:2;
> +	volatile uint32_t log:1;		/* Element logged. */
> +	volatile uint32_t log_index:29;		/* Element log index. */
>   	uint32_t pad;
>   	size_t size;
>   	struct malloc_elem *orig_elem;
> diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c
> index d6026a2..9926518 100644
> --- a/lib/librte_eal/common/rte_malloc.c
> +++ b/lib/librte_eal/common/rte_malloc.c
> @@ -6,6 +6,7 @@
>   #include <stddef.h>
>   #include <stdio.h>
>   #include <string.h>
> +#include <assert.h>
>   #include <sys/queue.h>
>   
>   #include <rte_errno.h>
> @@ -29,10 +30,154 @@
>   #include "eal_private.h"
>   
>   
> +/*
> + * Track and log memory allocation information.
> + */
> +static void
> +rte_malloc_log(const char *type, size_t size, unsigned int align,
> +	       void *addr)
> +{
> +	struct rte_malloc_log *log;
> +	struct malloc_elem *elem = malloc_elem_from_data(addr);
> +	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
> +
> +	if (!addr || !elem || !mcfg->malloc_log_max ||
> +	    mcfg->malloc_log_max == mcfg->malloc_log_index)
> +		return;
> +
> +	log = &mcfg->malloc_logs[mcfg->malloc_log_index];
> +	if (type)
> +		strncpy(log->name, type, sizeof(log->name) - 1);
> +	log->size = size;
> +	log->real_size = elem->size;
> +	align = align ? align : 1;
> +	log->align = RTE_CACHE_LINE_ROUNDUP(align);
> +	log->addr = addr;
> +	log->socket = elem->heap->socket_id;
> +	elem->log = 1;
> +	elem->log_index = mcfg->malloc_log_index++;
> +}
> +
> +/*
> + * track and log memory free
> + */
> +static void
> +rte_free_log(void *addr)
> +{
> +	struct rte_malloc_log *alloc_log;
> +	struct rte_malloc_log *log;
> +	struct malloc_elem *elem = malloc_elem_from_data(addr);
> +	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
> +
> +	if (!addr || !elem || !mcfg->malloc_log_max ||
> +	    mcfg->malloc_log_max == mcfg->malloc_log_index)
> +		return;

This would still succeed when addr is invalid, because you don't do all 
of the checks that malloc_heap_free() does (you don't chec malloc 
cookies and ELEM_BUSY).

> +	log = &mcfg->malloc_logs[mcfg->malloc_log_index++];
> +	log->addr = addr;
> +	log->free = 1;
> +	log->real_size = elem->size;
> +	if (elem->log) {
> +		alloc_log = &mcfg->malloc_logs[elem->log_index];
> +		strncpy(log->name, alloc_log->name, sizeof(log->name));
> +		log->size = alloc_log->size;
> +		log->align = alloc_log->align;
> +		log->socket = alloc_log->socket;
> +		log->alloc_free = 1;
> +		alloc_log->alloc_free = 1;
> +	}
> +}
> +
> +/*
> + * Initialize malloc tracking with max count of log entries
> + */
> +int
> +rte_malloc_log_init(int count)
> +{
> +	struct malloc_elem *elem;
> +	int i;
> +	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
> +	struct rte_malloc_log *log;

Since your malloc elements can only hold 29 bit indices, you should 
check the count against that value.

> +
> +	if (mcfg->malloc_log_max) {
> +		/* release all logs. */
> +		for (i = 0; i < mcfg->malloc_log_index; i++) {
> +			log = &mcfg->malloc_logs[i];
> +			/* clear log info from malloc elem. */
> +			if (log->free || log->alloc_free)
> +				continue;
> +			elem = malloc_elem_from_data(log->addr);
> +			if (elem) {
> +				elem->log = 0;
> +				elem->log_index = 0;
> +			}
> +		}
> +		rte_free(mcfg->malloc_logs);
> +		mcfg->malloc_logs = NULL;
> +		mcfg->malloc_log_max = 0;
> +		mcfg->malloc_log_index = 0;
> +	}

I don't think immediately deallocating everything on a log_init call is 
a good idea. That's why you need stop() API :)

> +	if (count) {
> +		/* allocate logs. */
> +		mcfg->malloc_logs = rte_calloc("malloc_logs", count,
> +					       sizeof(*log), 0);
> +		if (!mcfg->malloc_logs)
> +			return -ENOMEM;
> +		mcfg->malloc_log_max = count;
> +	}
> +	return 0;
> +}
> +
> +/*
> + * Dump malloc tracking
> + */
> +void
> +rte_malloc_log_dump(FILE *out, int detail)
> +{
> +	struct rte_malloc_log *log;
> +	int i;
> +	int n_alloc_free = 0;
> +	int n_alloc = 0;
> +	int n_free_alloc = 0;
> +	int n_free = 0;
> +	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
> +
> +	if (!mcfg->malloc_log_max || !mcfg->malloc_logs) {
> +		fprintf(out, "warning: malloc tracking log not enabled\n");
> +		return;
> +	}
> +	if (detail)
> +		fprintf(out, "Socket Action Paired            Address    Size Align Mgmt   Total Type\n");
> +	for (i = 0; i < mcfg->malloc_log_index; i++) {
> +		log = &mcfg->malloc_logs[i];
> +		if (detail == 2 ||
> +		    (detail == 1 && !log->alloc_free && !log->free))
> +			fprintf(out, "%6d %6s %6s %18p %7lu %5lu %4d %7lu %s\n",
> +				log->socket,
> +				log->free ? "free" : "alloc",
> +				log->alloc_free ? "y" : "n",
> +				log->addr, log->size, log->align,
> +				MALLOC_ELEM_OVERHEAD,
> +				log->real_size,
> +				log->name);
> +		if (log->free) {
> +			n_free++;
> +			if (log->alloc_free)
> +				n_free_alloc++;
> +		} else {
> +			n_alloc++;
> +			if (log->alloc_free)
> +				n_alloc_free++;
> +		}
> +	}
> +	fprintf(out, "Total rte_malloc: %d/%d, rte_free:%d/%d\n",
> +		n_alloc_free, n_alloc, n_free_alloc, n_free);
> +}
> +
>   /* Free the memory space back to heap */
>   void rte_free(void *addr)
>   {
>   	if (addr == NULL) return;
> +	rte_free_log(addr);
>   	if (malloc_heap_free(malloc_elem_from_data(addr)) < 0)
>   		RTE_LOG(ERR, EAL, "Error: Invalid memory\n");
>   }
> @@ -44,6 +189,8 @@ void rte_free(void *addr)
>   rte_malloc_socket(const char *type, size_t size, unsigned int align,
>   		int socket_arg)
>   {
> +	void *ret;
> +
>   	/* return NULL if size is 0 or alignment is not power-of-2 */
>   	if (size == 0 || (align && !rte_is_power_of_2(align)))
>   		return NULL;
> @@ -57,8 +204,13 @@ void rte_free(void *addr)
>   				!rte_eal_has_hugepages())
>   		socket_arg = SOCKET_ID_ANY;
>   
> -	return malloc_heap_alloc(type, size, socket_arg, 0,
> +	ret = malloc_heap_alloc(type, size, socket_arg, 0,
>   			align == 0 ? 1 : align, 0, false);
> +
> +	if (ret)
> +		rte_malloc_log(type, size, align == 0 ? 1 : align, ret);
> +
> +	return ret;
>   }
>   
>   /*
> diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
> index f9ede5b..3db892d 100644
> --- a/lib/librte_eal/rte_eal_version.map
> +++ b/lib/librte_eal/rte_eal_version.map
> @@ -338,4 +338,6 @@ EXPERIMENTAL {
>   
>   	# added in 20.05
>   	rte_log_can_log;
> +    rte_malloc_log_init;
> +    rte_malloc_log_dump;
>   };
>
  

Patch

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index a037a55..274e391 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -9554,6 +9554,8 @@  struct cmd_rm_mirror_rule_result {
 
 struct cmd_dump_result {
 	cmdline_fixed_string_t dump;
+	cmdline_fixed_string_t cmd;
+	uint32_t val;
 };
 
 static void
@@ -9576,6 +9578,16 @@  static void cmd_dump_parsed(void *parsed_result,
 		rte_dump_physmem_layout(stdout);
 	else if (!strcmp(res->dump, "dump_memzone"))
 		rte_memzone_dump(stdout);
+	else if (!strcmp(res->dump, "dump_malloc")) {
+		if (!strcmp(res->cmd, "start")) {
+			if (rte_malloc_log_init(res->val))
+				fprintf(stdout, "Failed to start logging\n");
+		} else if (!strcmp(res->cmd, "dump")) {
+			rte_malloc_log_dump(stdout, res->val);
+		} else if (!strcmp(res->cmd, "stop")) {
+			rte_malloc_log_init(0);
+		}
+	}
 	else if (!strcmp(res->dump, "dump_struct_sizes"))
 		dump_struct_sizes();
 	else if (!strcmp(res->dump, "dump_ring"))
@@ -9595,9 +9607,53 @@  static void cmd_dump_parsed(void *parsed_result,
 		"dump_struct_sizes#"
 		"dump_ring#"
 		"dump_mempool#"
+		"dump_malloc#"
 		"dump_devargs#"
 		"dump_log_types");
-
+cmdline_parse_token_string_t cmd_dump_malloc =
+	TOKEN_STRING_INITIALIZER(struct cmd_dump_result, dump,
+		"dump_malloc");
+cmdline_parse_token_string_t cmd_dump_malloc_cmd_start =
+	TOKEN_STRING_INITIALIZER(struct cmd_dump_result, cmd,
+		"start");
+cmdline_parse_token_string_t cmd_dump_malloc_cmd_dump =
+	TOKEN_STRING_INITIALIZER(struct cmd_dump_result, cmd,
+		"dump");
+cmdline_parse_token_string_t cmd_dump_malloc_cmd_stop =
+	TOKEN_STRING_INITIALIZER(struct cmd_dump_result, cmd,
+		"stop");
+cmdline_parse_token_num_t cmd_dump_val =
+	TOKEN_NUM_INITIALIZER(struct cmd_dump_result, val, UINT32);
+
+cmdline_parse_inst_t cmd_dump_malloc_start = {
+	.f = cmd_dump_parsed,  /* function to call */
+	.help_str = "Start rte_malloc tracking log with <count> buffer entries",
+	.tokens = {        /* token list, NULL terminated */
+		(void *)&cmd_dump_malloc,
+		(void *)&cmd_dump_malloc_cmd_start,
+		(void *)&cmd_dump_val,
+		NULL,
+	},
+};
+cmdline_parse_inst_t cmd_dump_malloc_dump = {
+	.f = cmd_dump_parsed,  /* function to call */
+	.help_str = "Dump rte_malloc tracking log with <level>, 0:summary, 1:leaks, 2: all",
+	.tokens = {        /* token list, NULL terminated */
+		(void *)&cmd_dump_malloc,
+		(void *)&cmd_dump_malloc_cmd_dump,
+		(void *)&cmd_dump_val,
+		NULL,
+	},
+};
+cmdline_parse_inst_t cmd_dump_malloc_stop = {
+	.f = cmd_dump_parsed,  /* function to call */
+	.help_str = "Stop rte_malloc tracking log",
+	.tokens = {        /* token list, NULL terminated */
+		(void *)&cmd_dump_malloc,
+		(void *)&cmd_dump_malloc_cmd_stop,
+		NULL,
+	},
+};
 cmdline_parse_inst_t cmd_dump = {
 	.f = cmd_dump_parsed,  /* function to call */
 	.data = NULL,      /* 2nd arg of func */
@@ -19452,6 +19508,9 @@  struct cmd_showport_macs_result {
 	(cmdline_parse_inst_t *)&cmd_showport_rss_hash_key,
 	(cmdline_parse_inst_t *)&cmd_config_rss_hash_key,
 	(cmdline_parse_inst_t *)&cmd_dump,
+	(cmdline_parse_inst_t *)&cmd_dump_malloc_start,
+	(cmdline_parse_inst_t *)&cmd_dump_malloc_dump,
+	(cmdline_parse_inst_t *)&cmd_dump_malloc_stop,
 	(cmdline_parse_inst_t *)&cmd_dump_one,
 	(cmdline_parse_inst_t *)&cmd_ethertype_filter,
 	(cmdline_parse_inst_t *)&cmd_syn_filter,
diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
index 5bb12a5..1a9879f 100644
--- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
+++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
@@ -561,6 +561,21 @@  Dumps the statistics of all or specific memory pool::
 
    testpmd> dump_mempool [mempool_name]
 
+dump malloc
+~~~~~~~~~~~~
+
+Start rte_malloc and rte_free tracking with number of buffers::
+
+   testpmd> dump_malloc start <count>
+
+Dump tracking result with different level, 0:summary, 1:leaks, 2: all::
+
+   testpmd> dump_malloc dump <level>
+
+Stop tracking::
+
+   testpmd> dump_malloc stop
+
 dump devargs
 ~~~~~~~~~~~~
 
diff --git a/lib/librte_eal/common/eal_memcfg.h b/lib/librte_eal/common/eal_memcfg.h
index 583fcb5..38055b4 100644
--- a/lib/librte_eal/common/eal_memcfg.h
+++ b/lib/librte_eal/common/eal_memcfg.h
@@ -15,6 +15,20 @@ 
 #include "malloc_heap.h"
 
 /**
+ * Memory allocation and free tracking log
+ */
+struct rte_malloc_log {
+	int socket;
+	void *addr;
+	size_t size; /* requested size */
+	size_t real_size;
+	size_t align;
+	int8_t free; /* allocate or free. */
+	int8_t alloc_free; /* alloc and free both tracked. */
+	char name[64]; /* name may come from stack, copy. */
+};
+
+/**
  * Memory configuration shared across multiple processes.
  */
 struct rte_mem_config {
@@ -73,6 +87,10 @@  struct rte_mem_config {
 	/**< TSC rate */
 
 	uint8_t dma_maskbits; /**< Keeps the more restricted dma mask. */
+
+	struct rte_malloc_log *malloc_logs; /**< Log entries. */
+	int malloc_log_max; /**< Max number of logs to write. */
+	int malloc_log_index; /**< Next log index to write to. */
 };
 
 /* update internal config from shared mem config */
diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h
index 42ca051..fb116fd 100644
--- a/lib/librte_eal/common/include/rte_malloc.h
+++ b/lib/librte_eal/common/include/rte_malloc.h
@@ -508,7 +508,35 @@  struct rte_malloc_socket_stats {
  *   to dump all objects.
  */
 void
-rte_malloc_dump_stats(FILE *f, const char *type);
+rte_malloc_dump_stats(FILE *out, const char *type);
+
+/**
+ * Initialize malloc tracking log buffer.
+ *
+ * @param count
+ *   Max count of tracking log entries, 0 to stop tracking
+ * @return
+ *   0 on success, negative errno otherwise
+ */
+__rte_experimental
+int
+rte_malloc_log_init(int count);
+
+
+/**
+ * Dump malloc tracking log to output.
+ *
+ * @param out
+ *   output file handle
+ * @param detail
+ *   detail level of output
+ *   0: summary
+ *   1: potential leaks - allocated without free
+ *  -1: all entries
+ */
+__rte_experimental
+void
+rte_malloc_log_dump(FILE *out, int detail);
 
 /**
  * Dump contents of all malloc heaps to a file.
diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h
index a1e5f7f..6c6dba8 100644
--- a/lib/librte_eal/common/malloc_elem.h
+++ b/lib/librte_eal/common/malloc_elem.h
@@ -27,7 +27,9 @@  struct malloc_elem {
 	LIST_ENTRY(malloc_elem) free_list;
 	/**< list of free elements in heap */
 	struct rte_memseg_list *msl;
-	volatile enum elem_state state;
+	volatile uint32_t state:2;
+	volatile uint32_t log:1;		/* Element logged. */
+	volatile uint32_t log_index:29;		/* Element log index. */
 	uint32_t pad;
 	size_t size;
 	struct malloc_elem *orig_elem;
diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c
index d6026a2..9926518 100644
--- a/lib/librte_eal/common/rte_malloc.c
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -6,6 +6,7 @@ 
 #include <stddef.h>
 #include <stdio.h>
 #include <string.h>
+#include <assert.h>
 #include <sys/queue.h>
 
 #include <rte_errno.h>
@@ -29,10 +30,154 @@ 
 #include "eal_private.h"
 
 
+/*
+ * Track and log memory allocation information.
+ */
+static void
+rte_malloc_log(const char *type, size_t size, unsigned int align,
+	       void *addr)
+{
+	struct rte_malloc_log *log;
+	struct malloc_elem *elem = malloc_elem_from_data(addr);
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+	if (!addr || !elem || !mcfg->malloc_log_max ||
+	    mcfg->malloc_log_max == mcfg->malloc_log_index)
+		return;
+
+	log = &mcfg->malloc_logs[mcfg->malloc_log_index];
+	if (type)
+		strncpy(log->name, type, sizeof(log->name) - 1);
+	log->size = size;
+	log->real_size = elem->size;
+	align = align ? align : 1;
+	log->align = RTE_CACHE_LINE_ROUNDUP(align);
+	log->addr = addr;
+	log->socket = elem->heap->socket_id;
+	elem->log = 1;
+	elem->log_index = mcfg->malloc_log_index++;
+}
+
+/*
+ * track and log memory free
+ */
+static void
+rte_free_log(void *addr)
+{
+	struct rte_malloc_log *alloc_log;
+	struct rte_malloc_log *log;
+	struct malloc_elem *elem = malloc_elem_from_data(addr);
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+	if (!addr || !elem || !mcfg->malloc_log_max ||
+	    mcfg->malloc_log_max == mcfg->malloc_log_index)
+		return;
+	log = &mcfg->malloc_logs[mcfg->malloc_log_index++];
+	log->addr = addr;
+	log->free = 1;
+	log->real_size = elem->size;
+	if (elem->log) {
+		alloc_log = &mcfg->malloc_logs[elem->log_index];
+		strncpy(log->name, alloc_log->name, sizeof(log->name));
+		log->size = alloc_log->size;
+		log->align = alloc_log->align;
+		log->socket = alloc_log->socket;
+		log->alloc_free = 1;
+		alloc_log->alloc_free = 1;
+	}
+}
+
+/*
+ * Initialize malloc tracking with max count of log entries
+ */
+int
+rte_malloc_log_init(int count)
+{
+	struct malloc_elem *elem;
+	int i;
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_malloc_log *log;
+
+	if (mcfg->malloc_log_max) {
+		/* release all logs. */
+		for (i = 0; i < mcfg->malloc_log_index; i++) {
+			log = &mcfg->malloc_logs[i];
+			/* clear log info from malloc elem. */
+			if (log->free || log->alloc_free)
+				continue;
+			elem = malloc_elem_from_data(log->addr);
+			if (elem) {
+				elem->log = 0;
+				elem->log_index = 0;
+			}
+		}
+		rte_free(mcfg->malloc_logs);
+		mcfg->malloc_logs = NULL;
+		mcfg->malloc_log_max = 0;
+		mcfg->malloc_log_index = 0;
+	}
+	if (count) {
+		/* allocate logs. */
+		mcfg->malloc_logs = rte_calloc("malloc_logs", count,
+					       sizeof(*log), 0);
+		if (!mcfg->malloc_logs)
+			return -ENOMEM;
+		mcfg->malloc_log_max = count;
+	}
+	return 0;
+}
+
+/*
+ * Dump malloc tracking
+ */
+void
+rte_malloc_log_dump(FILE *out, int detail)
+{
+	struct rte_malloc_log *log;
+	int i;
+	int n_alloc_free = 0;
+	int n_alloc = 0;
+	int n_free_alloc = 0;
+	int n_free = 0;
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+	if (!mcfg->malloc_log_max || !mcfg->malloc_logs) {
+		fprintf(out, "warning: malloc tracking log not enabled\n");
+		return;
+	}
+	if (detail)
+		fprintf(out, "Socket Action Paired            Address    Size Align Mgmt   Total Type\n");
+	for (i = 0; i < mcfg->malloc_log_index; i++) {
+		log = &mcfg->malloc_logs[i];
+		if (detail == 2 ||
+		    (detail == 1 && !log->alloc_free && !log->free))
+			fprintf(out, "%6d %6s %6s %18p %7lu %5lu %4d %7lu %s\n",
+				log->socket,
+				log->free ? "free" : "alloc",
+				log->alloc_free ? "y" : "n",
+				log->addr, log->size, log->align,
+				MALLOC_ELEM_OVERHEAD,
+				log->real_size,
+				log->name);
+		if (log->free) {
+			n_free++;
+			if (log->alloc_free)
+				n_free_alloc++;
+		} else {
+			n_alloc++;
+			if (log->alloc_free)
+				n_alloc_free++;
+		}
+	}
+	fprintf(out, "Total rte_malloc: %d/%d, rte_free:%d/%d\n",
+		n_alloc_free, n_alloc, n_free_alloc, n_free);
+}
+
 /* Free the memory space back to heap */
 void rte_free(void *addr)
 {
 	if (addr == NULL) return;
+	rte_free_log(addr);
 	if (malloc_heap_free(malloc_elem_from_data(addr)) < 0)
 		RTE_LOG(ERR, EAL, "Error: Invalid memory\n");
 }
@@ -44,6 +189,8 @@  void rte_free(void *addr)
 rte_malloc_socket(const char *type, size_t size, unsigned int align,
 		int socket_arg)
 {
+	void *ret;
+
 	/* return NULL if size is 0 or alignment is not power-of-2 */
 	if (size == 0 || (align && !rte_is_power_of_2(align)))
 		return NULL;
@@ -57,8 +204,13 @@  void rte_free(void *addr)
 				!rte_eal_has_hugepages())
 		socket_arg = SOCKET_ID_ANY;
 
-	return malloc_heap_alloc(type, size, socket_arg, 0,
+	ret = malloc_heap_alloc(type, size, socket_arg, 0,
 			align == 0 ? 1 : align, 0, false);
+
+	if (ret)
+		rte_malloc_log(type, size, align == 0 ? 1 : align, ret);
+
+	return ret;
 }
 
 /*
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index f9ede5b..3db892d 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -338,4 +338,6 @@  EXPERIMENTAL {
 
 	# added in 20.05
 	rte_log_can_log;
+    rte_malloc_log_init;
+    rte_malloc_log_dump;
 };