diff mbox series

mempool: fix rte primary program coredump

Message ID 1636559839-6553-1-git-send-email-laitianli@tom.com (mailing list archive)
State New
Delegated to: Thomas Monjalon
Headers show
Series mempool: fix rte primary program coredump | expand

Checks

Context Check Description
ci/github-robot: build fail github build: failed
ci/iol-testing warning apply patch failure
ci/Intel-compilation warning apply issues
ci/checkpatch warning coding style issues

Commit Message

Tianli Lai Nov. 10, 2021, 3:57 p.m. UTC
the primary program(such as ofp app) run first, then run the secondary
program(such as dpdk-pdump), the primary program would receive signal
SIGSEGV. the function stack as follow:

aived signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7fffee60e700 (LWP 112613)]
0x00007ffff5f2cc0b in bucket_stack_pop (stack=0xffff00010000) at
/ofp/dpdk/drivers/mempool/bucket/rte_mempool_bucket.c:95
95      if (stack->top == 0)
Missing separate debuginfos, use: debuginfo-install
glibc-2.17-196.el7.x86_64 libatomic-4.8.5-16.el7.x86_64
libconfig-1.4.9-5.el7.x86_64 libgcc-4.8.5-16.el7.x86_64
libpcap-1.5.3-12.el7.x86_64 numactl-libs-2.0.9-6.el7_2.x86_64
openssl-libs-1.0.2k-8.el7.x86_64 zlib-1.2.7-17.el7.x86_64
(gdb) bt
 #0  0x00007ffff5f2cc0b in bucket_stack_pop (stack=0xffff00010000) at /ofp/dpdk/drivers/mempool/bucket/rte_mempool_bucket.c:95
 #1  0x00007ffff5f2e5dc in bucket_dequeue_orphans (bd=0x2209e5fac0,obj_table=0x220b083710, n_orphans=251) at /ofp/dpdk/drivers/mempool/bucket/rte_mempool_bucket.c:190
 #2  0x00007ffff5f30192 in bucket_dequeue (mp=0x220b07d5c0,obj_table=0x220b083710, n=251) at /ofp/dpdk/drivers/mempool/bucket/rte_mempool_bucket.c:288
 #3  0x00007ffff5f47e18 in rte_mempool_ops_dequeue_bulk (mp=0x220b07d5c0,obj_table=0x220b083710, n=251) at /ofp/dpdk/x86_64-native-linuxapp-gcc/include/rte_mempool.h:739
 #4  0x00007ffff5f4819d in __mempool_generic_get (cache=0x220b083700, n=1, obj_table=0x7fffee5deb18, mp=0x220b07d5c0) at /ofp/dpdk/x86_64-native-linuxapp-gcc/include/rte_mempool.h:1443
 #5  rte_mempool_generic_get (cache=0x220b083700, n=1, obj_table=0x7fffee5deb18, mp=0x220b07d5c0) at /ofp/dpdk/x86_64-native-linuxapp-gcc/include/rte_mempool.h:1506
 #6  rte_mempool_get_bulk (n=1, obj_table=0x7fffee5deb18, mp=0x220b07d5c0) at /ofp/dpdk/x86_64-native-linuxapp-gcc/include/rte_mempool.h:1539
 #7  rte_mempool_get (obj_p=0x7fffee5deb18, mp=0x220b07d5c0) at /ofp/dpdk/x86_64-native-linuxapp-gcc/include/rte_mempool.h:1565
 #8  rte_mbuf_raw_alloc (mp=0x220b07d5c0) at /ofp/dpdk/x86_64-native-linuxapp-gcc/include/rte_mbuf.h:551
 #9  0x00007ffff5f483a4 in rte_pktmbuf_alloc (mp=0x220b07d5c0) at /ofp/dpdk/x86_64-native-linuxapp-gcc/include/rte_mbuf.h:804
 #10 0x00007ffff5f4c9d9 in pdump_pktmbuf_copy (m=0x220746ad80, mp=0x220b07d5c0) at /ofp/dpdk/lib/librte_pdump/rte_pdump.c:99
 #11 0x00007ffff5f4e42e in pdump_copy (pkts=0x7fffee5dfdf0, nb_pkts=1, user_params=0x7ffff76d7cc0 <rx_cbs>) at /ofp/dpdk/lib/librte_pdump/rte_pdump.c:151
 #12 0x00007ffff5f4eadd in pdump_rx (port=0, qidx=0, pkts=0x7fffee5dfdf0, nb_pkts=1, max_pkts=16, user_params=0x7ffff76d7cc0 <rx_cbs>) at /ofp/dpdk/lib/librte_pdump/rte_pdump.c:172
 #13 0x00007ffff5d0e9e8 in rte_eth_rx_burst (port_id=0, queue_id=0, rx_pkts=0x7fffee5dfdf0, nb_pkts=16) at /ofp/dpdk/x86_64-native-linuxapp-gcc/usr/local/include/dpdk/rte_ethdev.h:4396
 #14 0x00007ffff5d114c3 in recv_pkt_dpdk (pktio_entry=0x22005436c0, index=0, pkt_table=0x7fffee5dfdf0, num=16) at odp_packet_dpdk.c:1081
 #15 0x00007ffff5d2f931 in odp_pktin_recv (queue=...,packets=0x7fffee5dfdf0, num=16) at ../linux-generic/odp_packet_io.c:1896
 #16 0x000000000040a344 in rx_burst (pktin=...) at app_main.c:223
 #17 0x000000000040aca4 in run_server_single (arg=0x7fffffffe2b0) at app_main.c:417
 #18 0x00007ffff7bd6883 in run_thread (arg=0x7fffffffe3b8) at threads.c:67
 #19 0x00007ffff53c8e25 in start_thread () from /lib64/libpthread.so.0
 #20 0x00007ffff433e34d in clone () from /lib64/libc.so.6.c:67

The program crash down reason is:

In primary program and secondary program , the global array rte_mempool_ops.ops[]:
        primary name            secondary name
 [0]:   "bucket"                "ring_mp_mc"
 [1]:   "dpaa"                  "ring_sp_sc"
 [2]:   "dpaa2"                 "ring_mp_sc"
 [3]:   "octeontx_fpavf"        "ring_sp_mc"
 [4]:   "octeontx2_npa"         "octeontx2_npa"
 [5]:   "ring_mp_mc"            "bucket"
 [6]:   "ring_sp_sc"            "stack"
 [7]:   "ring_mp_sc"            "if_stack"
 [8]:   "ring_sp_mc"            "dpaa"
 [9]:   "stack"                 "dpaa2"
 [10]:  "if_stack"              "octeontx_fpavf"
 [11]:  NULL                    NULL

 this array in primary program is different with secondary program.
 so when secondary program call rte_pktmbuf_pool_create_by_ops() with
 mempool name “ring_mp_mc”, but the primary program use "bucket" type
 to alloc rte_mbuf.

 so sort this array both primary program and secondary program when init
 memzone.

Signed-off-by: Tianli Lai <laitianli@tom.com>
---
 lib/librte_eal/common/eal_common_memzone.c |  2 +-
 lib/librte_mempool/rte_mempool.h           |  6 ++++++
 lib/librte_mempool/rte_mempool_ops.c       | 31 ++++++++++++++++++++++++++++++
 3 files changed, 38 insertions(+), 1 deletion(-)
 mode change 100644 => 100755 lib/librte_mempool/rte_mempool_ops.c

Comments

David Marchand Nov. 10, 2021, 4 p.m. UTC | #1
On Wed, Nov 10, 2021 at 4:57 PM Tianli Lai <laitianli@tom.com> wrote:
>
> the primary program(such as ofp app) run first, then run the secondary
> program(such as dpdk-pdump), the primary program would receive signal
> SIGSEGV. the function stack as follow:

Is OpenFastPath linked against the same dpdk binary than your dpdk-pdump tool?
Jerin Jacob Nov. 10, 2021, 5:15 p.m. UTC | #2
On Wed, Nov 10, 2021 at 9:38 PM Tianli Lai <laitianli@tom.com> wrote:
>
> the primary program(such as ofp app) run first, then run the secondary
> program(such as dpdk-pdump), the primary program would receive signal
> SIGSEGV. the function stack as follow:
>
> aived signal SIGSEGV, Segmentation fault.
> [Switching to Thread 0x7fffee60e700 (LWP 112613)]
> 0x00007ffff5f2cc0b in bucket_stack_pop (stack=0xffff00010000) at
> /ofp/dpdk/drivers/mempool/bucket/rte_mempool_bucket.c:95
> 95      if (stack->top == 0)
> Missing separate debuginfos, use: debuginfo-install
> glibc-2.17-196.el7.x86_64 libatomic-4.8.5-16.el7.x86_64
> libconfig-1.4.9-5.el7.x86_64 libgcc-4.8.5-16.el7.x86_64
> libpcap-1.5.3-12.el7.x86_64 numactl-libs-2.0.9-6.el7_2.x86_64
> openssl-libs-1.0.2k-8.el7.x86_64 zlib-1.2.7-17.el7.x86_64
> (gdb) bt
>  #0  0x00007ffff5f2cc0b in bucket_stack_pop (stack=0xffff00010000) at /ofp/dpdk/drivers/mempool/bucket/rte_mempool_bucket.c:95
>  #1  0x00007ffff5f2e5dc in bucket_dequeue_orphans (bd=0x2209e5fac0,obj_table=0x220b083710, n_orphans=251) at /ofp/dpdk/drivers/mempool/bucket/rte_mempool_bucket.c:190
>  #2  0x00007ffff5f30192 in bucket_dequeue (mp=0x220b07d5c0,obj_table=0x220b083710, n=251) at /ofp/dpdk/drivers/mempool/bucket/rte_mempool_bucket.c:288
>  #3  0x00007ffff5f47e18 in rte_mempool_ops_dequeue_bulk (mp=0x220b07d5c0,obj_table=0x220b083710, n=251) at /ofp/dpdk/x86_64-native-linuxapp-gcc/include/rte_mempool.h:739
>  #4  0x00007ffff5f4819d in __mempool_generic_get (cache=0x220b083700, n=1, obj_table=0x7fffee5deb18, mp=0x220b07d5c0) at /ofp/dpdk/x86_64-native-linuxapp-gcc/include/rte_mempool.h:1443
>  #5  rte_mempool_generic_get (cache=0x220b083700, n=1, obj_table=0x7fffee5deb18, mp=0x220b07d5c0) at /ofp/dpdk/x86_64-native-linuxapp-gcc/include/rte_mempool.h:1506
>  #6  rte_mempool_get_bulk (n=1, obj_table=0x7fffee5deb18, mp=0x220b07d5c0) at /ofp/dpdk/x86_64-native-linuxapp-gcc/include/rte_mempool.h:1539
>  #7  rte_mempool_get (obj_p=0x7fffee5deb18, mp=0x220b07d5c0) at /ofp/dpdk/x86_64-native-linuxapp-gcc/include/rte_mempool.h:1565
>  #8  rte_mbuf_raw_alloc (mp=0x220b07d5c0) at /ofp/dpdk/x86_64-native-linuxapp-gcc/include/rte_mbuf.h:551
>  #9  0x00007ffff5f483a4 in rte_pktmbuf_alloc (mp=0x220b07d5c0) at /ofp/dpdk/x86_64-native-linuxapp-gcc/include/rte_mbuf.h:804
>  #10 0x00007ffff5f4c9d9 in pdump_pktmbuf_copy (m=0x220746ad80, mp=0x220b07d5c0) at /ofp/dpdk/lib/librte_pdump/rte_pdump.c:99
>  #11 0x00007ffff5f4e42e in pdump_copy (pkts=0x7fffee5dfdf0, nb_pkts=1, user_params=0x7ffff76d7cc0 <rx_cbs>) at /ofp/dpdk/lib/librte_pdump/rte_pdump.c:151
>  #12 0x00007ffff5f4eadd in pdump_rx (port=0, qidx=0, pkts=0x7fffee5dfdf0, nb_pkts=1, max_pkts=16, user_params=0x7ffff76d7cc0 <rx_cbs>) at /ofp/dpdk/lib/librte_pdump/rte_pdump.c:172
>  #13 0x00007ffff5d0e9e8 in rte_eth_rx_burst (port_id=0, queue_id=0, rx_pkts=0x7fffee5dfdf0, nb_pkts=16) at /ofp/dpdk/x86_64-native-linuxapp-gcc/usr/local/include/dpdk/rte_ethdev.h:4396
>  #14 0x00007ffff5d114c3 in recv_pkt_dpdk (pktio_entry=0x22005436c0, index=0, pkt_table=0x7fffee5dfdf0, num=16) at odp_packet_dpdk.c:1081
>  #15 0x00007ffff5d2f931 in odp_pktin_recv (queue=...,packets=0x7fffee5dfdf0, num=16) at ../linux-generic/odp_packet_io.c:1896
>  #16 0x000000000040a344 in rx_burst (pktin=...) at app_main.c:223
>  #17 0x000000000040aca4 in run_server_single (arg=0x7fffffffe2b0) at app_main.c:417
>  #18 0x00007ffff7bd6883 in run_thread (arg=0x7fffffffe3b8) at threads.c:67
>  #19 0x00007ffff53c8e25 in start_thread () from /lib64/libpthread.so.0
>  #20 0x00007ffff433e34d in clone () from /lib64/libc.so.6.c:67
>
> The program crash down reason is:
>
> In primary program and secondary program , the global array rte_mempool_ops.ops[]:
>         primary name            secondary name
>  [0]:   "bucket"                "ring_mp_mc"
>  [1]:   "dpaa"                  "ring_sp_sc"
>  [2]:   "dpaa2"                 "ring_mp_sc"
>  [3]:   "octeontx_fpavf"        "ring_sp_mc"
>  [4]:   "octeontx2_npa"         "octeontx2_npa"
>  [5]:   "ring_mp_mc"            "bucket"
>  [6]:   "ring_sp_sc"            "stack"
>  [7]:   "ring_mp_sc"            "if_stack"
>  [8]:   "ring_sp_mc"            "dpaa"
>  [9]:   "stack"                 "dpaa2"
>  [10]:  "if_stack"              "octeontx_fpavf"
>  [11]:  NULL                    NULL
>
>  this array in primary program is different with secondary program.
>  so when secondary program call rte_pktmbuf_pool_create_by_ops() with
>  mempool name “ring_mp_mc”, but the primary program use "bucket" type
>  to alloc rte_mbuf.
>
>  so sort this array both primary program and secondary program when init
>  memzone.
>
> Signed-off-by: Tianli Lai <laitianli@tom.com>
> ---
>  lib/librte_eal/common/eal_common_memzone.c |  2 +-
>  lib/librte_mempool/rte_mempool.h           |  6 ++++++
>  lib/librte_mempool/rte_mempool_ops.c       | 31 ++++++++++++++++++++++++++++++
>  3 files changed, 38 insertions(+), 1 deletion(-)
>  mode change 100644 => 100755 lib/librte_mempool/rte_mempool_ops.c
>
> diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
> index 99b8d65..b59f3f5 100644
> --- a/lib/librte_eal/common/eal_common_memzone.c
> +++ b/lib/librte_eal/common/eal_common_memzone.c
> @@ -384,7 +384,7 @@
>         }
>
>         rte_rwlock_write_unlock(&mcfg->mlock);
> -
> +       rte_sort_mempool_ops();
>         return ret;
>  }
>
> diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
> index f81152a..a22850b 100644
> --- a/lib/librte_mempool/rte_mempool.h
> +++ b/lib/librte_mempool/rte_mempool.h
> @@ -910,6 +910,12 @@ int rte_mempool_ops_get_info(const struct rte_mempool *mp,
>  int rte_mempool_register_ops(const struct rte_mempool_ops *ops);
>
>  /**
> + * Sort global array rte_mempool_ops_table.ops[] .
> + * Used by rte_eal_memzone_init()
> + */
> +int rte_sort_mempool_ops(void);

Since it is an internal API, No need for rte_ prefix.


> +
> +/**
>   * Macro to statically register the ops of a mempool handler.
>   * Note that the rte_mempool_register_ops fails silently here when
>   * more than RTE_MEMPOOL_MAX_OPS_IDX is registered.
> diff --git a/lib/librte_mempool/rte_mempool_ops.c b/lib/librte_mempool/rte_mempool_ops.c
> old mode 100644
> new mode 100755
> index 22c5251..8e10488
> --- a/lib/librte_mempool/rte_mempool_ops.c
> +++ b/lib/librte_mempool/rte_mempool_ops.c
> @@ -68,6 +68,37 @@ struct rte_mempool_ops_table rte_mempool_ops_table = {
>         return ops_index;
>  }
>
> +
> +int rte_sort_mempool_ops(void)
> +{
> +       /* same with rte_mempool_ops.name */
> +       static const char *memops_name[RTE_MEMPOOL_MAX_OPS_IDX] = {
> +               "ring_mp_mc", "ring_sp_sc", "ring_mp_sc", "ring_sp_mc",
> +               "stack", "lf_stack", "octeontx2_npa", "octeontx_fpavf",
> +                "dpaa2", "dpaa", "bucket",

I think, it is not foolproof. I think, either

1) you can use primary - secondary communication
mechanism to get the library order from the primary.

OR

2) At end of primary rte_eal_init or so, copy the array to memzone and then
lookup the memzone by name(string) in secondary to fill this array.


> +        };
> +       struct rte_mempool_ops_table tmp_mempool_ops_table = {
> +               .sl =  rte_mempool_ops_table.sl,
> +               .num_ops = rte_mempool_ops_table.num_ops
> +       };
> +       uint32_t i = 0, j= 0;
> +       struct rte_mempool_ops *ops = NULL;
> +       for (i = 0; i < 16; i++) {
> +               const char* name = memops_name[i];
> +               if(name && strlen(name)) {
> +                       for(j = 0; j < rte_mempool_ops_table.num_ops; j++) {
> +                               if(strcmp(name, rte_mempool_ops_table.ops[j].name))
> +                                       continue;
> +                               ops = &rte_mempool_ops_table.ops[j];
> +                               memcpy(&tmp_mempool_ops_table.ops[i], ops, sizeof(*ops));
> +                               break;
> +                       }
> +               }
> +       }
> +       memcpy(&rte_mempool_ops_table, &tmp_mempool_ops_table, sizeof(tmp_mempool_ops_table));
> +       return 0;
> +}
> +
>  /* wrapper to allocate an external mempool's private (pool) data. */
>  int
>  rte_mempool_ops_alloc(struct rte_mempool *mp)
> --
> 1.8.3.1
>
diff mbox series

Patch

diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
index 99b8d65..b59f3f5 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -384,7 +384,7 @@ 
 	}
 
 	rte_rwlock_write_unlock(&mcfg->mlock);
-
+	rte_sort_mempool_ops();
 	return ret;
 }
 
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index f81152a..a22850b 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -910,6 +910,12 @@  int rte_mempool_ops_get_info(const struct rte_mempool *mp,
 int rte_mempool_register_ops(const struct rte_mempool_ops *ops);
 
 /**
+ * Sort global array rte_mempool_ops_table.ops[] .
+ * Used by rte_eal_memzone_init()
+ */
+int rte_sort_mempool_ops(void);
+
+/**
  * Macro to statically register the ops of a mempool handler.
  * Note that the rte_mempool_register_ops fails silently here when
  * more than RTE_MEMPOOL_MAX_OPS_IDX is registered.
diff --git a/lib/librte_mempool/rte_mempool_ops.c b/lib/librte_mempool/rte_mempool_ops.c
old mode 100644
new mode 100755
index 22c5251..8e10488
--- a/lib/librte_mempool/rte_mempool_ops.c
+++ b/lib/librte_mempool/rte_mempool_ops.c
@@ -68,6 +68,37 @@  struct rte_mempool_ops_table rte_mempool_ops_table = {
 	return ops_index;
 }
 
+
+int rte_sort_mempool_ops(void)
+{
+	/* same with rte_mempool_ops.name */
+	static const char *memops_name[RTE_MEMPOOL_MAX_OPS_IDX] = {
+		"ring_mp_mc", "ring_sp_sc", "ring_mp_sc", "ring_sp_mc",
+		"stack", "lf_stack", "octeontx2_npa", "octeontx_fpavf",
+                "dpaa2", "dpaa", "bucket",
+        };
+	struct rte_mempool_ops_table tmp_mempool_ops_table = {
+		.sl =  rte_mempool_ops_table.sl,
+		.num_ops = rte_mempool_ops_table.num_ops
+	};
+	uint32_t i = 0, j= 0;
+	struct rte_mempool_ops *ops = NULL;
+	for (i = 0; i < 16; i++) {
+		const char* name = memops_name[i];
+		if(name && strlen(name)) {
+			for(j = 0; j < rte_mempool_ops_table.num_ops; j++) {
+				if(strcmp(name, rte_mempool_ops_table.ops[j].name))
+					continue;
+				ops = &rte_mempool_ops_table.ops[j];
+				memcpy(&tmp_mempool_ops_table.ops[i], ops, sizeof(*ops));
+				break;
+			}
+		}
+	}
+	memcpy(&rte_mempool_ops_table, &tmp_mempool_ops_table, sizeof(tmp_mempool_ops_table));
+	return 0;
+}
+
 /* wrapper to allocate an external mempool's private (pool) data. */
 int
 rte_mempool_ops_alloc(struct rte_mempool *mp)