app/testpmd: reduce memory consumption
Checks
Commit Message
Following [1], testpmd memory consumption has skyrocketted.
The rte_port structure has gotten quite fat.
struct rte_port {
[...]
struct rte_eth_rxconf rx_conf[65536]; /* 266280 3145728 */
/* --- cacheline 53312 boundary (3411968 bytes) was 40 bytes ago --- */
struct rte_eth_txconf tx_conf[65536]; /* 3412008 3670016 */
/* --- cacheline 110656 boundary (7081984 bytes) was 40 bytes ago --- */
[...]
/* size: 8654936, cachelines: 135234, members: 31 */
[...]
testpmd handles RTE_MAX_ETHPORTS ports (32 by default) which means that it
needs ~256MB just for this internal representation.
The reason is that a testpmd rte_port (the name is quite confusing, as
it is a local type) maintains configurations for all queues of a port.
But where you would expect testpmd to use RTE_MAX_QUEUES_PER_PORT as the
maximum queue count, the rte_port uses MAX_QUEUE_ID set to 64k.
Prefer the ethdev maximum value.
After this patch:
struct rte_port {
[...]
struct rte_eth_rxconf rx_conf[1025]; /* 8240 49200 */
/* --- cacheline 897 boundary (57408 bytes) was 32 bytes ago --- */
struct rte_eth_txconf tx_conf[1025]; /* 57440 57400 */
/* --- cacheline 1794 boundary (114816 bytes) was 24 bytes ago --- */
[...]
/* size: 139488, cachelines: 2180, members: 31 */
[...]
[1]: https://git.dpdk.org/dpdk/commit/?id=436b3a6b6e62
Signed-off-by: David Marchand <david.marchand@redhat.com>
---
app/test-pmd/testpmd.c | 6 +++---
app/test-pmd/testpmd.h | 16 +++++++---------
2 files changed, 10 insertions(+), 12 deletions(-)
Comments
On 11/21/2019 3:12 PM, David Marchand wrote:
> Following [1], testpmd memory consumption has skyrocketted.
> The rte_port structure has gotten quite fat.
>
> struct rte_port {
> [...]
> struct rte_eth_rxconf rx_conf[65536]; /* 266280 3145728 */
> /* --- cacheline 53312 boundary (3411968 bytes) was 40 bytes ago --- */
> struct rte_eth_txconf tx_conf[65536]; /* 3412008 3670016 */
> /* --- cacheline 110656 boundary (7081984 bytes) was 40 bytes ago --- */
> [...]
> /* size: 8654936, cachelines: 135234, members: 31 */
> [...]
>
> testpmd handles RTE_MAX_ETHPORTS ports (32 by default) which means that it
> needs ~256MB just for this internal representation.
>
> The reason is that a testpmd rte_port (the name is quite confusing, as
> it is a local type) maintains configurations for all queues of a port.
> But where you would expect testpmd to use RTE_MAX_QUEUES_PER_PORT as the
> maximum queue count, the rte_port uses MAX_QUEUE_ID set to 64k.
>
> Prefer the ethdev maximum value.
>
> After this patch:
> struct rte_port {
> [...]
> struct rte_eth_rxconf rx_conf[1025]; /* 8240 49200 */
> /* --- cacheline 897 boundary (57408 bytes) was 32 bytes ago --- */
> struct rte_eth_txconf tx_conf[1025]; /* 57440 57400 */
> /* --- cacheline 1794 boundary (114816 bytes) was 24 bytes ago --- */
> [...]
> /* size: 139488, cachelines: 2180, members: 31 */
> [...]
>
> [1]: https://git.dpdk.org/dpdk/commit/?id=436b3a6b6e62
>
> Signed-off-by: David Marchand <david.marchand@redhat.com>
Thanks for figuring this out,
Acked-by: Ferruh Yigit <ferruh.yigit@intel.com>
<...>
> diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
> index 90694a3309..217d577018 100644
> --- a/app/test-pmd/testpmd.h
> +++ b/app/test-pmd/testpmd.h
> @@ -58,8 +58,6 @@ typedef uint16_t portid_t;
> typedef uint16_t queueid_t;
> typedef uint16_t streamid_t;
>
> -#define MAX_QUEUE_ID ((1 << (sizeof(queueid_t) * 8)) - 1)
No strong opinion, but would it be simpler if assign 'MAX_QUEUE_ID' to
'RTE_MAX_QUEUES_PER_PORT' instead?
#define MAX_QUEUE_ID RTE_MAX_QUEUES_PER_PORT
On Thu, Nov 21, 2019 at 4:36 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
> > diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
> > index 90694a3309..217d577018 100644
> > --- a/app/test-pmd/testpmd.h
> > +++ b/app/test-pmd/testpmd.h
> > @@ -58,8 +58,6 @@ typedef uint16_t portid_t;
> > typedef uint16_t queueid_t;
> > typedef uint16_t streamid_t;
> >
> > -#define MAX_QUEUE_ID ((1 << (sizeof(queueid_t) * 8)) - 1)
>
> No strong opinion, but would it be simpler if assign 'MAX_QUEUE_ID' to
> 'RTE_MAX_QUEUES_PER_PORT' instead?
> #define MAX_QUEUE_ID RTE_MAX_QUEUES_PER_PORT
This was my first solution once I spotted this.
But I prefer to globally replace: when reading this code, using
MAX_QUEUE_ID leaves the impression that testpmd has its own
restriction on max queue count.
On Thu, Nov 21, 2019 at 5:17 PM David Marchand
<david.marchand@redhat.com> wrote:
>
> On Thu, Nov 21, 2019 at 4:36 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
> > > diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
> > > index 90694a3309..217d577018 100644
> > > --- a/app/test-pmd/testpmd.h
> > > +++ b/app/test-pmd/testpmd.h
> > > @@ -58,8 +58,6 @@ typedef uint16_t portid_t;
> > > typedef uint16_t queueid_t;
> > > typedef uint16_t streamid_t;
> > >
> > > -#define MAX_QUEUE_ID ((1 << (sizeof(queueid_t) * 8)) - 1)
> >
> > No strong opinion, but would it be simpler if assign 'MAX_QUEUE_ID' to
> > 'RTE_MAX_QUEUES_PER_PORT' instead?
> > #define MAX_QUEUE_ID RTE_MAX_QUEUES_PER_PORT
>
> This was my first solution once I spotted this.
> But I prefer to globally replace: when reading this code, using
> MAX_QUEUE_ID leaves the impression that testpmd has its own
> restriction on max queue count.
Btw, not sure we want to backport this, or maybe up to branches
containing d44f8a485f5d ("app/testpmd: enable per queue configure")
Opinions?
On Thu, 21 Nov 2019 16:12:55 +0100
David Marchand <david.marchand@redhat.com> wrote:
> - uint16_t nb_rx_desc[MAX_QUEUE_ID+1]; /**< per queue rx desc number */
> - uint16_t nb_tx_desc[MAX_QUEUE_ID+1]; /**< per queue tx desc number */
> - struct rte_eth_rxconf rx_conf[MAX_QUEUE_ID+1]; /**< per queue rx configuration */
> - struct rte_eth_txconf tx_conf[MAX_QUEUE_ID+1]; /**< per queue tx configuration */
> + uint16_t nb_rx_desc[RTE_MAX_QUEUES_PER_PORT+1]; /**< per queue rx desc number */
> + uint16_t nb_tx_desc[RTE_MAX_QUEUES_PER_PORT+1]; /**< per queue tx desc number */
> + struct rte_eth_rxconf rx_conf[RTE_MAX_QUEUES_PER_PORT+1]; /**< per queue rx configuration */
> + struct rte_eth_txconf tx_conf[RTE_MAX_QUEUES_PER_PORT+1]; /**< per queue tx configuration */
Why not put all the per-queue stuff together in one structure
and put it at the end. Then dynamically size based on number of queues?
On Thu, Nov 21, 2019 at 5:45 PM Stephen Hemminger
<stephen@networkplumber.org> wrote:
>
> On Thu, 21 Nov 2019 16:12:55 +0100
> David Marchand <david.marchand@redhat.com> wrote:
>
> > - uint16_t nb_rx_desc[MAX_QUEUE_ID+1]; /**< per queue rx desc number */
> > - uint16_t nb_tx_desc[MAX_QUEUE_ID+1]; /**< per queue tx desc number */
> > - struct rte_eth_rxconf rx_conf[MAX_QUEUE_ID+1]; /**< per queue rx configuration */
> > - struct rte_eth_txconf tx_conf[MAX_QUEUE_ID+1]; /**< per queue tx configuration */
> > + uint16_t nb_rx_desc[RTE_MAX_QUEUES_PER_PORT+1]; /**< per queue rx desc number */
> > + uint16_t nb_tx_desc[RTE_MAX_QUEUES_PER_PORT+1]; /**< per queue tx desc number */
> > + struct rte_eth_rxconf rx_conf[RTE_MAX_QUEUES_PER_PORT+1]; /**< per queue rx configuration */
> > + struct rte_eth_txconf tx_conf[RTE_MAX_QUEUES_PER_PORT+1]; /**< per queue tx configuration */
>
> Why not put all the per-queue stuff together in one structure
> and put it at the end. Then dynamically size based on number of queues?
This is something that could be done.
At first glance, the code is relying on those arrays being contiguous,
but it should not be a problem.
The reason for the size '+1' is not obvious to me.
Not saying that would be difficult to investigate and fix/rework all this.
My approach seems the quickest and less risky after rc3.
I can look at this post 19.11 (but volunteers are welcome, testpmd
needs some love).
21/11/2019 17:23, David Marchand:
> On Thu, Nov 21, 2019 at 5:17 PM David Marchand
> <david.marchand@redhat.com> wrote:
> >
> > On Thu, Nov 21, 2019 at 4:36 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
> > > > diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
> > > > index 90694a3309..217d577018 100644
> > > > --- a/app/test-pmd/testpmd.h
> > > > +++ b/app/test-pmd/testpmd.h
> > > > @@ -58,8 +58,6 @@ typedef uint16_t portid_t;
> > > > typedef uint16_t queueid_t;
> > > > typedef uint16_t streamid_t;
> > > >
> > > > -#define MAX_QUEUE_ID ((1 << (sizeof(queueid_t) * 8)) - 1)
> > >
> > > No strong opinion, but would it be simpler if assign 'MAX_QUEUE_ID' to
> > > 'RTE_MAX_QUEUES_PER_PORT' instead?
> > > #define MAX_QUEUE_ID RTE_MAX_QUEUES_PER_PORT
> >
> > This was my first solution once I spotted this.
> > But I prefer to globally replace: when reading this code, using
> > MAX_QUEUE_ID leaves the impression that testpmd has its own
> > restriction on max queue count.
>
> Btw, not sure we want to backport this, or maybe up to branches
> containing d44f8a485f5d ("app/testpmd: enable per queue configure")
> Opinions?
I am for not backporting.
It is an optimization (stop wasting some memory).
21/11/2019 16:12, David Marchand:
> Following [1], testpmd memory consumption has skyrocketted.
> The rte_port structure has gotten quite fat.
>
> struct rte_port {
> [...]
> struct rte_eth_rxconf rx_conf[65536]; /* 266280 3145728 */
> /* --- cacheline 53312 boundary (3411968 bytes) was 40 bytes ago --- */
> struct rte_eth_txconf tx_conf[65536]; /* 3412008 3670016 */
> /* --- cacheline 110656 boundary (7081984 bytes) was 40 bytes ago --- */
> [...]
> /* size: 8654936, cachelines: 135234, members: 31 */
> [...]
>
> testpmd handles RTE_MAX_ETHPORTS ports (32 by default) which means that it
> needs ~256MB just for this internal representation.
>
> The reason is that a testpmd rte_port (the name is quite confusing, as
> it is a local type) maintains configurations for all queues of a port.
> But where you would expect testpmd to use RTE_MAX_QUEUES_PER_PORT as the
> maximum queue count, the rte_port uses MAX_QUEUE_ID set to 64k.
>
> Prefer the ethdev maximum value.
>
> After this patch:
> struct rte_port {
> [...]
> struct rte_eth_rxconf rx_conf[1025]; /* 8240 49200 */
> /* --- cacheline 897 boundary (57408 bytes) was 32 bytes ago --- */
> struct rte_eth_txconf tx_conf[1025]; /* 57440 57400 */
> /* --- cacheline 1794 boundary (114816 bytes) was 24 bytes ago --- */
> [...]
> /* size: 139488, cachelines: 2180, members: 31 */
> [...]
>
> [1]: https://git.dpdk.org/dpdk/commit/?id=436b3a6b6e62
>
> Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Thomas Monjalon <thomas@monjalon.net>
I was really concerned by the memory requirement increase
due to my patch on ethdev structs.
Thank you for finding these giant arrays.
21/11/2019 22:25, Thomas Monjalon:
> 21/11/2019 16:12, David Marchand:
> > Following [1], testpmd memory consumption has skyrocketted.
> > The rte_port structure has gotten quite fat.
> >
> > struct rte_port {
> > [...]
> > struct rte_eth_rxconf rx_conf[65536]; /* 266280 3145728 */
> > /* --- cacheline 53312 boundary (3411968 bytes) was 40 bytes ago --- */
> > struct rte_eth_txconf tx_conf[65536]; /* 3412008 3670016 */
> > /* --- cacheline 110656 boundary (7081984 bytes) was 40 bytes ago --- */
> > [...]
> > /* size: 8654936, cachelines: 135234, members: 31 */
> > [...]
> >
> > testpmd handles RTE_MAX_ETHPORTS ports (32 by default) which means that it
> > needs ~256MB just for this internal representation.
> >
> > The reason is that a testpmd rte_port (the name is quite confusing, as
> > it is a local type) maintains configurations for all queues of a port.
> > But where you would expect testpmd to use RTE_MAX_QUEUES_PER_PORT as the
> > maximum queue count, the rte_port uses MAX_QUEUE_ID set to 64k.
> >
> > Prefer the ethdev maximum value.
> >
> > After this patch:
> > struct rte_port {
> > [...]
> > struct rte_eth_rxconf rx_conf[1025]; /* 8240 49200 */
> > /* --- cacheline 897 boundary (57408 bytes) was 32 bytes ago --- */
> > struct rte_eth_txconf tx_conf[1025]; /* 57440 57400 */
> > /* --- cacheline 1794 boundary (114816 bytes) was 24 bytes ago --- */
> > [...]
> > /* size: 139488, cachelines: 2180, members: 31 */
> > [...]
> >
> > [1]: https://git.dpdk.org/dpdk/commit/?id=436b3a6b6e62
> >
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
>
> Acked-by: Thomas Monjalon <thomas@monjalon.net>
>
> I was really concerned by the memory requirement increase
> due to my patch on ethdev structs.
> Thank you for finding these giant arrays.
After testing this patch, I realized that you can decrease
the memory requirement of test-null.sh from 150 (300 in patch [1])
to only 20 MB.
The following patch [1] was workarounding the big memory requirement
by increasing the allocated memory to 300 MB.
[1] https://patches.dpdk.org/patch/63151/
@@ -979,7 +979,7 @@ check_socket_id(const unsigned int socket_id)
queueid_t
get_allowed_max_nb_rxq(portid_t *pid)
{
- queueid_t allowed_max_rxq = MAX_QUEUE_ID;
+ queueid_t allowed_max_rxq = RTE_MAX_QUEUES_PER_PORT;
bool max_rxq_valid = false;
portid_t pi;
struct rte_eth_dev_info dev_info;
@@ -1029,7 +1029,7 @@ check_nb_rxq(queueid_t rxq)
queueid_t
get_allowed_max_nb_txq(portid_t *pid)
{
- queueid_t allowed_max_txq = MAX_QUEUE_ID;
+ queueid_t allowed_max_txq = RTE_MAX_QUEUES_PER_PORT;
bool max_txq_valid = false;
portid_t pi;
struct rte_eth_dev_info dev_info;
@@ -1079,7 +1079,7 @@ check_nb_txq(queueid_t txq)
queueid_t
get_allowed_max_nb_hairpinq(portid_t *pid)
{
- queueid_t allowed_max_hairpinq = MAX_QUEUE_ID;
+ queueid_t allowed_max_hairpinq = RTE_MAX_QUEUES_PER_PORT;
portid_t pi;
struct rte_eth_hairpin_cap cap;
@@ -58,8 +58,6 @@ typedef uint16_t portid_t;
typedef uint16_t queueid_t;
typedef uint16_t streamid_t;
-#define MAX_QUEUE_ID ((1 << (sizeof(queueid_t) * 8)) - 1)
-
#if defined RTE_LIBRTE_PMD_SOFTNIC
#define SOFTNIC 1
#else
@@ -179,22 +177,22 @@ struct rte_port {
uint8_t need_reconfig_queues; /**< need reconfiguring queues or not */
uint8_t rss_flag; /**< enable rss or not */
uint8_t dcb_flag; /**< enable dcb */
- uint16_t nb_rx_desc[MAX_QUEUE_ID+1]; /**< per queue rx desc number */
- uint16_t nb_tx_desc[MAX_QUEUE_ID+1]; /**< per queue tx desc number */
- struct rte_eth_rxconf rx_conf[MAX_QUEUE_ID+1]; /**< per queue rx configuration */
- struct rte_eth_txconf tx_conf[MAX_QUEUE_ID+1]; /**< per queue tx configuration */
+ uint16_t nb_rx_desc[RTE_MAX_QUEUES_PER_PORT+1]; /**< per queue rx desc number */
+ uint16_t nb_tx_desc[RTE_MAX_QUEUES_PER_PORT+1]; /**< per queue tx desc number */
+ struct rte_eth_rxconf rx_conf[RTE_MAX_QUEUES_PER_PORT+1]; /**< per queue rx configuration */
+ struct rte_eth_txconf tx_conf[RTE_MAX_QUEUES_PER_PORT+1]; /**< per queue tx configuration */
struct rte_ether_addr *mc_addr_pool; /**< pool of multicast addrs */
uint32_t mc_addr_nb; /**< nb. of addr. in mc_addr_pool */
uint8_t slave_flag; /**< bonding slave port */
struct port_flow *flow_list; /**< Associated flows. */
- const struct rte_eth_rxtx_callback *rx_dump_cb[MAX_QUEUE_ID+1];
- const struct rte_eth_rxtx_callback *tx_dump_cb[MAX_QUEUE_ID+1];
+ const struct rte_eth_rxtx_callback *rx_dump_cb[RTE_MAX_QUEUES_PER_PORT+1];
+ const struct rte_eth_rxtx_callback *tx_dump_cb[RTE_MAX_QUEUES_PER_PORT+1];
#ifdef SOFTNIC
struct softnic_port softport; /**< softnic params */
#endif
/**< metadata value to insert in Tx packets. */
uint32_t tx_metadata;
- const struct rte_eth_rxtx_callback *tx_set_md_cb[MAX_QUEUE_ID+1];
+ const struct rte_eth_rxtx_callback *tx_set_md_cb[RTE_MAX_QUEUES_PER_PORT+1];
};
/**