examples/l3fwd: support separate buffer pool per port

Message ID 20190103112932.4415-1-shreyansh.jain@nxp.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series examples/l3fwd: support separate buffer pool per port |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/intel-Performance-Testing success Performance Testing PASS
ci/mellanox-Performance-Testing success Performance Testing PASS
ci/Intel-compilation success Compilation OK

Commit Message

Shreyansh Jain Jan. 3, 2019, 11:30 a.m. UTC
  Traditionally, only a single buffer pool per port
(or, per-port-per-socket) is created in l3fwd application.

If separate pools are created per-port, it might lead to gain in
performance as packet alloc/dealloc requests would be isolated
across ports (and their corresponding lcores).

This patch adds an argument '--per-port-pool' to the l3fwd application.
By default, old mode of single pool per port (split on sockets) is
active.

Signed-off-by: Shreyansh Jain <shreyansh.jain@nxp.com>
---

RFC: https://mails.dpdk.org/archives/dev/2018-November/120002.html

 examples/l3fwd/main.c | 74 +++++++++++++++++++++++++++++++------------
 1 file changed, 53 insertions(+), 21 deletions(-)
  

Comments

Hemant Agrawal April 4, 2019, 11:54 a.m. UTC | #1
ping


On 03-Jan-19 5:00 PM, Shreyansh Jain wrote:
> Traditionally, only a single buffer pool per port
> (or, per-port-per-socket) is created in l3fwd application.
>
> If separate pools are created per-port, it might lead to gain in
> performance as packet alloc/dealloc requests would be isolated
> across ports (and their corresponding lcores).
>
> This patch adds an argument '--per-port-pool' to the l3fwd application.
> By default, old mode of single pool per port (split on sockets) is
> active.
>
> Signed-off-by: Shreyansh Jain <shreyansh.jain@nxp.com>
> ---
>
> RFC: https://mails.dpdk.org/archives/dev/2018-November/120002.html
>
>   examples/l3fwd/main.c | 74 +++++++++++++++++++++++++++++++------------
>   1 file changed, 53 insertions(+), 21 deletions(-)
>
> diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c
> index e4b99efe0..7b9683187 100644
> --- a/examples/l3fwd/main.c
> +++ b/examples/l3fwd/main.c
> @@ -69,11 +69,13 @@ static int promiscuous_on;
>   static int l3fwd_lpm_on;
>   static int l3fwd_em_on;
>   
> +/* Global variables. */
> +
>   static int numa_on = 1; /**< NUMA is enabled by default. */
>   static int parse_ptype; /**< Parse packet type using rx callback, and */
>   			/**< disabled by default */
> -
> -/* Global variables. */
> +static int per_port_pool; /**< Use separate buffer pools per port; disabled */
> +			  /**< by default */
>   
>   volatile bool force_quit;
>   
> @@ -133,7 +135,8 @@ static struct rte_eth_conf port_conf = {
>   	},
>   };
>   
> -static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
> +static struct rte_mempool *pktmbuf_pool[RTE_MAX_ETHPORTS][NB_SOCKETS];
> +static uint8_t lkp_per_socket[NB_SOCKETS];
>   
>   struct l3fwd_lkp_mode {
>   	void  (*setup)(int);
> @@ -285,7 +288,8 @@ print_usage(const char *prgname)
>   		" [--no-numa]"
>   		" [--hash-entry-num]"
>   		" [--ipv6]"
> -		" [--parse-ptype]\n\n"
> +		" [--parse-ptype]"
> +		" [--per-port-pool]\n\n"
>   
>   		"  -p PORTMASK: Hexadecimal bitmask of ports to configure\n"
>   		"  -P : Enable promiscuous mode\n"
> @@ -299,7 +303,8 @@ print_usage(const char *prgname)
>   		"  --no-numa: Disable numa awareness\n"
>   		"  --hash-entry-num: Specify the hash entry number in hexadecimal to be setup\n"
>   		"  --ipv6: Set if running ipv6 packets\n"
> -		"  --parse-ptype: Set to use software to analyze packet type\n\n",
> +		"  --parse-ptype: Set to use software to analyze packet type\n"
> +		"  --per-port-pool: Use separate buffer pool per port\n\n",
>   		prgname);
>   }
>   
> @@ -452,6 +457,7 @@ static const char short_options[] =
>   #define CMD_LINE_OPT_ENABLE_JUMBO "enable-jumbo"
>   #define CMD_LINE_OPT_HASH_ENTRY_NUM "hash-entry-num"
>   #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype"
> +#define CMD_LINE_OPT_PER_PORT_POOL "per-port-pool"
>   enum {
>   	/* long options mapped to a short option */
>   
> @@ -465,6 +471,7 @@ enum {
>   	CMD_LINE_OPT_ENABLE_JUMBO_NUM,
>   	CMD_LINE_OPT_HASH_ENTRY_NUM_NUM,
>   	CMD_LINE_OPT_PARSE_PTYPE_NUM,
> +	CMD_LINE_OPT_PARSE_PER_PORT_POOL,
>   };
>   
>   static const struct option lgopts[] = {
> @@ -475,6 +482,7 @@ static const struct option lgopts[] = {
>   	{CMD_LINE_OPT_ENABLE_JUMBO, 0, 0, CMD_LINE_OPT_ENABLE_JUMBO_NUM},
>   	{CMD_LINE_OPT_HASH_ENTRY_NUM, 1, 0, CMD_LINE_OPT_HASH_ENTRY_NUM_NUM},
>   	{CMD_LINE_OPT_PARSE_PTYPE, 0, 0, CMD_LINE_OPT_PARSE_PTYPE_NUM},
> +	{CMD_LINE_OPT_PER_PORT_POOL, 0, 0, CMD_LINE_OPT_PARSE_PER_PORT_POOL},
>   	{NULL, 0, 0, 0}
>   };
>   
> @@ -485,10 +493,10 @@ static const struct option lgopts[] = {
>    * RTE_MAX is used to ensure that NB_MBUF never goes below a minimum
>    * value of 8192
>    */
> -#define NB_MBUF RTE_MAX(	\
> -	(nb_ports*nb_rx_queue*nb_rxd +		\
> -	nb_ports*nb_lcores*MAX_PKT_BURST +	\
> -	nb_ports*n_tx_queue*nb_txd +		\
> +#define NB_MBUF(nports) RTE_MAX(	\
> +	(nports*nb_rx_queue*nb_rxd +		\
> +	nports*nb_lcores*MAX_PKT_BURST +	\
> +	nports*n_tx_queue*nb_txd +		\
>   	nb_lcores*MEMPOOL_CACHE_SIZE),		\
>   	(unsigned)8192)
>   
> @@ -594,6 +602,11 @@ parse_args(int argc, char **argv)
>   			parse_ptype = 1;
>   			break;
>   
> +		case CMD_LINE_OPT_PARSE_PER_PORT_POOL:
> +			printf("per port buffer pool is enabled\n");
> +			per_port_pool = 1;
> +			break;
> +
>   		default:
>   			print_usage(prgname);
>   			return -1;
> @@ -642,7 +655,7 @@ print_ethaddr(const char *name, const struct ether_addr *eth_addr)
>   }
>   
>   static int
> -init_mem(unsigned nb_mbuf)
> +init_mem(uint16_t portid, unsigned int nb_mbuf)
>   {
>   	struct lcore_conf *qconf;
>   	int socketid;
> @@ -664,13 +677,14 @@ init_mem(unsigned nb_mbuf)
>   				socketid, lcore_id, NB_SOCKETS);
>   		}
>   
> -		if (pktmbuf_pool[socketid] == NULL) {
> -			snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
> -			pktmbuf_pool[socketid] =
> +		if (pktmbuf_pool[portid][socketid] == NULL) {
> +			snprintf(s, sizeof(s), "mbuf_pool_%d:%d",
> +				 portid, socketid);
> +			pktmbuf_pool[portid][socketid] =
>   				rte_pktmbuf_pool_create(s, nb_mbuf,
>   					MEMPOOL_CACHE_SIZE, 0,
>   					RTE_MBUF_DEFAULT_BUF_SIZE, socketid);
> -			if (pktmbuf_pool[socketid] == NULL)
> +			if (pktmbuf_pool[portid][socketid] == NULL)
>   				rte_exit(EXIT_FAILURE,
>   					"Cannot init mbuf pool on socket %d\n",
>   					socketid);
> @@ -678,8 +692,13 @@ init_mem(unsigned nb_mbuf)
>   				printf("Allocated mbuf pool on socket %d\n",
>   					socketid);
>   
> -			/* Setup either LPM or EM(f.e Hash).  */
> -			l3fwd_lkp.setup(socketid);
> +			/* Setup either LPM or EM(f.e Hash). But, only once per
> +			 * available socket.
> +			 */
> +			if (!lkp_per_socket[socketid]) {
> +				l3fwd_lkp.setup(socketid);
> +				lkp_per_socket[socketid] = 1;
> +			}
>   		}
>   		qconf = &lcore_conf[lcore_id];
>   		qconf->ipv4_lookup_struct =
> @@ -899,7 +918,14 @@ main(int argc, char **argv)
>   			(struct ether_addr *)(val_eth + portid) + 1);
>   
>   		/* init memory */
> -		ret = init_mem(NB_MBUF);
> +		if (!per_port_pool) {
> +			/* portid = 0; this is *not* signifying the first port,
> +			 * rather, it signifies that portid is ignored.
> +			 */
> +			ret = init_mem(0, NB_MBUF(nb_ports));
> +		} else {
> +			ret = init_mem(portid, NB_MBUF(1));
> +		}
>   		if (ret < 0)
>   			rte_exit(EXIT_FAILURE, "init_mem failed\n");
>   
> @@ -966,10 +992,16 @@ main(int argc, char **argv)
>   			rte_eth_dev_info_get(portid, &dev_info);
>   			rxq_conf = dev_info.default_rxconf;
>   			rxq_conf.offloads = conf->rxmode.offloads;
> -			ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd,
> -					socketid,
> -					&rxq_conf,
> -					pktmbuf_pool[socketid]);
> +			if (!per_port_pool)
> +				ret = rte_eth_rx_queue_setup(portid, queueid,
> +						nb_rxd, socketid,
> +						&rxq_conf,
> +						pktmbuf_pool[0][socketid]);
> +			else
> +				ret = rte_eth_rx_queue_setup(portid, queueid,
> +						nb_rxd, socketid,
> +						&rxq_conf,
> +						pktmbuf_pool[portid][socketid]);
>   			if (ret < 0)
>   				rte_exit(EXIT_FAILURE,
>   				"rte_eth_rx_queue_setup: err=%d, port=%d\n",
  
Ruifeng Wang April 8, 2019, 6:10 a.m. UTC | #2
Hi Shreyansh,

I tried this patch on MacchiatoBin + 82599 NIC.
Compared with global-pool mode, per-port-pool mode showed slightly lower performance in single core test.
In dual core test, both modes had nearly same performance. 

My setup only has two ports which is limited. 
Just want to know the per-port-pool mode has more performance gain when many ports are bound to  different cores?

Used commands:
sudo ./examples/l3fwd/build/l3fwd -c 0x4 -w 0000:01:00.0 -w 0000:01:00.1 -- -P -p 3 --config='(0,0,2),(1,0,2)' --per-port-pool
sudo ./examples/l3fwd/build/l3fwd -c 0xc -w 0000:01:00.0 -w 0000:01:00.1 -- -P -p 3 --config='(0,0,2),(1,0,3)' --per-port-pool

Regards,
/Ruifeng

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Shreyansh Jain
> Sent: 2019年1月3日 19:30
> To: dev@dpdk.org
> Cc: Shreyansh Jain <shreyansh.jain@nxp.com>
> Subject: [dpdk-dev] [PATCH] examples/l3fwd: support separate buffer pool
> per port
> 
> Traditionally, only a single buffer pool per port (or, per-port-per-socket) is
> created in l3fwd application.
> 
> If separate pools are created per-port, it might lead to gain in performance as
> packet alloc/dealloc requests would be isolated across ports (and their
> corresponding lcores).
> 
> This patch adds an argument '--per-port-pool' to the l3fwd application.
> By default, old mode of single pool per port (split on sockets) is active.
> 
> Signed-off-by: Shreyansh Jain <shreyansh.jain@nxp.com>
> ---
> 
> RFC: https://mails.dpdk.org/archives/dev/2018-November/120002.html
> 
>  examples/l3fwd/main.c | 74 +++++++++++++++++++++++++++++++--------
> ----
>  1 file changed, 53 insertions(+), 21 deletions(-)
> 
> diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c index
> e4b99efe0..7b9683187 100644
> --- a/examples/l3fwd/main.c
> +++ b/examples/l3fwd/main.c
> @@ -69,11 +69,13 @@ static int promiscuous_on;  static int l3fwd_lpm_on;
> static int l3fwd_em_on;
> 
> +/* Global variables. */
> +
>  static int numa_on = 1; /**< NUMA is enabled by default. */  static int
> parse_ptype; /**< Parse packet type using rx callback, and */
>  			/**< disabled by default */
> -
> -/* Global variables. */
> +static int per_port_pool; /**< Use separate buffer pools per port; disabled
> */
> +			  /**< by default */
> 
>  volatile bool force_quit;
> 
> @@ -133,7 +135,8 @@ static struct rte_eth_conf port_conf = {
>  	},
>  };
> 
> -static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
> +static struct rte_mempool
> *pktmbuf_pool[RTE_MAX_ETHPORTS][NB_SOCKETS];
> +static uint8_t lkp_per_socket[NB_SOCKETS];
> 
>  struct l3fwd_lkp_mode {
>  	void  (*setup)(int);
> @@ -285,7 +288,8 @@ print_usage(const char *prgname)
>  		" [--no-numa]"
>  		" [--hash-entry-num]"
>  		" [--ipv6]"
> -		" [--parse-ptype]\n\n"
> +		" [--parse-ptype]"
> +		" [--per-port-pool]\n\n"
> 
>  		"  -p PORTMASK: Hexadecimal bitmask of ports to
> configure\n"
>  		"  -P : Enable promiscuous mode\n"
> @@ -299,7 +303,8 @@ print_usage(const char *prgname)
>  		"  --no-numa: Disable numa awareness\n"
>  		"  --hash-entry-num: Specify the hash entry number in
> hexadecimal to be setup\n"
>  		"  --ipv6: Set if running ipv6 packets\n"
> -		"  --parse-ptype: Set to use software to analyze packet
> type\n\n",
> +		"  --parse-ptype: Set to use software to analyze packet
> type\n"
> +		"  --per-port-pool: Use separate buffer pool per port\n\n",
>  		prgname);
>  }
> 
> @@ -452,6 +457,7 @@ static const char short_options[] =  #define
> CMD_LINE_OPT_ENABLE_JUMBO "enable-jumbo"
>  #define CMD_LINE_OPT_HASH_ENTRY_NUM "hash-entry-num"
>  #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype"
> +#define CMD_LINE_OPT_PER_PORT_POOL "per-port-pool"
>  enum {
>  	/* long options mapped to a short option */
> 
> @@ -465,6 +471,7 @@ enum {
>  	CMD_LINE_OPT_ENABLE_JUMBO_NUM,
>  	CMD_LINE_OPT_HASH_ENTRY_NUM_NUM,
>  	CMD_LINE_OPT_PARSE_PTYPE_NUM,
> +	CMD_LINE_OPT_PARSE_PER_PORT_POOL,
>  };
> 
>  static const struct option lgopts[] = { @@ -475,6 +482,7 @@ static const
> struct option lgopts[] = {
>  	{CMD_LINE_OPT_ENABLE_JUMBO, 0, 0,
> CMD_LINE_OPT_ENABLE_JUMBO_NUM},
>  	{CMD_LINE_OPT_HASH_ENTRY_NUM, 1, 0,
> CMD_LINE_OPT_HASH_ENTRY_NUM_NUM},
>  	{CMD_LINE_OPT_PARSE_PTYPE, 0, 0,
> CMD_LINE_OPT_PARSE_PTYPE_NUM},
> +	{CMD_LINE_OPT_PER_PORT_POOL, 0, 0,
> CMD_LINE_OPT_PARSE_PER_PORT_POOL},
>  	{NULL, 0, 0, 0}
>  };
> 
> @@ -485,10 +493,10 @@ static const struct option lgopts[] = {
>   * RTE_MAX is used to ensure that NB_MBUF never goes below a minimum
>   * value of 8192
>   */
> -#define NB_MBUF RTE_MAX(	\
> -	(nb_ports*nb_rx_queue*nb_rxd +		\
> -	nb_ports*nb_lcores*MAX_PKT_BURST +	\
> -	nb_ports*n_tx_queue*nb_txd +		\
> +#define NB_MBUF(nports) RTE_MAX(	\
> +	(nports*nb_rx_queue*nb_rxd +		\
> +	nports*nb_lcores*MAX_PKT_BURST +	\
> +	nports*n_tx_queue*nb_txd +		\
>  	nb_lcores*MEMPOOL_CACHE_SIZE),		\
>  	(unsigned)8192)
> 
> @@ -594,6 +602,11 @@ parse_args(int argc, char **argv)
>  			parse_ptype = 1;
>  			break;
> 
> +		case CMD_LINE_OPT_PARSE_PER_PORT_POOL:
> +			printf("per port buffer pool is enabled\n");
> +			per_port_pool = 1;
> +			break;
> +
>  		default:
>  			print_usage(prgname);
>  			return -1;
> @@ -642,7 +655,7 @@ print_ethaddr(const char *name, const struct
> ether_addr *eth_addr)  }
> 
>  static int
> -init_mem(unsigned nb_mbuf)
> +init_mem(uint16_t portid, unsigned int nb_mbuf)
>  {
>  	struct lcore_conf *qconf;
>  	int socketid;
> @@ -664,13 +677,14 @@ init_mem(unsigned nb_mbuf)
>  				socketid, lcore_id, NB_SOCKETS);
>  		}
> 
> -		if (pktmbuf_pool[socketid] == NULL) {
> -			snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
> -			pktmbuf_pool[socketid] =
> +		if (pktmbuf_pool[portid][socketid] == NULL) {
> +			snprintf(s, sizeof(s), "mbuf_pool_%d:%d",
> +				 portid, socketid);
> +			pktmbuf_pool[portid][socketid] =
>  				rte_pktmbuf_pool_create(s, nb_mbuf,
>  					MEMPOOL_CACHE_SIZE, 0,
>  					RTE_MBUF_DEFAULT_BUF_SIZE,
> socketid);
> -			if (pktmbuf_pool[socketid] == NULL)
> +			if (pktmbuf_pool[portid][socketid] == NULL)
>  				rte_exit(EXIT_FAILURE,
>  					"Cannot init mbuf pool on
> socket %d\n",
>  					socketid);
> @@ -678,8 +692,13 @@ init_mem(unsigned nb_mbuf)
>  				printf("Allocated mbuf pool on socket %d\n",
>  					socketid);
> 
> -			/* Setup either LPM or EM(f.e Hash).  */
> -			l3fwd_lkp.setup(socketid);
> +			/* Setup either LPM or EM(f.e Hash). But, only once
> per
> +			 * available socket.
> +			 */
> +			if (!lkp_per_socket[socketid]) {
> +				l3fwd_lkp.setup(socketid);
> +				lkp_per_socket[socketid] = 1;
> +			}
>  		}
>  		qconf = &lcore_conf[lcore_id];
>  		qconf->ipv4_lookup_struct =
> @@ -899,7 +918,14 @@ main(int argc, char **argv)
>  			(struct ether_addr *)(val_eth + portid) + 1);
> 
>  		/* init memory */
> -		ret = init_mem(NB_MBUF);
> +		if (!per_port_pool) {
> +			/* portid = 0; this is *not* signifying the first port,
> +			 * rather, it signifies that portid is ignored.
> +			 */
> +			ret = init_mem(0, NB_MBUF(nb_ports));
> +		} else {
> +			ret = init_mem(portid, NB_MBUF(1));
> +		}
>  		if (ret < 0)
>  			rte_exit(EXIT_FAILURE, "init_mem failed\n");
> 
> @@ -966,10 +992,16 @@ main(int argc, char **argv)
>  			rte_eth_dev_info_get(portid, &dev_info);
>  			rxq_conf = dev_info.default_rxconf;
>  			rxq_conf.offloads = conf->rxmode.offloads;
> -			ret = rte_eth_rx_queue_setup(portid, queueid,
> nb_rxd,
> -					socketid,
> -					&rxq_conf,
> -					pktmbuf_pool[socketid]);
> +			if (!per_port_pool)
> +				ret = rte_eth_rx_queue_setup(portid,
> queueid,
> +						nb_rxd, socketid,
> +						&rxq_conf,
> +						pktmbuf_pool[0][socketid]);
> +			else
> +				ret = rte_eth_rx_queue_setup(portid,
> queueid,
> +						nb_rxd, socketid,
> +						&rxq_conf,
> +
> 	pktmbuf_pool[portid][socketid]);
>  			if (ret < 0)
>  				rte_exit(EXIT_FAILURE,
>  				"rte_eth_rx_queue_setup: err=%d,
> port=%d\n",
> --
> 2.17.1
  
Ananyev, Konstantin April 8, 2019, 9:29 a.m. UTC | #3
> 
> Hi Shreyansh,
> 
> I tried this patch on MacchiatoBin + 82599 NIC.
> Compared with global-pool mode, per-port-pool mode showed slightly lower performance in single core test.

That was my thought too - for the case when queues from multiple ports are handled by the same core
it probably would only slowdown things.
Wonder what is the use case for the patch and what is the performance gain you observed?
Konstantin 

> In dual core test, both modes had nearly same performance.
> 
> My setup only has two ports which is limited.
> Just want to know the per-port-pool mode has more performance gain when many ports are bound to  different cores?
> 
> Used commands:
> sudo ./examples/l3fwd/build/l3fwd -c 0x4 -w 0000:01:00.0 -w 0000:01:00.1 -- -P -p 3 --config='(0,0,2),(1,0,2)' --per-port-pool
> sudo ./examples/l3fwd/build/l3fwd -c 0xc -w 0000:01:00.0 -w 0000:01:00.1 -- -P -p 3 --config='(0,0,2),(1,0,3)' --per-port-pool
> 
> Regards,
> /Ruifeng
> 
> > -----Original Message-----
> > From: dev <dev-bounces@dpdk.org> On Behalf Of Shreyansh Jain
> > Sent: 2019年1月3日 19:30
> > To: dev@dpdk.org
> > Cc: Shreyansh Jain <shreyansh.jain@nxp.com>
> > Subject: [dpdk-dev] [PATCH] examples/l3fwd: support separate buffer pool
> > per port
> >
> > Traditionally, only a single buffer pool per port (or, per-port-per-socket) is
> > created in l3fwd application.
> >
> > If separate pools are created per-port, it might lead to gain in performance as
> > packet alloc/dealloc requests would be isolated across ports (and their
> > corresponding lcores).
> >
> > This patch adds an argument '--per-port-pool' to the l3fwd application.
> > By default, old mode of single pool per port (split on sockets) is active.
> >
> > Signed-off-by: Shreyansh Jain <shreyansh.jain@nxp.com>
> > ---
> >
> > RFC: https://mails.dpdk.org/archives/dev/2018-November/120002.html
> >
> >  examples/l3fwd/main.c | 74 +++++++++++++++++++++++++++++++--------
> > ----
> >  1 file changed, 53 insertions(+), 21 deletions(-)
> >
> > diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c index
> > e4b99efe0..7b9683187 100644
> > --- a/examples/l3fwd/main.c
> > +++ b/examples/l3fwd/main.c
> > @@ -69,11 +69,13 @@ static int promiscuous_on;  static int l3fwd_lpm_on;
> > static int l3fwd_em_on;
> >
> > +/* Global variables. */
> > +
> >  static int numa_on = 1; /**< NUMA is enabled by default. */  static int
> > parse_ptype; /**< Parse packet type using rx callback, and */
> >  			/**< disabled by default */
> > -
> > -/* Global variables. */
> > +static int per_port_pool; /**< Use separate buffer pools per port; disabled
> > */
> > +			  /**< by default */
> >
> >  volatile bool force_quit;
> >
> > @@ -133,7 +135,8 @@ static struct rte_eth_conf port_conf = {
> >  	},
> >  };
> >
> > -static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
> > +static struct rte_mempool
> > *pktmbuf_pool[RTE_MAX_ETHPORTS][NB_SOCKETS];
> > +static uint8_t lkp_per_socket[NB_SOCKETS];
> >
> >  struct l3fwd_lkp_mode {
> >  	void  (*setup)(int);
> > @@ -285,7 +288,8 @@ print_usage(const char *prgname)
> >  		" [--no-numa]"
> >  		" [--hash-entry-num]"
> >  		" [--ipv6]"
> > -		" [--parse-ptype]\n\n"
> > +		" [--parse-ptype]"
> > +		" [--per-port-pool]\n\n"
> >
> >  		"  -p PORTMASK: Hexadecimal bitmask of ports to
> > configure\n"
> >  		"  -P : Enable promiscuous mode\n"
> > @@ -299,7 +303,8 @@ print_usage(const char *prgname)
> >  		"  --no-numa: Disable numa awareness\n"
> >  		"  --hash-entry-num: Specify the hash entry number in
> > hexadecimal to be setup\n"
> >  		"  --ipv6: Set if running ipv6 packets\n"
> > -		"  --parse-ptype: Set to use software to analyze packet
> > type\n\n",
> > +		"  --parse-ptype: Set to use software to analyze packet
> > type\n"
> > +		"  --per-port-pool: Use separate buffer pool per port\n\n",
> >  		prgname);
> >  }
> >
> > @@ -452,6 +457,7 @@ static const char short_options[] =  #define
> > CMD_LINE_OPT_ENABLE_JUMBO "enable-jumbo"
> >  #define CMD_LINE_OPT_HASH_ENTRY_NUM "hash-entry-num"
> >  #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype"
> > +#define CMD_LINE_OPT_PER_PORT_POOL "per-port-pool"
> >  enum {
> >  	/* long options mapped to a short option */
> >
> > @@ -465,6 +471,7 @@ enum {
> >  	CMD_LINE_OPT_ENABLE_JUMBO_NUM,
> >  	CMD_LINE_OPT_HASH_ENTRY_NUM_NUM,
> >  	CMD_LINE_OPT_PARSE_PTYPE_NUM,
> > +	CMD_LINE_OPT_PARSE_PER_PORT_POOL,
> >  };
> >
> >  static const struct option lgopts[] = { @@ -475,6 +482,7 @@ static const
> > struct option lgopts[] = {
> >  	{CMD_LINE_OPT_ENABLE_JUMBO, 0, 0,
> > CMD_LINE_OPT_ENABLE_JUMBO_NUM},
> >  	{CMD_LINE_OPT_HASH_ENTRY_NUM, 1, 0,
> > CMD_LINE_OPT_HASH_ENTRY_NUM_NUM},
> >  	{CMD_LINE_OPT_PARSE_PTYPE, 0, 0,
> > CMD_LINE_OPT_PARSE_PTYPE_NUM},
> > +	{CMD_LINE_OPT_PER_PORT_POOL, 0, 0,
> > CMD_LINE_OPT_PARSE_PER_PORT_POOL},
> >  	{NULL, 0, 0, 0}
> >  };
> >
> > @@ -485,10 +493,10 @@ static const struct option lgopts[] = {
> >   * RTE_MAX is used to ensure that NB_MBUF never goes below a minimum
> >   * value of 8192
> >   */
> > -#define NB_MBUF RTE_MAX(	\
> > -	(nb_ports*nb_rx_queue*nb_rxd +		\
> > -	nb_ports*nb_lcores*MAX_PKT_BURST +	\
> > -	nb_ports*n_tx_queue*nb_txd +		\
> > +#define NB_MBUF(nports) RTE_MAX(	\
> > +	(nports*nb_rx_queue*nb_rxd +		\
> > +	nports*nb_lcores*MAX_PKT_BURST +	\
> > +	nports*n_tx_queue*nb_txd +		\
> >  	nb_lcores*MEMPOOL_CACHE_SIZE),		\
> >  	(unsigned)8192)
> >
> > @@ -594,6 +602,11 @@ parse_args(int argc, char **argv)
> >  			parse_ptype = 1;
> >  			break;
> >
> > +		case CMD_LINE_OPT_PARSE_PER_PORT_POOL:
> > +			printf("per port buffer pool is enabled\n");
> > +			per_port_pool = 1;
> > +			break;
> > +
> >  		default:
> >  			print_usage(prgname);
> >  			return -1;
> > @@ -642,7 +655,7 @@ print_ethaddr(const char *name, const struct
> > ether_addr *eth_addr)  }
> >
> >  static int
> > -init_mem(unsigned nb_mbuf)
> > +init_mem(uint16_t portid, unsigned int nb_mbuf)
> >  {
> >  	struct lcore_conf *qconf;
> >  	int socketid;
> > @@ -664,13 +677,14 @@ init_mem(unsigned nb_mbuf)
> >  				socketid, lcore_id, NB_SOCKETS);
> >  		}
> >
> > -		if (pktmbuf_pool[socketid] == NULL) {
> > -			snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
> > -			pktmbuf_pool[socketid] =
> > +		if (pktmbuf_pool[portid][socketid] == NULL) {
> > +			snprintf(s, sizeof(s), "mbuf_pool_%d:%d",
> > +				 portid, socketid);
> > +			pktmbuf_pool[portid][socketid] =
> >  				rte_pktmbuf_pool_create(s, nb_mbuf,
> >  					MEMPOOL_CACHE_SIZE, 0,
> >  					RTE_MBUF_DEFAULT_BUF_SIZE,
> > socketid);
> > -			if (pktmbuf_pool[socketid] == NULL)
> > +			if (pktmbuf_pool[portid][socketid] == NULL)
> >  				rte_exit(EXIT_FAILURE,
> >  					"Cannot init mbuf pool on
> > socket %d\n",
> >  					socketid);
> > @@ -678,8 +692,13 @@ init_mem(unsigned nb_mbuf)
> >  				printf("Allocated mbuf pool on socket %d\n",
> >  					socketid);
> >
> > -			/* Setup either LPM or EM(f.e Hash).  */
> > -			l3fwd_lkp.setup(socketid);
> > +			/* Setup either LPM or EM(f.e Hash). But, only once
> > per
> > +			 * available socket.
> > +			 */
> > +			if (!lkp_per_socket[socketid]) {
> > +				l3fwd_lkp.setup(socketid);
> > +				lkp_per_socket[socketid] = 1;
> > +			}
> >  		}
> >  		qconf = &lcore_conf[lcore_id];
> >  		qconf->ipv4_lookup_struct =
> > @@ -899,7 +918,14 @@ main(int argc, char **argv)
> >  			(struct ether_addr *)(val_eth + portid) + 1);
> >
> >  		/* init memory */
> > -		ret = init_mem(NB_MBUF);
> > +		if (!per_port_pool) {
> > +			/* portid = 0; this is *not* signifying the first port,
> > +			 * rather, it signifies that portid is ignored.
> > +			 */
> > +			ret = init_mem(0, NB_MBUF(nb_ports));
> > +		} else {
> > +			ret = init_mem(portid, NB_MBUF(1));
> > +		}
> >  		if (ret < 0)
> >  			rte_exit(EXIT_FAILURE, "init_mem failed\n");
> >
> > @@ -966,10 +992,16 @@ main(int argc, char **argv)
> >  			rte_eth_dev_info_get(portid, &dev_info);
> >  			rxq_conf = dev_info.default_rxconf;
> >  			rxq_conf.offloads = conf->rxmode.offloads;
> > -			ret = rte_eth_rx_queue_setup(portid, queueid,
> > nb_rxd,
> > -					socketid,
> > -					&rxq_conf,
> > -					pktmbuf_pool[socketid]);
> > +			if (!per_port_pool)
> > +				ret = rte_eth_rx_queue_setup(portid,
> > queueid,
> > +						nb_rxd, socketid,
> > +						&rxq_conf,
> > +						pktmbuf_pool[0][socketid]);
> > +			else
> > +				ret = rte_eth_rx_queue_setup(portid,
> > queueid,
> > +						nb_rxd, socketid,
> > +						&rxq_conf,
> > +
> > 	pktmbuf_pool[portid][socketid]);
> >  			if (ret < 0)
> >  				rte_exit(EXIT_FAILURE,
> >  				"rte_eth_rx_queue_setup: err=%d,
> > port=%d\n",
> > --
> > 2.17.1
  
Shreyansh Jain April 12, 2019, 9:24 a.m. UTC | #4
Hi Konstantin, Ruifeng,

> -----Original Message-----
> From: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> Sent: Monday, April 8, 2019 3:00 PM
> To: Ruifeng Wang (Arm Technology China) <Ruifeng.Wang@arm.com>;
> Shreyansh Jain <shreyansh.jain@nxp.com>; dev@dpdk.org
> Cc: nd <nd@arm.com>
> Subject: RE: [dpdk-dev] [PATCH] examples/l3fwd: support separate buffer
> pool per port
> 
> 
> 
> >
> > Hi Shreyansh,
> >
> > I tried this patch on MacchiatoBin + 82599 NIC.
> > Compared with global-pool mode, per-port-pool mode showed slightly
> lower performance in single core test.
> 
> That was my thought too - for the case when queues from multiple ports
> are handled by the same core
> it probably would only slowdown things.

Thanks for your comments.

This is applicable for cases where separate cores can handle separate ports - each with their pools. (somehow I felt that message in commit was adequate - I can rephrase if that is misleading)

In case there is enough number of cores available for datapath, such segregation can result in better performance - possibly because of drop in pool and cache conflicts.
At least on some of NXP SoC, this resulted in over 15% improvement.
And, in other cases it didn't lead to any drop/negative-impact.

> Wonder what is the use case for the patch and what is the performance
> gain you observed?

For hardware backed pools, hardware access and exclusion are expensive. By segregating pool/port/lcores it is possible to attain a conflict free path. This is the use-case this patch targets. 
And anyways, this is an optional feature.

> Konstantin
> 
> > In dual core test, both modes had nearly same performance.

OK

> >
> > My setup only has two ports which is limited.
> > Just want to know the per-port-pool mode has more performance gain
> when many ports are bound to  different cores?

Yes, though not necessarily *many* - in my case, I had 4 ports and even then about ~10% improvement was directly visible. I increased the port count and I was able to touch about ~15%. I did pin each port to a separate core, though.
But again, important point is that without this feature enabled, I didn't see any drop in performance. Did you observe any drop?

> >
> > Used commands:
> > sudo ./examples/l3fwd/build/l3fwd -c 0x4 -w 0000:01:00.0 -w
> 0000:01:00.1 -- -P -p 3 --config='(0,0,2),(1,0,2)' --per-port-pool
> > sudo ./examples/l3fwd/build/l3fwd -c 0xc -w 0000:01:00.0 -w
> 0000:01:00.1 -- -P -p 3 --config='(0,0,2),(1,0,3)' --per-port-pool
> >
> > Regards,
> > /Ruifeng
> >

[...]
  
Ruifeng Wang April 14, 2019, 9:13 a.m. UTC | #5
Hi Shreyansh,

> -----Original Message-----
> From: Shreyansh Jain <shreyansh.jain@nxp.com>
> Sent: Friday, April 12, 2019 17:25
> To: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Ruifeng Wang
> (Arm Technology China) <Ruifeng.Wang@arm.com>; dev@dpdk.org
> Cc: nd <nd@arm.com>
> Subject: RE: [dpdk-dev] [PATCH] examples/l3fwd: support separate buffer
> pool per port
> 
> Hi Konstantin, Ruifeng,
> 
> > -----Original Message-----
> > From: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> > Sent: Monday, April 8, 2019 3:00 PM
> > To: Ruifeng Wang (Arm Technology China) <Ruifeng.Wang@arm.com>;
> > Shreyansh Jain <shreyansh.jain@nxp.com>; dev@dpdk.org
> > Cc: nd <nd@arm.com>
> > Subject: RE: [dpdk-dev] [PATCH] examples/l3fwd: support separate
> > buffer pool per port
> >
> >
> >
> > >
> > > Hi Shreyansh,
> > >
> > > I tried this patch on MacchiatoBin + 82599 NIC.
> > > Compared with global-pool mode, per-port-pool mode showed slightly
> > lower performance in single core test.
> >
> > That was my thought too - for the case when queues from multiple ports
> > are handled by the same core it probably would only slowdown things.
> 
> Thanks for your comments.
> 
> This is applicable for cases where separate cores can handle separate ports -
> each with their pools. (somehow I felt that message in commit was adequate
> - I can rephrase if that is misleading)
> 
> In case there is enough number of cores available for datapath, such
> segregation can result in better performance - possibly because of drop in
> pool and cache conflicts.
> At least on some of NXP SoC, this resulted in over 15% improvement.
> And, in other cases it didn't lead to any drop/negative-impact.
> 
> > Wonder what is the use case for the patch and what is the performance
> > gain you observed?
> 
> For hardware backed pools, hardware access and exclusion are expensive. By
> segregating pool/port/lcores it is possible to attain a conflict free path. This is
> the use-case this patch targets.
> And anyways, this is an optional feature.
> 
> > Konstantin
> >
> > > In dual core test, both modes had nearly same performance.
> 
> OK
> 
> > >
> > > My setup only has two ports which is limited.
> > > Just want to know the per-port-pool mode has more performance gain
> > when many ports are bound to  different cores?
> 
> Yes, though not necessarily *many* - in my case, I had 4 ports and even then
> about ~10% improvement was directly visible. I increased the port count and
> I was able to touch about ~15%. I did pin each port to a separate core, though.
> But again, important point is that without this feature enabled, I didn't see
> any drop in performance. Did you observe any drop?
> 

No, no drop without the feature enabled in my test.

> > >
> > > Used commands:
> > > sudo ./examples/l3fwd/build/l3fwd -c 0x4 -w 0000:01:00.0 -w
> > 0000:01:00.1 -- -P -p 3 --config='(0,0,2),(1,0,2)' --per-port-pool
> > > sudo ./examples/l3fwd/build/l3fwd -c 0xc -w 0000:01:00.0 -w
> > 0000:01:00.1 -- -P -p 3 --config='(0,0,2),(1,0,3)' --per-port-pool
> > >
> > > Regards,
> > > /Ruifeng
> > >
> 
> [...]
  
Ananyev, Konstantin April 15, 2019, 12:05 p.m. UTC | #6
Hi Shreyansh,

> > > I tried this patch on MacchiatoBin + 82599 NIC.
> > > Compared with global-pool mode, per-port-pool mode showed slightly
> > lower performance in single core test.
> >
> > That was my thought too - for the case when queues from multiple ports
> > are handled by the same core
> > it probably would only slowdown things.
> 
> Thanks for your comments.
> 
> This is applicable for cases where separate cores can handle separate ports - each with their pools. (somehow I felt that message in commit
> was adequate - I can rephrase if that is misleading)
> 
> In case there is enough number of cores available for datapath, such segregation can result in better performance - possibly because of
> drop in pool and cache conflicts.
> At least on some of NXP SoC, this resulted in over 15% improvement.
> And, in other cases it didn't lead to any drop/negative-impact.

If each core manages just one port, then yes definitely performance increase is expected.
If that's the case you'd like enable, then can I suggest to have mempool per lcore not per port?
I think it would be plausible for both cases:
- one port per core (your case).
- multiple ports per core.   
Konstantin

> 
> > Wonder what is the use case for the patch and what is the performance
> > gain you observed?
> 
> For hardware backed pools, hardware access and exclusion are expensive. By segregating pool/port/lcores it is possible to attain a conflict
> free path. This is the use-case this patch targets.
> And anyways, this is an optional feature.
> 
> > Konstantin
> >
> > > In dual core test, both modes had nearly same performance.
> 
> OK
> 
> > >
> > > My setup only has two ports which is limited.
> > > Just want to know the per-port-pool mode has more performance gain
> > when many ports are bound to  different cores?
> 
> Yes, though not necessarily *many* - in my case, I had 4 ports and even then about ~10% improvement was directly visible. I increased the
> port count and I was able to touch about ~15%. I did pin each port to a separate core, though.
> But again, important point is that without this feature enabled, I didn't see any drop in performance. Did you observe any drop?
> 
> > >
> > > Used commands:
> > > sudo ./examples/l3fwd/build/l3fwd -c 0x4 -w 0000:01:00.0 -w
> > 0000:01:00.1 -- -P -p 3 --config='(0,0,2),(1,0,2)' --per-port-pool
> > > sudo ./examples/l3fwd/build/l3fwd -c 0xc -w 0000:01:00.0 -w
> > 0000:01:00.1 -- -P -p 3 --config='(0,0,2),(1,0,3)' --per-port-pool
> > >
> > > Regards,
> > > /Ruifeng
> > >
> 
> [...]
  

Patch

diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c
index e4b99efe0..7b9683187 100644
--- a/examples/l3fwd/main.c
+++ b/examples/l3fwd/main.c
@@ -69,11 +69,13 @@  static int promiscuous_on;
 static int l3fwd_lpm_on;
 static int l3fwd_em_on;
 
+/* Global variables. */
+
 static int numa_on = 1; /**< NUMA is enabled by default. */
 static int parse_ptype; /**< Parse packet type using rx callback, and */
 			/**< disabled by default */
-
-/* Global variables. */
+static int per_port_pool; /**< Use separate buffer pools per port; disabled */
+			  /**< by default */
 
 volatile bool force_quit;
 
@@ -133,7 +135,8 @@  static struct rte_eth_conf port_conf = {
 	},
 };
 
-static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
+static struct rte_mempool *pktmbuf_pool[RTE_MAX_ETHPORTS][NB_SOCKETS];
+static uint8_t lkp_per_socket[NB_SOCKETS];
 
 struct l3fwd_lkp_mode {
 	void  (*setup)(int);
@@ -285,7 +288,8 @@  print_usage(const char *prgname)
 		" [--no-numa]"
 		" [--hash-entry-num]"
 		" [--ipv6]"
-		" [--parse-ptype]\n\n"
+		" [--parse-ptype]"
+		" [--per-port-pool]\n\n"
 
 		"  -p PORTMASK: Hexadecimal bitmask of ports to configure\n"
 		"  -P : Enable promiscuous mode\n"
@@ -299,7 +303,8 @@  print_usage(const char *prgname)
 		"  --no-numa: Disable numa awareness\n"
 		"  --hash-entry-num: Specify the hash entry number in hexadecimal to be setup\n"
 		"  --ipv6: Set if running ipv6 packets\n"
-		"  --parse-ptype: Set to use software to analyze packet type\n\n",
+		"  --parse-ptype: Set to use software to analyze packet type\n"
+		"  --per-port-pool: Use separate buffer pool per port\n\n",
 		prgname);
 }
 
@@ -452,6 +457,7 @@  static const char short_options[] =
 #define CMD_LINE_OPT_ENABLE_JUMBO "enable-jumbo"
 #define CMD_LINE_OPT_HASH_ENTRY_NUM "hash-entry-num"
 #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype"
+#define CMD_LINE_OPT_PER_PORT_POOL "per-port-pool"
 enum {
 	/* long options mapped to a short option */
 
@@ -465,6 +471,7 @@  enum {
 	CMD_LINE_OPT_ENABLE_JUMBO_NUM,
 	CMD_LINE_OPT_HASH_ENTRY_NUM_NUM,
 	CMD_LINE_OPT_PARSE_PTYPE_NUM,
+	CMD_LINE_OPT_PARSE_PER_PORT_POOL,
 };
 
 static const struct option lgopts[] = {
@@ -475,6 +482,7 @@  static const struct option lgopts[] = {
 	{CMD_LINE_OPT_ENABLE_JUMBO, 0, 0, CMD_LINE_OPT_ENABLE_JUMBO_NUM},
 	{CMD_LINE_OPT_HASH_ENTRY_NUM, 1, 0, CMD_LINE_OPT_HASH_ENTRY_NUM_NUM},
 	{CMD_LINE_OPT_PARSE_PTYPE, 0, 0, CMD_LINE_OPT_PARSE_PTYPE_NUM},
+	{CMD_LINE_OPT_PER_PORT_POOL, 0, 0, CMD_LINE_OPT_PARSE_PER_PORT_POOL},
 	{NULL, 0, 0, 0}
 };
 
@@ -485,10 +493,10 @@  static const struct option lgopts[] = {
  * RTE_MAX is used to ensure that NB_MBUF never goes below a minimum
  * value of 8192
  */
-#define NB_MBUF RTE_MAX(	\
-	(nb_ports*nb_rx_queue*nb_rxd +		\
-	nb_ports*nb_lcores*MAX_PKT_BURST +	\
-	nb_ports*n_tx_queue*nb_txd +		\
+#define NB_MBUF(nports) RTE_MAX(	\
+	(nports*nb_rx_queue*nb_rxd +		\
+	nports*nb_lcores*MAX_PKT_BURST +	\
+	nports*n_tx_queue*nb_txd +		\
 	nb_lcores*MEMPOOL_CACHE_SIZE),		\
 	(unsigned)8192)
 
@@ -594,6 +602,11 @@  parse_args(int argc, char **argv)
 			parse_ptype = 1;
 			break;
 
+		case CMD_LINE_OPT_PARSE_PER_PORT_POOL:
+			printf("per port buffer pool is enabled\n");
+			per_port_pool = 1;
+			break;
+
 		default:
 			print_usage(prgname);
 			return -1;
@@ -642,7 +655,7 @@  print_ethaddr(const char *name, const struct ether_addr *eth_addr)
 }
 
 static int
-init_mem(unsigned nb_mbuf)
+init_mem(uint16_t portid, unsigned int nb_mbuf)
 {
 	struct lcore_conf *qconf;
 	int socketid;
@@ -664,13 +677,14 @@  init_mem(unsigned nb_mbuf)
 				socketid, lcore_id, NB_SOCKETS);
 		}
 
-		if (pktmbuf_pool[socketid] == NULL) {
-			snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
-			pktmbuf_pool[socketid] =
+		if (pktmbuf_pool[portid][socketid] == NULL) {
+			snprintf(s, sizeof(s), "mbuf_pool_%d:%d",
+				 portid, socketid);
+			pktmbuf_pool[portid][socketid] =
 				rte_pktmbuf_pool_create(s, nb_mbuf,
 					MEMPOOL_CACHE_SIZE, 0,
 					RTE_MBUF_DEFAULT_BUF_SIZE, socketid);
-			if (pktmbuf_pool[socketid] == NULL)
+			if (pktmbuf_pool[portid][socketid] == NULL)
 				rte_exit(EXIT_FAILURE,
 					"Cannot init mbuf pool on socket %d\n",
 					socketid);
@@ -678,8 +692,13 @@  init_mem(unsigned nb_mbuf)
 				printf("Allocated mbuf pool on socket %d\n",
 					socketid);
 
-			/* Setup either LPM or EM(f.e Hash).  */
-			l3fwd_lkp.setup(socketid);
+			/* Setup either LPM or EM(f.e Hash). But, only once per
+			 * available socket.
+			 */
+			if (!lkp_per_socket[socketid]) {
+				l3fwd_lkp.setup(socketid);
+				lkp_per_socket[socketid] = 1;
+			}
 		}
 		qconf = &lcore_conf[lcore_id];
 		qconf->ipv4_lookup_struct =
@@ -899,7 +918,14 @@  main(int argc, char **argv)
 			(struct ether_addr *)(val_eth + portid) + 1);
 
 		/* init memory */
-		ret = init_mem(NB_MBUF);
+		if (!per_port_pool) {
+			/* portid = 0; this is *not* signifying the first port,
+			 * rather, it signifies that portid is ignored.
+			 */
+			ret = init_mem(0, NB_MBUF(nb_ports));
+		} else {
+			ret = init_mem(portid, NB_MBUF(1));
+		}
 		if (ret < 0)
 			rte_exit(EXIT_FAILURE, "init_mem failed\n");
 
@@ -966,10 +992,16 @@  main(int argc, char **argv)
 			rte_eth_dev_info_get(portid, &dev_info);
 			rxq_conf = dev_info.default_rxconf;
 			rxq_conf.offloads = conf->rxmode.offloads;
-			ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd,
-					socketid,
-					&rxq_conf,
-					pktmbuf_pool[socketid]);
+			if (!per_port_pool)
+				ret = rte_eth_rx_queue_setup(portid, queueid,
+						nb_rxd, socketid,
+						&rxq_conf,
+						pktmbuf_pool[0][socketid]);
+			else
+				ret = rte_eth_rx_queue_setup(portid, queueid,
+						nb_rxd, socketid,
+						&rxq_conf,
+						pktmbuf_pool[portid][socketid]);
 			if (ret < 0)
 				rte_exit(EXIT_FAILURE,
 				"rte_eth_rx_queue_setup: err=%d, port=%d\n",