[v2,3/3] net/ark: support chunk DMA transfers

Message ID 20220211113935.303366-3-john.miller@atomicrules.com (mailing list archive)
State Changes Requested, archived
Delegated to: Ferruh Yigit
Headers
Series [v2,1/3] net/ark: add device capabilities record |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/iol-broadcom-Functional success Functional Testing PASS
ci/intel-Testing success Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/github-robot: build success github build: passed
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-abi-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS

Commit Message

John Miller Feb. 11, 2022, 11:39 a.m. UTC
  Add support for chunk DMA transfers.
Various performance optimizations and behavior fixes.
Chunk mpu transfer use 64 objects (512 byte) to maintain memory
read alignment.
Align mpu memory allocation to be at 512 byte boundaries.
Reduce force-close allocation from 10000 objects to 64 objects.
Add memory write barriers for read and wait status functions
in ddm, udm and mpu.
Configuration status updates for internal packet checker and
generator.

Signed-off-by: John Miller <john.miller@atomicrules.com>

---
v2:
- Added more details to the git log.
---
 drivers/net/ark/ark_ddm.c       |  1 +
 drivers/net/ark/ark_ethdev_rx.c | 16 +++++++++-------
 drivers/net/ark/ark_mpu.c       |  1 +
 drivers/net/ark/ark_pktchkr.c   |  2 +-
 drivers/net/ark/ark_pktgen.c    |  2 +-
 drivers/net/ark/ark_udm.c       |  3 +++
 6 files changed, 16 insertions(+), 9 deletions(-)
  

Comments

Ferruh Yigit Feb. 14, 2022, 1:59 p.m. UTC | #1
On 2/11/2022 11:39 AM, John Miller wrote:
> Add support for chunk DMA transfers.
> Various performance optimizations and behavior fixes.
> Chunk mpu transfer use 64 objects (512 byte) to maintain memory
> read alignment.
> Align mpu memory allocation to be at 512 byte boundaries.
> Reduce force-close allocation from 10000 objects to 64 objects.
> Add memory write barriers for read and wait status functions
> in ddm, udm and mpu.
> Configuration status updates for internal packet checker and
> generator.
> 

Hi John,

When all above done in same patch, it is harder to differentiate
what change bellow is for any item listed above.
And this will be challenge for whoever wants to debug code and
reads this commit to figure out.

As far as I understand these 'various performance optimizations'
are independent updates, can you split them into their own
patches?
Each independent and logically separate optimization can go
into a separate patch.

Thanks,
ferruh

> Signed-off-by: John Miller <john.miller@atomicrules.com>
> 
> ---
> v2:
> - Added more details to the git log.
> ---
>   drivers/net/ark/ark_ddm.c       |  1 +
>   drivers/net/ark/ark_ethdev_rx.c | 16 +++++++++-------
>   drivers/net/ark/ark_mpu.c       |  1 +
>   drivers/net/ark/ark_pktchkr.c   |  2 +-
>   drivers/net/ark/ark_pktgen.c    |  2 +-
>   drivers/net/ark/ark_udm.c       |  3 +++
>   6 files changed, 16 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/net/ark/ark_ddm.c b/drivers/net/ark/ark_ddm.c
> index 2321371572..b16c739d50 100644
> --- a/drivers/net/ark/ark_ddm.c
> +++ b/drivers/net/ark/ark_ddm.c
> @@ -55,6 +55,7 @@ ark_ddm_stop(struct ark_ddm_t *ddm, const int wait)
>   	int cnt = 0;
>   
>   	ddm->cfg.command = 2;
> +	rte_wmb();
>   	while (wait && (ddm->cfg.stop_flushed & 0x01) == 0) {
>   		if (cnt++ > 1000)
>   			return 1;
> diff --git a/drivers/net/ark/ark_ethdev_rx.c b/drivers/net/ark/ark_ethdev_rx.c
> index 1000f50be0..49134ea08f 100644
> --- a/drivers/net/ark/ark_ethdev_rx.c
> +++ b/drivers/net/ark/ark_ethdev_rx.c
> @@ -12,6 +12,7 @@
>   
>   #define ARK_RX_META_SIZE 32
>   #define ARK_RX_META_OFFSET (RTE_PKTMBUF_HEADROOM - ARK_RX_META_SIZE)
> +#define ARK_RX_MPU_CHUNK (64U)
>   
>   /* Forward declarations */
>   struct ark_rx_queue;
> @@ -104,7 +105,7 @@ static inline void
>   eth_ark_rx_update_cons_index(struct ark_rx_queue *queue, uint32_t cons_index)
>   {
>   	queue->cons_index = cons_index;
> -	if ((cons_index + queue->queue_size - queue->seed_index) >= 64U) {
> +	if ((cons_index + queue->queue_size - queue->seed_index) >= ARK_RX_MPU_CHUNK) {
>   		eth_ark_rx_seed_mbufs(queue);
>   		ark_mpu_set_producer(queue->mpu, queue->seed_index);
>   	}
> @@ -179,12 +180,12 @@ eth_ark_dev_rx_queue_setup(struct rte_eth_dev *dev,
>   	queue->reserve_q =
>   		rte_zmalloc_socket("Ark_rx_queue mbuf",
>   				   nb_desc * sizeof(struct rte_mbuf *),
> -				   64,
> +				   512,
>   				   socket_id);
>   	queue->paddress_q =
>   		rte_zmalloc_socket("Ark_rx_queue paddr",
>   				   nb_desc * sizeof(rte_iova_t),
> -				   64,
> +				   512,
>   				   socket_id);
>   
>   	if (queue->reserve_q == 0 || queue->paddress_q == 0) {
> @@ -455,7 +456,8 @@ eth_ark_rx_stop_queue(struct rte_eth_dev *dev, uint16_t queue_id)
>   static inline int
>   eth_ark_rx_seed_mbufs(struct ark_rx_queue *queue)
>   {
> -	uint32_t limit = queue->cons_index + queue->queue_size;
> +	uint32_t limit = (queue->cons_index & ~(ARK_RX_MPU_CHUNK - 1)) +
> +		queue->queue_size;
>   	uint32_t seed_index = queue->seed_index;
>   
>   	uint32_t count = 0;
> @@ -618,14 +620,14 @@ eth_ark_udm_force_close(struct rte_eth_dev *dev)
>   
>   			ark_mpu_start(queue->mpu);
>   			/* Add some buffers */
> -			index = 100000 + queue->seed_index;
> +			index = ARK_RX_MPU_CHUNK + queue->seed_index;
>   			ark_mpu_set_producer(queue->mpu, index);
>   		}
>   		/* Wait to allow data to pass */
>   		usleep(100);
>   
> -		ARK_PMD_LOG(DEBUG, "UDM forced flush attempt, stopped = %d\n",
> -				ark_udm_is_flushed(ark->udm.v));
> +		ARK_PMD_LOG(NOTICE, "UDM forced flush attempt, stopped = %d\n",
> +			    ark_udm_is_flushed(ark->udm.v));
>   	}
>   	ark_udm_reset(ark->udm.v);
>   }
> diff --git a/drivers/net/ark/ark_mpu.c b/drivers/net/ark/ark_mpu.c
> index 8160c1de7b..b8e94b6ed3 100644
> --- a/drivers/net/ark/ark_mpu.c
> +++ b/drivers/net/ark/ark_mpu.c
> @@ -68,6 +68,7 @@ ark_mpu_reset(struct ark_mpu_t *mpu)
>   	int cnt = 0;
>   
>   	mpu->cfg.command = MPU_CMD_RESET;
> +	rte_wmb();
>   
>   	while (mpu->cfg.command != MPU_CMD_IDLE) {
>   		if (cnt++ > 1000)
> diff --git a/drivers/net/ark/ark_pktchkr.c b/drivers/net/ark/ark_pktchkr.c
> index 84bb567a41..12a5abb2f7 100644
> --- a/drivers/net/ark/ark_pktchkr.c
> +++ b/drivers/net/ark/ark_pktchkr.c
> @@ -113,7 +113,7 @@ ark_pktchkr_stopped(ark_pkt_chkr_t handle)
>   	struct ark_pkt_chkr_inst *inst = (struct ark_pkt_chkr_inst *)handle;
>   	uint32_t r = inst->sregs->pkt_start_stop;
>   
> -	return (((r >> 16) & 1) == 1);
> +	return (((r >> 16) & 1) == 1) || (r == 0);
>   }
>   
>   void
> diff --git a/drivers/net/ark/ark_pktgen.c b/drivers/net/ark/ark_pktgen.c
> index 515bfe461c..6195ef997f 100644
> --- a/drivers/net/ark/ark_pktgen.c
> +++ b/drivers/net/ark/ark_pktgen.c
> @@ -107,7 +107,7 @@ ark_pktgen_paused(ark_pkt_gen_t handle)
>   	struct ark_pkt_gen_inst *inst = (struct ark_pkt_gen_inst *)handle;
>   	uint32_t r = inst->regs->pkt_start_stop;
>   
> -	return (((r >> 16) & 1) == 1);
> +	return (((r >> 24) & 1) == 1) || (((r >> 16) & 1) == 1)  || (r == 0);
>   }
>   
>   void
> diff --git a/drivers/net/ark/ark_udm.c b/drivers/net/ark/ark_udm.c
> index 28c4500a2c..9ebed89627 100644
> --- a/drivers/net/ark/ark_udm.c
> +++ b/drivers/net/ark/ark_udm.c
> @@ -33,7 +33,9 @@ ark_udm_stop(struct ark_udm_t *udm, const int wait)
>   {
>   	int cnt = 0;
>   
> +	udm->setup.r0 = 0;
>   	udm->cfg.command = 2;
> +	rte_wmb();
>   
>   	while (wait && (udm->cfg.stop_flushed & 0x01) == 0) {
>   		if (cnt++ > 1000)
> @@ -70,6 +72,7 @@ ark_udm_reset(struct ark_udm_t *udm)
>   void
>   ark_udm_start(struct ark_udm_t *udm)
>   {
> +	udm->setup.r0 = 0x100;
>   	udm->cfg.command = 1;
>   }
>
  

Patch

diff --git a/drivers/net/ark/ark_ddm.c b/drivers/net/ark/ark_ddm.c
index 2321371572..b16c739d50 100644
--- a/drivers/net/ark/ark_ddm.c
+++ b/drivers/net/ark/ark_ddm.c
@@ -55,6 +55,7 @@  ark_ddm_stop(struct ark_ddm_t *ddm, const int wait)
 	int cnt = 0;
 
 	ddm->cfg.command = 2;
+	rte_wmb();
 	while (wait && (ddm->cfg.stop_flushed & 0x01) == 0) {
 		if (cnt++ > 1000)
 			return 1;
diff --git a/drivers/net/ark/ark_ethdev_rx.c b/drivers/net/ark/ark_ethdev_rx.c
index 1000f50be0..49134ea08f 100644
--- a/drivers/net/ark/ark_ethdev_rx.c
+++ b/drivers/net/ark/ark_ethdev_rx.c
@@ -12,6 +12,7 @@ 
 
 #define ARK_RX_META_SIZE 32
 #define ARK_RX_META_OFFSET (RTE_PKTMBUF_HEADROOM - ARK_RX_META_SIZE)
+#define ARK_RX_MPU_CHUNK (64U)
 
 /* Forward declarations */
 struct ark_rx_queue;
@@ -104,7 +105,7 @@  static inline void
 eth_ark_rx_update_cons_index(struct ark_rx_queue *queue, uint32_t cons_index)
 {
 	queue->cons_index = cons_index;
-	if ((cons_index + queue->queue_size - queue->seed_index) >= 64U) {
+	if ((cons_index + queue->queue_size - queue->seed_index) >= ARK_RX_MPU_CHUNK) {
 		eth_ark_rx_seed_mbufs(queue);
 		ark_mpu_set_producer(queue->mpu, queue->seed_index);
 	}
@@ -179,12 +180,12 @@  eth_ark_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	queue->reserve_q =
 		rte_zmalloc_socket("Ark_rx_queue mbuf",
 				   nb_desc * sizeof(struct rte_mbuf *),
-				   64,
+				   512,
 				   socket_id);
 	queue->paddress_q =
 		rte_zmalloc_socket("Ark_rx_queue paddr",
 				   nb_desc * sizeof(rte_iova_t),
-				   64,
+				   512,
 				   socket_id);
 
 	if (queue->reserve_q == 0 || queue->paddress_q == 0) {
@@ -455,7 +456,8 @@  eth_ark_rx_stop_queue(struct rte_eth_dev *dev, uint16_t queue_id)
 static inline int
 eth_ark_rx_seed_mbufs(struct ark_rx_queue *queue)
 {
-	uint32_t limit = queue->cons_index + queue->queue_size;
+	uint32_t limit = (queue->cons_index & ~(ARK_RX_MPU_CHUNK - 1)) +
+		queue->queue_size;
 	uint32_t seed_index = queue->seed_index;
 
 	uint32_t count = 0;
@@ -618,14 +620,14 @@  eth_ark_udm_force_close(struct rte_eth_dev *dev)
 
 			ark_mpu_start(queue->mpu);
 			/* Add some buffers */
-			index = 100000 + queue->seed_index;
+			index = ARK_RX_MPU_CHUNK + queue->seed_index;
 			ark_mpu_set_producer(queue->mpu, index);
 		}
 		/* Wait to allow data to pass */
 		usleep(100);
 
-		ARK_PMD_LOG(DEBUG, "UDM forced flush attempt, stopped = %d\n",
-				ark_udm_is_flushed(ark->udm.v));
+		ARK_PMD_LOG(NOTICE, "UDM forced flush attempt, stopped = %d\n",
+			    ark_udm_is_flushed(ark->udm.v));
 	}
 	ark_udm_reset(ark->udm.v);
 }
diff --git a/drivers/net/ark/ark_mpu.c b/drivers/net/ark/ark_mpu.c
index 8160c1de7b..b8e94b6ed3 100644
--- a/drivers/net/ark/ark_mpu.c
+++ b/drivers/net/ark/ark_mpu.c
@@ -68,6 +68,7 @@  ark_mpu_reset(struct ark_mpu_t *mpu)
 	int cnt = 0;
 
 	mpu->cfg.command = MPU_CMD_RESET;
+	rte_wmb();
 
 	while (mpu->cfg.command != MPU_CMD_IDLE) {
 		if (cnt++ > 1000)
diff --git a/drivers/net/ark/ark_pktchkr.c b/drivers/net/ark/ark_pktchkr.c
index 84bb567a41..12a5abb2f7 100644
--- a/drivers/net/ark/ark_pktchkr.c
+++ b/drivers/net/ark/ark_pktchkr.c
@@ -113,7 +113,7 @@  ark_pktchkr_stopped(ark_pkt_chkr_t handle)
 	struct ark_pkt_chkr_inst *inst = (struct ark_pkt_chkr_inst *)handle;
 	uint32_t r = inst->sregs->pkt_start_stop;
 
-	return (((r >> 16) & 1) == 1);
+	return (((r >> 16) & 1) == 1) || (r == 0);
 }
 
 void
diff --git a/drivers/net/ark/ark_pktgen.c b/drivers/net/ark/ark_pktgen.c
index 515bfe461c..6195ef997f 100644
--- a/drivers/net/ark/ark_pktgen.c
+++ b/drivers/net/ark/ark_pktgen.c
@@ -107,7 +107,7 @@  ark_pktgen_paused(ark_pkt_gen_t handle)
 	struct ark_pkt_gen_inst *inst = (struct ark_pkt_gen_inst *)handle;
 	uint32_t r = inst->regs->pkt_start_stop;
 
-	return (((r >> 16) & 1) == 1);
+	return (((r >> 24) & 1) == 1) || (((r >> 16) & 1) == 1)  || (r == 0);
 }
 
 void
diff --git a/drivers/net/ark/ark_udm.c b/drivers/net/ark/ark_udm.c
index 28c4500a2c..9ebed89627 100644
--- a/drivers/net/ark/ark_udm.c
+++ b/drivers/net/ark/ark_udm.c
@@ -33,7 +33,9 @@  ark_udm_stop(struct ark_udm_t *udm, const int wait)
 {
 	int cnt = 0;
 
+	udm->setup.r0 = 0;
 	udm->cfg.command = 2;
+	rte_wmb();
 
 	while (wait && (udm->cfg.stop_flushed & 0x01) == 0) {
 		if (cnt++ > 1000)
@@ -70,6 +72,7 @@  ark_udm_reset(struct ark_udm_t *udm)
 void
 ark_udm_start(struct ark_udm_t *udm)
 {
+	udm->setup.r0 = 0x100;
 	udm->cfg.command = 1;
 }