From patchwork Tue Feb 7 06:38:58 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wenzhuo Lu X-Patchwork-Id: 123205 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 54FF741C2C; Tue, 7 Feb 2023 08:15:13 +0100 (CET) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id D3DB242B7E; Tue, 7 Feb 2023 08:15:08 +0100 (CET) Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by mails.dpdk.org (Postfix) with ESMTP id A8DEB40EF0 for ; Tue, 7 Feb 2023 08:15:05 +0100 (CET) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1675754105; x=1707290105; h=from:to:cc:subject:date:message-id; bh=Y0158YonYsCW49KIs8nv1lrz9PEu7HSTqY2mTSbFDR4=; b=DaEPXJxEzmQUhiqnljZmR4aM/0CiMQufm2fKvDV/E4ncJlY+yL+pNr8V ZQK3U4jJ3uEtY5MAAAZn2dmtfi2PO/mVNugm+/z8b9gmE4GIXzO0Yd4ZB xiTUSfUTur4G8DONVtaJ7JCK6bm5CByDSeSUsQavtyqITzoFlI1v1cjGn mF7cuLIJCx2kbyJIwaHYYTGT3OtuiWNx467+nijOg6m5KFwaFzbffKDjj ro0zMNJMMc2azUTsdzPhcyMm7DR/DxNwsnCKvdNgEI3gcZY3DYFW7bOc7 API+cpy7xY0ADTpA45lS8tjqjqm0ptn+clkY0Scp26BZWb1TXs/Exv9kp Q==; X-IronPort-AV: E=McAfee;i="6500,9779,10613"; a="356797019" X-IronPort-AV: E=Sophos;i="5.97,278,1669104000"; d="scan'208";a="356797019" Received: from fmsmga007.fm.intel.com ([10.253.24.52]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 06 Feb 2023 23:14:59 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6500,9779,10613"; a="668688595" X-IronPort-AV: E=Sophos;i="5.97,278,1669104000"; d="scan'208";a="668688595" Received: from dpdk-wenzhuo-cascadelake.sh.intel.com ([10.67.110.255]) by fmsmga007.fm.intel.com with ESMTP; 06 Feb 2023 23:14:57 -0800 From: Wenzhuo Lu To: dev@dpdk.org Cc: Wenzhuo Lu Subject: [PATCH] net/i40e: remove avx512 specific Rx queue rearm code Date: Tue, 7 Feb 2023 14:38:58 +0800 Message-Id: <1675751941-25662-1-git-send-email-wenzhuo.lu@intel.com> X-Mailer: git-send-email 1.8.3.1 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org 'i40e_rxq_rearm' in avx512 path is optimized to improve the performance. But after the commit a2833ecc5ea4 ("mempool: fix get objects from mempool with cache"), this avx512 specific optimization is not necessary. This patch remove the unnecessary PMD specific optimization to make the code easier to maintain and get the benefit from the enhancement of common lib. Reported-by: Haijun Chu Signed-off-by: Wenzhuo Lu --- drivers/net/i40e/i40e_rxtx_vec_avx512.c | 125 +----------------------- 1 file changed, 1 insertion(+), 124 deletions(-) diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/i40e/i40e_rxtx_vec_avx512.c index 60c97d5331..d3c7bfd121 100644 --- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c +++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c @@ -24,130 +24,7 @@ static __rte_always_inline void i40e_rxq_rearm(struct i40e_rx_queue *rxq) { - int i; - uint16_t rx_id; - volatile union i40e_rx_desc *rxdp; - struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start]; - struct rte_mempool_cache *cache = rte_mempool_default_cache(rxq->mp, - rte_lcore_id()); - - rxdp = rxq->rx_ring + rxq->rxrearm_start; - - if (unlikely(!cache)) - return i40e_rxq_rearm_common(rxq, true); - - /* We need to pull 'n' more MBUFs into the software ring from mempool - * We inline the mempool function here, so we can vectorize the copy - * from the cache into the shadow ring. - */ - - if (cache->len < RTE_I40E_RXQ_REARM_THRESH) { - /* No. Backfill the cache first, and then fill from it */ - uint32_t req = RTE_I40E_RXQ_REARM_THRESH + (cache->size - - cache->len); - - /* How many do we require - * i.e. number to fill the cache + the request - */ - int ret = rte_mempool_ops_dequeue_bulk(rxq->mp, - &cache->objs[cache->len], req); - if (ret == 0) { - cache->len += req; - } else { - if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >= - rxq->nb_rx_desc) { - __m128i dma_addr0; - - dma_addr0 = _mm_setzero_si128(); - for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) { - rxep[i].mbuf = &rxq->fake_mbuf; - _mm_store_si128 - ((__m128i *)&rxdp[i].read, - dma_addr0); - } - } - rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += - RTE_I40E_RXQ_REARM_THRESH; - return; - } - } - - const __m512i iova_offsets = _mm512_set1_epi64 - (offsetof(struct rte_mbuf, buf_iova)); - const __m512i headroom = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM); - -#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC - /* to shuffle the addresses to correct slots. Values 4-7 will contain - * zeros, so use 7 for a zero-value. - */ - const __m512i permute_idx = _mm512_set_epi64(7, 7, 3, 1, 7, 7, 2, 0); -#else - const __m512i permute_idx = _mm512_set_epi64(7, 3, 6, 2, 5, 1, 4, 0); -#endif - - /* Initialize the mbufs in vector, process 8 mbufs in one loop, taking - * from mempool cache and populating both shadow and HW rings - */ - for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH / 8; i++) { - const __m512i mbuf_ptrs = _mm512_loadu_si512 - (&cache->objs[cache->len - 8]); - _mm512_store_si512(rxep, mbuf_ptrs); - - /* gather iova of mbuf0-7 into one zmm reg */ - const __m512i iova_base_addrs = _mm512_i64gather_epi64 - (_mm512_add_epi64(mbuf_ptrs, iova_offsets), - 0, /* base */ - 1 /* scale */); - const __m512i iova_addrs = _mm512_add_epi64(iova_base_addrs, - headroom); -#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC - const __m512i iovas0 = _mm512_castsi256_si512 - (_mm512_extracti64x4_epi64(iova_addrs, 0)); - const __m512i iovas1 = _mm512_castsi256_si512 - (_mm512_extracti64x4_epi64(iova_addrs, 1)); - - /* permute leaves desc 2-3 addresses in header address slots 0-1 - * but these are ignored by driver since header split not - * enabled. Similarly for desc 4 & 5. - */ - const __m512i desc_rd_0_1 = _mm512_permutexvar_epi64 - (permute_idx, iovas0); - const __m512i desc_rd_2_3 = _mm512_bsrli_epi128(desc_rd_0_1, 8); - - const __m512i desc_rd_4_5 = _mm512_permutexvar_epi64 - (permute_idx, iovas1); - const __m512i desc_rd_6_7 = _mm512_bsrli_epi128(desc_rd_4_5, 8); - - _mm512_store_si512((void *)rxdp, desc_rd_0_1); - _mm512_store_si512((void *)(rxdp + 2), desc_rd_2_3); - _mm512_store_si512((void *)(rxdp + 4), desc_rd_4_5); - _mm512_store_si512((void *)(rxdp + 6), desc_rd_6_7); -#else - /* permute leaves desc 4-7 addresses in header address slots 0-3 - * but these are ignored by driver since header split not - * enabled. - */ - const __m512i desc_rd_0_3 = _mm512_permutexvar_epi64 - (permute_idx, iova_addrs); - const __m512i desc_rd_4_7 = _mm512_bsrli_epi128(desc_rd_0_3, 8); - - _mm512_store_si512((void *)rxdp, desc_rd_0_3); - _mm512_store_si512((void *)(rxdp + 4), desc_rd_4_7); -#endif - rxep += 8, rxdp += 8, cache->len -= 8; - } - - rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH; - if (rxq->rxrearm_start >= rxq->nb_rx_desc) - rxq->rxrearm_start = 0; - - rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH; - - rx_id = (uint16_t)((rxq->rxrearm_start == 0) ? - (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1)); - - /* Update the tail pointer on the NIC */ - I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id); + return i40e_rxq_rearm_common(rxq, true); } #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC