From patchwork Fri Feb 3 09:43:34 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Xing, Beilei" X-Patchwork-Id: 123029 X-Patchwork-Delegate: qi.z.zhang@intel.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 6927B41BBB; Fri, 3 Feb 2023 11:11:50 +0100 (CET) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 4AB4742D94; Fri, 3 Feb 2023 11:10:43 +0100 (CET) Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by mails.dpdk.org (Postfix) with ESMTP id C150B42D3F for ; Fri, 3 Feb 2023 11:10:33 +0100 (CET) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1675419033; x=1706955033; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=gDL6qqD8vzC9kapYelrY+Yi2EcQBZFwiM/aMFSxBjFk=; b=DD7a56nMUWtc3zxuafYMz8MiMs3WDRFWOBPscTzuLgWnADtmmS8Vyf4c 6u0TdE2qr8g6xvTQ8QNvGwbeWS2u7+Vw9/qdRaXrwQs62OGdzMze6bJb0 BvBY1U/yxk+GhKjAJvgbNbKQd4qG/C8wnHtYFVJ9c/LZt/2Fa75O54oQj 04dVID/LpiC3aYAzmQ8kh3FEHHrDyhWVpwRss10atK6hkUZY9Y7H/MVSM 9oOzy55hE65Yxm9h5dm29G1sNzFVZynNQb8sF+2bim6g/qvd2UoFIHRbI 8ljBN0BT7AGX+UyQmHI6lHATNNo/LuqeG+u1zwvdfOqwz8VUEKpcCfNcD Q==; X-IronPort-AV: E=McAfee;i="6500,9779,10609"; a="356052846" X-IronPort-AV: E=Sophos;i="5.97,270,1669104000"; d="scan'208";a="356052846" Received: from orsmga007.jf.intel.com ([10.7.209.58]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 03 Feb 2023 02:10:33 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6500,9779,10609"; a="659047901" X-IronPort-AV: E=Sophos;i="5.97,270,1669104000"; d="scan'208";a="659047901" Received: from dpdk-beileix-3.sh.intel.com ([10.67.110.253]) by orsmga007.jf.intel.com with ESMTP; 03 Feb 2023 02:10:30 -0800 From: beilei.xing@intel.com To: jingjing.wu@intel.com Cc: dev@dpdk.org, qi.z.zhang@intel.com, Beilei Xing , Mingxia Liu Subject: [PATCH v6 13/19] common/idpf: add Rx and Tx data path Date: Fri, 3 Feb 2023 09:43:34 +0000 Message-Id: <20230203094340.8103-14-beilei.xing@intel.com> X-Mailer: git-send-email 2.26.2 In-Reply-To: <20230203094340.8103-1-beilei.xing@intel.com> References: <20230202095357.37929-1-beilei.xing@intel.com> <20230203094340.8103-1-beilei.xing@intel.com> MIME-Version: 1.0 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org From: Beilei Xing Add timestamp filed to idpf_adapter structure. Move scalar Rx/Tx data path for both single queue and split queue to common module. Signed-off-by: Mingxia Liu Signed-off-by: Beilei Xing --- drivers/common/idpf/idpf_common_device.h | 5 + drivers/common/idpf/idpf_common_logs.h | 24 + drivers/common/idpf/idpf_common_rxtx.c | 987 +++++++++++++++++++++++ drivers/common/idpf/idpf_common_rxtx.h | 89 +- drivers/common/idpf/version.map | 6 + drivers/net/idpf/idpf_ethdev.c | 2 - drivers/net/idpf/idpf_ethdev.h | 4 - drivers/net/idpf/idpf_logs.h | 24 - drivers/net/idpf/idpf_rxtx.c | 937 +-------------------- drivers/net/idpf/idpf_rxtx.h | 132 --- drivers/net/idpf/idpf_rxtx_vec_avx512.c | 8 +- 11 files changed, 1115 insertions(+), 1103 deletions(-) diff --git a/drivers/common/idpf/idpf_common_device.h b/drivers/common/idpf/idpf_common_device.h index 4895f5f360..573852ff75 100644 --- a/drivers/common/idpf/idpf_common_device.h +++ b/drivers/common/idpf/idpf_common_device.h @@ -23,6 +23,8 @@ #define IDPF_TX_COMPLQ_PER_GRP 1 #define IDPF_TXQ_PER_GRP 1 +#define IDPF_MIN_FRAME_SIZE 14 + #define IDPF_MAX_PKT_TYPE 1024 #define IDPF_DFLT_INTERVAL 16 @@ -43,6 +45,9 @@ struct idpf_adapter { uint32_t txq_model; /* 0 - split queue model, non-0 - single queue model */ uint32_t rxq_model; /* 0 - split queue model, non-0 - single queue model */ + + /* For timestamp */ + uint64_t time_hw; }; struct idpf_chunks_info { diff --git a/drivers/common/idpf/idpf_common_logs.h b/drivers/common/idpf/idpf_common_logs.h index fe36562769..63ad2195be 100644 --- a/drivers/common/idpf/idpf_common_logs.h +++ b/drivers/common/idpf/idpf_common_logs.h @@ -20,4 +20,28 @@ extern int idpf_common_logtype; #define DRV_LOG(level, fmt, args...) \ DRV_LOG_RAW(level, fmt "\n", ## args) +#ifdef RTE_LIBRTE_IDPF_DEBUG_RX +#define RX_LOG(level, ...) \ + RTE_LOG(level, \ + PMD, \ + RTE_FMT("%s(): " \ + RTE_FMT_HEAD(__VA_ARGS__,) "\n", \ + __func__, \ + RTE_FMT_TAIL(__VA_ARGS__,))) +#else +#define RX_LOG(level, fmt, args...) do { } while (0) +#endif + +#ifdef RTE_LIBRTE_IDPF_DEBUG_TX +#define TX_LOG(level, ...) \ + RTE_LOG(level, \ + PMD, \ + RTE_FMT("%s(): " \ + RTE_FMT_HEAD(__VA_ARGS__,) "\n", \ + __func__, \ + RTE_FMT_TAIL(__VA_ARGS__,))) +#else +#define TX_LOG(level, fmt, args...) do { } while (0) +#endif + #endif /* _IDPF_COMMON_LOGS_H_ */ diff --git a/drivers/common/idpf/idpf_common_rxtx.c b/drivers/common/idpf/idpf_common_rxtx.c index eeeeedca88..459057f20e 100644 --- a/drivers/common/idpf/idpf_common_rxtx.c +++ b/drivers/common/idpf/idpf_common_rxtx.c @@ -3,8 +3,13 @@ */ #include +#include + #include "idpf_common_rxtx.h" +int idpf_timestamp_dynfield_offset = -1; +uint64_t idpf_timestamp_dynflag; + int idpf_check_rx_thresh(uint16_t nb_desc, uint16_t thresh) { @@ -337,6 +342,23 @@ idpf_tx_queue_release(void *txq) rte_free(q); } +int +idpf_register_ts_mbuf(struct idpf_rx_queue *rxq) +{ + int err; + if ((rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP) != 0) { + /* Register mbuf field and flag for Rx timestamp */ + err = rte_mbuf_dyn_rx_timestamp_register(&idpf_timestamp_dynfield_offset, + &idpf_timestamp_dynflag); + if (err != 0) { + DRV_LOG(ERR, + "Cannot register mbuf field/flag for timestamp"); + return -EINVAL; + } + } + return 0; +} + int idpf_alloc_single_rxq_mbufs(struct idpf_rx_queue *rxq) { @@ -412,3 +434,968 @@ idpf_alloc_split_rxq_mbufs(struct idpf_rx_queue *rxq) return 0; } + +#define IDPF_TIMESYNC_REG_WRAP_GUARD_BAND 10000 +/* Helper function to convert a 32b nanoseconds timestamp to 64b. */ +static inline uint64_t +idpf_tstamp_convert_32b_64b(struct idpf_adapter *ad, uint32_t flag, + uint32_t in_timestamp) +{ +#ifdef RTE_ARCH_X86_64 + struct idpf_hw *hw = &ad->hw; + const uint64_t mask = 0xFFFFFFFF; + uint32_t hi, lo, lo2, delta; + uint64_t ns; + + if (flag != 0) { + IDPF_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, PF_GLTSYN_CMD_SYNC_SHTIME_EN_M); + IDPF_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, PF_GLTSYN_CMD_SYNC_EXEC_CMD_M | + PF_GLTSYN_CMD_SYNC_SHTIME_EN_M); + lo = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0); + hi = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_H_0); + /* + * On typical system, the delta between lo and lo2 is ~1000ns, + * so 10000 seems a large-enough but not overly-big guard band. + */ + if (lo > (UINT32_MAX - IDPF_TIMESYNC_REG_WRAP_GUARD_BAND)) + lo2 = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0); + else + lo2 = lo; + + if (lo2 < lo) { + lo = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0); + hi = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_H_0); + } + + ad->time_hw = ((uint64_t)hi << 32) | lo; + } + + delta = (in_timestamp - (uint32_t)(ad->time_hw & mask)); + if (delta > (mask / 2)) { + delta = ((uint32_t)(ad->time_hw & mask) - in_timestamp); + ns = ad->time_hw - delta; + } else { + ns = ad->time_hw + delta; + } + + return ns; +#else /* !RTE_ARCH_X86_64 */ + RTE_SET_USED(ad); + RTE_SET_USED(flag); + RTE_SET_USED(in_timestamp); + return 0; +#endif /* RTE_ARCH_X86_64 */ +} + +#define IDPF_RX_FLEX_DESC_ADV_STATUS0_XSUM_S \ + (RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_S) | \ + RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_S) | \ + RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_S) | \ + RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EUDPE_S)) + +static inline uint64_t +idpf_splitq_rx_csum_offload(uint8_t err) +{ + uint64_t flags = 0; + + if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_L3L4P_S)) == 0)) + return flags; + + if (likely((err & IDPF_RX_FLEX_DESC_ADV_STATUS0_XSUM_S) == 0)) { + flags |= (RTE_MBUF_F_RX_IP_CKSUM_GOOD | + RTE_MBUF_F_RX_L4_CKSUM_GOOD); + return flags; + } + + if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_S)) != 0)) + flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; + else + flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; + + if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_S)) != 0)) + flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; + else + flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; + + if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_S)) != 0)) + flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD; + + if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EUDPE_S)) != 0)) + flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD; + else + flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD; + + return flags; +} + +#define IDPF_RX_FLEX_DESC_ADV_HASH1_S 0 +#define IDPF_RX_FLEX_DESC_ADV_HASH2_S 16 +#define IDPF_RX_FLEX_DESC_ADV_HASH3_S 24 + +static inline uint64_t +idpf_splitq_rx_rss_offload(struct rte_mbuf *mb, + volatile struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc) +{ + uint8_t status_err0_qw0; + uint64_t flags = 0; + + status_err0_qw0 = rx_desc->status_err0_qw0; + + if ((status_err0_qw0 & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_RSS_VALID_S)) != 0) { + flags |= RTE_MBUF_F_RX_RSS_HASH; + mb->hash.rss = (rte_le_to_cpu_16(rx_desc->hash1) << + IDPF_RX_FLEX_DESC_ADV_HASH1_S) | + ((uint32_t)(rx_desc->ff2_mirrid_hash2.hash2) << + IDPF_RX_FLEX_DESC_ADV_HASH2_S) | + ((uint32_t)(rx_desc->hash3) << + IDPF_RX_FLEX_DESC_ADV_HASH3_S); + } + + return flags; +} + +static void +idpf_split_rx_bufq_refill(struct idpf_rx_queue *rx_bufq) +{ + volatile struct virtchnl2_splitq_rx_buf_desc *rx_buf_ring; + volatile struct virtchnl2_splitq_rx_buf_desc *rx_buf_desc; + uint16_t nb_refill = rx_bufq->rx_free_thresh; + uint16_t nb_desc = rx_bufq->nb_rx_desc; + uint16_t next_avail = rx_bufq->rx_tail; + struct rte_mbuf *nmb[rx_bufq->rx_free_thresh]; + uint64_t dma_addr; + uint16_t delta; + int i; + + if (rx_bufq->nb_rx_hold < rx_bufq->rx_free_thresh) + return; + + rx_buf_ring = rx_bufq->rx_ring; + delta = nb_desc - next_avail; + if (unlikely(delta < nb_refill)) { + if (likely(rte_pktmbuf_alloc_bulk(rx_bufq->mp, nmb, delta) == 0)) { + for (i = 0; i < delta; i++) { + rx_buf_desc = &rx_buf_ring[next_avail + i]; + rx_bufq->sw_ring[next_avail + i] = nmb[i]; + dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb[i])); + rx_buf_desc->hdr_addr = 0; + rx_buf_desc->pkt_addr = dma_addr; + } + nb_refill -= delta; + next_avail = 0; + rx_bufq->nb_rx_hold -= delta; + } else { + rte_atomic64_add(&rx_bufq->rx_stats.mbuf_alloc_failed, + nb_desc - next_avail); + RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u queue_id=%u", + rx_bufq->port_id, rx_bufq->queue_id); + return; + } + } + + if (nb_desc - next_avail >= nb_refill) { + if (likely(rte_pktmbuf_alloc_bulk(rx_bufq->mp, nmb, nb_refill) == 0)) { + for (i = 0; i < nb_refill; i++) { + rx_buf_desc = &rx_buf_ring[next_avail + i]; + rx_bufq->sw_ring[next_avail + i] = nmb[i]; + dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb[i])); + rx_buf_desc->hdr_addr = 0; + rx_buf_desc->pkt_addr = dma_addr; + } + next_avail += nb_refill; + rx_bufq->nb_rx_hold -= nb_refill; + } else { + rte_atomic64_add(&rx_bufq->rx_stats.mbuf_alloc_failed, + nb_desc - next_avail); + RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u queue_id=%u", + rx_bufq->port_id, rx_bufq->queue_id); + } + } + + IDPF_PCI_REG_WRITE(rx_bufq->qrx_tail, next_avail); + + rx_bufq->rx_tail = next_avail; +} + +uint16_t +idpf_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + volatile struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc_ring; + volatile struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc; + uint16_t pktlen_gen_bufq_id; + struct idpf_rx_queue *rxq; + const uint32_t *ptype_tbl; + uint8_t status_err0_qw1; + struct idpf_adapter *ad; + struct rte_mbuf *rxm; + uint16_t rx_id_bufq1; + uint16_t rx_id_bufq2; + uint64_t pkt_flags; + uint16_t pkt_len; + uint16_t bufq_id; + uint16_t gen_id; + uint16_t rx_id; + uint16_t nb_rx; + uint64_t ts_ns; + + nb_rx = 0; + rxq = rx_queue; + ad = rxq->adapter; + + if (unlikely(rxq == NULL) || unlikely(!rxq->q_started)) + return nb_rx; + + rx_id = rxq->rx_tail; + rx_id_bufq1 = rxq->bufq1->rx_next_avail; + rx_id_bufq2 = rxq->bufq2->rx_next_avail; + rx_desc_ring = rxq->rx_ring; + ptype_tbl = rxq->adapter->ptype_tbl; + + if ((rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP) != 0) + rxq->hw_register_set = 1; + + while (nb_rx < nb_pkts) { + rx_desc = &rx_desc_ring[rx_id]; + + pktlen_gen_bufq_id = + rte_le_to_cpu_16(rx_desc->pktlen_gen_bufq_id); + gen_id = (pktlen_gen_bufq_id & + VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M) >> + VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_S; + if (gen_id != rxq->expected_gen_id) + break; + + pkt_len = (pktlen_gen_bufq_id & + VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_M) >> + VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_S; + if (pkt_len == 0) + RX_LOG(ERR, "Packet length is 0"); + + rx_id++; + if (unlikely(rx_id == rxq->nb_rx_desc)) { + rx_id = 0; + rxq->expected_gen_id ^= 1; + } + + bufq_id = (pktlen_gen_bufq_id & + VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_M) >> + VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_S; + if (bufq_id == 0) { + rxm = rxq->bufq1->sw_ring[rx_id_bufq1]; + rx_id_bufq1++; + if (unlikely(rx_id_bufq1 == rxq->bufq1->nb_rx_desc)) + rx_id_bufq1 = 0; + rxq->bufq1->nb_rx_hold++; + } else { + rxm = rxq->bufq2->sw_ring[rx_id_bufq2]; + rx_id_bufq2++; + if (unlikely(rx_id_bufq2 == rxq->bufq2->nb_rx_desc)) + rx_id_bufq2 = 0; + rxq->bufq2->nb_rx_hold++; + } + + rxm->pkt_len = pkt_len; + rxm->data_len = pkt_len; + rxm->data_off = RTE_PKTMBUF_HEADROOM; + rxm->next = NULL; + rxm->nb_segs = 1; + rxm->port = rxq->port_id; + rxm->ol_flags = 0; + rxm->packet_type = + ptype_tbl[(rte_le_to_cpu_16(rx_desc->ptype_err_fflags0) & + VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M) >> + VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_S]; + + status_err0_qw1 = rx_desc->status_err0_qw1; + pkt_flags = idpf_splitq_rx_csum_offload(status_err0_qw1); + pkt_flags |= idpf_splitq_rx_rss_offload(rxm, rx_desc); + if (idpf_timestamp_dynflag > 0 && + (rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP)) { + /* timestamp */ + ts_ns = idpf_tstamp_convert_32b_64b(ad, + rxq->hw_register_set, + rte_le_to_cpu_32(rx_desc->ts_high)); + rxq->hw_register_set = 0; + *RTE_MBUF_DYNFIELD(rxm, + idpf_timestamp_dynfield_offset, + rte_mbuf_timestamp_t *) = ts_ns; + rxm->ol_flags |= idpf_timestamp_dynflag; + } + + rxm->ol_flags |= pkt_flags; + + rx_pkts[nb_rx++] = rxm; + } + + if (nb_rx > 0) { + rxq->rx_tail = rx_id; + if (rx_id_bufq1 != rxq->bufq1->rx_next_avail) + rxq->bufq1->rx_next_avail = rx_id_bufq1; + if (rx_id_bufq2 != rxq->bufq2->rx_next_avail) + rxq->bufq2->rx_next_avail = rx_id_bufq2; + + idpf_split_rx_bufq_refill(rxq->bufq1); + idpf_split_rx_bufq_refill(rxq->bufq2); + } + + return nb_rx; +} + +static inline void +idpf_split_tx_free(struct idpf_tx_queue *cq) +{ + volatile struct idpf_splitq_tx_compl_desc *compl_ring = cq->compl_ring; + volatile struct idpf_splitq_tx_compl_desc *txd; + uint16_t next = cq->tx_tail; + struct idpf_tx_entry *txe; + struct idpf_tx_queue *txq; + uint16_t gen, qid, q_head; + uint16_t nb_desc_clean; + uint8_t ctype; + + txd = &compl_ring[next]; + gen = (rte_le_to_cpu_16(txd->qid_comptype_gen) & + IDPF_TXD_COMPLQ_GEN_M) >> IDPF_TXD_COMPLQ_GEN_S; + if (gen != cq->expected_gen_id) + return; + + ctype = (rte_le_to_cpu_16(txd->qid_comptype_gen) & + IDPF_TXD_COMPLQ_COMPL_TYPE_M) >> IDPF_TXD_COMPLQ_COMPL_TYPE_S; + qid = (rte_le_to_cpu_16(txd->qid_comptype_gen) & + IDPF_TXD_COMPLQ_QID_M) >> IDPF_TXD_COMPLQ_QID_S; + q_head = rte_le_to_cpu_16(txd->q_head_compl_tag.compl_tag); + txq = cq->txqs[qid - cq->tx_start_qid]; + + switch (ctype) { + case IDPF_TXD_COMPLT_RE: + /* clean to q_head which indicates be fetched txq desc id + 1. + * TODO: need to refine and remove the if condition. + */ + if (unlikely(q_head % 32)) { + TX_LOG(ERR, "unexpected desc (head = %u) completion.", + q_head); + return; + } + if (txq->last_desc_cleaned > q_head) + nb_desc_clean = (txq->nb_tx_desc - txq->last_desc_cleaned) + + q_head; + else + nb_desc_clean = q_head - txq->last_desc_cleaned; + txq->nb_free += nb_desc_clean; + txq->last_desc_cleaned = q_head; + break; + case IDPF_TXD_COMPLT_RS: + /* q_head indicates sw_id when ctype is 2 */ + txe = &txq->sw_ring[q_head]; + if (txe->mbuf != NULL) { + rte_pktmbuf_free_seg(txe->mbuf); + txe->mbuf = NULL; + } + break; + default: + TX_LOG(ERR, "unknown completion type."); + return; + } + + if (++next == cq->nb_tx_desc) { + next = 0; + cq->expected_gen_id ^= 1; + } + + cq->tx_tail = next; +} + +/* Check if the context descriptor is needed for TX offloading */ +static inline uint16_t +idpf_calc_context_desc(uint64_t flags) +{ + if ((flags & RTE_MBUF_F_TX_TCP_SEG) != 0) + return 1; + + return 0; +} + +/* set TSO context descriptor + */ +static inline void +idpf_set_splitq_tso_ctx(struct rte_mbuf *mbuf, + union idpf_tx_offload tx_offload, + volatile union idpf_flex_tx_ctx_desc *ctx_desc) +{ + uint16_t cmd_dtype; + uint32_t tso_len; + uint8_t hdr_len; + + if (tx_offload.l4_len == 0) { + TX_LOG(DEBUG, "L4 length set to 0"); + return; + } + + hdr_len = tx_offload.l2_len + + tx_offload.l3_len + + tx_offload.l4_len; + cmd_dtype = IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX | + IDPF_TX_FLEX_CTX_DESC_CMD_TSO; + tso_len = mbuf->pkt_len - hdr_len; + + ctx_desc->tso.qw1.cmd_dtype = rte_cpu_to_le_16(cmd_dtype); + ctx_desc->tso.qw0.hdr_len = hdr_len; + ctx_desc->tso.qw0.mss_rt = + rte_cpu_to_le_16((uint16_t)mbuf->tso_segsz & + IDPF_TXD_FLEX_CTX_MSS_RT_M); + ctx_desc->tso.qw0.flex_tlen = + rte_cpu_to_le_32(tso_len & + IDPF_TXD_FLEX_CTX_MSS_RT_M); +} + +uint16_t +idpf_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + struct idpf_tx_queue *txq = (struct idpf_tx_queue *)tx_queue; + volatile struct idpf_flex_tx_sched_desc *txr; + volatile struct idpf_flex_tx_sched_desc *txd; + struct idpf_tx_entry *sw_ring; + union idpf_tx_offload tx_offload = {0}; + struct idpf_tx_entry *txe, *txn; + uint16_t nb_used, tx_id, sw_id; + struct rte_mbuf *tx_pkt; + uint16_t nb_to_clean; + uint16_t nb_tx = 0; + uint64_t ol_flags; + uint16_t nb_ctx; + + if (unlikely(txq == NULL) || unlikely(!txq->q_started)) + return nb_tx; + + txr = txq->desc_ring; + sw_ring = txq->sw_ring; + tx_id = txq->tx_tail; + sw_id = txq->sw_tail; + txe = &sw_ring[sw_id]; + + for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { + tx_pkt = tx_pkts[nb_tx]; + + if (txq->nb_free <= txq->free_thresh) { + /* TODO: Need to refine + * 1. free and clean: Better to decide a clean destination instead of + * loop times. And don't free mbuf when RS got immediately, free when + * transmit or according to the clean destination. + * Now, just ignore the RE write back, free mbuf when get RS + * 2. out-of-order rewrite back haven't be supported, SW head and HW head + * need to be separated. + **/ + nb_to_clean = 2 * txq->rs_thresh; + while (nb_to_clean--) + idpf_split_tx_free(txq->complq); + } + + if (txq->nb_free < tx_pkt->nb_segs) + break; + + ol_flags = tx_pkt->ol_flags; + tx_offload.l2_len = tx_pkt->l2_len; + tx_offload.l3_len = tx_pkt->l3_len; + tx_offload.l4_len = tx_pkt->l4_len; + tx_offload.tso_segsz = tx_pkt->tso_segsz; + /* Calculate the number of context descriptors needed. */ + nb_ctx = idpf_calc_context_desc(ol_flags); + nb_used = tx_pkt->nb_segs + nb_ctx; + + /* context descriptor */ + if (nb_ctx != 0) { + volatile union idpf_flex_tx_ctx_desc *ctx_desc = + (volatile union idpf_flex_tx_ctx_desc *)&txr[tx_id]; + + if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0) + idpf_set_splitq_tso_ctx(tx_pkt, tx_offload, + ctx_desc); + + tx_id++; + if (tx_id == txq->nb_tx_desc) + tx_id = 0; + } + + do { + txd = &txr[tx_id]; + txn = &sw_ring[txe->next_id]; + txe->mbuf = tx_pkt; + + /* Setup TX descriptor */ + txd->buf_addr = + rte_cpu_to_le_64(rte_mbuf_data_iova(tx_pkt)); + txd->qw1.cmd_dtype = + rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE); + txd->qw1.rxr_bufsize = tx_pkt->data_len; + txd->qw1.compl_tag = sw_id; + tx_id++; + if (tx_id == txq->nb_tx_desc) + tx_id = 0; + sw_id = txe->next_id; + txe = txn; + tx_pkt = tx_pkt->next; + } while (tx_pkt); + + /* fill the last descriptor with End of Packet (EOP) bit */ + txd->qw1.cmd_dtype |= IDPF_TXD_FLEX_FLOW_CMD_EOP; + + if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK) + txd->qw1.cmd_dtype |= IDPF_TXD_FLEX_FLOW_CMD_CS_EN; + txq->nb_free = (uint16_t)(txq->nb_free - nb_used); + txq->nb_used = (uint16_t)(txq->nb_used + nb_used); + + if (txq->nb_used >= 32) { + txd->qw1.cmd_dtype |= IDPF_TXD_FLEX_FLOW_CMD_RE; + /* Update txq RE bit counters */ + txq->nb_used = 0; + } + } + + /* update the tail pointer if any packets were processed */ + if (likely(nb_tx > 0)) { + IDPF_PCI_REG_WRITE(txq->qtx_tail, tx_id); + txq->tx_tail = tx_id; + txq->sw_tail = sw_id; + } + + return nb_tx; +} + +#define IDPF_RX_FLEX_DESC_STATUS0_XSUM_S \ + (RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) | \ + RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) | \ + RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S) | \ + RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)) + +/* Translate the rx descriptor status and error fields to pkt flags */ +static inline uint64_t +idpf_rxd_to_pkt_flags(uint16_t status_error) +{ + uint64_t flags = 0; + + if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_S)) == 0)) + return flags; + + if (likely((status_error & IDPF_RX_FLEX_DESC_STATUS0_XSUM_S) == 0)) { + flags |= (RTE_MBUF_F_RX_IP_CKSUM_GOOD | + RTE_MBUF_F_RX_L4_CKSUM_GOOD); + return flags; + } + + if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_S)) != 0)) + flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; + else + flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; + + if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)) != 0)) + flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; + else + flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; + + if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)) != 0)) + flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD; + + if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)) != 0)) + flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD; + else + flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD; + + return flags; +} + +static inline void +idpf_update_rx_tail(struct idpf_rx_queue *rxq, uint16_t nb_hold, + uint16_t rx_id) +{ + nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold); + + if (nb_hold > rxq->rx_free_thresh) { + RX_LOG(DEBUG, + "port_id=%u queue_id=%u rx_tail=%u nb_hold=%u", + rxq->port_id, rxq->queue_id, rx_id, nb_hold); + rx_id = (uint16_t)((rx_id == 0) ? + (rxq->nb_rx_desc - 1) : (rx_id - 1)); + IDPF_PCI_REG_WRITE(rxq->qrx_tail, rx_id); + nb_hold = 0; + } + rxq->nb_rx_hold = nb_hold; +} + +uint16_t +idpf_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + volatile union virtchnl2_rx_desc *rx_ring; + volatile union virtchnl2_rx_desc *rxdp; + union virtchnl2_rx_desc rxd; + struct idpf_rx_queue *rxq; + const uint32_t *ptype_tbl; + uint16_t rx_id, nb_hold; + struct idpf_adapter *ad; + uint16_t rx_packet_len; + struct rte_mbuf *rxm; + struct rte_mbuf *nmb; + uint16_t rx_status0; + uint64_t pkt_flags; + uint64_t dma_addr; + uint64_t ts_ns; + uint16_t nb_rx; + + nb_rx = 0; + nb_hold = 0; + rxq = rx_queue; + + ad = rxq->adapter; + + if (unlikely(rxq == NULL) || unlikely(!rxq->q_started)) + return nb_rx; + + rx_id = rxq->rx_tail; + rx_ring = rxq->rx_ring; + ptype_tbl = rxq->adapter->ptype_tbl; + + if ((rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP) != 0) + rxq->hw_register_set = 1; + + while (nb_rx < nb_pkts) { + rxdp = &rx_ring[rx_id]; + rx_status0 = rte_le_to_cpu_16(rxdp->flex_nic_wb.status_error0); + + /* Check the DD bit first */ + if ((rx_status0 & (1 << VIRTCHNL2_RX_FLEX_DESC_STATUS0_DD_S)) == 0) + break; + + nmb = rte_mbuf_raw_alloc(rxq->mp); + if (unlikely(nmb == NULL)) { + rte_atomic64_inc(&rxq->rx_stats.mbuf_alloc_failed); + RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u " + "queue_id=%u", rxq->port_id, rxq->queue_id); + break; + } + rxd = *rxdp; /* copy descriptor in ring to temp variable*/ + + nb_hold++; + rxm = rxq->sw_ring[rx_id]; + rxq->sw_ring[rx_id] = nmb; + rx_id++; + if (unlikely(rx_id == rxq->nb_rx_desc)) + rx_id = 0; + + /* Prefetch next mbuf */ + rte_prefetch0(rxq->sw_ring[rx_id]); + + /* When next RX descriptor is on a cache line boundary, + * prefetch the next 4 RX descriptors and next 8 pointers + * to mbufs. + */ + if ((rx_id & 0x3) == 0) { + rte_prefetch0(&rx_ring[rx_id]); + rte_prefetch0(rxq->sw_ring[rx_id]); + } + dma_addr = + rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb)); + rxdp->read.hdr_addr = 0; + rxdp->read.pkt_addr = dma_addr; + + rx_packet_len = (rte_cpu_to_le_16(rxd.flex_nic_wb.pkt_len) & + VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M); + + rxm->data_off = RTE_PKTMBUF_HEADROOM; + rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr, RTE_PKTMBUF_HEADROOM)); + rxm->nb_segs = 1; + rxm->next = NULL; + rxm->pkt_len = rx_packet_len; + rxm->data_len = rx_packet_len; + rxm->port = rxq->port_id; + rxm->ol_flags = 0; + pkt_flags = idpf_rxd_to_pkt_flags(rx_status0); + rxm->packet_type = + ptype_tbl[(uint8_t)(rte_cpu_to_le_16(rxd.flex_nic_wb.ptype_flex_flags0) & + VIRTCHNL2_RX_FLEX_DESC_PTYPE_M)]; + + rxm->ol_flags |= pkt_flags; + + if (idpf_timestamp_dynflag > 0 && + (rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP) != 0) { + /* timestamp */ + ts_ns = idpf_tstamp_convert_32b_64b(ad, + rxq->hw_register_set, + rte_le_to_cpu_32(rxd.flex_nic_wb.flex_ts.ts_high)); + rxq->hw_register_set = 0; + *RTE_MBUF_DYNFIELD(rxm, + idpf_timestamp_dynfield_offset, + rte_mbuf_timestamp_t *) = ts_ns; + rxm->ol_flags |= idpf_timestamp_dynflag; + } + + rx_pkts[nb_rx++] = rxm; + } + rxq->rx_tail = rx_id; + + idpf_update_rx_tail(rxq, nb_hold, rx_id); + + return nb_rx; +} + +static inline int +idpf_xmit_cleanup(struct idpf_tx_queue *txq) +{ + uint16_t last_desc_cleaned = txq->last_desc_cleaned; + struct idpf_tx_entry *sw_ring = txq->sw_ring; + uint16_t nb_tx_desc = txq->nb_tx_desc; + uint16_t desc_to_clean_to; + uint16_t nb_tx_to_clean; + uint16_t i; + + volatile struct idpf_flex_tx_desc *txd = txq->tx_ring; + + desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->rs_thresh); + if (desc_to_clean_to >= nb_tx_desc) + desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc); + + desc_to_clean_to = sw_ring[desc_to_clean_to].last_id; + /* In the writeback Tx desccriptor, the only significant fields are the 4-bit DTYPE */ + if ((txd[desc_to_clean_to].qw1.cmd_dtype & + rte_cpu_to_le_16(IDPF_TXD_QW1_DTYPE_M)) != + rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE)) { + TX_LOG(DEBUG, "TX descriptor %4u is not done " + "(port=%d queue=%d)", desc_to_clean_to, + txq->port_id, txq->queue_id); + return -1; + } + + if (last_desc_cleaned > desc_to_clean_to) + nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) + + desc_to_clean_to); + else + nb_tx_to_clean = (uint16_t)(desc_to_clean_to - + last_desc_cleaned); + + txd[desc_to_clean_to].qw1.cmd_dtype = 0; + txd[desc_to_clean_to].qw1.buf_size = 0; + for (i = 0; i < RTE_DIM(txd[desc_to_clean_to].qw1.flex.raw); i++) + txd[desc_to_clean_to].qw1.flex.raw[i] = 0; + + txq->last_desc_cleaned = desc_to_clean_to; + txq->nb_free = (uint16_t)(txq->nb_free + nb_tx_to_clean); + + return 0; +} + +/* TX function */ +uint16_t +idpf_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + volatile struct idpf_flex_tx_desc *txd; + volatile struct idpf_flex_tx_desc *txr; + union idpf_tx_offload tx_offload = {0}; + struct idpf_tx_entry *txe, *txn; + struct idpf_tx_entry *sw_ring; + struct idpf_tx_queue *txq; + struct rte_mbuf *tx_pkt; + struct rte_mbuf *m_seg; + uint64_t buf_dma_addr; + uint64_t ol_flags; + uint16_t tx_last; + uint16_t nb_used; + uint16_t nb_ctx; + uint16_t td_cmd; + uint16_t tx_id; + uint16_t nb_tx; + uint16_t slen; + + nb_tx = 0; + txq = tx_queue; + + if (unlikely(txq == NULL) || unlikely(!txq->q_started)) + return nb_tx; + + sw_ring = txq->sw_ring; + txr = txq->tx_ring; + tx_id = txq->tx_tail; + txe = &sw_ring[tx_id]; + + /* Check if the descriptor ring needs to be cleaned. */ + if (txq->nb_free < txq->free_thresh) + (void)idpf_xmit_cleanup(txq); + + for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { + td_cmd = 0; + + tx_pkt = *tx_pkts++; + RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf); + + ol_flags = tx_pkt->ol_flags; + tx_offload.l2_len = tx_pkt->l2_len; + tx_offload.l3_len = tx_pkt->l3_len; + tx_offload.l4_len = tx_pkt->l4_len; + tx_offload.tso_segsz = tx_pkt->tso_segsz; + /* Calculate the number of context descriptors needed. */ + nb_ctx = idpf_calc_context_desc(ol_flags); + + /* The number of descriptors that must be allocated for + * a packet equals to the number of the segments of that + * packet plus 1 context descriptor if needed. + */ + nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx); + tx_last = (uint16_t)(tx_id + nb_used - 1); + + /* Circular ring */ + if (tx_last >= txq->nb_tx_desc) + tx_last = (uint16_t)(tx_last - txq->nb_tx_desc); + + TX_LOG(DEBUG, "port_id=%u queue_id=%u" + " tx_first=%u tx_last=%u", + txq->port_id, txq->queue_id, tx_id, tx_last); + + if (nb_used > txq->nb_free) { + if (idpf_xmit_cleanup(txq) != 0) { + if (nb_tx == 0) + return 0; + goto end_of_tx; + } + if (unlikely(nb_used > txq->rs_thresh)) { + while (nb_used > txq->nb_free) { + if (idpf_xmit_cleanup(txq) != 0) { + if (nb_tx == 0) + return 0; + goto end_of_tx; + } + } + } + } + + if (nb_ctx != 0) { + /* Setup TX context descriptor if required */ + volatile union idpf_flex_tx_ctx_desc *ctx_txd = + (volatile union idpf_flex_tx_ctx_desc *) + &txr[tx_id]; + + txn = &sw_ring[txe->next_id]; + RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf); + if (txe->mbuf != NULL) { + rte_pktmbuf_free_seg(txe->mbuf); + txe->mbuf = NULL; + } + + /* TSO enabled */ + if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0) + idpf_set_splitq_tso_ctx(tx_pkt, tx_offload, + ctx_txd); + + txe->last_id = tx_last; + tx_id = txe->next_id; + txe = txn; + } + + m_seg = tx_pkt; + do { + txd = &txr[tx_id]; + txn = &sw_ring[txe->next_id]; + + if (txe->mbuf != NULL) + rte_pktmbuf_free_seg(txe->mbuf); + txe->mbuf = m_seg; + + /* Setup TX Descriptor */ + slen = m_seg->data_len; + buf_dma_addr = rte_mbuf_data_iova(m_seg); + txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr); + txd->qw1.buf_size = slen; + txd->qw1.cmd_dtype = rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_FLEX_DATA << + IDPF_FLEX_TXD_QW1_DTYPE_S); + + txe->last_id = tx_last; + tx_id = txe->next_id; + txe = txn; + m_seg = m_seg->next; + } while (m_seg); + + /* The last packet data descriptor needs End Of Packet (EOP) */ + td_cmd |= IDPF_TX_FLEX_DESC_CMD_EOP; + txq->nb_used = (uint16_t)(txq->nb_used + nb_used); + txq->nb_free = (uint16_t)(txq->nb_free - nb_used); + + if (txq->nb_used >= txq->rs_thresh) { + TX_LOG(DEBUG, "Setting RS bit on TXD id=" + "%4u (port=%d queue=%d)", + tx_last, txq->port_id, txq->queue_id); + + td_cmd |= IDPF_TX_FLEX_DESC_CMD_RS; + + /* Update txq RS bit counters */ + txq->nb_used = 0; + } + + if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK) + td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN; + + txd->qw1.cmd_dtype |= rte_cpu_to_le_16(td_cmd << IDPF_FLEX_TXD_QW1_CMD_S); + } + +end_of_tx: + rte_wmb(); + + TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u", + txq->port_id, txq->queue_id, tx_id, nb_tx); + + IDPF_PCI_REG_WRITE(txq->qtx_tail, tx_id); + txq->tx_tail = tx_id; + + return nb_tx; +} + +/* TX prep functions */ +uint16_t +idpf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + int ret; +#endif + int i; + uint64_t ol_flags; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /* Check condition for nb_segs > IDPF_TX_MAX_MTU_SEG. */ + if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0) { + if (m->nb_segs > IDPF_TX_MAX_MTU_SEG) { + rte_errno = EINVAL; + return i; + } + } else if ((m->tso_segsz < IDPF_MIN_TSO_MSS) || + (m->tso_segsz > IDPF_MAX_TSO_MSS) || + (m->pkt_len > IDPF_MAX_TSO_FRAME_SIZE)) { + /* MSS outside the range are considered malicious */ + rte_errno = EINVAL; + return i; + } + + if ((ol_flags & IDPF_TX_OFFLOAD_NOTSUP_MASK) != 0) { + rte_errno = ENOTSUP; + return i; + } + + if (m->pkt_len < IDPF_MIN_FRAME_SIZE) { + rte_errno = EINVAL; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = -ret; + return i; + } +#endif + } + + return i; +} diff --git a/drivers/common/idpf/idpf_common_rxtx.h b/drivers/common/idpf/idpf_common_rxtx.h index c5bb7d48af..827f791505 100644 --- a/drivers/common/idpf/idpf_common_rxtx.h +++ b/drivers/common/idpf/idpf_common_rxtx.h @@ -27,8 +27,63 @@ #define IDPF_TX_OFFLOAD_MULTI_SEGS RTE_BIT64(15) #define IDPF_TX_OFFLOAD_MBUF_FAST_FREE RTE_BIT64(16) +#define IDPF_TX_MAX_MTU_SEG 10 + +#define IDPF_MIN_TSO_MSS 88 +#define IDPF_MAX_TSO_MSS 9728 +#define IDPF_MAX_TSO_FRAME_SIZE 262143 +#define IDPF_TX_MAX_MTU_SEG 10 + +#define IDPF_TX_CKSUM_OFFLOAD_MASK ( \ + RTE_MBUF_F_TX_IP_CKSUM | \ + RTE_MBUF_F_TX_L4_MASK | \ + RTE_MBUF_F_TX_TCP_SEG) + +#define IDPF_TX_OFFLOAD_MASK ( \ + IDPF_TX_CKSUM_OFFLOAD_MASK | \ + RTE_MBUF_F_TX_IPV4 | \ + RTE_MBUF_F_TX_IPV6) + +#define IDPF_TX_OFFLOAD_NOTSUP_MASK \ + (RTE_MBUF_F_TX_OFFLOAD_MASK ^ IDPF_TX_OFFLOAD_MASK) + +/* MTS */ +#define GLTSYN_CMD_SYNC_0_0 (PF_TIMESYNC_BASE + 0x0) +#define PF_GLTSYN_SHTIME_0_0 (PF_TIMESYNC_BASE + 0x4) +#define PF_GLTSYN_SHTIME_L_0 (PF_TIMESYNC_BASE + 0x8) +#define PF_GLTSYN_SHTIME_H_0 (PF_TIMESYNC_BASE + 0xC) +#define GLTSYN_ART_L_0 (PF_TIMESYNC_BASE + 0x10) +#define GLTSYN_ART_H_0 (PF_TIMESYNC_BASE + 0x14) +#define PF_GLTSYN_SHTIME_0_1 (PF_TIMESYNC_BASE + 0x24) +#define PF_GLTSYN_SHTIME_L_1 (PF_TIMESYNC_BASE + 0x28) +#define PF_GLTSYN_SHTIME_H_1 (PF_TIMESYNC_BASE + 0x2C) +#define PF_GLTSYN_SHTIME_0_2 (PF_TIMESYNC_BASE + 0x44) +#define PF_GLTSYN_SHTIME_L_2 (PF_TIMESYNC_BASE + 0x48) +#define PF_GLTSYN_SHTIME_H_2 (PF_TIMESYNC_BASE + 0x4C) +#define PF_GLTSYN_SHTIME_0_3 (PF_TIMESYNC_BASE + 0x64) +#define PF_GLTSYN_SHTIME_L_3 (PF_TIMESYNC_BASE + 0x68) +#define PF_GLTSYN_SHTIME_H_3 (PF_TIMESYNC_BASE + 0x6C) + +#define PF_TIMESYNC_BAR4_BASE 0x0E400000 +#define GLTSYN_ENA (PF_TIMESYNC_BAR4_BASE + 0x90) +#define GLTSYN_CMD (PF_TIMESYNC_BAR4_BASE + 0x94) +#define GLTSYC_TIME_L (PF_TIMESYNC_BAR4_BASE + 0x104) +#define GLTSYC_TIME_H (PF_TIMESYNC_BAR4_BASE + 0x108) + +#define GLTSYN_CMD_SYNC_0_4 (PF_TIMESYNC_BAR4_BASE + 0x110) +#define PF_GLTSYN_SHTIME_L_4 (PF_TIMESYNC_BAR4_BASE + 0x118) +#define PF_GLTSYN_SHTIME_H_4 (PF_TIMESYNC_BAR4_BASE + 0x11C) +#define GLTSYN_INCVAL_L (PF_TIMESYNC_BAR4_BASE + 0x150) +#define GLTSYN_INCVAL_H (PF_TIMESYNC_BAR4_BASE + 0x154) +#define GLTSYN_SHADJ_L (PF_TIMESYNC_BAR4_BASE + 0x158) +#define GLTSYN_SHADJ_H (PF_TIMESYNC_BAR4_BASE + 0x15C) + +#define GLTSYN_CMD_SYNC_0_5 (PF_TIMESYNC_BAR4_BASE + 0x130) +#define PF_GLTSYN_SHTIME_L_5 (PF_TIMESYNC_BAR4_BASE + 0x138) +#define PF_GLTSYN_SHTIME_H_5 (PF_TIMESYNC_BAR4_BASE + 0x13C) + struct idpf_rx_stats { - uint64_t mbuf_alloc_failed; + rte_atomic64_t mbuf_alloc_failed; }; struct idpf_rx_queue { @@ -126,6 +181,18 @@ struct idpf_tx_queue { struct idpf_tx_queue *complq; }; +/* Offload features */ +union idpf_tx_offload { + uint64_t data; + struct { + uint64_t l2_len:7; /* L2 (MAC) Header Length. */ + uint64_t l3_len:9; /* L3 (IP) Header Length. */ + uint64_t l4_len:8; /* L4 Header Length. */ + uint64_t tso_segsz:16; /* TCP TSO segment size */ + /* uint64_t unused : 24; */ + }; +}; + struct idpf_rxq_ops { void (*release_mbufs)(struct idpf_rx_queue *rxq); }; @@ -134,6 +201,9 @@ struct idpf_txq_ops { void (*release_mbufs)(struct idpf_tx_queue *txq); }; +extern int idpf_timestamp_dynfield_offset; +extern uint64_t idpf_timestamp_dynflag; + __rte_internal int idpf_check_rx_thresh(uint16_t nb_desc, uint16_t thresh); __rte_internal @@ -162,8 +232,25 @@ void idpf_rx_queue_release(void *rxq); __rte_internal void idpf_tx_queue_release(void *txq); __rte_internal +int idpf_register_ts_mbuf(struct idpf_rx_queue *rxq); +__rte_internal int idpf_alloc_single_rxq_mbufs(struct idpf_rx_queue *rxq); __rte_internal int idpf_alloc_split_rxq_mbufs(struct idpf_rx_queue *rxq); +__rte_internal +uint16_t idpf_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); +__rte_internal +uint16_t idpf_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +__rte_internal +uint16_t idpf_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); +__rte_internal +uint16_t idpf_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +__rte_internal +uint16_t idpf_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); #endif /* _IDPF_COMMON_RXTX_H_ */ diff --git a/drivers/common/idpf/version.map b/drivers/common/idpf/version.map index aa6ebd7c6c..03aab598b4 100644 --- a/drivers/common/idpf/version.map +++ b/drivers/common/idpf/version.map @@ -12,6 +12,8 @@ INTERNAL { idpf_config_rss; idpf_create_vport_info_init; idpf_execute_vc_cmd; + idpf_prep_pkts; + idpf_register_ts_mbuf; idpf_release_rxq_mbufs; idpf_release_txq_mbufs; idpf_reset_single_rx_queue; @@ -22,6 +24,10 @@ INTERNAL { idpf_reset_split_tx_complq; idpf_reset_split_tx_descq; idpf_rx_queue_release; + idpf_singleq_recv_pkts; + idpf_singleq_xmit_pkts; + idpf_splitq_recv_pkts; + idpf_splitq_xmit_pkts; idpf_tx_queue_release; idpf_vc_alloc_vectors; idpf_vc_check_api_version; diff --git a/drivers/net/idpf/idpf_ethdev.c b/drivers/net/idpf/idpf_ethdev.c index 734e97ffc2..ee2dec7c7c 100644 --- a/drivers/net/idpf/idpf_ethdev.c +++ b/drivers/net/idpf/idpf_ethdev.c @@ -22,8 +22,6 @@ rte_spinlock_t idpf_adapter_lock; struct idpf_adapter_list idpf_adapter_list; bool idpf_adapter_list_init; -uint64_t idpf_timestamp_dynflag; - static const char * const idpf_valid_args[] = { IDPF_TX_SINGLE_Q, IDPF_RX_SINGLE_Q, diff --git a/drivers/net/idpf/idpf_ethdev.h b/drivers/net/idpf/idpf_ethdev.h index 9b40aa4e56..d791d402fb 100644 --- a/drivers/net/idpf/idpf_ethdev.h +++ b/drivers/net/idpf/idpf_ethdev.h @@ -28,7 +28,6 @@ #define IDPF_MIN_BUF_SIZE 1024 #define IDPF_MAX_FRAME_SIZE 9728 -#define IDPF_MIN_FRAME_SIZE 14 #define IDPF_DEFAULT_MTU RTE_ETHER_MTU #define IDPF_NUM_MACADDR_MAX 64 @@ -78,9 +77,6 @@ struct idpf_adapter_ext { uint16_t cur_vport_nb; uint16_t used_vecs_num; - - /* For PTP */ - uint64_t time_hw; }; TAILQ_HEAD(idpf_adapter_list, idpf_adapter_ext); diff --git a/drivers/net/idpf/idpf_logs.h b/drivers/net/idpf/idpf_logs.h index d5f778fefe..bf0774b8e4 100644 --- a/drivers/net/idpf/idpf_logs.h +++ b/drivers/net/idpf/idpf_logs.h @@ -29,28 +29,4 @@ extern int idpf_logtype_driver; #define PMD_DRV_LOG(level, fmt, args...) \ PMD_DRV_LOG_RAW(level, fmt "\n", ## args) -#ifdef RTE_LIBRTE_IDPF_DEBUG_RX -#define PMD_RX_LOG(level, ...) \ - RTE_LOG(level, \ - PMD, \ - RTE_FMT("%s(): " \ - RTE_FMT_HEAD(__VA_ARGS__,) "\n", \ - __func__, \ - RTE_FMT_TAIL(__VA_ARGS__,))) -#else -#define PMD_RX_LOG(level, fmt, args...) do { } while (0) -#endif - -#ifdef RTE_LIBRTE_IDPF_DEBUG_TX -#define PMD_TX_LOG(level, ...) \ - RTE_LOG(level, \ - PMD, \ - RTE_FMT("%s(): " \ - RTE_FMT_HEAD(__VA_ARGS__,) "\n", \ - __func__, \ - RTE_FMT_TAIL(__VA_ARGS__,))) -#else -#define PMD_TX_LOG(level, fmt, args...) do { } while (0) -#endif - #endif /* _IDPF_LOGS_H_ */ diff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c index fb1814d893..1066789386 100644 --- a/drivers/net/idpf/idpf_rxtx.c +++ b/drivers/net/idpf/idpf_rxtx.c @@ -10,8 +10,6 @@ #include "idpf_rxtx.h" #include "idpf_rxtx_vec_common.h" -static int idpf_timestamp_dynfield_offset = -1; - static uint64_t idpf_rx_offload_convert(uint64_t offload) { @@ -501,23 +499,6 @@ idpf_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, return ret; } -static int -idpf_register_ts_mbuf(struct idpf_rx_queue *rxq) -{ - int err; - if ((rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) != 0) { - /* Register mbuf field and flag for Rx timestamp */ - err = rte_mbuf_dyn_rx_timestamp_register(&idpf_timestamp_dynfield_offset, - &idpf_timestamp_dynflag); - if (err != 0) { - PMD_DRV_LOG(ERR, - "Cannot register mbuf field/flag for timestamp"); - return -EINVAL; - } - } - return 0; -} - int idpf_rx_queue_init(struct rte_eth_dev *dev, uint16_t rx_queue_id) { @@ -537,7 +518,7 @@ idpf_rx_queue_init(struct rte_eth_dev *dev, uint16_t rx_queue_id) err = idpf_register_ts_mbuf(rxq); if (err != 0) { - PMD_DRV_LOG(ERR, "fail to regidter timestamp mbuf %u", + PMD_DRV_LOG(ERR, "fail to residter timestamp mbuf %u", rx_queue_id); return -EIO; } @@ -762,922 +743,6 @@ idpf_stop_queues(struct rte_eth_dev *dev) } } -#define IDPF_RX_FLEX_DESC_ADV_STATUS0_XSUM_S \ - (RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_S) | \ - RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_S) | \ - RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_S) | \ - RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EUDPE_S)) - -static inline uint64_t -idpf_splitq_rx_csum_offload(uint8_t err) -{ - uint64_t flags = 0; - - if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_L3L4P_S)) == 0)) - return flags; - - if (likely((err & IDPF_RX_FLEX_DESC_ADV_STATUS0_XSUM_S) == 0)) { - flags |= (RTE_MBUF_F_RX_IP_CKSUM_GOOD | - RTE_MBUF_F_RX_L4_CKSUM_GOOD); - return flags; - } - - if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_S)) != 0)) - flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; - else - flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; - - if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_S)) != 0)) - flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; - else - flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; - - if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_S)) != 0)) - flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD; - - if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EUDPE_S)) != 0)) - flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD; - else - flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD; - - return flags; -} - -#define IDPF_RX_FLEX_DESC_ADV_HASH1_S 0 -#define IDPF_RX_FLEX_DESC_ADV_HASH2_S 16 -#define IDPF_RX_FLEX_DESC_ADV_HASH3_S 24 - -static inline uint64_t -idpf_splitq_rx_rss_offload(struct rte_mbuf *mb, - volatile struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc) -{ - uint8_t status_err0_qw0; - uint64_t flags = 0; - - status_err0_qw0 = rx_desc->status_err0_qw0; - - if ((status_err0_qw0 & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_RSS_VALID_S)) != 0) { - flags |= RTE_MBUF_F_RX_RSS_HASH; - mb->hash.rss = (rte_le_to_cpu_16(rx_desc->hash1) << - IDPF_RX_FLEX_DESC_ADV_HASH1_S) | - ((uint32_t)(rx_desc->ff2_mirrid_hash2.hash2) << - IDPF_RX_FLEX_DESC_ADV_HASH2_S) | - ((uint32_t)(rx_desc->hash3) << - IDPF_RX_FLEX_DESC_ADV_HASH3_S); - } - - return flags; -} - -static void -idpf_split_rx_bufq_refill(struct idpf_rx_queue *rx_bufq) -{ - volatile struct virtchnl2_splitq_rx_buf_desc *rx_buf_ring; - volatile struct virtchnl2_splitq_rx_buf_desc *rx_buf_desc; - uint16_t nb_refill = rx_bufq->rx_free_thresh; - uint16_t nb_desc = rx_bufq->nb_rx_desc; - uint16_t next_avail = rx_bufq->rx_tail; - struct rte_mbuf *nmb[rx_bufq->rx_free_thresh]; - struct rte_eth_dev *dev; - uint64_t dma_addr; - uint16_t delta; - int i; - - if (rx_bufq->nb_rx_hold < rx_bufq->rx_free_thresh) - return; - - rx_buf_ring = rx_bufq->rx_ring; - delta = nb_desc - next_avail; - if (unlikely(delta < nb_refill)) { - if (likely(rte_pktmbuf_alloc_bulk(rx_bufq->mp, nmb, delta) == 0)) { - for (i = 0; i < delta; i++) { - rx_buf_desc = &rx_buf_ring[next_avail + i]; - rx_bufq->sw_ring[next_avail + i] = nmb[i]; - dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb[i])); - rx_buf_desc->hdr_addr = 0; - rx_buf_desc->pkt_addr = dma_addr; - } - nb_refill -= delta; - next_avail = 0; - rx_bufq->nb_rx_hold -= delta; - } else { - dev = &rte_eth_devices[rx_bufq->port_id]; - dev->data->rx_mbuf_alloc_failed += nb_desc - next_avail; - PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u queue_id=%u", - rx_bufq->port_id, rx_bufq->queue_id); - return; - } - } - - if (nb_desc - next_avail >= nb_refill) { - if (likely(rte_pktmbuf_alloc_bulk(rx_bufq->mp, nmb, nb_refill) == 0)) { - for (i = 0; i < nb_refill; i++) { - rx_buf_desc = &rx_buf_ring[next_avail + i]; - rx_bufq->sw_ring[next_avail + i] = nmb[i]; - dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb[i])); - rx_buf_desc->hdr_addr = 0; - rx_buf_desc->pkt_addr = dma_addr; - } - next_avail += nb_refill; - rx_bufq->nb_rx_hold -= nb_refill; - } else { - dev = &rte_eth_devices[rx_bufq->port_id]; - dev->data->rx_mbuf_alloc_failed += nb_desc - next_avail; - PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u queue_id=%u", - rx_bufq->port_id, rx_bufq->queue_id); - } - } - - IDPF_PCI_REG_WRITE(rx_bufq->qrx_tail, next_avail); - - rx_bufq->rx_tail = next_avail; -} - -uint16_t -idpf_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t nb_pkts) -{ - volatile struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc_ring; - volatile struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc; - uint16_t pktlen_gen_bufq_id; - struct idpf_rx_queue *rxq; - const uint32_t *ptype_tbl; - uint8_t status_err0_qw1; - struct idpf_adapter_ext *ad; - struct rte_mbuf *rxm; - uint16_t rx_id_bufq1; - uint16_t rx_id_bufq2; - uint64_t pkt_flags; - uint16_t pkt_len; - uint16_t bufq_id; - uint16_t gen_id; - uint16_t rx_id; - uint16_t nb_rx; - uint64_t ts_ns; - - nb_rx = 0; - rxq = rx_queue; - ad = IDPF_ADAPTER_TO_EXT(rxq->adapter); - - if (unlikely(rxq == NULL) || unlikely(!rxq->q_started)) - return nb_rx; - - rx_id = rxq->rx_tail; - rx_id_bufq1 = rxq->bufq1->rx_next_avail; - rx_id_bufq2 = rxq->bufq2->rx_next_avail; - rx_desc_ring = rxq->rx_ring; - ptype_tbl = rxq->adapter->ptype_tbl; - - if ((rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) != 0) - rxq->hw_register_set = 1; - - while (nb_rx < nb_pkts) { - rx_desc = &rx_desc_ring[rx_id]; - - pktlen_gen_bufq_id = - rte_le_to_cpu_16(rx_desc->pktlen_gen_bufq_id); - gen_id = (pktlen_gen_bufq_id & - VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M) >> - VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_S; - if (gen_id != rxq->expected_gen_id) - break; - - pkt_len = (pktlen_gen_bufq_id & - VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_M) >> - VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_S; - if (pkt_len == 0) - PMD_RX_LOG(ERR, "Packet length is 0"); - - rx_id++; - if (unlikely(rx_id == rxq->nb_rx_desc)) { - rx_id = 0; - rxq->expected_gen_id ^= 1; - } - - bufq_id = (pktlen_gen_bufq_id & - VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_M) >> - VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_S; - if (bufq_id == 0) { - rxm = rxq->bufq1->sw_ring[rx_id_bufq1]; - rx_id_bufq1++; - if (unlikely(rx_id_bufq1 == rxq->bufq1->nb_rx_desc)) - rx_id_bufq1 = 0; - rxq->bufq1->nb_rx_hold++; - } else { - rxm = rxq->bufq2->sw_ring[rx_id_bufq2]; - rx_id_bufq2++; - if (unlikely(rx_id_bufq2 == rxq->bufq2->nb_rx_desc)) - rx_id_bufq2 = 0; - rxq->bufq2->nb_rx_hold++; - } - - rxm->pkt_len = pkt_len; - rxm->data_len = pkt_len; - rxm->data_off = RTE_PKTMBUF_HEADROOM; - rxm->next = NULL; - rxm->nb_segs = 1; - rxm->port = rxq->port_id; - rxm->ol_flags = 0; - rxm->packet_type = - ptype_tbl[(rte_le_to_cpu_16(rx_desc->ptype_err_fflags0) & - VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M) >> - VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_S]; - - status_err0_qw1 = rx_desc->status_err0_qw1; - pkt_flags = idpf_splitq_rx_csum_offload(status_err0_qw1); - pkt_flags |= idpf_splitq_rx_rss_offload(rxm, rx_desc); - if (idpf_timestamp_dynflag > 0 && - (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)) { - /* timestamp */ - ts_ns = idpf_tstamp_convert_32b_64b(ad, - rxq->hw_register_set, - rte_le_to_cpu_32(rx_desc->ts_high)); - rxq->hw_register_set = 0; - *RTE_MBUF_DYNFIELD(rxm, - idpf_timestamp_dynfield_offset, - rte_mbuf_timestamp_t *) = ts_ns; - rxm->ol_flags |= idpf_timestamp_dynflag; - } - - rxm->ol_flags |= pkt_flags; - - rx_pkts[nb_rx++] = rxm; - } - - if (nb_rx > 0) { - rxq->rx_tail = rx_id; - if (rx_id_bufq1 != rxq->bufq1->rx_next_avail) - rxq->bufq1->rx_next_avail = rx_id_bufq1; - if (rx_id_bufq2 != rxq->bufq2->rx_next_avail) - rxq->bufq2->rx_next_avail = rx_id_bufq2; - - idpf_split_rx_bufq_refill(rxq->bufq1); - idpf_split_rx_bufq_refill(rxq->bufq2); - } - - return nb_rx; -} - -static inline void -idpf_split_tx_free(struct idpf_tx_queue *cq) -{ - volatile struct idpf_splitq_tx_compl_desc *compl_ring = cq->compl_ring; - volatile struct idpf_splitq_tx_compl_desc *txd; - uint16_t next = cq->tx_tail; - struct idpf_tx_entry *txe; - struct idpf_tx_queue *txq; - uint16_t gen, qid, q_head; - uint16_t nb_desc_clean; - uint8_t ctype; - - txd = &compl_ring[next]; - gen = (rte_le_to_cpu_16(txd->qid_comptype_gen) & - IDPF_TXD_COMPLQ_GEN_M) >> IDPF_TXD_COMPLQ_GEN_S; - if (gen != cq->expected_gen_id) - return; - - ctype = (rte_le_to_cpu_16(txd->qid_comptype_gen) & - IDPF_TXD_COMPLQ_COMPL_TYPE_M) >> IDPF_TXD_COMPLQ_COMPL_TYPE_S; - qid = (rte_le_to_cpu_16(txd->qid_comptype_gen) & - IDPF_TXD_COMPLQ_QID_M) >> IDPF_TXD_COMPLQ_QID_S; - q_head = rte_le_to_cpu_16(txd->q_head_compl_tag.compl_tag); - txq = cq->txqs[qid - cq->tx_start_qid]; - - switch (ctype) { - case IDPF_TXD_COMPLT_RE: - /* clean to q_head which indicates be fetched txq desc id + 1. - * TODO: need to refine and remove the if condition. - */ - if (unlikely(q_head % 32)) { - PMD_DRV_LOG(ERR, "unexpected desc (head = %u) completion.", - q_head); - return; - } - if (txq->last_desc_cleaned > q_head) - nb_desc_clean = (txq->nb_tx_desc - txq->last_desc_cleaned) + - q_head; - else - nb_desc_clean = q_head - txq->last_desc_cleaned; - txq->nb_free += nb_desc_clean; - txq->last_desc_cleaned = q_head; - break; - case IDPF_TXD_COMPLT_RS: - /* q_head indicates sw_id when ctype is 2 */ - txe = &txq->sw_ring[q_head]; - if (txe->mbuf != NULL) { - rte_pktmbuf_free_seg(txe->mbuf); - txe->mbuf = NULL; - } - break; - default: - PMD_DRV_LOG(ERR, "unknown completion type."); - return; - } - - if (++next == cq->nb_tx_desc) { - next = 0; - cq->expected_gen_id ^= 1; - } - - cq->tx_tail = next; -} - -/* Check if the context descriptor is needed for TX offloading */ -static inline uint16_t -idpf_calc_context_desc(uint64_t flags) -{ - if ((flags & RTE_MBUF_F_TX_TCP_SEG) != 0) - return 1; - - return 0; -} - -/* set TSO context descriptor - */ -static inline void -idpf_set_splitq_tso_ctx(struct rte_mbuf *mbuf, - union idpf_tx_offload tx_offload, - volatile union idpf_flex_tx_ctx_desc *ctx_desc) -{ - uint16_t cmd_dtype; - uint32_t tso_len; - uint8_t hdr_len; - - if (tx_offload.l4_len == 0) { - PMD_TX_LOG(DEBUG, "L4 length set to 0"); - return; - } - - hdr_len = tx_offload.l2_len + - tx_offload.l3_len + - tx_offload.l4_len; - cmd_dtype = IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX | - IDPF_TX_FLEX_CTX_DESC_CMD_TSO; - tso_len = mbuf->pkt_len - hdr_len; - - ctx_desc->tso.qw1.cmd_dtype = rte_cpu_to_le_16(cmd_dtype); - ctx_desc->tso.qw0.hdr_len = hdr_len; - ctx_desc->tso.qw0.mss_rt = - rte_cpu_to_le_16((uint16_t)mbuf->tso_segsz & - IDPF_TXD_FLEX_CTX_MSS_RT_M); - ctx_desc->tso.qw0.flex_tlen = - rte_cpu_to_le_32(tso_len & - IDPF_TXD_FLEX_CTX_MSS_RT_M); -} - -uint16_t -idpf_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t nb_pkts) -{ - struct idpf_tx_queue *txq = (struct idpf_tx_queue *)tx_queue; - volatile struct idpf_flex_tx_sched_desc *txr; - volatile struct idpf_flex_tx_sched_desc *txd; - struct idpf_tx_entry *sw_ring; - union idpf_tx_offload tx_offload = {0}; - struct idpf_tx_entry *txe, *txn; - uint16_t nb_used, tx_id, sw_id; - struct rte_mbuf *tx_pkt; - uint16_t nb_to_clean; - uint16_t nb_tx = 0; - uint64_t ol_flags; - uint16_t nb_ctx; - - if (unlikely(txq == NULL) || unlikely(!txq->q_started)) - return nb_tx; - - txr = txq->desc_ring; - sw_ring = txq->sw_ring; - tx_id = txq->tx_tail; - sw_id = txq->sw_tail; - txe = &sw_ring[sw_id]; - - for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { - tx_pkt = tx_pkts[nb_tx]; - - if (txq->nb_free <= txq->free_thresh) { - /* TODO: Need to refine - * 1. free and clean: Better to decide a clean destination instead of - * loop times. And don't free mbuf when RS got immediately, free when - * transmit or according to the clean destination. - * Now, just ignore the RE write back, free mbuf when get RS - * 2. out-of-order rewrite back haven't be supported, SW head and HW head - * need to be separated. - **/ - nb_to_clean = 2 * txq->rs_thresh; - while (nb_to_clean--) - idpf_split_tx_free(txq->complq); - } - - if (txq->nb_free < tx_pkt->nb_segs) - break; - - ol_flags = tx_pkt->ol_flags; - tx_offload.l2_len = tx_pkt->l2_len; - tx_offload.l3_len = tx_pkt->l3_len; - tx_offload.l4_len = tx_pkt->l4_len; - tx_offload.tso_segsz = tx_pkt->tso_segsz; - /* Calculate the number of context descriptors needed. */ - nb_ctx = idpf_calc_context_desc(ol_flags); - nb_used = tx_pkt->nb_segs + nb_ctx; - - /* context descriptor */ - if (nb_ctx != 0) { - volatile union idpf_flex_tx_ctx_desc *ctx_desc = - (volatile union idpf_flex_tx_ctx_desc *)&txr[tx_id]; - - if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0) - idpf_set_splitq_tso_ctx(tx_pkt, tx_offload, - ctx_desc); - - tx_id++; - if (tx_id == txq->nb_tx_desc) - tx_id = 0; - } - - do { - txd = &txr[tx_id]; - txn = &sw_ring[txe->next_id]; - txe->mbuf = tx_pkt; - - /* Setup TX descriptor */ - txd->buf_addr = - rte_cpu_to_le_64(rte_mbuf_data_iova(tx_pkt)); - txd->qw1.cmd_dtype = - rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE); - txd->qw1.rxr_bufsize = tx_pkt->data_len; - txd->qw1.compl_tag = sw_id; - tx_id++; - if (tx_id == txq->nb_tx_desc) - tx_id = 0; - sw_id = txe->next_id; - txe = txn; - tx_pkt = tx_pkt->next; - } while (tx_pkt); - - /* fill the last descriptor with End of Packet (EOP) bit */ - txd->qw1.cmd_dtype |= IDPF_TXD_FLEX_FLOW_CMD_EOP; - - if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK) - txd->qw1.cmd_dtype |= IDPF_TXD_FLEX_FLOW_CMD_CS_EN; - txq->nb_free = (uint16_t)(txq->nb_free - nb_used); - txq->nb_used = (uint16_t)(txq->nb_used + nb_used); - - if (txq->nb_used >= 32) { - txd->qw1.cmd_dtype |= IDPF_TXD_FLEX_FLOW_CMD_RE; - /* Update txq RE bit counters */ - txq->nb_used = 0; - } - } - - /* update the tail pointer if any packets were processed */ - if (likely(nb_tx > 0)) { - IDPF_PCI_REG_WRITE(txq->qtx_tail, tx_id); - txq->tx_tail = tx_id; - txq->sw_tail = sw_id; - } - - return nb_tx; -} - -#define IDPF_RX_FLEX_DESC_STATUS0_XSUM_S \ - (RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) | \ - RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) | \ - RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S) | \ - RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)) - -/* Translate the rx descriptor status and error fields to pkt flags */ -static inline uint64_t -idpf_rxd_to_pkt_flags(uint16_t status_error) -{ - uint64_t flags = 0; - - if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_S)) == 0)) - return flags; - - if (likely((status_error & IDPF_RX_FLEX_DESC_STATUS0_XSUM_S) == 0)) { - flags |= (RTE_MBUF_F_RX_IP_CKSUM_GOOD | - RTE_MBUF_F_RX_L4_CKSUM_GOOD); - return flags; - } - - if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_S)) != 0)) - flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; - else - flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; - - if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)) != 0)) - flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; - else - flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; - - if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)) != 0)) - flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD; - - if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)) != 0)) - flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD; - else - flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD; - - return flags; -} - -static inline void -idpf_update_rx_tail(struct idpf_rx_queue *rxq, uint16_t nb_hold, - uint16_t rx_id) -{ - nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold); - - if (nb_hold > rxq->rx_free_thresh) { - PMD_RX_LOG(DEBUG, - "port_id=%u queue_id=%u rx_tail=%u nb_hold=%u", - rxq->port_id, rxq->queue_id, rx_id, nb_hold); - rx_id = (uint16_t)((rx_id == 0) ? - (rxq->nb_rx_desc - 1) : (rx_id - 1)); - IDPF_PCI_REG_WRITE(rxq->qrx_tail, rx_id); - nb_hold = 0; - } - rxq->nb_rx_hold = nb_hold; -} - -uint16_t -idpf_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t nb_pkts) -{ - volatile union virtchnl2_rx_desc *rx_ring; - volatile union virtchnl2_rx_desc *rxdp; - union virtchnl2_rx_desc rxd; - struct idpf_rx_queue *rxq; - const uint32_t *ptype_tbl; - uint16_t rx_id, nb_hold; - struct rte_eth_dev *dev; - struct idpf_adapter_ext *ad; - uint16_t rx_packet_len; - struct rte_mbuf *rxm; - struct rte_mbuf *nmb; - uint16_t rx_status0; - uint64_t pkt_flags; - uint64_t dma_addr; - uint64_t ts_ns; - uint16_t nb_rx; - - nb_rx = 0; - nb_hold = 0; - rxq = rx_queue; - - ad = IDPF_ADAPTER_TO_EXT(rxq->adapter); - - if (unlikely(rxq == NULL) || unlikely(!rxq->q_started)) - return nb_rx; - - rx_id = rxq->rx_tail; - rx_ring = rxq->rx_ring; - ptype_tbl = rxq->adapter->ptype_tbl; - - if ((rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) != 0) - rxq->hw_register_set = 1; - - while (nb_rx < nb_pkts) { - rxdp = &rx_ring[rx_id]; - rx_status0 = rte_le_to_cpu_16(rxdp->flex_nic_wb.status_error0); - - /* Check the DD bit first */ - if ((rx_status0 & (1 << VIRTCHNL2_RX_FLEX_DESC_STATUS0_DD_S)) == 0) - break; - - nmb = rte_mbuf_raw_alloc(rxq->mp); - if (unlikely(nmb == NULL)) { - dev = &rte_eth_devices[rxq->port_id]; - dev->data->rx_mbuf_alloc_failed++; - PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u " - "queue_id=%u", rxq->port_id, rxq->queue_id); - break; - } - rxd = *rxdp; /* copy descriptor in ring to temp variable*/ - - nb_hold++; - rxm = rxq->sw_ring[rx_id]; - rxq->sw_ring[rx_id] = nmb; - rx_id++; - if (unlikely(rx_id == rxq->nb_rx_desc)) - rx_id = 0; - - /* Prefetch next mbuf */ - rte_prefetch0(rxq->sw_ring[rx_id]); - - /* When next RX descriptor is on a cache line boundary, - * prefetch the next 4 RX descriptors and next 8 pointers - * to mbufs. - */ - if ((rx_id & 0x3) == 0) { - rte_prefetch0(&rx_ring[rx_id]); - rte_prefetch0(rxq->sw_ring[rx_id]); - } - dma_addr = - rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb)); - rxdp->read.hdr_addr = 0; - rxdp->read.pkt_addr = dma_addr; - - rx_packet_len = (rte_cpu_to_le_16(rxd.flex_nic_wb.pkt_len) & - VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M); - - rxm->data_off = RTE_PKTMBUF_HEADROOM; - rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr, RTE_PKTMBUF_HEADROOM)); - rxm->nb_segs = 1; - rxm->next = NULL; - rxm->pkt_len = rx_packet_len; - rxm->data_len = rx_packet_len; - rxm->port = rxq->port_id; - rxm->ol_flags = 0; - pkt_flags = idpf_rxd_to_pkt_flags(rx_status0); - rxm->packet_type = - ptype_tbl[(uint8_t)(rte_cpu_to_le_16(rxd.flex_nic_wb.ptype_flex_flags0) & - VIRTCHNL2_RX_FLEX_DESC_PTYPE_M)]; - - rxm->ol_flags |= pkt_flags; - - if (idpf_timestamp_dynflag > 0 && - (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) != 0) { - /* timestamp */ - ts_ns = idpf_tstamp_convert_32b_64b(ad, - rxq->hw_register_set, - rte_le_to_cpu_32(rxd.flex_nic_wb.flex_ts.ts_high)); - rxq->hw_register_set = 0; - *RTE_MBUF_DYNFIELD(rxm, - idpf_timestamp_dynfield_offset, - rte_mbuf_timestamp_t *) = ts_ns; - rxm->ol_flags |= idpf_timestamp_dynflag; - } - - rx_pkts[nb_rx++] = rxm; - } - rxq->rx_tail = rx_id; - - idpf_update_rx_tail(rxq, nb_hold, rx_id); - - return nb_rx; -} - -static inline int -idpf_xmit_cleanup(struct idpf_tx_queue *txq) -{ - uint16_t last_desc_cleaned = txq->last_desc_cleaned; - struct idpf_tx_entry *sw_ring = txq->sw_ring; - uint16_t nb_tx_desc = txq->nb_tx_desc; - uint16_t desc_to_clean_to; - uint16_t nb_tx_to_clean; - uint16_t i; - - volatile struct idpf_flex_tx_desc *txd = txq->tx_ring; - - desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->rs_thresh); - if (desc_to_clean_to >= nb_tx_desc) - desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc); - - desc_to_clean_to = sw_ring[desc_to_clean_to].last_id; - /* In the writeback Tx desccriptor, the only significant fields are the 4-bit DTYPE */ - if ((txd[desc_to_clean_to].qw1.cmd_dtype & - rte_cpu_to_le_16(IDPF_TXD_QW1_DTYPE_M)) != - rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE)) { - PMD_TX_LOG(DEBUG, "TX descriptor %4u is not done " - "(port=%d queue=%d)", desc_to_clean_to, - txq->port_id, txq->queue_id); - return -1; - } - - if (last_desc_cleaned > desc_to_clean_to) - nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) + - desc_to_clean_to); - else - nb_tx_to_clean = (uint16_t)(desc_to_clean_to - - last_desc_cleaned); - - txd[desc_to_clean_to].qw1.cmd_dtype = 0; - txd[desc_to_clean_to].qw1.buf_size = 0; - for (i = 0; i < RTE_DIM(txd[desc_to_clean_to].qw1.flex.raw); i++) - txd[desc_to_clean_to].qw1.flex.raw[i] = 0; - - txq->last_desc_cleaned = desc_to_clean_to; - txq->nb_free = (uint16_t)(txq->nb_free + nb_tx_to_clean); - - return 0; -} - -/* TX function */ -uint16_t -idpf_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t nb_pkts) -{ - volatile struct idpf_flex_tx_desc *txd; - volatile struct idpf_flex_tx_desc *txr; - union idpf_tx_offload tx_offload = {0}; - struct idpf_tx_entry *txe, *txn; - struct idpf_tx_entry *sw_ring; - struct idpf_tx_queue *txq; - struct rte_mbuf *tx_pkt; - struct rte_mbuf *m_seg; - uint64_t buf_dma_addr; - uint64_t ol_flags; - uint16_t tx_last; - uint16_t nb_used; - uint16_t nb_ctx; - uint16_t td_cmd; - uint16_t tx_id; - uint16_t nb_tx; - uint16_t slen; - - nb_tx = 0; - txq = tx_queue; - - if (unlikely(txq == NULL) || unlikely(!txq->q_started)) - return nb_tx; - - sw_ring = txq->sw_ring; - txr = txq->tx_ring; - tx_id = txq->tx_tail; - txe = &sw_ring[tx_id]; - - /* Check if the descriptor ring needs to be cleaned. */ - if (txq->nb_free < txq->free_thresh) - (void)idpf_xmit_cleanup(txq); - - for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { - td_cmd = 0; - - tx_pkt = *tx_pkts++; - RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf); - - ol_flags = tx_pkt->ol_flags; - tx_offload.l2_len = tx_pkt->l2_len; - tx_offload.l3_len = tx_pkt->l3_len; - tx_offload.l4_len = tx_pkt->l4_len; - tx_offload.tso_segsz = tx_pkt->tso_segsz; - /* Calculate the number of context descriptors needed. */ - nb_ctx = idpf_calc_context_desc(ol_flags); - - /* The number of descriptors that must be allocated for - * a packet equals to the number of the segments of that - * packet plus 1 context descriptor if needed. - */ - nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx); - tx_last = (uint16_t)(tx_id + nb_used - 1); - - /* Circular ring */ - if (tx_last >= txq->nb_tx_desc) - tx_last = (uint16_t)(tx_last - txq->nb_tx_desc); - - PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u" - " tx_first=%u tx_last=%u", - txq->port_id, txq->queue_id, tx_id, tx_last); - - if (nb_used > txq->nb_free) { - if (idpf_xmit_cleanup(txq) != 0) { - if (nb_tx == 0) - return 0; - goto end_of_tx; - } - if (unlikely(nb_used > txq->rs_thresh)) { - while (nb_used > txq->nb_free) { - if (idpf_xmit_cleanup(txq) != 0) { - if (nb_tx == 0) - return 0; - goto end_of_tx; - } - } - } - } - - if (nb_ctx != 0) { - /* Setup TX context descriptor if required */ - volatile union idpf_flex_tx_ctx_desc *ctx_txd = - (volatile union idpf_flex_tx_ctx_desc *) - &txr[tx_id]; - - txn = &sw_ring[txe->next_id]; - RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf); - if (txe->mbuf != NULL) { - rte_pktmbuf_free_seg(txe->mbuf); - txe->mbuf = NULL; - } - - /* TSO enabled */ - if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0) - idpf_set_splitq_tso_ctx(tx_pkt, tx_offload, - ctx_txd); - - txe->last_id = tx_last; - tx_id = txe->next_id; - txe = txn; - } - - m_seg = tx_pkt; - do { - txd = &txr[tx_id]; - txn = &sw_ring[txe->next_id]; - - if (txe->mbuf != NULL) - rte_pktmbuf_free_seg(txe->mbuf); - txe->mbuf = m_seg; - - /* Setup TX Descriptor */ - slen = m_seg->data_len; - buf_dma_addr = rte_mbuf_data_iova(m_seg); - txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr); - txd->qw1.buf_size = slen; - txd->qw1.cmd_dtype = rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_FLEX_DATA << - IDPF_FLEX_TXD_QW1_DTYPE_S); - - txe->last_id = tx_last; - tx_id = txe->next_id; - txe = txn; - m_seg = m_seg->next; - } while (m_seg); - - /* The last packet data descriptor needs End Of Packet (EOP) */ - td_cmd |= IDPF_TX_FLEX_DESC_CMD_EOP; - txq->nb_used = (uint16_t)(txq->nb_used + nb_used); - txq->nb_free = (uint16_t)(txq->nb_free - nb_used); - - if (txq->nb_used >= txq->rs_thresh) { - PMD_TX_LOG(DEBUG, "Setting RS bit on TXD id=" - "%4u (port=%d queue=%d)", - tx_last, txq->port_id, txq->queue_id); - - td_cmd |= IDPF_TX_FLEX_DESC_CMD_RS; - - /* Update txq RS bit counters */ - txq->nb_used = 0; - } - - if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK) - td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN; - - txd->qw1.cmd_dtype |= rte_cpu_to_le_16(td_cmd << IDPF_FLEX_TXD_QW1_CMD_S); - } - -end_of_tx: - rte_wmb(); - - PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u", - txq->port_id, txq->queue_id, tx_id, nb_tx); - - IDPF_PCI_REG_WRITE(txq->qtx_tail, tx_id); - txq->tx_tail = tx_id; - - return nb_tx; -} - -/* TX prep functions */ -uint16_t -idpf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t nb_pkts) -{ -#ifdef RTE_LIBRTE_ETHDEV_DEBUG - int ret; -#endif - int i; - uint64_t ol_flags; - struct rte_mbuf *m; - - for (i = 0; i < nb_pkts; i++) { - m = tx_pkts[i]; - ol_flags = m->ol_flags; - - /* Check condition for nb_segs > IDPF_TX_MAX_MTU_SEG. */ - if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0) { - if (m->nb_segs > IDPF_TX_MAX_MTU_SEG) { - rte_errno = EINVAL; - return i; - } - } else if ((m->tso_segsz < IDPF_MIN_TSO_MSS) || - (m->tso_segsz > IDPF_MAX_TSO_MSS) || - (m->pkt_len > IDPF_MAX_TSO_FRAME_SIZE)) { - /* MSS outside the range are considered malicious */ - rte_errno = EINVAL; - return i; - } - - if ((ol_flags & IDPF_TX_OFFLOAD_NOTSUP_MASK) != 0) { - rte_errno = ENOTSUP; - return i; - } - - if (m->pkt_len < IDPF_MIN_FRAME_SIZE) { - rte_errno = EINVAL; - return i; - } - -#ifdef RTE_LIBRTE_ETHDEV_DEBUG - ret = rte_validate_tx_offload(m); - if (ret != 0) { - rte_errno = -ret; - return i; - } -#endif - } - - return i; -} - static void __rte_cold release_rxq_mbufs_vec(struct idpf_rx_queue *rxq) { diff --git a/drivers/net/idpf/idpf_rxtx.h b/drivers/net/idpf/idpf_rxtx.h index 4efbf10295..eab363c3e7 100644 --- a/drivers/net/idpf/idpf_rxtx.h +++ b/drivers/net/idpf/idpf_rxtx.h @@ -8,41 +8,6 @@ #include #include "idpf_ethdev.h" -/* MTS */ -#define GLTSYN_CMD_SYNC_0_0 (PF_TIMESYNC_BASE + 0x0) -#define PF_GLTSYN_SHTIME_0_0 (PF_TIMESYNC_BASE + 0x4) -#define PF_GLTSYN_SHTIME_L_0 (PF_TIMESYNC_BASE + 0x8) -#define PF_GLTSYN_SHTIME_H_0 (PF_TIMESYNC_BASE + 0xC) -#define GLTSYN_ART_L_0 (PF_TIMESYNC_BASE + 0x10) -#define GLTSYN_ART_H_0 (PF_TIMESYNC_BASE + 0x14) -#define PF_GLTSYN_SHTIME_0_1 (PF_TIMESYNC_BASE + 0x24) -#define PF_GLTSYN_SHTIME_L_1 (PF_TIMESYNC_BASE + 0x28) -#define PF_GLTSYN_SHTIME_H_1 (PF_TIMESYNC_BASE + 0x2C) -#define PF_GLTSYN_SHTIME_0_2 (PF_TIMESYNC_BASE + 0x44) -#define PF_GLTSYN_SHTIME_L_2 (PF_TIMESYNC_BASE + 0x48) -#define PF_GLTSYN_SHTIME_H_2 (PF_TIMESYNC_BASE + 0x4C) -#define PF_GLTSYN_SHTIME_0_3 (PF_TIMESYNC_BASE + 0x64) -#define PF_GLTSYN_SHTIME_L_3 (PF_TIMESYNC_BASE + 0x68) -#define PF_GLTSYN_SHTIME_H_3 (PF_TIMESYNC_BASE + 0x6C) - -#define PF_TIMESYNC_BAR4_BASE 0x0E400000 -#define GLTSYN_ENA (PF_TIMESYNC_BAR4_BASE + 0x90) -#define GLTSYN_CMD (PF_TIMESYNC_BAR4_BASE + 0x94) -#define GLTSYC_TIME_L (PF_TIMESYNC_BAR4_BASE + 0x104) -#define GLTSYC_TIME_H (PF_TIMESYNC_BAR4_BASE + 0x108) - -#define GLTSYN_CMD_SYNC_0_4 (PF_TIMESYNC_BAR4_BASE + 0x110) -#define PF_GLTSYN_SHTIME_L_4 (PF_TIMESYNC_BAR4_BASE + 0x118) -#define PF_GLTSYN_SHTIME_H_4 (PF_TIMESYNC_BAR4_BASE + 0x11C) -#define GLTSYN_INCVAL_L (PF_TIMESYNC_BAR4_BASE + 0x150) -#define GLTSYN_INCVAL_H (PF_TIMESYNC_BAR4_BASE + 0x154) -#define GLTSYN_SHADJ_L (PF_TIMESYNC_BAR4_BASE + 0x158) -#define GLTSYN_SHADJ_H (PF_TIMESYNC_BAR4_BASE + 0x15C) - -#define GLTSYN_CMD_SYNC_0_5 (PF_TIMESYNC_BAR4_BASE + 0x130) -#define PF_GLTSYN_SHTIME_L_5 (PF_TIMESYNC_BAR4_BASE + 0x138) -#define PF_GLTSYN_SHTIME_H_5 (PF_TIMESYNC_BAR4_BASE + 0x13C) - /* In QLEN must be whole number of 32 descriptors. */ #define IDPF_ALIGN_RING_DESC 32 #define IDPF_MIN_RING_DESC 32 @@ -62,44 +27,10 @@ #define IDPF_DEFAULT_TX_RS_THRESH 32 #define IDPF_DEFAULT_TX_FREE_THRESH 32 -#define IDPF_TX_MAX_MTU_SEG 10 - -#define IDPF_MIN_TSO_MSS 88 -#define IDPF_MAX_TSO_MSS 9728 -#define IDPF_MAX_TSO_FRAME_SIZE 262143 -#define IDPF_TX_MAX_MTU_SEG 10 - -#define IDPF_TX_CKSUM_OFFLOAD_MASK ( \ - RTE_MBUF_F_TX_IP_CKSUM | \ - RTE_MBUF_F_TX_L4_MASK | \ - RTE_MBUF_F_TX_TCP_SEG) - -#define IDPF_TX_OFFLOAD_MASK ( \ - IDPF_TX_CKSUM_OFFLOAD_MASK | \ - RTE_MBUF_F_TX_IPV4 | \ - RTE_MBUF_F_TX_IPV6) - -#define IDPF_TX_OFFLOAD_NOTSUP_MASK \ - (RTE_MBUF_F_TX_OFFLOAD_MASK ^ IDPF_TX_OFFLOAD_MASK) - -extern uint64_t idpf_timestamp_dynflag; - struct idpf_tx_vec_entry { struct rte_mbuf *mbuf; }; -/* Offload features */ -union idpf_tx_offload { - uint64_t data; - struct { - uint64_t l2_len:7; /* L2 (MAC) Header Length. */ - uint64_t l3_len:9; /* L3 (IP) Header Length. */ - uint64_t l4_len:8; /* L4 Header Length. */ - uint64_t tso_segsz:16; /* TCP TSO segment size */ - /* uint64_t unused : 24; */ - }; -}; - int idpf_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t nb_desc, unsigned int socket_id, const struct rte_eth_rxconf *rx_conf, @@ -118,77 +49,14 @@ int idpf_tx_queue_init(struct rte_eth_dev *dev, uint16_t tx_queue_id); int idpf_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id); int idpf_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id); void idpf_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); -uint16_t idpf_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t nb_pkts); uint16_t idpf_singleq_recv_pkts_avx512(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); -uint16_t idpf_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t nb_pkts); -uint16_t idpf_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t nb_pkts); uint16_t idpf_singleq_xmit_pkts_avx512(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); -uint16_t idpf_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t nb_pkts); -uint16_t idpf_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t nb_pkts); void idpf_stop_queues(struct rte_eth_dev *dev); void idpf_set_rx_function(struct rte_eth_dev *dev); void idpf_set_tx_function(struct rte_eth_dev *dev); -#define IDPF_TIMESYNC_REG_WRAP_GUARD_BAND 10000 -/* Helper function to convert a 32b nanoseconds timestamp to 64b. */ -static inline uint64_t - -idpf_tstamp_convert_32b_64b(struct idpf_adapter_ext *ad, uint32_t flag, - uint32_t in_timestamp) -{ -#ifdef RTE_ARCH_X86_64 - struct idpf_hw *hw = &ad->base.hw; - const uint64_t mask = 0xFFFFFFFF; - uint32_t hi, lo, lo2, delta; - uint64_t ns; - - if (flag != 0) { - IDPF_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, PF_GLTSYN_CMD_SYNC_SHTIME_EN_M); - IDPF_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, PF_GLTSYN_CMD_SYNC_EXEC_CMD_M | - PF_GLTSYN_CMD_SYNC_SHTIME_EN_M); - lo = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0); - hi = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_H_0); - /* - * On typical system, the delta between lo and lo2 is ~1000ns, - * so 10000 seems a large-enough but not overly-big guard band. - */ - if (lo > (UINT32_MAX - IDPF_TIMESYNC_REG_WRAP_GUARD_BAND)) - lo2 = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0); - else - lo2 = lo; - - if (lo2 < lo) { - lo = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0); - hi = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_H_0); - } - - ad->time_hw = ((uint64_t)hi << 32) | lo; - } - - delta = (in_timestamp - (uint32_t)(ad->time_hw & mask)); - if (delta > (mask / 2)) { - delta = ((uint32_t)(ad->time_hw & mask) - in_timestamp); - ns = ad->time_hw - delta; - } else { - ns = ad->time_hw + delta; - } - - return ns; -#else /* !RTE_ARCH_X86_64 */ - RTE_SET_USED(ad); - RTE_SET_USED(flag); - RTE_SET_USED(in_timestamp); - return 0; -#endif /* RTE_ARCH_X86_64 */ -} - #endif /* _IDPF_RXTX_H_ */ diff --git a/drivers/net/idpf/idpf_rxtx_vec_avx512.c b/drivers/net/idpf/idpf_rxtx_vec_avx512.c index 71a6c59823..ea949635e0 100644 --- a/drivers/net/idpf/idpf_rxtx_vec_avx512.c +++ b/drivers/net/idpf/idpf_rxtx_vec_avx512.c @@ -38,8 +38,8 @@ idpf_singleq_rearm_common(struct idpf_rx_queue *rxq) dma_addr0); } } - rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += - IDPF_RXQ_REARM_THRESH; + rte_atomic64_add(&rxq->rx_stats.mbuf_alloc_failed, + IDPF_RXQ_REARM_THRESH); return; } struct rte_mbuf *mb0, *mb1, *mb2, *mb3; @@ -168,8 +168,8 @@ idpf_singleq_rearm(struct idpf_rx_queue *rxq) dma_addr0); } } - rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += - IDPF_RXQ_REARM_THRESH; + rte_atomic64_add(&rxq->rx_stats.mbuf_alloc_failed, + IDPF_RXQ_REARM_THRESH); return; } }