From patchwork Tue Sep 27 09:56:34 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Zhichao Zeng X-Patchwork-Id: 116964 X-Patchwork-Delegate: qi.z.zhang@intel.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 8F9A0A00C2; Tue, 27 Sep 2022 11:55:13 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 338F741133; Tue, 27 Sep 2022 11:55:13 +0200 (CEST) Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by mails.dpdk.org (Postfix) with ESMTP id 2559F410D0 for ; Tue, 27 Sep 2022 11:55:11 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1664272512; x=1695808512; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=KNXMUwibCInczDXsh0/ck5oO+6FH/vJosh7YIXj7Ya0=; b=DpareBg69GLo54x+MAGPr6ycDMSlcXx5rIdME72xM9fIqmUPwoZfFRzL 62KdV5bEmC3y7V6JoOatHPv+CF2dBjgPY3AzUn/w9WRlvz5yWumDjYnNS 5WRiRRVzOpWWFyFVOxrdR6yyzrKG5pELkLSWIcM8+ypV4sC98RHgrn7VZ UriKPuUjOFtItFVQ622AKYKcPILIdfJ3n9XHZ9CVvPXnfQAvd0N+1ql/H no+j6tSCV2waAJDo/UPMKaAsiUqdn+CE9PvJOllzVk6R9c42w0r7ezKA6 ndeqzv/9L9xwXqt8Uq2Bw5h9RK3s5WPGkevwRAPhEqjnloybrZQ1lOqsU A==; X-IronPort-AV: E=McAfee;i="6500,9779,10482"; a="301252237" X-IronPort-AV: E=Sophos;i="5.93,349,1654585200"; d="scan'208";a="301252237" Received: from orsmga001.jf.intel.com ([10.7.209.18]) by fmsmga103.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 27 Sep 2022 02:55:06 -0700 X-IronPort-AV: E=McAfee;i="6500,9779,10482"; a="654666775" X-IronPort-AV: E=Sophos;i="5.93,349,1654585200"; d="scan'208";a="654666775" Received: from unknown (HELO localhost.localdomain) ([10.239.252.103]) by orsmga001-auth.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 27 Sep 2022 02:55:03 -0700 From: Zhichao Zeng To: dev@dpdk.org Cc: qiming.yang@intel.com, yidingx.zhou@intel.com, qi.z.zhang@intel.com, Zhichao Zeng , Jingjing Wu , Beilei Xing , Abhijit Sinha , Radu Nicolau , Declan Doherty Subject: [PATCH v3] net/iavf: fix TSO offload for tunnel case Date: Tue, 27 Sep 2022 17:56:34 +0800 Message-Id: <20220927095634.448796-1-zhichaox.zeng@intel.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20220926051725.261950-1-zhichaox.zeng@intel.com> References: <20220926051725.261950-1-zhichaox.zeng@intel.com> MIME-Version: 1.0 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org This patch is to fix the tunnel TSO not enabling issue, simplify the logic of calculating 'Tx Buffer Size' of data descriptor with IPSec, and fix handling that the mbuf size exceeds the TX descriptor hardware limit(1B-16KB) which causes malicious behavior to the NIC. Fixes: 1e728b01120c ("net/iavf: rework Tx path") Signed-off-by: Zhichao Zeng --- v3: move macros to iavf header file --- v2: rework patch --- drivers/net/iavf/iavf_rxtx.c | 92 +++++++++++++++++++++--------------- drivers/net/iavf/iavf_rxtx.h | 4 ++ 2 files changed, 58 insertions(+), 38 deletions(-) diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c index 3deabe1d7e..7e3bffb6f8 100644 --- a/drivers/net/iavf/iavf_rxtx.c +++ b/drivers/net/iavf/iavf_rxtx.c @@ -2417,7 +2417,7 @@ iavf_fill_ctx_desc_segmentation_field(volatile uint64_t *field, total_length = m->pkt_len - (m->l2_len + m->l3_len + m->l4_len); if (m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) - total_length -= m->outer_l3_len; + total_length -= m->outer_l3_len + m->outer_l2_len; } #ifdef RTE_LIBRTE_IAVF_DEBUG_TX @@ -2583,50 +2583,36 @@ iavf_build_data_desc_cmd_offset_fields(volatile uint64_t *qw1, ((uint64_t)l2tag1 << IAVF_TXD_DATA_QW1_L2TAG1_SHIFT)); } +/* Calculate the number of TX descriptors needed for each pkt */ +static inline uint16_t +iavf_calc_pkt_desc(struct rte_mbuf *tx_pkt) +{ + struct rte_mbuf *txd = tx_pkt; + uint16_t count = 0; + + while (txd != NULL) { + count += (txd->data_len + IAVF_MAX_DATA_PER_TXD - 1) / + IAVF_MAX_DATA_PER_TXD; + txd = txd->next; + } + + return count; +} + static inline void iavf_fill_data_desc(volatile struct iavf_tx_desc *desc, - struct rte_mbuf *m, uint64_t desc_template, - uint16_t tlen, uint16_t ipseclen) + uint64_t desc_template, uint16_t buffsz, + uint64_t buffer_addr) { - uint32_t hdrlen = m->l2_len; - uint32_t bufsz = 0; - /* fill data descriptor qw1 from template */ desc->cmd_type_offset_bsz = desc_template; - /* set data buffer address */ - desc->buffer_addr = rte_mbuf_data_iova(m); - - /* calculate data buffer size less set header lengths */ - if ((m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) && - (m->ol_flags & (RTE_MBUF_F_TX_TCP_SEG | - RTE_MBUF_F_TX_UDP_SEG))) { - hdrlen += m->outer_l3_len; - if (m->ol_flags & RTE_MBUF_F_TX_L4_MASK) - hdrlen += m->l3_len + m->l4_len; - else - hdrlen += m->l3_len; - if (m->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD) - hdrlen += ipseclen; - bufsz = hdrlen + tlen; - } else if ((m->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD) && - (m->ol_flags & (RTE_MBUF_F_TX_TCP_SEG | - RTE_MBUF_F_TX_UDP_SEG))) { - hdrlen += m->outer_l3_len + m->l3_len + ipseclen; - if (m->ol_flags & RTE_MBUF_F_TX_L4_MASK) - hdrlen += m->l4_len; - bufsz = hdrlen + tlen; - - } else { - bufsz = m->data_len; - } - /* set data buffer size */ desc->cmd_type_offset_bsz |= - (((uint64_t)bufsz << IAVF_TXD_DATA_QW1_TX_BUF_SZ_SHIFT) & + (((uint64_t)buffsz << IAVF_TXD_DATA_QW1_TX_BUF_SZ_SHIFT) & IAVF_TXD_DATA_QW1_TX_BUF_SZ_MASK); - desc->buffer_addr = rte_cpu_to_le_64(desc->buffer_addr); + desc->buffer_addr = rte_cpu_to_le_64(buffer_addr); desc->cmd_type_offset_bsz = rte_cpu_to_le_64(desc->cmd_type_offset_bsz); } @@ -2651,8 +2637,10 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) struct iavf_tx_entry *txe_ring = txq->sw_ring; struct iavf_tx_entry *txe, *txn; struct rte_mbuf *mb, *mb_seg; + uint64_t buf_dma_addr; uint16_t desc_idx, desc_idx_last; uint16_t idx; + uint16_t slen; /* Check if the descriptor ring needs to be cleaned. */ @@ -2691,8 +2679,14 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) * The number of descriptors that must be allocated for * a packet equals to the number of the segments of that * packet plus the context and ipsec descriptors if needed. + * Recalculate the needed tx descs when TSO enabled in case + * the mbuf data size exceeds max data size that hw allows + * per tx desc. */ - nb_desc_required = nb_desc_data + nb_desc_ctx + nb_desc_ipsec; + if (mb->ol_flags & RTE_MBUF_F_TX_TCP_SEG) + nb_desc_required = iavf_calc_pkt_desc(mb) + nb_desc_ctx + nb_desc_ipsec; + else + nb_desc_required = nb_desc_data + nb_desc_ctx + nb_desc_ipsec; desc_idx_last = (uint16_t)(desc_idx + nb_desc_required - 1); @@ -2788,8 +2782,30 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) rte_pktmbuf_free_seg(txe->mbuf); txe->mbuf = mb_seg; - iavf_fill_data_desc(ddesc, mb_seg, - ddesc_template, tlen, ipseclen); + slen = mb_seg->data_len; + if (mb_seg->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD) + slen += ipseclen; + buf_dma_addr = rte_mbuf_data_iova(mb_seg); + while ((mb_seg->ol_flags & (RTE_MBUF_F_TX_TCP_SEG | + RTE_MBUF_F_TX_UDP_SEG)) && + unlikely(slen > IAVF_MAX_DATA_PER_TXD)) { + iavf_fill_data_desc(ddesc, ddesc_template, + IAVF_MAX_DATA_PER_TXD, buf_dma_addr); + + IAVF_DUMP_TX_DESC(txq, ddesc, desc_idx); + + buf_dma_addr += IAVF_MAX_DATA_PER_TXD; + slen -= IAVF_MAX_DATA_PER_TXD; + + txe->last_id = desc_idx_last; + desc_idx = txe->next_id; + txe = txn; + ddesc = &txr[desc_idx]; + txn = &txe_ring[txe->next_id]; + } + + iavf_fill_data_desc(ddesc, ddesc_template, + slen, buf_dma_addr); IAVF_DUMP_TX_DESC(txq, ddesc, desc_idx); diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h index 1695e43cd5..81b1418db1 100644 --- a/drivers/net/iavf/iavf_rxtx.h +++ b/drivers/net/iavf/iavf_rxtx.h @@ -72,6 +72,10 @@ #define IAVF_TX_OFFLOAD_NOTSUP_MASK \ (RTE_MBUF_F_TX_OFFLOAD_MASK ^ IAVF_TX_OFFLOAD_MASK) +/* HW requires that TX buffer size ranges from 1B up to (16K-1)B. */ +#define IAVF_MAX_DATA_PER_TXD \ + (IAVF_TXD_QW1_TX_BUF_SZ_MASK >> IAVF_TXD_QW1_TX_BUF_SZ_SHIFT) + extern uint64_t iavf_timestamp_dynflag; extern int iavf_timestamp_dynfield_offset;