From patchwork Tue Jan 9 14:11:05 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xueming Li X-Patchwork-Id: 33246 Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 4B0C11B217; Tue, 9 Jan 2018 16:00:26 +0100 (CET) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by dpdk.org (Postfix) with ESMTP id 3E3D71B1D5 for ; Tue, 9 Jan 2018 16:00:19 +0100 (CET) Received: from Internal Mail-Server by MTLPINE1 (envelope-from xuemingl@mellanox.com) with ESMTPS (AES256-SHA encrypted); 9 Jan 2018 17:00:15 +0200 Received: from dev-r630-06.mtbc.labs.mlnx (dev-r630-06.mtbc.labs.mlnx [10.12.205.180]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id w09F07Pp017506; Tue, 9 Jan 2018 17:00:14 +0200 Received: from dev-r630-06.mtbc.labs.mlnx (localhost [127.0.0.1]) by dev-r630-06.mtbc.labs.mlnx (8.14.7/8.14.7) with ESMTP id w09EBfN6146306; Tue, 9 Jan 2018 22:11:41 +0800 Received: (from xuemingl@localhost) by dev-r630-06.mtbc.labs.mlnx (8.14.7/8.14.7/Submit) id w09EBfax146305; Tue, 9 Jan 2018 22:11:41 +0800 From: Xueming Li To: Olivier MATZ , Thomas Monjalon , Jingjing Wu , Yongseok Koh Cc: Xueming Li , Shahaf Shuler , dev@dpdk.org Date: Tue, 9 Jan 2018 22:11:05 +0800 Message-Id: <20180109141110.146250-2-xuemingl@mellanox.com> X-Mailer: git-send-email 2.13.3 In-Reply-To: <20180109141110.146250-1-xuemingl@mellanox.com> References: <20180109141110.146250-1-xuemingl@mellanox.com> Subject: [dpdk-dev] [PATCH 1/6] net/mlx5: support tx swp tunnel offloading X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This commit adds support for generic tunnel TSO and checksum offloads. The PMD will compute the inner/outer headers offset according to the mbuf fields. The Hardware will do calculation according to offsets and types. Such capability is supported only for PFs. Signed-off-by: Xueming Li --- drivers/net/mlx5/mlx5_prm.h | 12 ++++ drivers/net/mlx5/mlx5_rxtx.c | 163 ++++++++++++++++++++++++++++--------------- drivers/net/mlx5/mlx5_rxtx.h | 94 ++++++++++++++++++++----- drivers/net/mlx5/mlx5_txq.c | 1 + 4 files changed, 195 insertions(+), 75 deletions(-) diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h index 2de310bcb..edf39c249 100644 --- a/drivers/net/mlx5/mlx5_prm.h +++ b/drivers/net/mlx5/mlx5_prm.h @@ -135,6 +135,18 @@ /* Inner L4 checksum offload (Tunneled packets only). */ #define MLX5_ETH_WQE_L4_INNER_CSUM (1u << 5) +/* Outer L4 type is UDP. */ +#define MLX5_ETH_OUTER_L4_UDP (1u << 5) + +/* Outer L3 type is IPV6. */ +#define MLX5_ETH_OUTER_L3_IPV6 (1u << 4) + +/* Inner L4 type is UDP. */ +#define MLX5_ETH_INNER_L4_UDP (1u << 1) + +/* Inner L3 type is IPV6. */ +#define MLX5_ETH_INNER_L3_IPV6 (1u << 0) + /* Is flow mark valid. */ #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN #define MLX5_FLOW_MARK_IS_VALID(val) ((val) & 0xffffff00) diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index 3b8f71c28..d79f9fc0e 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -247,6 +247,80 @@ mlx5_copy_to_wq(void *dst, const void *src, size_t n, } /** + * Inline TSO headers into WQE and set checksums fields. + * + * @param txq + * Pointer to TX queue structure. + * @param buf + * Pointer to packet mbuf structure. + * @param raw + * Double pointer to WQE current write offset. + * @param cs_flags + * Pointer to checksums flags. + * @swp_offsets + * Pointer to header offsets when using software parser. + * @swp_types + * Pointer to header types when using software parser. + * @param max_wqe + * Pointer to the available number of wqes. + * + * @return + * Headers size which were copied into wqe upon success, + * negative errno value otherwise, the following erros + * are defined: + * + * -EINVAL: invalid arugments for TSO. packet headers are too large + * or not enough WQEs. cannot execute the TSO. + * + * -ENOMEM: reached the end of WQ ring. the TSO WQE can be executed + * only after the WQ ring wraparound. + */ +static int +process_tso(struct mlx5_txq_data *txq, struct rte_mbuf *buf, uint8_t **raw, + uint16_t *max_wqe) +{ + uintptr_t addr = rte_pktmbuf_mtod(buf, uintptr_t); + volatile struct mlx5_wqe *wqe = (volatile struct mlx5_wqe *) + tx_mlx5_wqe(txq, txq->wqe_ci); + uint8_t *curr = *raw; + const uint8_t tunneled = txq->tunnel_en && + (buf->ol_flags & PKT_TX_TUNNEL_MASK); + uint16_t pkt_inline_sz = (uintptr_t)curr - (uintptr_t)wqe - + (MLX5_WQE_DWORD_SIZE * 2 - 2); + uint8_t vlan_sz = (buf->ol_flags & PKT_TX_VLAN_PKT) ? 4 : 0; + uintptr_t end = (uintptr_t)(((uintptr_t)txq->wqes) + + (1 << txq->wqe_n) * MLX5_WQE_SIZE); + unsigned int copy_b; + uint16_t tso_header_sz; + + if (vlan_sz) + addr += 2 * ETHER_ADDR_LEN + 2; + else + addr += pkt_inline_sz; + tso_header_sz = buf->l2_len + vlan_sz + buf->l3_len + buf->l4_len; + if (tunneled) + tso_header_sz += buf->outer_l2_len + buf->outer_l3_len; + if (unlikely(tso_header_sz > MLX5_MAX_TSO_HEADER)) { + txq->stats.oerrors++; + return -EINVAL; + } + copy_b = tso_header_sz - pkt_inline_sz; + if (copy_b && ((end - (uintptr_t)curr) > copy_b)) { + uint16_t n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4; + + if (unlikely(*max_wqe < n)) + return -EINVAL; + *max_wqe -= n; + rte_memcpy((void *)curr, (void *)addr, copy_b); + /* Another DWORD will be added in the inline part. */ + *raw = curr + MLX5_WQE_DS(copy_b) * MLX5_WQE_DWORD_SIZE; + } else { + return -ENOMEM; + } + return copy_b; +} + +/** * DPDK callback to check the status of a tx descriptor. * * @param tx_queue @@ -376,6 +450,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) uint16_t ehdr; uint8_t cs_flags; uint64_t tso = 0; + uint32_t swp_offsets = 0; + uint8_t swp_types = 0; uint16_t tso_segsz = 0; #ifdef MLX5_PMD_SOFT_COUNTERS uint32_t total_length = 0; @@ -417,7 +493,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (pkts_n - i > 1) rte_prefetch0( rte_pktmbuf_mtod(*(pkts + 1), volatile void *)); - cs_flags = txq_ol_cksum_to_cs(txq, buf); + cs_flags = txq_ol_flags_to_verbs(txq, buf, + (uint8_t *)&swp_offsets, + &swp_types); raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE; /* Replace the Ethernet type by the VLAN if necessary. */ if (buf->ol_flags & PKT_TX_VLAN_PKT) { @@ -445,69 +523,37 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) raw += MLX5_WQE_DWORD_SIZE; tso = txq->tso_en && (buf->ol_flags & PKT_TX_TCP_SEG); if (tso) { - uintptr_t end = - (uintptr_t)(((uintptr_t)txq->wqes) + - (1 << txq->wqe_n) * MLX5_WQE_SIZE); - unsigned int copy_b; - uint8_t vlan_sz = - (buf->ol_flags & PKT_TX_VLAN_PKT) ? 4 : 0; - const uint64_t is_tunneled = - buf->ol_flags & (PKT_TX_TUNNEL_GRE | - PKT_TX_TUNNEL_VXLAN); - - tso_header_sz = buf->l2_len + vlan_sz + - buf->l3_len + buf->l4_len; - tso_segsz = buf->tso_segsz; - if (unlikely(tso_segsz == 0)) { - txq->stats.oerrors++; - break; - } - if (is_tunneled && txq->tunnel_en) { - tso_header_sz += buf->outer_l2_len + - buf->outer_l3_len; - cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; - } else { - cs_flags |= MLX5_ETH_WQE_L4_CSUM; - } - if (unlikely(tso_header_sz > MLX5_MAX_TSO_HEADER)) { - txq->stats.oerrors++; - break; - } - copy_b = tso_header_sz - pkt_inline_sz; - /* First seg must contain all headers. */ - assert(copy_b <= length); - if (copy_b && ((end - (uintptr_t)raw) > copy_b)) { - uint16_t n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4; + int ret; - if (unlikely(max_wqe < n)) - break; - max_wqe -= n; - rte_memcpy((void *)raw, (void *)addr, copy_b); - addr += copy_b; - length -= copy_b; - /* Include padding for TSO header. */ - copy_b = MLX5_WQE_DS(copy_b) * - MLX5_WQE_DWORD_SIZE; - pkt_inline_sz += copy_b; - raw += copy_b; - } else { + ret = process_tso(txq, buf, &raw, &max_wqe); + if (ret == -EINVAL) { + break; + } else if (ret == -ENOMEM) { /* NOP WQE. */ wqe->ctrl = (rte_v128u32_t){ - rte_cpu_to_be_32(txq->wqe_ci << 8), - rte_cpu_to_be_32(txq->qp_num_8s | 1), - 0, - 0, + rte_cpu_to_be_32(txq->wqe_ci << 8), + rte_cpu_to_be_32(txq->qp_num_8s | 1), + 0, + 0, }; ds = 1; -#ifdef MLX5_PMD_SOFT_COUNTERS total_length = 0; -#endif k++; goto next_wqe; + } else { + tso_segsz = buf->tso_segsz; + if (unlikely(tso_segsz == 0)) { + txq->stats.oerrors++; + break; + } + addr += ret; + length -= ret; + pkt_inline_sz += ret; + tso_header_sz = pkt_inline_sz; } } /* Inline if enough room. */ - if (max_inline || tso) { + if (max_inline || unlikely(tso)) { uint32_t inl = 0; uintptr_t end = (uintptr_t) (((uintptr_t)txq->wqes) + @@ -652,7 +698,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) ++i; j += sg; /* Initialize known and common part of the WQE structure. */ - if (tso) { + if (unlikely(tso)) { wqe->ctrl = (rte_v128u32_t){ rte_cpu_to_be_32((txq->wqe_ci << 8) | MLX5_OPCODE_TSO), @@ -661,8 +707,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 0, }; wqe->eseg = (rte_v128u32_t){ - 0, - cs_flags | (rte_cpu_to_be_16(tso_segsz) << 16), + swp_offsets, + cs_flags | (swp_types << 8) | + (rte_cpu_to_be_16(tso_segsz) << 16), 0, (ehdr << 16) | rte_cpu_to_be_16(tso_header_sz), }; @@ -675,8 +722,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 0, }; wqe->eseg = (rte_v128u32_t){ - 0, - cs_flags, + swp_offsets, + cs_flags | (swp_types << 8), 0, (ehdr << 16) | rte_cpu_to_be_16(pkt_inline_sz), }; diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index 4ade8bee1..852594708 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -196,6 +196,7 @@ struct mlx5_txq_data { uint16_t tso_en:1; /* When set hardware TSO is enabled. */ uint16_t tunnel_en:1; /* When set TX offload for tunneled packets are supported. */ + uint16_t swp_en:1; /* When set software parser is supported. */ uint16_t mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */ uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */ uint16_t inline_max_packet_sz; /* Max packet size for inlining. */ @@ -623,40 +624,99 @@ mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe) } /** - * Convert the Checksum offloads to Verbs. + * Convert mbuf tx offloads info to Verbs. * * @param txq_data * Pointer to the Tx queue. * @param buf * Pointer to the mbuf. + * @param offsets + * Pointer to the header offsets. + * @param swp_types + * Pointer to the swp types. * * @return * the converted cs_flags. */ static __rte_always_inline uint8_t -txq_ol_cksum_to_cs(struct mlx5_txq_data *txq_data, struct rte_mbuf *buf) +txq_ol_flags_to_verbs(struct mlx5_txq_data *txq_data, struct rte_mbuf *buf, + uint8_t *offsets, uint8_t *swp_types) { uint8_t cs_flags = 0; - - /* Should we enable HW CKSUM offload */ - if (buf->ol_flags & - (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) { - if (txq_data->tunnel_en && - (buf->ol_flags & - (PKT_TX_TUNNEL_GRE | PKT_TX_TUNNEL_VXLAN))) { - cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM | - MLX5_ETH_WQE_L4_INNER_CSUM; - if (buf->ol_flags & PKT_TX_OUTER_IP_CKSUM) - cs_flags |= MLX5_ETH_WQE_L3_CSUM; - } else { - cs_flags = MLX5_ETH_WQE_L3_CSUM | - MLX5_ETH_WQE_L4_CSUM; - } + uint8_t vlan_sz = (buf->ol_flags & PKT_TX_VLAN_PKT) ? 4 : 0; + const uint8_t tunnel = txq_data->tunnel_en && + (buf->ol_flags & PKT_TX_TUNNEL_MASK); + const uint8_t tso = txq_data->tso_en && + (buf->ol_flags & PKT_TX_TCP_SEG); + uint16_t off = buf->outer_l2_len + vlan_sz; + + if (likely(!tso && !(buf->ol_flags & + (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM | + PKT_TX_OUTER_IP_CKSUM)))) + return cs_flags; + if (likely(!tunnel)) { + if (buf->ol_flags & PKT_TX_IP_CKSUM) + cs_flags = MLX5_ETH_WQE_L3_CSUM; + if (tso || (buf->ol_flags & PKT_TX_L4_MASK)) + cs_flags |= MLX5_ETH_WQE_L4_CSUM; + return cs_flags; + } + /* Tunneled packets */ + if (buf->ol_flags & PKT_TX_OUTER_IP_CKSUM) + cs_flags = MLX5_ETH_WQE_L3_CSUM; + if (buf->ol_flags & PKT_TX_IP_CKSUM) + cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM; + if (tso || (buf->ol_flags & PKT_TX_L4_MASK)) + cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; + if (!txq_data->swp_en) /* HW offloading, only set csum flags*/ + return cs_flags; + /* SW Parer enabled */ + if (tso || (buf->ol_flags & PKT_TX_OUTER_IP_CKSUM)) { + offsets[1] = off >> 1; /* Outer L3 offset */ + if (buf->ol_flags & PKT_TX_OUTER_IPV6) + *swp_types |= MLX5_ETH_OUTER_L3_IPV6; + } + off += buf->outer_l3_len; + /* TODO is outer L4 required? */ + if (tso && (buf->ol_flags & PKT_TX_TUNNEL_VXLAN)) { + offsets[0] = off >> 1; /* Outer L4 offset */ + *swp_types |= MLX5_ETH_OUTER_L4_UDP; + } + off += buf->l2_len; + if (tso || (buf->ol_flags & PKT_TX_IP_CKSUM)) { + offsets[3] = off >> 1; /* Inner L3 offset */ + if (buf->ol_flags & PKT_TX_IPV6) + *swp_types |= MLX5_ETH_INNER_L3_IPV6; + } + if (tso || (buf->ol_flags & PKT_TX_L4_MASK)) { + off += buf->l3_len; + offsets[2] = off >> 1; /* Inner L4 offset */ + if ((buf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) + *swp_types |= MLX5_ETH_INNER_L4_UDP; } return cs_flags; } /** + * Convert the Checksum offloads to Verbs. + * + * @param txq_data + * Pointer to the Tx queue. + * @param buf + * Pointer to the mbuf. + * + * @return + * the converted cs_flags. + */ +static __rte_always_inline uint8_t +txq_ol_cksum_to_cs(struct mlx5_txq_data *txq_data, struct rte_mbuf *buf) +{ + uint32_t offsets; + uint8_t swp_types; + return txq_ol_flags_to_verbs(txq_data, buf, (uint8_t *)&offsets, &swp_types); +} + +/** * Count the number of contiguous single segment packets. * * @param pkts diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c index b81c85fed..bd7ba0834 100644 --- a/drivers/net/mlx5/mlx5_txq.c +++ b/drivers/net/mlx5/mlx5_txq.c @@ -729,6 +729,7 @@ txq_set_params(struct mlx5_txq_ctrl *txq_ctrl) txq_ctrl->txq.tso_en = 1; } txq_ctrl->txq.tunnel_en = config->tunnel_en; + txq_ctrl->txq.swp_en = 1; } /**