From patchwork Wed Aug 23 16:01:38 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Marchand X-Patchwork-Id: 130691 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 97913430E4; Wed, 23 Aug 2023 18:02:07 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 888D943248; Wed, 23 Aug 2023 18:01:58 +0200 (CEST) Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) by mails.dpdk.org (Postfix) with ESMTP id 9199F43247 for ; Wed, 23 Aug 2023 18:01:56 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1692806516; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=9k1Mrn6om+4RzCTxRlx/UX9GCfFutih4GU5H3Ld47uw=; b=Z+OIv1mefMptpg2J0ND1h9mKfGJUxzt/KAroN8GSx0NcM/vxMajtdMx4CLwjDMaIks6kPM Fs59uTkvKKSR65Aejp0e6Xoj1vB2E9h/N75ByimQzjEzcoRNNy+bz9SIjqAo+1fKF+9iQH lIpl2A49M7oWV1xUKKkGvw34VcGZiRg= Received: from mimecast-mx02.redhat.com (66.187.233.73 [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-424-PPRJoA-XOGWHtJvuotmD5A-1; Wed, 23 Aug 2023 12:01:52 -0400 X-MC-Unique: PPRJoA-XOGWHtJvuotmD5A-1 Received: from smtp.corp.redhat.com (int-mx04.intmail.prod.int.rdu2.redhat.com [10.11.54.4]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 6E2B63C02B92; Wed, 23 Aug 2023 16:01:50 +0000 (UTC) Received: from dmarchan.redhat.com (unknown [10.45.225.147]) by smtp.corp.redhat.com (Postfix) with ESMTP id CF5C92026D2B; Wed, 23 Aug 2023 16:01:49 +0000 (UTC) From: David Marchand To: dev@dpdk.org Cc: olivier.matz@6wind.com Subject: [PATCH v2 3/3] net/tap: rework checksum offloading Date: Wed, 23 Aug 2023 18:01:38 +0200 Message-ID: <20230823160138.291980-3-david.marchand@redhat.com> In-Reply-To: <20230823160138.291980-1-david.marchand@redhat.com> References: <20230822073244.3751885-1-david.marchand@redhat.com> <20230823160138.291980-1-david.marchand@redhat.com> MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.4 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Get rid of all the complicated code which copies data on the stack: - allocate a new segment from the same mempool than the original mbuf, - copy headers data in this segment, - chain the new segment in place of headers of the original mbuf, - use existing helpers for computing IP and TCP/UDP checksums, - simplify the iovecs array filling, With this rework, special care is needed for releasing mbufs in pmd_tx_burst(). Signed-off-by: David Marchand --- drivers/net/tap/rte_eth_tap.c | 205 ++++++++++++---------------------- 1 file changed, 73 insertions(+), 132 deletions(-) diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c index 30b45ddc67..57d1126ce3 100644 --- a/drivers/net/tap/rte_eth_tap.c +++ b/drivers/net/tap/rte_eth_tap.c @@ -521,79 +521,13 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) return num_rx; } -/* Finalize l4 checksum calculation */ -static void -tap_tx_l4_cksum(uint16_t *l4_cksum, uint16_t l4_phdr_cksum, - uint32_t l4_raw_cksum) -{ - if (l4_cksum) { - uint32_t cksum; - - cksum = __rte_raw_cksum_reduce(l4_raw_cksum); - cksum += l4_phdr_cksum; - - cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff); - cksum = (~cksum) & 0xffff; - if (cksum == 0) - cksum = 0xffff; - *l4_cksum = cksum; - } -} - -/* Accumulate L4 raw checksums */ -static void -tap_tx_l4_add_rcksum(char *l4_data, unsigned int l4_len, uint16_t *l4_cksum, - uint32_t *l4_raw_cksum) -{ - if (l4_cksum == NULL) - return; - - *l4_raw_cksum = __rte_raw_cksum(l4_data, l4_len, *l4_raw_cksum); -} - -/* L3 and L4 pseudo headers checksum offloads */ -static void -tap_tx_l3_cksum(char *packet, uint64_t ol_flags, unsigned int l2_len, - unsigned int l3_len, unsigned int l4_len, uint16_t **l4_cksum, - uint16_t *l4_phdr_cksum, uint32_t *l4_raw_cksum) -{ - void *l3_hdr = packet + l2_len; - - if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) { - struct rte_ipv4_hdr *iph = l3_hdr; - uint16_t cksum; - - iph->hdr_checksum = 0; - cksum = rte_raw_cksum(iph, l3_len); - iph->hdr_checksum = (cksum == 0xffff) ? cksum : ~cksum; - } - if (ol_flags & RTE_MBUF_F_TX_L4_MASK) { - void *l4_hdr; - - l4_hdr = packet + l2_len + l3_len; - if ((ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_UDP_CKSUM) - *l4_cksum = &((struct rte_udp_hdr *)l4_hdr)->dgram_cksum; - else if ((ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_TCP_CKSUM) - *l4_cksum = &((struct rte_tcp_hdr *)l4_hdr)->cksum; - else - return; - **l4_cksum = 0; - if (ol_flags & RTE_MBUF_F_TX_IPV4) - *l4_phdr_cksum = rte_ipv4_phdr_cksum(l3_hdr, 0); - else - *l4_phdr_cksum = rte_ipv6_phdr_cksum(l3_hdr, 0); - *l4_raw_cksum = __rte_raw_cksum(l4_hdr, l4_len, 0); - } -} - static inline int tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs, struct rte_mbuf **pmbufs, uint16_t *num_packets, unsigned long *num_tx_bytes) { - int i; - uint16_t l234_hlen; struct pmd_process_private *process_private; + int i; process_private = rte_eth_devices[txq->out_port].process_private; @@ -602,19 +536,12 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs, struct iovec iovecs[mbuf->nb_segs + 2]; struct tun_pi pi = { .flags = 0, .proto = 0x00 }; struct rte_mbuf *seg = mbuf; - char m_copy[mbuf->data_len]; + uint64_t l4_ol_flags; int proto; int n; int j; int k; /* current index in iovecs for copying segments */ - uint16_t seg_len; /* length of first segment */ - uint16_t nb_segs; - uint16_t *l4_cksum; /* l4 checksum (pseudo header + payload) */ - uint32_t l4_raw_cksum = 0; /* TCP/UDP payload raw checksum */ - uint16_t l4_phdr_cksum = 0; /* TCP/UDP pseudo header checksum */ - uint16_t is_cksum = 0; /* in case cksum should be offloaded */ - - l4_cksum = NULL; + if (txq->type == ETH_TUNTAP_TYPE_TUN) { /* * TUN and TAP are created with IFF_NO_PI disabled. @@ -640,73 +567,83 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs, iovecs[k].iov_len = sizeof(pi); k++; - nb_segs = mbuf->nb_segs; - if (txq->csum && - ((mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM || - (mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_UDP_CKSUM || - (mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_TCP_CKSUM))) { - unsigned int l4_len = 0; - - is_cksum = 1; - - if ((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == - RTE_MBUF_F_TX_UDP_CKSUM) - l4_len = sizeof(struct rte_udp_hdr); - else if ((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == - RTE_MBUF_F_TX_TCP_CKSUM) - l4_len = sizeof(struct rte_tcp_hdr); + l4_ol_flags = mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK; + if (txq->csum && (mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM || + l4_ol_flags == RTE_MBUF_F_TX_UDP_CKSUM || + l4_ol_flags == RTE_MBUF_F_TX_TCP_CKSUM)) { + unsigned hdrlens = mbuf->l2_len + mbuf->l3_len; + uint16_t *l4_cksum; + void *l3_hdr; + + if (l4_ol_flags == RTE_MBUF_F_TX_UDP_CKSUM) + hdrlens += sizeof(struct rte_udp_hdr); + else if (l4_ol_flags == RTE_MBUF_F_TX_TCP_CKSUM) + hdrlens += sizeof(struct rte_tcp_hdr); + else if (l4_ol_flags != RTE_MBUF_F_TX_L4_NO_CKSUM) + return -1; /* Support only packets with at least layer 4 * header included in the first segment */ - seg_len = rte_pktmbuf_data_len(mbuf); - l234_hlen = mbuf->l2_len + mbuf->l3_len + l4_len; - if (seg_len < l234_hlen) + if (rte_pktmbuf_data_len(mbuf) < hdrlens) return -1; - /* To change checksums, work on a * copy of l2, l3 - * headers + l4 pseudo header + /* To change checksums (considering that a mbuf can be + * indirect, for example), copy l2, l3 and l4 headers + * in a new segment and chain it to existing data */ - rte_memcpy(m_copy, rte_pktmbuf_mtod(mbuf, void *), - l234_hlen); - tap_tx_l3_cksum(m_copy, mbuf->ol_flags, - mbuf->l2_len, mbuf->l3_len, l4_len, - &l4_cksum, &l4_phdr_cksum, - &l4_raw_cksum); - iovecs[k].iov_base = m_copy; - iovecs[k].iov_len = l234_hlen; - k++; + seg = rte_pktmbuf_copy(mbuf, mbuf->pool, 0, hdrlens); + if (seg == NULL) + return -1; + rte_pktmbuf_adj(mbuf, hdrlens); + rte_pktmbuf_chain(seg, mbuf); + pmbufs[i] = mbuf = seg; + + l3_hdr = rte_pktmbuf_mtod_offset(mbuf, void *, mbuf->l2_len); + if (mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) { + struct rte_ipv4_hdr *iph = l3_hdr; - /* Update next iovecs[] beyond l2, l3, l4 headers */ - if (seg_len > l234_hlen) { - iovecs[k].iov_len = seg_len - l234_hlen; - iovecs[k].iov_base = - rte_pktmbuf_mtod(seg, char *) + - l234_hlen; - tap_tx_l4_add_rcksum(iovecs[k].iov_base, - iovecs[k].iov_len, l4_cksum, - &l4_raw_cksum); - k++; - nb_segs++; + iph->hdr_checksum = 0; + iph->hdr_checksum = rte_ipv4_cksum(iph); } - seg = seg->next; + + if (l4_ol_flags == RTE_MBUF_F_TX_L4_NO_CKSUM) + goto skip_l4_cksum; + + if (l4_ol_flags == RTE_MBUF_F_TX_UDP_CKSUM) { + struct rte_udp_hdr *udp_hdr; + + udp_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_udp_hdr *, + mbuf->l2_len + mbuf->l3_len); + l4_cksum = &udp_hdr->dgram_cksum; + } else { + struct rte_tcp_hdr *tcp_hdr; + + tcp_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_tcp_hdr *, + mbuf->l2_len + mbuf->l3_len); + l4_cksum = &tcp_hdr->cksum; + } + + *l4_cksum = 0; + if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV4) { + *l4_cksum = rte_ipv4_udptcp_cksum_mbuf(mbuf, l3_hdr, + mbuf->l2_len + mbuf->l3_len); + } else { + *l4_cksum = rte_ipv6_udptcp_cksum_mbuf(mbuf, l3_hdr, + mbuf->l2_len + mbuf->l3_len); + } +skip_l4_cksum: } - for (j = k; j <= nb_segs; j++) { - iovecs[j].iov_len = rte_pktmbuf_data_len(seg); - iovecs[j].iov_base = rte_pktmbuf_mtod(seg, void *); - if (is_cksum) - tap_tx_l4_add_rcksum(iovecs[j].iov_base, - iovecs[j].iov_len, l4_cksum, - &l4_raw_cksum); + for (j = 0; j < mbuf->nb_segs; j++) { + iovecs[k].iov_len = rte_pktmbuf_data_len(seg); + iovecs[k].iov_base = rte_pktmbuf_mtod(seg, void *); + k++; seg = seg->next; } - if (is_cksum) - tap_tx_l4_cksum(l4_cksum, l4_phdr_cksum, l4_raw_cksum); - /* copy the tx frame data */ - n = writev(process_private->txq_fds[txq->queue_id], iovecs, j); + n = writev(process_private->txq_fds[txq->queue_id], iovecs, k); if (n <= 0) return -1; @@ -801,11 +738,15 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) break; } num_tx++; - /* free original mbuf */ - rte_pktmbuf_free(mbuf_in); - /* free tso mbufs */ - if (num_tso_mbufs > 0) + if (num_tso_mbufs == 0) { + /* tap_write_mbufs may prepend a segment to mbuf_in */ + rte_pktmbuf_free(mbuf[0]); + } else { + /* free original mbuf */ + rte_pktmbuf_free(mbuf_in); + /* free tso mbufs */ rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs); + } } txq->stats.opackets += num_packets;