[dpdk-dev,v3,5/7] net/mlx4: get back Tx checksum offloads
Checks
Commit Message
From: Moti Haimovsky <motih@mellanox.com>
This patch adds hardware offloading support for IPV4, UDP and TCP
checksum calculation.
This commit also includes support for offloading IPV4, UDP and TCP
tunnel checksum calculation to the hardware.
Signed-off-by: Moti Haimovsky <motih@mellanox.com>
---
drivers/net/mlx4/mlx4.c | 9 +++++++++
drivers/net/mlx4/mlx4.h | 2 ++
drivers/net/mlx4/mlx4_ethdev.c | 6 ++++++
drivers/net/mlx4/mlx4_prm.h | 2 ++
drivers/net/mlx4/mlx4_rxtx.c | 25 +++++++++++++++++++++----
drivers/net/mlx4/mlx4_rxtx.h | 2 ++
drivers/net/mlx4/mlx4_txq.c | 2 ++
7 files changed, 44 insertions(+), 4 deletions(-)
@@ -397,6 +397,7 @@ struct mlx4_conf {
.ports.present = 0,
};
unsigned int vf;
+ unsigned int tunnel_en;
int i;
(void)pci_drv;
@@ -456,6 +457,9 @@ struct mlx4_conf {
rte_errno = ENODEV;
goto error;
}
+ /* Only cx3-pro supports L3 tunneling */
+ tunnel_en = (device_attr.vendor_part_id ==
+ PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO);
INFO("%u port(s) detected", device_attr.phys_port_cnt);
conf.ports.present |= (UINT64_C(1) << device_attr.phys_port_cnt) - 1;
if (mlx4_args(pci_dev->device.devargs, &conf)) {
@@ -529,6 +533,11 @@ struct mlx4_conf {
priv->pd = pd;
priv->mtu = ETHER_MTU;
priv->vf = vf;
+ priv->hw_csum =
+ !!(device_attr.device_cap_flags & IBV_DEVICE_RAW_IP_CSUM);
+ priv->hw_csum_l2tun = tunnel_en;
+ DEBUG("L2 tunnel checksum offloads are %ssupported",
+ (priv->hw_csum_l2tun ? "" : "not "));
/* Configure the first MAC address by default. */
if (mlx4_get_mac(priv, &mac.addr_bytes)) {
ERROR("cannot get MAC address, is mlx4_en loaded?"
@@ -93,6 +93,8 @@ struct priv {
unsigned int vf:1; /* This is a VF device. */
unsigned int intr_alarm:1; /* An interrupt alarm is scheduled. */
unsigned int isolated:1; /* Toggle isolated mode. */
+ unsigned int hw_csum:1; /* Checksum offload is supported. */
+ unsigned int hw_csum_l2tun:1; /* Checksum support for L2 tunnels. */
struct rte_intr_handle intr_handle; /* Port interrupt handle. */
struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
LIST_HEAD(mlx4_flows, rte_flow) flows;
@@ -553,6 +553,12 @@
info->max_mac_addrs = 1;
info->rx_offload_capa = 0;
info->tx_offload_capa = 0;
+ if (priv->hw_csum)
+ info->tx_offload_capa |= (DEV_TX_OFFLOAD_IPV4_CKSUM |
+ DEV_TX_OFFLOAD_UDP_CKSUM |
+ DEV_TX_OFFLOAD_TCP_CKSUM);
+ if (priv->hw_csum_l2tun)
+ info->tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
if (mlx4_get_ifname(priv, &ifname) == 0)
info->if_index = if_nametoindex(ifname);
info->speed_capa =
@@ -64,6 +64,8 @@
/* Work queue element (WQE) flags. */
#define MLX4_BIT_WQE_OWN 0x80000000
+#define MLX4_WQE_CTRL_IIP_HDR_CSUM (1 << 28)
+#define MLX4_WQE_CTRL_IL4_HDR_CSUM (1 << 27)
#define MLX4_SIZE_TO_TXBBS(size) \
(RTE_ALIGN((size), (MLX4_TXBB_SIZE)) >> (MLX4_TXBB_SHIFT))
@@ -434,12 +434,29 @@ struct pv {
txq->elts_comp_cd -= nr_txbbs;
if (unlikely(txq->elts_comp_cd <= 0)) {
txq->elts_comp_cd = txq->elts_comp_cd_init;
- srcrb_flags = RTE_BE32(MLX4_WQE_CTRL_SOLICIT |
- MLX4_WQE_CTRL_CQ_UPDATE);
+ srcrb_flags = MLX4_WQE_CTRL_SOLICIT | MLX4_WQE_CTRL_CQ_UPDATE;
} else {
- srcrb_flags = RTE_BE32(MLX4_WQE_CTRL_SOLICIT);
+ srcrb_flags = MLX4_WQE_CTRL_SOLICIT;
}
- ctrl->srcrb_flags = srcrb_flags;
+ /* Enable HW checksum offload if requested */
+ if (txq->csum &&
+ (pkt->ol_flags &
+ (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))) {
+ const uint64_t is_tunneled = pkt->ol_flags &
+ (PKT_TX_TUNNEL_GRE |
+ PKT_TX_TUNNEL_VXLAN);
+
+ if (is_tunneled && txq->csum_l2tun) {
+ owner_opcode |= MLX4_WQE_CTRL_IIP_HDR_CSUM |
+ MLX4_WQE_CTRL_IL4_HDR_CSUM;
+ if (pkt->ol_flags & PKT_TX_OUTER_IP_CKSUM)
+ srcrb_flags |= MLX4_WQE_CTRL_IP_HDR_CSUM;
+ } else {
+ srcrb_flags |= MLX4_WQE_CTRL_IP_HDR_CSUM |
+ MLX4_WQE_CTRL_TCP_UDP_CSUM;
+ }
+ }
+ ctrl->srcrb_flags = rte_cpu_to_be_32(srcrb_flags);
/*
* Make sure descriptor is fully written before
* setting ownership bit (because HW can start
@@ -110,6 +110,8 @@ struct txq {
struct txq_elt (*elts)[]; /**< Tx elements. */
struct mlx4_txq_stats stats; /**< Tx queue counters. */
uint32_t max_inline; /**< Max inline send size. */
+ uint32_t csum:1; /**< Checksum is supported and enabled */
+ uint32_t csum_l2tun:1; /**< L2 tun Checksum is supported and enabled */
char *bounce_buf;
/**< memory used for storing the first DWORD of data TXBBs. */
struct {
@@ -340,6 +340,8 @@ struct txq_mp2mr_mbuf_check_data {
(void *)dev, strerror(rte_errno));
goto error;
}
+ tmpl.csum = priv->hw_csum;
+ tmpl.csum_l2tun = priv->hw_csum_l2tun;
DEBUG("priv->device_attr.max_qp_wr is %d",
priv->device_attr.max_qp_wr);
DEBUG("priv->device_attr.max_sge is %d",