@@ -1139,6 +1139,97 @@ struct mlx5_dev_spawn_data {
}
/**
+ * Configures the minimal amount of data to inline into WQE
+ * while sending packets.
+ *
+ * - the txq_inline_min has the maximal priority, if this
+ * key is specified in devargs
+ * - if DevX is enabled the inline mode is queried from the
+ * device (HCA attributes and NIC vport context if needed).
+ * - otherwise L2 mode (18 bytes) is assumed for ConnectX-4/4LX
+ * and none (0 bytes) for other NICs
+ *
+ * @param spawn
+ * Verbs device parameters (name, port, switch_info) to spawn.
+ * @param config
+ * Device configuration parameters.
+ */
+static void
+mlx5_set_min_inline(struct mlx5_dev_spawn_data *spawn,
+ struct mlx5_dev_config *config)
+{
+ if (config->txq_inline_min != MLX5_ARG_UNSET) {
+ /* Application defines size of inlined data explicitly. */
+ goto exit;
+ }
+ if (config->hca_attr.eth_net_offloads) {
+ /* We have DevX enabled, inline mode queried successfully. */
+ switch (config->hca_attr.wqe_inline_mode) {
+ case MLX5_CAP_INLINE_MODE_L2:
+ /* outer L2 header must be inlined. */
+ config->txq_inline_min = MLX5_INLINE_HSIZE_L2;
+ goto exit;
+ case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
+ /* No inline data are required by NIC. */
+ config->txq_inline_min = MLX5_INLINE_HSIZE_NONE;
+ goto exit;
+ case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+ /* inline mode is defined by NIC vport context. */
+ if (!config->hca_attr.eth_virt)
+ break;
+ switch (config->hca_attr.vport_inline_mode) {
+ case MLX5_INLINE_MODE_NONE:
+ config->txq_inline_min =
+ MLX5_INLINE_HSIZE_NONE;
+ goto exit;
+ case MLX5_INLINE_MODE_L2:
+ config->txq_inline_min =
+ MLX5_INLINE_HSIZE_L2;
+ goto exit;
+ case MLX5_INLINE_MODE_IP:
+ config->txq_inline_min =
+ MLX5_INLINE_HSIZE_L3;
+ goto exit;
+ case MLX5_INLINE_MODE_TCP_UDP:
+ config->txq_inline_min =
+ MLX5_INLINE_HSIZE_L4;
+ goto exit;
+ case MLX5_INLINE_MODE_INNER_L2:
+ config->txq_inline_min =
+ MLX5_INLINE_HSIZE_INNER_L2;
+ goto exit;
+ case MLX5_INLINE_MODE_INNER_IP:
+ config->txq_inline_min =
+ MLX5_INLINE_HSIZE_INNER_L3;
+ goto exit;
+ case MLX5_INLINE_MODE_INNER_TCP_UDP:
+ config->txq_inline_min =
+ MLX5_INLINE_HSIZE_INNER_L4;
+ goto exit;
+ }
+ }
+ }
+ /*
+ * We get here if we are unable to deduce
+ * inline data size with DevX. Try PCI ID
+ * to determine old NICs.
+ */
+ switch (spawn->pci_dev->id.device_id) {
+ case PCI_DEVICE_ID_MELLANOX_CONNECTX4:
+ case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF:
+ case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX:
+ case PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF:
+ config->txq_inline_min = MLX5_INLINE_HSIZE_L2;
+ break;
+ default:
+ config->txq_inline_min = MLX5_INLINE_HSIZE_NONE;
+ break;
+ }
+exit:
+ DRV_LOG(DEBUG, "min tx inline configured: %d", config->txq_inline_min);
+}
+
+/**
* Spawn an Ethernet device from Verbs information.
*
* @param dpdk_dev
@@ -1631,6 +1722,8 @@ struct mlx5_dev_spawn_data {
#else
config.dv_esw_en = 0;
#endif
+ /* Detect minimal data bytes to inline. */
+ mlx5_set_min_inline(spawn, &config);
/* Store device configuration on private structure. */
priv->config = config;
if (config.dv_flow_en) {
@@ -170,6 +170,10 @@ struct mlx5_hca_attr {
uint32_t eswitch_manager:1;
uint32_t flow_counters_dump:1;
uint8_t flow_counter_bulk_alloc_bitmap;
+ uint32_t eth_net_offloads:1;
+ uint32_t eth_virt:1;
+ uint32_t wqe_inline_mode:2;
+ uint32_t vport_inline_mode:3;
};
/* Flow list . */
@@ -60,6 +60,24 @@
/* Maximum Packet headers size (L2+L3+L4) for TSO. */
#define MLX5_MAX_TSO_HEADER (128u + 34u)
+/* Inline data size required by NICs. */
+#define MLX5_INLINE_HSIZE_NONE 0
+#define MLX5_INLINE_HSIZE_L2 (sizeof(struct rte_ether_hdr) + \
+ sizeof(struct rte_vlan_hdr))
+#define MLX5_INLINE_HSIZE_L3 (MLX5_INLINE_HSIZE_L2 + \
+ sizeof(struct rte_ipv6_hdr))
+#define MLX5_INLINE_HSIZE_L4 (MLX5_INLINE_HSIZE_L3 + \
+ sizeof(struct rte_tcp_hdr))
+#define MLX5_INLINE_HSIZE_INNER_L2 (MLX5_INLINE_HSIZE_L3 + \
+ sizeof(struct rte_udp_hdr) + \
+ sizeof(struct rte_vxlan_hdr) + \
+ sizeof(struct rte_ether_hdr) + \
+ sizeof(struct rte_vlan_hdr))
+#define MLX5_INLINE_HSIZE_INNER_L3 (MLX5_INLINE_HSIZE_INNER_L2 + \
+ sizeof(struct rte_ipv6_hdr))
+#define MLX5_INLINE_HSIZE_INNER_L4 (MLX5_INLINE_HSIZE_INNER_L3 + \
+ sizeof(struct rte_tcp_hdr))
+
/* Threshold of buffer replenishment for vectorized Rx. */
#define MLX5_VPMD_RXQ_RPLNSH_THRESH(n) \
(RTE_MIN(MLX5_VPMD_RX_MAX_BURST, (unsigned int)(n) >> 2))
@@ -230,6 +230,59 @@ struct mlx5_devx_obj *
}
/**
+ * Query NIC vport context.
+ * Fills minimal inline attribute.
+ *
+ * @param[in] ctx
+ * ibv contexts returned from mlx5dv_open_device.
+ * @param[in] vport
+ * vport index
+ * @param[out] attr
+ * Attributes device values.
+ *
+ * @return
+ * 0 on success, a negative value otherwise.
+ */
+static int
+mlx5_devx_cmd_query_nic_vport_context(struct ibv_context *ctx,
+ unsigned int vport,
+ struct mlx5_hca_attr *attr)
+{
+ uint32_t in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0};
+ uint32_t out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {0};
+ void *vctx;
+ int status, syndrome, rc;
+
+ /* Query NIC vport context to determine inline mode. */
+ MLX5_SET(query_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+ MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
+ rc = mlx5_glue->devx_general_cmd(ctx,
+ in, sizeof(in),
+ out, sizeof(out));
+ if (rc)
+ goto error;
+ status = MLX5_GET(query_nic_vport_context_out, out, status);
+ syndrome = MLX5_GET(query_nic_vport_context_out, out, syndrome);
+ if (status) {
+ DRV_LOG(DEBUG, "Failed to query NIC vport context, "
+ "status %x, syndrome = %x",
+ status, syndrome);
+ return -1;
+ }
+ vctx = MLX5_ADDR_OF(query_nic_vport_context_out, out,
+ nic_vport_context);
+ attr->vport_inline_mode = MLX5_GET(nic_vport_context, vctx,
+ min_wqe_inline_mode);
+ return 0;
+error:
+ rc = (rc > 0) ? -rc : rc;
+ return rc;
+}
+
+/**
* Query HCA attributes.
* Using those attributes we can check on run time if the device
* is having the required capabilities.
@@ -259,7 +312,7 @@ struct mlx5_devx_obj *
rc = mlx5_glue->devx_general_cmd(ctx,
in, sizeof(in), out, sizeof(out));
if (rc)
- return rc;
+ goto error;
status = MLX5_GET(query_hca_cap_out, out, status);
syndrome = MLX5_GET(query_hca_cap_out, out, syndrome);
if (status) {
@@ -274,5 +327,50 @@ struct mlx5_devx_obj *
attr->flow_counters_dump = MLX5_GET(cmd_hca_cap, hcattr,
flow_counters_dump);
attr->eswitch_manager = MLX5_GET(cmd_hca_cap, hcattr, eswitch_manager);
+ attr->eth_net_offloads = MLX5_GET(cmd_hca_cap, hcattr,
+ eth_net_offloads);
+ attr->eth_virt = MLX5_GET(cmd_hca_cap, hcattr, eth_virt);
+ if (!attr->eth_net_offloads)
+ return 0;
+
+ /* Query HCA offloads for Ethernet protocol. */
+ memset(in, 0, sizeof(in));
+ memset(out, 0, sizeof(out));
+ MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+ MLX5_SET(query_hca_cap_in, in, op_mod,
+ MLX5_GET_HCA_CAP_OP_MOD_ETHERNET_OFFLOAD_CAPS |
+ MLX5_HCA_CAP_OPMOD_GET_CUR);
+
+ rc = mlx5_glue->devx_general_cmd(ctx,
+ in, sizeof(in),
+ out, sizeof(out));
+ if (rc) {
+ attr->eth_net_offloads = 0;
+ goto error;
+ }
+ status = MLX5_GET(query_hca_cap_out, out, status);
+ syndrome = MLX5_GET(query_hca_cap_out, out, syndrome);
+ if (status) {
+ DRV_LOG(DEBUG, "Failed to query devx HCA capabilities, "
+ "status %x, syndrome = %x",
+ status, syndrome);
+ attr->eth_net_offloads = 0;
+ return -1;
+ }
+ hcattr = MLX5_ADDR_OF(query_hca_cap_out, out, capability);
+ attr->wqe_inline_mode = MLX5_GET(per_protocol_networking_offload_caps,
+ hcattr, wqe_inline_mode);
+ if (attr->wqe_inline_mode != MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
+ return 0;
+ if (attr->eth_virt) {
+ rc = mlx5_devx_cmd_query_nic_vport_context(ctx, 0, attr);
+ if (rc) {
+ attr->eth_virt = 0;
+ goto error;
+ }
+ }
return 0;
+error:
+ rc = (rc > 0) ? -rc : rc;
+ return rc;
}
@@ -625,6 +625,7 @@ enum {
enum {
MLX5_CMD_OP_QUERY_HCA_CAP = 0x100,
MLX5_CMD_OP_CREATE_MKEY = 0x200,
+ MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT = 0x754,
MLX5_CMD_OP_ALLOC_FLOW_COUNTER = 0x939,
MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b,
};
@@ -784,7 +785,8 @@ struct mlx5_ifc_create_mkey_in_bits {
enum {
MLX5_GET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0 << 1,
- MLX5_GET_HCA_CAP_OP_MOD_QOS_CAP = 0xc << 1,
+ MLX5_GET_HCA_CAP_OP_MOD_ETHERNET_OFFLOAD_CAPS = 0x1 << 1,
+ MLX5_GET_HCA_CAP_OP_MOD_QOS_CAP = 0xc << 1,
};
enum {
@@ -792,6 +794,23 @@ enum {
MLX5_HCA_CAP_OPMOD_GET_CUR = 1,
};
+enum {
+ MLX5_CAP_INLINE_MODE_L2,
+ MLX5_CAP_INLINE_MODE_VPORT_CONTEXT,
+ MLX5_CAP_INLINE_MODE_NOT_REQUIRED,
+};
+
+enum {
+ MLX5_INLINE_MODE_NONE,
+ MLX5_INLINE_MODE_L2,
+ MLX5_INLINE_MODE_IP,
+ MLX5_INLINE_MODE_TCP_UDP,
+ MLX5_INLINE_MODE_RESERVED4,
+ MLX5_INLINE_MODE_INNER_L2,
+ MLX5_INLINE_MODE_INNER_IP,
+ MLX5_INLINE_MODE_INNER_TCP_UDP,
+};
+
struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_0[0x30];
u8 vhca_id[0x10];
@@ -1064,6 +1083,42 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_61f[0x1e1];
};
+struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
+ u8 csum_cap[0x1];
+ u8 vlan_cap[0x1];
+ u8 lro_cap[0x1];
+ u8 lro_psh_flag[0x1];
+ u8 lro_time_stamp[0x1];
+ u8 reserved_at_5[0x2];
+ u8 wqe_vlan_insert[0x1];
+ u8 self_lb_en_modifiable[0x1];
+ u8 reserved_at_9[0x2];
+ u8 max_lso_cap[0x5];
+ u8 multi_pkt_send_wqe[0x2];
+ u8 wqe_inline_mode[0x2];
+ u8 rss_ind_tbl_cap[0x4];
+ u8 reg_umr_sq[0x1];
+ u8 scatter_fcs[0x1];
+ u8 enhanced_multi_pkt_send_wqe[0x1];
+ u8 tunnel_lso_const_out_ip_id[0x1];
+ u8 reserved_at_1c[0x2];
+ u8 tunnel_stateless_gre[0x1];
+ u8 tunnel_stateless_vxlan[0x1];
+ u8 swp[0x1];
+ u8 swp_csum[0x1];
+ u8 swp_lso[0x1];
+ u8 reserved_at_23[0xd];
+ u8 max_vxlan_udp_ports[0x8];
+ u8 reserved_at_38[0x6];
+ u8 max_geneve_opt_len[0x1];
+ u8 tunnel_stateless_geneve_rx[0x1];
+ u8 reserved_at_40[0x10];
+ u8 lro_min_mss_size[0x10];
+ u8 reserved_at_60[0x120];
+ u8 lro_timer_supported_periods[4][0x20];
+ u8 reserved_at_200[0x600];
+};
+
struct mlx5_ifc_qos_cap_bits {
u8 packet_pacing[0x1];
u8 esw_scheduling[0x1];
@@ -1091,6 +1146,8 @@ struct mlx5_ifc_qos_cap_bits {
union mlx5_ifc_hca_cap_union_bits {
struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap;
+ struct mlx5_ifc_per_protocol_networking_offload_caps_bits
+ per_protocol_networking_offload_caps;
struct mlx5_ifc_qos_cap_bits qos_cap;
u8 reserved_at_0[0x8000];
};
@@ -1111,6 +1168,69 @@ struct mlx5_ifc_query_hca_cap_in_bits {
u8 reserved_at_40[0x40];
};
+struct mlx5_ifc_mac_address_layout_bits {
+ u8 reserved_at_0[0x10];
+ u8 mac_addr_47_32[0x10];
+ u8 mac_addr_31_0[0x20];
+};
+
+struct mlx5_ifc_nic_vport_context_bits {
+ u8 reserved_at_0[0x5];
+ u8 min_wqe_inline_mode[0x3];
+ u8 reserved_at_8[0x15];
+ u8 disable_mc_local_lb[0x1];
+ u8 disable_uc_local_lb[0x1];
+ u8 roce_en[0x1];
+ u8 arm_change_event[0x1];
+ u8 reserved_at_21[0x1a];
+ u8 event_on_mtu[0x1];
+ u8 event_on_promisc_change[0x1];
+ u8 event_on_vlan_change[0x1];
+ u8 event_on_mc_address_change[0x1];
+ u8 event_on_uc_address_change[0x1];
+ u8 reserved_at_40[0xc];
+ u8 affiliation_criteria[0x4];
+ u8 affiliated_vhca_id[0x10];
+ u8 reserved_at_60[0xd0];
+ u8 mtu[0x10];
+ u8 system_image_guid[0x40];
+ u8 port_guid[0x40];
+ u8 node_guid[0x40];
+ u8 reserved_at_200[0x140];
+ u8 qkey_violation_counter[0x10];
+ u8 reserved_at_350[0x430];
+ u8 promisc_uc[0x1];
+ u8 promisc_mc[0x1];
+ u8 promisc_all[0x1];
+ u8 reserved_at_783[0x2];
+ u8 allowed_list_type[0x3];
+ u8 reserved_at_788[0xc];
+ u8 allowed_list_size[0xc];
+ struct mlx5_ifc_mac_address_layout_bits permanent_address;
+ u8 reserved_at_7e0[0x20];
+};
+
+struct mlx5_ifc_query_nic_vport_context_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+ u8 syndrome[0x20];
+ u8 reserved_at_40[0x40];
+ struct mlx5_ifc_nic_vport_context_bits nic_vport_context;
+};
+
+struct mlx5_ifc_query_nic_vport_context_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+ u8 other_vport[0x1];
+ u8 reserved_at_41[0xf];
+ u8 vport_number[0x10];
+ u8 reserved_at_60[0x5];
+ u8 allowed_list_type[0x3];
+ u8 reserved_at_68[0x18];
+};
+
/* CQE format mask. */
#define MLX5E_CQE_FORMAT_MASK 0xc