[v2,3/9] net/mlx5: add Direct Rules E-Switch support
diff mbox series

Message ID 1555586930-109097-4-git-send-email-orika@mellanox.com
State Superseded, archived
Delegated to: Shahaf Shuler
Headers show
Series
  • net/mlx5: add Direct Verbs E-Switch support
Related show

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail apply issues

Commit Message

Ori Kam April 18, 2019, 11:28 a.m. UTC
This commit checks the for DR E-Switch support.
The support is based on both  Device and Kernel.
This commit also enables the user to manualy disable this this feature.

Signed-off-by: Ori Kam <orika@mellanox.com>
---
v2:
* Address ML comments.
---
 drivers/net/mlx5/Makefile         |   5 +
 drivers/net/mlx5/meson.build      |   2 +
 drivers/net/mlx5/mlx5.c           |  53 +++++-
 drivers/net/mlx5/mlx5.h           |  12 ++
 drivers/net/mlx5/mlx5_devx_cmds.c |  44 +++++
 drivers/net/mlx5/mlx5_flow.c      |   2 +-
 drivers/net/mlx5/mlx5_prm.h       | 328 ++++++++++++++++++++++++++++++++++++++
 7 files changed, 440 insertions(+), 6 deletions(-)

Comments

Yongseok Koh April 18, 2019, 12:11 p.m. UTC | #1
On Thu, Apr 18, 2019 at 11:28:44AM +0000, Ori Kam wrote:
> This commit checks the for DR E-Switch support.
> The support is based on both  Device and Kernel.
> This commit also enables the user to manualy disable this this feature.
> 
> Signed-off-by: Ori Kam <orika@mellanox.com>
> ---

Only one minor nit below.

With the fix,
Acked-by: Yongseok Koh <yskoh@mellanox.com>

> v2:
> * Address ML comments.
> ---
>  drivers/net/mlx5/Makefile         |   5 +
>  drivers/net/mlx5/meson.build      |   2 +
>  drivers/net/mlx5/mlx5.c           |  53 +++++-
>  drivers/net/mlx5/mlx5.h           |  12 ++
>  drivers/net/mlx5/mlx5_devx_cmds.c |  44 +++++
>  drivers/net/mlx5/mlx5_flow.c      |   2 +-
>  drivers/net/mlx5/mlx5_prm.h       | 328 ++++++++++++++++++++++++++++++++++++++
>  7 files changed, 440 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
> index 93bc869..2b72a33 100644
> --- a/drivers/net/mlx5/Makefile
> +++ b/drivers/net/mlx5/Makefile
> @@ -161,6 +161,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
>  		enum MLX5DV_DR_NS_TYPE_TERMINATING \
>  		$(AUTOCONF_OUTPUT)
>  	$Q sh -- '$<' '$@' \
> +		HAVE_MLX5DV_DR_ESWITCH \
> +		infiniband/mlx5dv.h \
> +		enum MLX5DV_DR_NS_DOMAIN_FDB_BYPASS \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
>  		HAVE_IBV_DEVX_OBJ \
>  		infiniband/mlx5dv.h \
>  		func mlx5dv_devx_obj_create \
> diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
> index 0037e15..9dfd28d 100644
> --- a/drivers/net/mlx5/meson.build
> +++ b/drivers/net/mlx5/meson.build
> @@ -113,6 +113,8 @@ if build
>  		'MLX5DV_FLOW_ACTION_COUNTERS_DEVX' ],
>  		[ 'HAVE_MLX5DV_DR', 'infiniband/mlx5dv.h',
>  		'MLX5DV_DR_NS_TYPE_TERMINATING' ],
> +		[ 'HAVE_MLX5DV_DR_ESWITCH', 'infiniband/mlx5dv.h',
> +		'MLX5DV_DR_NS_DOMAIN_FDB_BYPASS' ],
>  		[ 'HAVE_SUPPORTED_40000baseKR4_Full', 'linux/ethtool.h',
>  		'SUPPORTED_40000baseKR4_Full' ],
>  		[ 'HAVE_SUPPORTED_40000baseCR4_Full', 'linux/ethtool.h',
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
> index 9ff50df..ff24e1d 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -101,6 +101,9 @@
>  /* Allow L3 VXLAN flow creation. */
>  #define MLX5_L3_VXLAN_EN "l3_vxlan_en"
>  
> +/* Activate DV E-Switch flow steering. */
> +#define MLX5_DV_ESW_EN "dv_esw_en"
> +
>  /* Activate DV flow steering. */
>  #define MLX5_DV_FLOW_EN "dv_flow_en"
>  
> @@ -344,6 +347,18 @@ struct mlx5_dev_spawn_data {
>  	}
>  	pthread_mutex_init(&sh->dv_mutex, NULL);
>  	sh->tx_ns = ns;
> +#ifdef HAVE_MLX5DV_DR_ESWITCH
> +	if (priv->config.dv_esw_en) {
> +		ns  = mlx5_glue->dr_create_ns(sh->ctx,
> +					      MLX5DV_DR_NS_DOMAIN_FDB_BYPASS);
> +		if (!ns) {
> +			DRV_LOG(ERR, "FDB mlx5dv_dr_create_ns failed");
> +			err = errno;
> +			goto error;
> +		}
> +		sh->fdb_ns = ns;
> +	}
> +#endif
>  	sh->dv_refcnt++;
>  	priv->dr_shared = 1;
>  	return 0;
> @@ -358,6 +373,10 @@ struct mlx5_dev_spawn_data {
>  		mlx5dv_dr_destroy_ns(sh->tx_ns);
>  		sh->tx_ns = NULL;
>  	}
> +	if (sh->fdb_ns) {
> +		mlx5_glue->dr_destroy_ns(sh->fdb_ns);
> +		sh->fdb_ns = NULL;
> +	}
>  	return err;
>  #else
>  	(void)priv;
> @@ -393,6 +412,12 @@ struct mlx5_dev_spawn_data {
>  		mlx5dv_dr_destroy_ns(sh->tx_ns);
>  		sh->tx_ns = NULL;
>  	}
> +#ifdef HAVE_MLX5DV_DR_ESWITCH
> +	if (sh->fdb_ns) {
> +		mlx5_glue->dr_destroy_ns(sh->fdb_ns);
> +		sh->fdb_ns = NULL;
> +	}
> +#endif
>  	pthread_mutex_destroy(&sh->dv_mutex);
>  #else
>  	(void)priv;
> @@ -861,6 +886,8 @@ struct mlx5_dev_spawn_data {
>  		config->l3_vxlan_en = !!tmp;
>  	} else if (strcmp(MLX5_VF_NL_EN, key) == 0) {
>  		config->vf_nl_en = !!tmp;
> +	} else if (strcmp(MLX5_DV_ESW_EN, key) == 0) {
> +		config->dv_esw_en = !!tmp;
>  	} else if (strcmp(MLX5_DV_FLOW_EN, key) == 0) {
>  		config->dv_flow_en = !!tmp;
>  	} else if (strcmp(MLX5_MR_EXT_MEMSEG_EN, key) == 0) {
> @@ -905,6 +932,7 @@ struct mlx5_dev_spawn_data {
>  		MLX5_RX_VEC_EN,
>  		MLX5_L3_VXLAN_EN,
>  		MLX5_VF_NL_EN,
> +		MLX5_DV_ESW_EN,
>  		MLX5_DV_FLOW_EN,
>  		MLX5_MR_EXT_MEMSEG_EN,
>  		MLX5_REPRESENTOR,
> @@ -1458,11 +1486,6 @@ struct mlx5_dev_spawn_data {
>  			priv->tcf_context = NULL;
>  		}
>  	}
> -	if (config.dv_flow_en) {
> -		err = mlx5_alloc_shared_dr(priv);
> -		if (err)
> -			goto error;
> -	}
>  	TAILQ_INIT(&priv->flows);
>  	TAILQ_INIT(&priv->ctrl_flows);
>  	/* Hint libmlx5 to use PMD allocator for data plane resources */
> @@ -1484,8 +1507,27 @@ struct mlx5_dev_spawn_data {
>  	 * Verbs context returned by ibv_open_device().
>  	 */
>  	mlx5_link_update(eth_dev, 0);
> +#ifdef HAVE_IBV_DEVX_OBJ
> +	err = mlx5_devx_cmd_query_hca_attr(sh->ctx, &config.hca_attr);
> +	if (err) {
> +		err = -err;
> +		goto error;
> +	}
> +#endif
> +#ifdef HAVE_MLX5DV_DR_ESWITCH
> +	if (!(config.hca_attr.eswitch_manager && config.dv_flow_en &&
> +	      (switch_info->representor || switch_info->master)))
> +		config.dv_esw_en = 0;
> +#else
> +	config.dv_esw_en = 0;
> +#endif
>  	/* Store device configuration on private structure. */
>  	priv->config = config;
> +	if (config.dv_flow_en) {
> +		err = mlx5_alloc_shared_dr(priv);
> +		if (err)
> +			goto error;
> +	}
>  	/* Supported Verbs flow priority number detection. */
>  	err = mlx5_flow_discover_priorities(eth_dev);
>  	if (err < 0) {
> @@ -1876,6 +1918,7 @@ struct mlx5_dev_spawn_data {
>  			.max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN,
>  			.min_rxqs_num = MLX5_MPRQ_MIN_RXQS,
>  		},
> +		.dv_esw_en = 1,
>  	};
>  	/* Device specific configuration. */
>  	switch (pci_dev->id.device_id) {
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
> index 14c7f3c..b9946f6 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -138,6 +138,11 @@ struct mlx5_devx_counter_set {
>  	int id; /* Flow counter ID */
>  };
>  
> +/* HCA attributes. */
> +struct mlx5_hca_attr {
> +	uint32_t eswitch_manager:1;
> +};
> +
>  /* Flow list . */
>  TAILQ_HEAD(mlx5_flows, rte_flow);
>  
> @@ -171,6 +176,7 @@ struct mlx5_dev_config {
>  	/* Whether memseg should be extended for MR creation. */
>  	unsigned int l3_vxlan_en:1; /* Enable L3 VXLAN flow creation. */
>  	unsigned int vf_nl_en:1; /* Enable Netlink requests in VF mode. */
> +	unsigned int dv_esw_en:1; /* Enable E-Switch DV flow. */
>  	unsigned int dv_flow_en:1; /* Enable DV flow. */
>  	unsigned int swp:1; /* Tx generic tunnel checksum and TSO offload. */
>  	unsigned int devx:1; /* Whether devx interface is available or not. */
> @@ -192,6 +198,7 @@ struct mlx5_dev_config {
>  	int txqs_inline; /* Queue number threshold for inlining. */
>  	int txqs_vec; /* Queue number threshold for vectorized Tx. */
>  	int inline_max_packet_sz; /* Max packet size for inlining. */
> +	struct mlx5_hca_attr hca_attr; /* HCA attributes. */
>  };
>  
>  /**
> @@ -241,6 +248,7 @@ struct mlx5_flow_tbl_resource {
>  };
>  
>  #define MLX5_MAX_TABLES 1024
> +#define MLX5_MAX_TABLES_FDB 32
>  #define MLX5_GROUP_FACTOR 1
>  
>  /*
> @@ -260,6 +268,8 @@ struct mlx5_ibv_shared {
>  	/* Shared DV/DR flow data section. */
>  	pthread_mutex_t dv_mutex; /* DV context mutex. */
>  	uint32_t dv_refcnt; /* DV/DR data reference counter. */
> +	void *fdb_ns; /* FDB Direct Rules name space handle. */
> +	struct mlx5_flow_tbl_resource fdb_tbl[MLX5_MAX_TABLES_FDB];

Please add a comment for fdb_tbl.

>  	void *rx_ns; /* RX Direct Rules name space handle. */
>  	struct mlx5_flow_tbl_resource rx_tbl[MLX5_MAX_TABLES];
>  	/* RX Direct Rules tables. */
> @@ -539,4 +549,6 @@ int mlx5_devx_cmd_flow_counter_alloc(struct ibv_context *ctx,
>  int mlx5_devx_cmd_flow_counter_query(struct mlx5_devx_counter_set *dcx,
>  				     int clear,
>  				     uint64_t *pkts, uint64_t *bytes);
> +int mlx5_devx_cmd_query_hca_attr(struct ibv_context *ctx,
> +				 struct mlx5_hca_attr *attr);
>  #endif /* RTE_PMD_MLX5_H_ */
> diff --git a/drivers/net/mlx5/mlx5_devx_cmds.c b/drivers/net/mlx5/mlx5_devx_cmds.c
> index a9dff58..e5776c4 100644
> --- a/drivers/net/mlx5/mlx5_devx_cmds.c
> +++ b/drivers/net/mlx5/mlx5_devx_cmds.c
> @@ -105,3 +105,47 @@ int mlx5_devx_cmd_flow_counter_free(struct mlx5dv_devx_obj *obj)
>  	*bytes = MLX5_GET64(traffic_counter, stats, octets);
>  	return 0;
>  }
> +
> +/**
> + * Query HCA attributes.
> + * Using those attributes we can check on run time if the device
> + * is having the required capabilities.
> + *
> + * @param[in] ctx
> + *   ibv contexts returned from mlx5dv_open_device.
> + * @param[out] attr
> + *   Attributes device values.
> + *
> + * @return
> + *   0 on success, a negative value otherwise.
> + */
> +int
> +mlx5_devx_cmd_query_hca_attr(struct ibv_context *ctx,
> +			     struct mlx5_hca_attr *attr)
> +{
> +	uint32_t in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {0};
> +	uint32_t out[MLX5_ST_SZ_DW(query_hca_cap_out)] = {0};
> +	void *hcattr;
> +	int status, syndrome, rc;
> +
> +	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
> +	MLX5_SET(query_hca_cap_in, in, op_mod,
> +		 MLX5_GET_HCA_CAP_OP_MOD_GENERAL_DEVICE |
> +		 MLX5_HCA_CAP_OPMOD_GET_CUR);
> +
> +	rc = mlx5_glue->devx_general_cmd(ctx,
> +					 in, sizeof(in), out, sizeof(out));
> +	if (rc)
> +		return rc;
> +	status = MLX5_GET(query_hca_cap_out, out, status);
> +	syndrome = MLX5_GET(query_hca_cap_out, out, syndrome);
> +	if (status) {
> +		DRV_LOG(DEBUG, "Failed to query devx HCA capabilities, "
> +			"status %x, syndrome = %x",
> +			status, syndrome);
> +		return -1;
> +	}
> +	hcattr = MLX5_ADDR_OF(query_hca_cap_out, out, capability);
> +	attr->eswitch_manager = MLX5_GET(cmd_hca_cap, hcattr, eswitch_manager);
> +	return 0;
> +}
> diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
> index a0683ee..b1effda 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -1784,7 +1784,7 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
>  	struct mlx5_priv *priv = dev->data->dev_private;
>  	enum mlx5_flow_drv_type type = MLX5_FLOW_TYPE_MAX;
>  
> -	if (attr->transfer)
> +	if (attr->transfer && !priv->config.dv_esw_en)
>  		type = MLX5_FLOW_TYPE_TCF;
>  	else
>  		type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
> diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
> index b15266f..8c42380 100644
> --- a/drivers/net/mlx5/mlx5_prm.h
> +++ b/drivers/net/mlx5/mlx5_prm.h
> @@ -529,6 +529,7 @@ enum {
>  };
>  
>  enum {
> +	MLX5_CMD_OP_QUERY_HCA_CAP = 0x100,
>  	MLX5_CMD_OP_ALLOC_FLOW_COUNTER = 0x939,
>  	MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b,
>  };
> @@ -591,6 +592,333 @@ struct mlx5_ifc_query_flow_counter_in_bits {
>  	u8         flow_counter_id[0x20];
>  };
>  
> +enum {
> +	MLX5_GET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0 << 1,
> +	MLX5_GET_HCA_CAP_OP_MOD_QOS_CAP        = 0xc << 1,
> +};
> +
> +enum {
> +	MLX5_HCA_CAP_OPMOD_GET_MAX   = 0,
> +	MLX5_HCA_CAP_OPMOD_GET_CUR   = 1,
> +};
> +
> +struct mlx5_ifc_cmd_hca_cap_bits {
> +	u8 reserved_at_0[0x30];
> +	u8 vhca_id[0x10];
> +	u8 reserved_at_40[0x40];
> +	u8 log_max_srq_sz[0x8];
> +	u8 log_max_qp_sz[0x8];
> +	u8 reserved_at_90[0xb];
> +	u8 log_max_qp[0x5];
> +	u8 reserved_at_a0[0xb];
> +	u8 log_max_srq[0x5];
> +	u8 reserved_at_b0[0x10];
> +	u8 reserved_at_c0[0x8];
> +	u8 log_max_cq_sz[0x8];
> +	u8 reserved_at_d0[0xb];
> +	u8 log_max_cq[0x5];
> +	u8 log_max_eq_sz[0x8];
> +	u8 reserved_at_e8[0x2];
> +	u8 log_max_mkey[0x6];
> +	u8 reserved_at_f0[0x8];
> +	u8 dump_fill_mkey[0x1];
> +	u8 reserved_at_f9[0x3];
> +	u8 log_max_eq[0x4];
> +	u8 max_indirection[0x8];
> +	u8 fixed_buffer_size[0x1];
> +	u8 log_max_mrw_sz[0x7];
> +	u8 force_teardown[0x1];
> +	u8 reserved_at_111[0x1];
> +	u8 log_max_bsf_list_size[0x6];
> +	u8 umr_extended_translation_offset[0x1];
> +	u8 null_mkey[0x1];
> +	u8 log_max_klm_list_size[0x6];
> +	u8 reserved_at_120[0xa];
> +	u8 log_max_ra_req_dc[0x6];
> +	u8 reserved_at_130[0xa];
> +	u8 log_max_ra_res_dc[0x6];
> +	u8 reserved_at_140[0xa];
> +	u8 log_max_ra_req_qp[0x6];
> +	u8 reserved_at_150[0xa];
> +	u8 log_max_ra_res_qp[0x6];
> +	u8 end_pad[0x1];
> +	u8 cc_query_allowed[0x1];
> +	u8 cc_modify_allowed[0x1];
> +	u8 start_pad[0x1];
> +	u8 cache_line_128byte[0x1];
> +	u8 reserved_at_165[0xa];
> +	u8 qcam_reg[0x1];
> +	u8 gid_table_size[0x10];
> +	u8 out_of_seq_cnt[0x1];
> +	u8 vport_counters[0x1];
> +	u8 retransmission_q_counters[0x1];
> +	u8 debug[0x1];
> +	u8 modify_rq_counter_set_id[0x1];
> +	u8 rq_delay_drop[0x1];
> +	u8 max_qp_cnt[0xa];
> +	u8 pkey_table_size[0x10];
> +	u8 vport_group_manager[0x1];
> +	u8 vhca_group_manager[0x1];
> +	u8 ib_virt[0x1];
> +	u8 eth_virt[0x1];
> +	u8 vnic_env_queue_counters[0x1];
> +	u8 ets[0x1];
> +	u8 nic_flow_table[0x1];
> +	u8 eswitch_manager[0x1];
> +	u8 device_memory[0x1];
> +	u8 mcam_reg[0x1];
> +	u8 pcam_reg[0x1];
> +	u8 local_ca_ack_delay[0x5];
> +	u8 port_module_event[0x1];
> +	u8 enhanced_error_q_counters[0x1];
> +	u8 ports_check[0x1];
> +	u8 reserved_at_1b3[0x1];
> +	u8 disable_link_up[0x1];
> +	u8 beacon_led[0x1];
> +	u8 port_type[0x2];
> +	u8 num_ports[0x8];
> +	u8 reserved_at_1c0[0x1];
> +	u8 pps[0x1];
> +	u8 pps_modify[0x1];
> +	u8 log_max_msg[0x5];
> +	u8 reserved_at_1c8[0x4];
> +	u8 max_tc[0x4];
> +	u8 temp_warn_event[0x1];
> +	u8 dcbx[0x1];
> +	u8 general_notification_event[0x1];
> +	u8 reserved_at_1d3[0x2];
> +	u8 fpga[0x1];
> +	u8 rol_s[0x1];
> +	u8 rol_g[0x1];
> +	u8 reserved_at_1d8[0x1];
> +	u8 wol_s[0x1];
> +	u8 wol_g[0x1];
> +	u8 wol_a[0x1];
> +	u8 wol_b[0x1];
> +	u8 wol_m[0x1];
> +	u8 wol_u[0x1];
> +	u8 wol_p[0x1];
> +	u8 stat_rate_support[0x10];
> +	u8 reserved_at_1f0[0xc];
> +	u8 cqe_version[0x4];
> +	u8 compact_address_vector[0x1];
> +	u8 striding_rq[0x1];
> +	u8 reserved_at_202[0x1];
> +	u8 ipoib_enhanced_offloads[0x1];
> +	u8 ipoib_basic_offloads[0x1];
> +	u8 reserved_at_205[0x1];
> +	u8 repeated_block_disabled[0x1];
> +	u8 umr_modify_entity_size_disabled[0x1];
> +	u8 umr_modify_atomic_disabled[0x1];
> +	u8 umr_indirect_mkey_disabled[0x1];
> +	u8 umr_fence[0x2];
> +	u8 reserved_at_20c[0x3];
> +	u8 drain_sigerr[0x1];
> +	u8 cmdif_checksum[0x2];
> +	u8 sigerr_cqe[0x1];
> +	u8 reserved_at_213[0x1];
> +	u8 wq_signature[0x1];
> +	u8 sctr_data_cqe[0x1];
> +	u8 reserved_at_216[0x1];
> +	u8 sho[0x1];
> +	u8 tph[0x1];
> +	u8 rf[0x1];
> +	u8 dct[0x1];
> +	u8 qos[0x1];
> +	u8 eth_net_offloads[0x1];
> +	u8 roce[0x1];
> +	u8 atomic[0x1];
> +	u8 reserved_at_21f[0x1];
> +	u8 cq_oi[0x1];
> +	u8 cq_resize[0x1];
> +	u8 cq_moderation[0x1];
> +	u8 reserved_at_223[0x3];
> +	u8 cq_eq_remap[0x1];
> +	u8 pg[0x1];
> +	u8 block_lb_mc[0x1];
> +	u8 reserved_at_229[0x1];
> +	u8 scqe_break_moderation[0x1];
> +	u8 cq_period_start_from_cqe[0x1];
> +	u8 cd[0x1];
> +	u8 reserved_at_22d[0x1];
> +	u8 apm[0x1];
> +	u8 vector_calc[0x1];
> +	u8 umr_ptr_rlky[0x1];
> +	u8 imaicl[0x1];
> +	u8 reserved_at_232[0x4];
> +	u8 qkv[0x1];
> +	u8 pkv[0x1];
> +	u8 set_deth_sqpn[0x1];
> +	u8 reserved_at_239[0x3];
> +	u8 xrc[0x1];
> +	u8 ud[0x1];
> +	u8 uc[0x1];
> +	u8 rc[0x1];
> +	u8 uar_4k[0x1];
> +	u8 reserved_at_241[0x9];
> +	u8 uar_sz[0x6];
> +	u8 reserved_at_250[0x8];
> +	u8 log_pg_sz[0x8];
> +	u8 bf[0x1];
> +	u8 driver_version[0x1];
> +	u8 pad_tx_eth_packet[0x1];
> +	u8 reserved_at_263[0x8];
> +	u8 log_bf_reg_size[0x5];
> +	u8 reserved_at_270[0xb];
> +	u8 lag_master[0x1];
> +	u8 num_lag_ports[0x4];
> +	u8 reserved_at_280[0x10];
> +	u8 max_wqe_sz_sq[0x10];
> +	u8 reserved_at_2a0[0x10];
> +	u8 max_wqe_sz_rq[0x10];
> +	u8 max_flow_counter_31_16[0x10];
> +	u8 max_wqe_sz_sq_dc[0x10];
> +	u8 reserved_at_2e0[0x7];
> +	u8 max_qp_mcg[0x19];
> +	u8 reserved_at_300[0x10];
> +	u8 flow_counter_bulk_alloc[0x08];
> +	u8 log_max_mcg[0x8];
> +	u8 reserved_at_320[0x3];
> +	u8 log_max_transport_domain[0x5];
> +	u8 reserved_at_328[0x3];
> +	u8 log_max_pd[0x5];
> +	u8 reserved_at_330[0xb];
> +	u8 log_max_xrcd[0x5];
> +	u8 nic_receive_steering_discard[0x1];
> +	u8 receive_discard_vport_down[0x1];
> +	u8 transmit_discard_vport_down[0x1];
> +	u8 reserved_at_343[0x5];
> +	u8 log_max_flow_counter_bulk[0x8];
> +	u8 max_flow_counter_15_0[0x10];
> +	u8 reserved_at_360[0x3];
> +	u8 log_max_rq[0x5];
> +	u8 reserved_at_368[0x3];
> +	u8 log_max_sq[0x5];
> +	u8 reserved_at_370[0x3];
> +	u8 log_max_tir[0x5];
> +	u8 reserved_at_378[0x3];
> +	u8 log_max_tis[0x5];
> +	u8 basic_cyclic_rcv_wqe[0x1];
> +	u8 reserved_at_381[0x2];
> +	u8 log_max_rmp[0x5];
> +	u8 reserved_at_388[0x3];
> +	u8 log_max_rqt[0x5];
> +	u8 reserved_at_390[0x3];
> +	u8 log_max_rqt_size[0x5];
> +	u8 reserved_at_398[0x3];
> +	u8 log_max_tis_per_sq[0x5];
> +	u8 ext_stride_num_range[0x1];
> +	u8 reserved_at_3a1[0x2];
> +	u8 log_max_stride_sz_rq[0x5];
> +	u8 reserved_at_3a8[0x3];
> +	u8 log_min_stride_sz_rq[0x5];
> +	u8 reserved_at_3b0[0x3];
> +	u8 log_max_stride_sz_sq[0x5];
> +	u8 reserved_at_3b8[0x3];
> +	u8 log_min_stride_sz_sq[0x5];
> +	u8 hairpin[0x1];
> +	u8 reserved_at_3c1[0x2];
> +	u8 log_max_hairpin_queues[0x5];
> +	u8 reserved_at_3c8[0x3];
> +	u8 log_max_hairpin_wq_data_sz[0x5];
> +	u8 reserved_at_3d0[0x3];
> +	u8 log_max_hairpin_num_packets[0x5];
> +	u8 reserved_at_3d8[0x3];
> +	u8 log_max_wq_sz[0x5];
> +	u8 nic_vport_change_event[0x1];
> +	u8 disable_local_lb_uc[0x1];
> +	u8 disable_local_lb_mc[0x1];
> +	u8 log_min_hairpin_wq_data_sz[0x5];
> +	u8 reserved_at_3e8[0x3];
> +	u8 log_max_vlan_list[0x5];
> +	u8 reserved_at_3f0[0x3];
> +	u8 log_max_current_mc_list[0x5];
> +	u8 reserved_at_3f8[0x3];
> +	u8 log_max_current_uc_list[0x5];
> +	u8 general_obj_types[0x40];
> +	u8 reserved_at_440[0x20];
> +	u8 reserved_at_460[0x10];
> +	u8 max_num_eqs[0x10];
> +	u8 reserved_at_480[0x3];
> +	u8 log_max_l2_table[0x5];
> +	u8 reserved_at_488[0x8];
> +	u8 log_uar_page_sz[0x10];
> +	u8 reserved_at_4a0[0x20];
> +	u8 device_frequency_mhz[0x20];
> +	u8 device_frequency_khz[0x20];
> +	u8 reserved_at_500[0x20];
> +	u8 num_of_uars_per_page[0x20];
> +	u8 flex_parser_protocols[0x20];
> +	u8 reserved_at_560[0x20];
> +	u8 reserved_at_580[0x3c];
> +	u8 mini_cqe_resp_stride_index[0x1];
> +	u8 cqe_128_always[0x1];
> +	u8 cqe_compression_128[0x1];
> +	u8 cqe_compression[0x1];
> +	u8 cqe_compression_timeout[0x10];
> +	u8 cqe_compression_max_num[0x10];
> +	u8 reserved_at_5e0[0x10];
> +	u8 tag_matching[0x1];
> +	u8 rndv_offload_rc[0x1];
> +	u8 rndv_offload_dc[0x1];
> +	u8 log_tag_matching_list_sz[0x5];
> +	u8 reserved_at_5f8[0x3];
> +	u8 log_max_xrq[0x5];
> +	u8 affiliate_nic_vport_criteria[0x8];
> +	u8 native_port_num[0x8];
> +	u8 num_vhca_ports[0x8];
> +	u8 reserved_at_618[0x6];
> +	u8 sw_owner_id[0x1];
> +	u8 reserved_at_61f[0x1e1];
> +};
> +
> +struct mlx5_ifc_qos_cap_bits {
> +	u8 packet_pacing[0x1];
> +	u8 esw_scheduling[0x1];
> +	u8 esw_bw_share[0x1];
> +	u8 esw_rate_limit[0x1];
> +	u8 reserved_at_4[0x1];
> +	u8 packet_pacing_burst_bound[0x1];
> +	u8 packet_pacing_typical_size[0x1];
> +	u8 flow_meter_srtcm[0x1];
> +	u8 reserved_at_8[0x8];
> +	u8 log_max_flow_meter[0x8];
> +	u8 flow_meter_reg_id[0x8];
> +	u8 reserved_at_25[0x20];
> +	u8 packet_pacing_max_rate[0x20];
> +	u8 packet_pacing_min_rate[0x20];
> +	u8 reserved_at_80[0x10];
> +	u8 packet_pacing_rate_table_size[0x10];
> +	u8 esw_element_type[0x10];
> +	u8 esw_tsar_type[0x10];
> +	u8 reserved_at_c0[0x10];
> +	u8 max_qos_para_vport[0x10];
> +	u8 max_tsar_bw_share[0x20];
> +	u8 reserved_at_100[0x6e8];
> +};
> +
> +union mlx5_ifc_hca_cap_union_bits {
> +	struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap;
> +	struct mlx5_ifc_qos_cap_bits qos_cap;
> +	u8 reserved_at_0[0x8000];
> +};
> +
> +struct mlx5_ifc_query_hca_cap_out_bits {
> +	u8 status[0x8];
> +	u8 reserved_at_8[0x18];
> +	u8 syndrome[0x20];
> +	u8 reserved_at_40[0x40];
> +	union mlx5_ifc_hca_cap_union_bits capability;
> +};
> +
> +struct mlx5_ifc_query_hca_cap_in_bits {
> +	u8 opcode[0x10];
> +	u8 reserved_at_10[0x10];
> +	u8 reserved_at_20[0x10];
> +	u8 op_mod[0x10];
> +	u8 reserved_at_40[0x40];
> +};
> +
>  /* CQE format mask. */
>  #define MLX5E_CQE_FORMAT_MASK 0xc
>  
> -- 
> 1.8.3.1
>

Patch
diff mbox series

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 93bc869..2b72a33 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -161,6 +161,11 @@  mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
 		enum MLX5DV_DR_NS_TYPE_TERMINATING \
 		$(AUTOCONF_OUTPUT)
 	$Q sh -- '$<' '$@' \
+		HAVE_MLX5DV_DR_ESWITCH \
+		infiniband/mlx5dv.h \
+		enum MLX5DV_DR_NS_DOMAIN_FDB_BYPASS \
+		$(AUTOCONF_OUTPUT)
+	$Q sh -- '$<' '$@' \
 		HAVE_IBV_DEVX_OBJ \
 		infiniband/mlx5dv.h \
 		func mlx5dv_devx_obj_create \
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
index 0037e15..9dfd28d 100644
--- a/drivers/net/mlx5/meson.build
+++ b/drivers/net/mlx5/meson.build
@@ -113,6 +113,8 @@  if build
 		'MLX5DV_FLOW_ACTION_COUNTERS_DEVX' ],
 		[ 'HAVE_MLX5DV_DR', 'infiniband/mlx5dv.h',
 		'MLX5DV_DR_NS_TYPE_TERMINATING' ],
+		[ 'HAVE_MLX5DV_DR_ESWITCH', 'infiniband/mlx5dv.h',
+		'MLX5DV_DR_NS_DOMAIN_FDB_BYPASS' ],
 		[ 'HAVE_SUPPORTED_40000baseKR4_Full', 'linux/ethtool.h',
 		'SUPPORTED_40000baseKR4_Full' ],
 		[ 'HAVE_SUPPORTED_40000baseCR4_Full', 'linux/ethtool.h',
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 9ff50df..ff24e1d 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -101,6 +101,9 @@ 
 /* Allow L3 VXLAN flow creation. */
 #define MLX5_L3_VXLAN_EN "l3_vxlan_en"
 
+/* Activate DV E-Switch flow steering. */
+#define MLX5_DV_ESW_EN "dv_esw_en"
+
 /* Activate DV flow steering. */
 #define MLX5_DV_FLOW_EN "dv_flow_en"
 
@@ -344,6 +347,18 @@  struct mlx5_dev_spawn_data {
 	}
 	pthread_mutex_init(&sh->dv_mutex, NULL);
 	sh->tx_ns = ns;
+#ifdef HAVE_MLX5DV_DR_ESWITCH
+	if (priv->config.dv_esw_en) {
+		ns  = mlx5_glue->dr_create_ns(sh->ctx,
+					      MLX5DV_DR_NS_DOMAIN_FDB_BYPASS);
+		if (!ns) {
+			DRV_LOG(ERR, "FDB mlx5dv_dr_create_ns failed");
+			err = errno;
+			goto error;
+		}
+		sh->fdb_ns = ns;
+	}
+#endif
 	sh->dv_refcnt++;
 	priv->dr_shared = 1;
 	return 0;
@@ -358,6 +373,10 @@  struct mlx5_dev_spawn_data {
 		mlx5dv_dr_destroy_ns(sh->tx_ns);
 		sh->tx_ns = NULL;
 	}
+	if (sh->fdb_ns) {
+		mlx5_glue->dr_destroy_ns(sh->fdb_ns);
+		sh->fdb_ns = NULL;
+	}
 	return err;
 #else
 	(void)priv;
@@ -393,6 +412,12 @@  struct mlx5_dev_spawn_data {
 		mlx5dv_dr_destroy_ns(sh->tx_ns);
 		sh->tx_ns = NULL;
 	}
+#ifdef HAVE_MLX5DV_DR_ESWITCH
+	if (sh->fdb_ns) {
+		mlx5_glue->dr_destroy_ns(sh->fdb_ns);
+		sh->fdb_ns = NULL;
+	}
+#endif
 	pthread_mutex_destroy(&sh->dv_mutex);
 #else
 	(void)priv;
@@ -861,6 +886,8 @@  struct mlx5_dev_spawn_data {
 		config->l3_vxlan_en = !!tmp;
 	} else if (strcmp(MLX5_VF_NL_EN, key) == 0) {
 		config->vf_nl_en = !!tmp;
+	} else if (strcmp(MLX5_DV_ESW_EN, key) == 0) {
+		config->dv_esw_en = !!tmp;
 	} else if (strcmp(MLX5_DV_FLOW_EN, key) == 0) {
 		config->dv_flow_en = !!tmp;
 	} else if (strcmp(MLX5_MR_EXT_MEMSEG_EN, key) == 0) {
@@ -905,6 +932,7 @@  struct mlx5_dev_spawn_data {
 		MLX5_RX_VEC_EN,
 		MLX5_L3_VXLAN_EN,
 		MLX5_VF_NL_EN,
+		MLX5_DV_ESW_EN,
 		MLX5_DV_FLOW_EN,
 		MLX5_MR_EXT_MEMSEG_EN,
 		MLX5_REPRESENTOR,
@@ -1458,11 +1486,6 @@  struct mlx5_dev_spawn_data {
 			priv->tcf_context = NULL;
 		}
 	}
-	if (config.dv_flow_en) {
-		err = mlx5_alloc_shared_dr(priv);
-		if (err)
-			goto error;
-	}
 	TAILQ_INIT(&priv->flows);
 	TAILQ_INIT(&priv->ctrl_flows);
 	/* Hint libmlx5 to use PMD allocator for data plane resources */
@@ -1484,8 +1507,27 @@  struct mlx5_dev_spawn_data {
 	 * Verbs context returned by ibv_open_device().
 	 */
 	mlx5_link_update(eth_dev, 0);
+#ifdef HAVE_IBV_DEVX_OBJ
+	err = mlx5_devx_cmd_query_hca_attr(sh->ctx, &config.hca_attr);
+	if (err) {
+		err = -err;
+		goto error;
+	}
+#endif
+#ifdef HAVE_MLX5DV_DR_ESWITCH
+	if (!(config.hca_attr.eswitch_manager && config.dv_flow_en &&
+	      (switch_info->representor || switch_info->master)))
+		config.dv_esw_en = 0;
+#else
+	config.dv_esw_en = 0;
+#endif
 	/* Store device configuration on private structure. */
 	priv->config = config;
+	if (config.dv_flow_en) {
+		err = mlx5_alloc_shared_dr(priv);
+		if (err)
+			goto error;
+	}
 	/* Supported Verbs flow priority number detection. */
 	err = mlx5_flow_discover_priorities(eth_dev);
 	if (err < 0) {
@@ -1876,6 +1918,7 @@  struct mlx5_dev_spawn_data {
 			.max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN,
 			.min_rxqs_num = MLX5_MPRQ_MIN_RXQS,
 		},
+		.dv_esw_en = 1,
 	};
 	/* Device specific configuration. */
 	switch (pci_dev->id.device_id) {
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 14c7f3c..b9946f6 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -138,6 +138,11 @@  struct mlx5_devx_counter_set {
 	int id; /* Flow counter ID */
 };
 
+/* HCA attributes. */
+struct mlx5_hca_attr {
+	uint32_t eswitch_manager:1;
+};
+
 /* Flow list . */
 TAILQ_HEAD(mlx5_flows, rte_flow);
 
@@ -171,6 +176,7 @@  struct mlx5_dev_config {
 	/* Whether memseg should be extended for MR creation. */
 	unsigned int l3_vxlan_en:1; /* Enable L3 VXLAN flow creation. */
 	unsigned int vf_nl_en:1; /* Enable Netlink requests in VF mode. */
+	unsigned int dv_esw_en:1; /* Enable E-Switch DV flow. */
 	unsigned int dv_flow_en:1; /* Enable DV flow. */
 	unsigned int swp:1; /* Tx generic tunnel checksum and TSO offload. */
 	unsigned int devx:1; /* Whether devx interface is available or not. */
@@ -192,6 +198,7 @@  struct mlx5_dev_config {
 	int txqs_inline; /* Queue number threshold for inlining. */
 	int txqs_vec; /* Queue number threshold for vectorized Tx. */
 	int inline_max_packet_sz; /* Max packet size for inlining. */
+	struct mlx5_hca_attr hca_attr; /* HCA attributes. */
 };
 
 /**
@@ -241,6 +248,7 @@  struct mlx5_flow_tbl_resource {
 };
 
 #define MLX5_MAX_TABLES 1024
+#define MLX5_MAX_TABLES_FDB 32
 #define MLX5_GROUP_FACTOR 1
 
 /*
@@ -260,6 +268,8 @@  struct mlx5_ibv_shared {
 	/* Shared DV/DR flow data section. */
 	pthread_mutex_t dv_mutex; /* DV context mutex. */
 	uint32_t dv_refcnt; /* DV/DR data reference counter. */
+	void *fdb_ns; /* FDB Direct Rules name space handle. */
+	struct mlx5_flow_tbl_resource fdb_tbl[MLX5_MAX_TABLES_FDB];
 	void *rx_ns; /* RX Direct Rules name space handle. */
 	struct mlx5_flow_tbl_resource rx_tbl[MLX5_MAX_TABLES];
 	/* RX Direct Rules tables. */
@@ -539,4 +549,6 @@  int mlx5_devx_cmd_flow_counter_alloc(struct ibv_context *ctx,
 int mlx5_devx_cmd_flow_counter_query(struct mlx5_devx_counter_set *dcx,
 				     int clear,
 				     uint64_t *pkts, uint64_t *bytes);
+int mlx5_devx_cmd_query_hca_attr(struct ibv_context *ctx,
+				 struct mlx5_hca_attr *attr);
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_devx_cmds.c b/drivers/net/mlx5/mlx5_devx_cmds.c
index a9dff58..e5776c4 100644
--- a/drivers/net/mlx5/mlx5_devx_cmds.c
+++ b/drivers/net/mlx5/mlx5_devx_cmds.c
@@ -105,3 +105,47 @@  int mlx5_devx_cmd_flow_counter_free(struct mlx5dv_devx_obj *obj)
 	*bytes = MLX5_GET64(traffic_counter, stats, octets);
 	return 0;
 }
+
+/**
+ * Query HCA attributes.
+ * Using those attributes we can check on run time if the device
+ * is having the required capabilities.
+ *
+ * @param[in] ctx
+ *   ibv contexts returned from mlx5dv_open_device.
+ * @param[out] attr
+ *   Attributes device values.
+ *
+ * @return
+ *   0 on success, a negative value otherwise.
+ */
+int
+mlx5_devx_cmd_query_hca_attr(struct ibv_context *ctx,
+			     struct mlx5_hca_attr *attr)
+{
+	uint32_t in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {0};
+	uint32_t out[MLX5_ST_SZ_DW(query_hca_cap_out)] = {0};
+	void *hcattr;
+	int status, syndrome, rc;
+
+	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+	MLX5_SET(query_hca_cap_in, in, op_mod,
+		 MLX5_GET_HCA_CAP_OP_MOD_GENERAL_DEVICE |
+		 MLX5_HCA_CAP_OPMOD_GET_CUR);
+
+	rc = mlx5_glue->devx_general_cmd(ctx,
+					 in, sizeof(in), out, sizeof(out));
+	if (rc)
+		return rc;
+	status = MLX5_GET(query_hca_cap_out, out, status);
+	syndrome = MLX5_GET(query_hca_cap_out, out, syndrome);
+	if (status) {
+		DRV_LOG(DEBUG, "Failed to query devx HCA capabilities, "
+			"status %x, syndrome = %x",
+			status, syndrome);
+		return -1;
+	}
+	hcattr = MLX5_ADDR_OF(query_hca_cap_out, out, capability);
+	attr->eswitch_manager = MLX5_GET(cmd_hca_cap, hcattr, eswitch_manager);
+	return 0;
+}
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index a0683ee..b1effda 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1784,7 +1784,7 @@  uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 	struct mlx5_priv *priv = dev->data->dev_private;
 	enum mlx5_flow_drv_type type = MLX5_FLOW_TYPE_MAX;
 
-	if (attr->transfer)
+	if (attr->transfer && !priv->config.dv_esw_en)
 		type = MLX5_FLOW_TYPE_TCF;
 	else
 		type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index b15266f..8c42380 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -529,6 +529,7 @@  enum {
 };
 
 enum {
+	MLX5_CMD_OP_QUERY_HCA_CAP = 0x100,
 	MLX5_CMD_OP_ALLOC_FLOW_COUNTER = 0x939,
 	MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b,
 };
@@ -591,6 +592,333 @@  struct mlx5_ifc_query_flow_counter_in_bits {
 	u8         flow_counter_id[0x20];
 };
 
+enum {
+	MLX5_GET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0 << 1,
+	MLX5_GET_HCA_CAP_OP_MOD_QOS_CAP        = 0xc << 1,
+};
+
+enum {
+	MLX5_HCA_CAP_OPMOD_GET_MAX   = 0,
+	MLX5_HCA_CAP_OPMOD_GET_CUR   = 1,
+};
+
+struct mlx5_ifc_cmd_hca_cap_bits {
+	u8 reserved_at_0[0x30];
+	u8 vhca_id[0x10];
+	u8 reserved_at_40[0x40];
+	u8 log_max_srq_sz[0x8];
+	u8 log_max_qp_sz[0x8];
+	u8 reserved_at_90[0xb];
+	u8 log_max_qp[0x5];
+	u8 reserved_at_a0[0xb];
+	u8 log_max_srq[0x5];
+	u8 reserved_at_b0[0x10];
+	u8 reserved_at_c0[0x8];
+	u8 log_max_cq_sz[0x8];
+	u8 reserved_at_d0[0xb];
+	u8 log_max_cq[0x5];
+	u8 log_max_eq_sz[0x8];
+	u8 reserved_at_e8[0x2];
+	u8 log_max_mkey[0x6];
+	u8 reserved_at_f0[0x8];
+	u8 dump_fill_mkey[0x1];
+	u8 reserved_at_f9[0x3];
+	u8 log_max_eq[0x4];
+	u8 max_indirection[0x8];
+	u8 fixed_buffer_size[0x1];
+	u8 log_max_mrw_sz[0x7];
+	u8 force_teardown[0x1];
+	u8 reserved_at_111[0x1];
+	u8 log_max_bsf_list_size[0x6];
+	u8 umr_extended_translation_offset[0x1];
+	u8 null_mkey[0x1];
+	u8 log_max_klm_list_size[0x6];
+	u8 reserved_at_120[0xa];
+	u8 log_max_ra_req_dc[0x6];
+	u8 reserved_at_130[0xa];
+	u8 log_max_ra_res_dc[0x6];
+	u8 reserved_at_140[0xa];
+	u8 log_max_ra_req_qp[0x6];
+	u8 reserved_at_150[0xa];
+	u8 log_max_ra_res_qp[0x6];
+	u8 end_pad[0x1];
+	u8 cc_query_allowed[0x1];
+	u8 cc_modify_allowed[0x1];
+	u8 start_pad[0x1];
+	u8 cache_line_128byte[0x1];
+	u8 reserved_at_165[0xa];
+	u8 qcam_reg[0x1];
+	u8 gid_table_size[0x10];
+	u8 out_of_seq_cnt[0x1];
+	u8 vport_counters[0x1];
+	u8 retransmission_q_counters[0x1];
+	u8 debug[0x1];
+	u8 modify_rq_counter_set_id[0x1];
+	u8 rq_delay_drop[0x1];
+	u8 max_qp_cnt[0xa];
+	u8 pkey_table_size[0x10];
+	u8 vport_group_manager[0x1];
+	u8 vhca_group_manager[0x1];
+	u8 ib_virt[0x1];
+	u8 eth_virt[0x1];
+	u8 vnic_env_queue_counters[0x1];
+	u8 ets[0x1];
+	u8 nic_flow_table[0x1];
+	u8 eswitch_manager[0x1];
+	u8 device_memory[0x1];
+	u8 mcam_reg[0x1];
+	u8 pcam_reg[0x1];
+	u8 local_ca_ack_delay[0x5];
+	u8 port_module_event[0x1];
+	u8 enhanced_error_q_counters[0x1];
+	u8 ports_check[0x1];
+	u8 reserved_at_1b3[0x1];
+	u8 disable_link_up[0x1];
+	u8 beacon_led[0x1];
+	u8 port_type[0x2];
+	u8 num_ports[0x8];
+	u8 reserved_at_1c0[0x1];
+	u8 pps[0x1];
+	u8 pps_modify[0x1];
+	u8 log_max_msg[0x5];
+	u8 reserved_at_1c8[0x4];
+	u8 max_tc[0x4];
+	u8 temp_warn_event[0x1];
+	u8 dcbx[0x1];
+	u8 general_notification_event[0x1];
+	u8 reserved_at_1d3[0x2];
+	u8 fpga[0x1];
+	u8 rol_s[0x1];
+	u8 rol_g[0x1];
+	u8 reserved_at_1d8[0x1];
+	u8 wol_s[0x1];
+	u8 wol_g[0x1];
+	u8 wol_a[0x1];
+	u8 wol_b[0x1];
+	u8 wol_m[0x1];
+	u8 wol_u[0x1];
+	u8 wol_p[0x1];
+	u8 stat_rate_support[0x10];
+	u8 reserved_at_1f0[0xc];
+	u8 cqe_version[0x4];
+	u8 compact_address_vector[0x1];
+	u8 striding_rq[0x1];
+	u8 reserved_at_202[0x1];
+	u8 ipoib_enhanced_offloads[0x1];
+	u8 ipoib_basic_offloads[0x1];
+	u8 reserved_at_205[0x1];
+	u8 repeated_block_disabled[0x1];
+	u8 umr_modify_entity_size_disabled[0x1];
+	u8 umr_modify_atomic_disabled[0x1];
+	u8 umr_indirect_mkey_disabled[0x1];
+	u8 umr_fence[0x2];
+	u8 reserved_at_20c[0x3];
+	u8 drain_sigerr[0x1];
+	u8 cmdif_checksum[0x2];
+	u8 sigerr_cqe[0x1];
+	u8 reserved_at_213[0x1];
+	u8 wq_signature[0x1];
+	u8 sctr_data_cqe[0x1];
+	u8 reserved_at_216[0x1];
+	u8 sho[0x1];
+	u8 tph[0x1];
+	u8 rf[0x1];
+	u8 dct[0x1];
+	u8 qos[0x1];
+	u8 eth_net_offloads[0x1];
+	u8 roce[0x1];
+	u8 atomic[0x1];
+	u8 reserved_at_21f[0x1];
+	u8 cq_oi[0x1];
+	u8 cq_resize[0x1];
+	u8 cq_moderation[0x1];
+	u8 reserved_at_223[0x3];
+	u8 cq_eq_remap[0x1];
+	u8 pg[0x1];
+	u8 block_lb_mc[0x1];
+	u8 reserved_at_229[0x1];
+	u8 scqe_break_moderation[0x1];
+	u8 cq_period_start_from_cqe[0x1];
+	u8 cd[0x1];
+	u8 reserved_at_22d[0x1];
+	u8 apm[0x1];
+	u8 vector_calc[0x1];
+	u8 umr_ptr_rlky[0x1];
+	u8 imaicl[0x1];
+	u8 reserved_at_232[0x4];
+	u8 qkv[0x1];
+	u8 pkv[0x1];
+	u8 set_deth_sqpn[0x1];
+	u8 reserved_at_239[0x3];
+	u8 xrc[0x1];
+	u8 ud[0x1];
+	u8 uc[0x1];
+	u8 rc[0x1];
+	u8 uar_4k[0x1];
+	u8 reserved_at_241[0x9];
+	u8 uar_sz[0x6];
+	u8 reserved_at_250[0x8];
+	u8 log_pg_sz[0x8];
+	u8 bf[0x1];
+	u8 driver_version[0x1];
+	u8 pad_tx_eth_packet[0x1];
+	u8 reserved_at_263[0x8];
+	u8 log_bf_reg_size[0x5];
+	u8 reserved_at_270[0xb];
+	u8 lag_master[0x1];
+	u8 num_lag_ports[0x4];
+	u8 reserved_at_280[0x10];
+	u8 max_wqe_sz_sq[0x10];
+	u8 reserved_at_2a0[0x10];
+	u8 max_wqe_sz_rq[0x10];
+	u8 max_flow_counter_31_16[0x10];
+	u8 max_wqe_sz_sq_dc[0x10];
+	u8 reserved_at_2e0[0x7];
+	u8 max_qp_mcg[0x19];
+	u8 reserved_at_300[0x10];
+	u8 flow_counter_bulk_alloc[0x08];
+	u8 log_max_mcg[0x8];
+	u8 reserved_at_320[0x3];
+	u8 log_max_transport_domain[0x5];
+	u8 reserved_at_328[0x3];
+	u8 log_max_pd[0x5];
+	u8 reserved_at_330[0xb];
+	u8 log_max_xrcd[0x5];
+	u8 nic_receive_steering_discard[0x1];
+	u8 receive_discard_vport_down[0x1];
+	u8 transmit_discard_vport_down[0x1];
+	u8 reserved_at_343[0x5];
+	u8 log_max_flow_counter_bulk[0x8];
+	u8 max_flow_counter_15_0[0x10];
+	u8 reserved_at_360[0x3];
+	u8 log_max_rq[0x5];
+	u8 reserved_at_368[0x3];
+	u8 log_max_sq[0x5];
+	u8 reserved_at_370[0x3];
+	u8 log_max_tir[0x5];
+	u8 reserved_at_378[0x3];
+	u8 log_max_tis[0x5];
+	u8 basic_cyclic_rcv_wqe[0x1];
+	u8 reserved_at_381[0x2];
+	u8 log_max_rmp[0x5];
+	u8 reserved_at_388[0x3];
+	u8 log_max_rqt[0x5];
+	u8 reserved_at_390[0x3];
+	u8 log_max_rqt_size[0x5];
+	u8 reserved_at_398[0x3];
+	u8 log_max_tis_per_sq[0x5];
+	u8 ext_stride_num_range[0x1];
+	u8 reserved_at_3a1[0x2];
+	u8 log_max_stride_sz_rq[0x5];
+	u8 reserved_at_3a8[0x3];
+	u8 log_min_stride_sz_rq[0x5];
+	u8 reserved_at_3b0[0x3];
+	u8 log_max_stride_sz_sq[0x5];
+	u8 reserved_at_3b8[0x3];
+	u8 log_min_stride_sz_sq[0x5];
+	u8 hairpin[0x1];
+	u8 reserved_at_3c1[0x2];
+	u8 log_max_hairpin_queues[0x5];
+	u8 reserved_at_3c8[0x3];
+	u8 log_max_hairpin_wq_data_sz[0x5];
+	u8 reserved_at_3d0[0x3];
+	u8 log_max_hairpin_num_packets[0x5];
+	u8 reserved_at_3d8[0x3];
+	u8 log_max_wq_sz[0x5];
+	u8 nic_vport_change_event[0x1];
+	u8 disable_local_lb_uc[0x1];
+	u8 disable_local_lb_mc[0x1];
+	u8 log_min_hairpin_wq_data_sz[0x5];
+	u8 reserved_at_3e8[0x3];
+	u8 log_max_vlan_list[0x5];
+	u8 reserved_at_3f0[0x3];
+	u8 log_max_current_mc_list[0x5];
+	u8 reserved_at_3f8[0x3];
+	u8 log_max_current_uc_list[0x5];
+	u8 general_obj_types[0x40];
+	u8 reserved_at_440[0x20];
+	u8 reserved_at_460[0x10];
+	u8 max_num_eqs[0x10];
+	u8 reserved_at_480[0x3];
+	u8 log_max_l2_table[0x5];
+	u8 reserved_at_488[0x8];
+	u8 log_uar_page_sz[0x10];
+	u8 reserved_at_4a0[0x20];
+	u8 device_frequency_mhz[0x20];
+	u8 device_frequency_khz[0x20];
+	u8 reserved_at_500[0x20];
+	u8 num_of_uars_per_page[0x20];
+	u8 flex_parser_protocols[0x20];
+	u8 reserved_at_560[0x20];
+	u8 reserved_at_580[0x3c];
+	u8 mini_cqe_resp_stride_index[0x1];
+	u8 cqe_128_always[0x1];
+	u8 cqe_compression_128[0x1];
+	u8 cqe_compression[0x1];
+	u8 cqe_compression_timeout[0x10];
+	u8 cqe_compression_max_num[0x10];
+	u8 reserved_at_5e0[0x10];
+	u8 tag_matching[0x1];
+	u8 rndv_offload_rc[0x1];
+	u8 rndv_offload_dc[0x1];
+	u8 log_tag_matching_list_sz[0x5];
+	u8 reserved_at_5f8[0x3];
+	u8 log_max_xrq[0x5];
+	u8 affiliate_nic_vport_criteria[0x8];
+	u8 native_port_num[0x8];
+	u8 num_vhca_ports[0x8];
+	u8 reserved_at_618[0x6];
+	u8 sw_owner_id[0x1];
+	u8 reserved_at_61f[0x1e1];
+};
+
+struct mlx5_ifc_qos_cap_bits {
+	u8 packet_pacing[0x1];
+	u8 esw_scheduling[0x1];
+	u8 esw_bw_share[0x1];
+	u8 esw_rate_limit[0x1];
+	u8 reserved_at_4[0x1];
+	u8 packet_pacing_burst_bound[0x1];
+	u8 packet_pacing_typical_size[0x1];
+	u8 flow_meter_srtcm[0x1];
+	u8 reserved_at_8[0x8];
+	u8 log_max_flow_meter[0x8];
+	u8 flow_meter_reg_id[0x8];
+	u8 reserved_at_25[0x20];
+	u8 packet_pacing_max_rate[0x20];
+	u8 packet_pacing_min_rate[0x20];
+	u8 reserved_at_80[0x10];
+	u8 packet_pacing_rate_table_size[0x10];
+	u8 esw_element_type[0x10];
+	u8 esw_tsar_type[0x10];
+	u8 reserved_at_c0[0x10];
+	u8 max_qos_para_vport[0x10];
+	u8 max_tsar_bw_share[0x20];
+	u8 reserved_at_100[0x6e8];
+};
+
+union mlx5_ifc_hca_cap_union_bits {
+	struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap;
+	struct mlx5_ifc_qos_cap_bits qos_cap;
+	u8 reserved_at_0[0x8000];
+};
+
+struct mlx5_ifc_query_hca_cap_out_bits {
+	u8 status[0x8];
+	u8 reserved_at_8[0x18];
+	u8 syndrome[0x20];
+	u8 reserved_at_40[0x40];
+	union mlx5_ifc_hca_cap_union_bits capability;
+};
+
+struct mlx5_ifc_query_hca_cap_in_bits {
+	u8 opcode[0x10];
+	u8 reserved_at_10[0x10];
+	u8 reserved_at_20[0x10];
+	u8 op_mod[0x10];
+	u8 reserved_at_40[0x40];
+};
+
 /* CQE format mask. */
 #define MLX5E_CQE_FORMAT_MASK 0xc