@@ -346,99 +346,107 @@ Run-time configuration
greater or equal to this value. The default value is 12, valid only if
``mprq_en`` is set.
-- ``txq_inline`` parameter [int]
-
- Amount of data to be inlined during TX operations. Improves latency.
- Can improve PPS performance when PCI back pressure is detected and may be
- useful for scenarios involving heavy traffic on many queues.
-
- Because additional software logic is necessary to handle this mode, this
- option should be used with care, as it can lower performance when back
- pressure is not expected.
+- ``txq_inline_min`` parameter [int]
+
+ Minimal amount of data to be inlined into WQE during Tx operations. NICs
+ may require this minimal data amount to operate correctly. The exact value
+ may depend on NIC operation mode, requested offloads, etc.
+
+ If ``txq_inline_min`` key is present the specified value (may be aligned
+ by the driver in order not to exceed the limits and provide better descriptor
+ space utilization) will be used by the driver and it is guaranteed the
+ requested data bytes are inlined into the WQE beside other inline settings.
+
+ If ``txq_inline_min`` key is not present the value may be queried by the
+ driver from the NIC via DevX if this feature is available. If there is no DevX
+ enabled/supported the value 18 (supposing L2 header including VLAN) is set
+ for ConnectX-4, value 58 (supposing L2-L4 headers, required by configurations
+ over E-Switch) is set for ConnectX-4 Lx, and 0 is set by default for ConnectX-5
+ and newer NICs. If packet is shorter the ``txq_inline_min`` value, the entire
+ packet is inlined.
+
+ Please, note, this minimal data inlining disengages eMPW feature (Enhanced
+ Multi-Packet Write), because last one does not support partial packet inlining.
+ This is not very critical due to minimal data inlining is mostly required
+ by ConnectX-4 and ConnectX-4 Lx, these NICs does not support eMPW feature.
- ``txqs_min_inline`` parameter [int]
- Enable inline send only when the number of TX queues is greater or equal
+ Enable inline data send only when the number of TX queues is greater or equal
to this value.
- This option should be used in combination with ``txq_inline`` above.
-
- On ConnectX-4, ConnectX-4 LX, ConnectX-5, ConnectX-6 and BlueField without
- Enhanced MPW:
-
- - Disabled by default.
- - In case ``txq_inline`` is set recommendation is 4.
+ This option should be used in combination with ``txq_inline_max`` and
+ ``txq_inline_mpw`` below and does not affect ``txq_inline_min`` settings above.
- On ConnectX-5, ConnectX-6 and BlueField with Enhanced MPW:
+ If this option is not specified the default value 8 is used.
- - Set to 8 by default.
+ The data inlining consumes the CPU cycles, so this option is intended to
+ auto enable inline data if we have enough Tx queues, which means we have
+ enough CPU cores and PCI bandwidth is getting more critical and CPU
+ is not supposed to be bottleneck anymore.
-- ``txqs_max_vec`` parameter [int]
+ The copying data into WQE improves latency and can improve PPS performance
+ when PCI back pressure is detected and may be useful for scenarios involving
+ heavy traffic on many queues.
- Enable vectorized Tx only when the number of TX queues is less than or
- equal to this value. Effective only when ``tx_vec_en`` is enabled.
-
- On ConnectX-5:
-
- - Set to 8 by default on ARMv8.
- - Set to 4 by default otherwise.
-
- On BlueField
+ Because additional software logic is necessary to handle this mode, this
+ option should be used with care, as it may lower performance when back
+ pressure is not expected.
- - Set to 16 by default.
+- ``txq_inline_max`` parameter [int]
+
+ Specifies the maximal packet length to be completely inlined into WQE
+ Ethernet Segment for ordinary SEND method. If packet is larger the specified
+ value, the packet data won't be copied by the driver at all, data buffer
+ is addressed with a pointer. If packet length is less or equal all packet
+ data will be copied into WQE. This may improve PCI bandwidth utilization for
+ short packets significantly but requires the extra CPU cycles.
+
+ The data inline feature is controlled by number of Tx queues, if number of Tx
+ queues is larger than ``txqs_min_inline`` key parameter, the inline feature
+ is engaged, if there are not enough Tx queues (which means not enough CPU cores
+ and CPU resources are scarce), data inline is not performed by the driver.
+ Assigning ``txqs_min_inline`` with zero always enables the data inline.
+
+ The default ``txq_inline_max`` value is 290. The specified value may be adjusted
+ by the driver in order not to exceed the limit (930 bytes) and to provide better
+ WQE space filling without gaps, the adjustment is reflected in the debug log.
+
+- ``txq_inline_mpw`` parameter [int]
+
+ Specifies the maximal packet length to be completely inlined into WQE for
+ Enhanced MPW method. If packet is large the specified value, the packet data
+ won't be copied, and data buffer is addressed with pointer. If packet length
+ is less or equal, all packet data will be copied into WQE. This may improve PCI
+ bandwidth utilization for short packets significantly but requires the extra
+ CPU cycles.
+
+ The data inline feature is controlled by number of TX queues, if number of Tx
+ queues is larger than ``txqs_min_inline`` key parameter, the inline feature
+ is engaged, if there are not enough Tx queues (which means not enough CPU cores
+ and CPU resources are scarce), data inline is not performed by the driver.
+ Assigning ``txqs_min_inline`` with zero always enables the data inline.
+
+ The default ``txq_inline_mpw`` value is 188. The specified value may be adjusted
+ by the driver in order not to exceed the limit (930 bytes) and to provide better
+ WQE space filling without gaps, the adjustment is reflected in the debug log.
+ Due to multiple packets may be included to the same WQE with Enhanced Multi
+ Packet Write Method and overall WQE size is limited it is not recommended to
+ specify large values for the ``txq_inline_mpw``.
- ``txq_mpw_en`` parameter [int]
- A nonzero value enables multi-packet send (MPS) for ConnectX-4 Lx and
- enhanced multi-packet send (Enhanced MPS) for ConnectX-5, ConnectX-6 and BlueField.
- MPS allows the TX burst function to pack up multiple packets in a
- single descriptor session in order to save PCI bandwidth and improve
- performance at the cost of a slightly higher CPU usage. When
- ``txq_inline`` is set along with ``txq_mpw_en``, TX burst function tries
- to copy entire packet data on to TX descriptor instead of including
- pointer of packet only if there is enough room remained in the
- descriptor. ``txq_inline`` sets per-descriptor space for either pointers
- or inlined packets. In addition, Enhanced MPS supports hybrid mode -
- mixing inlined packets and pointers in the same descriptor.
-
- This option cannot be used with certain offloads such as ``DEV_TX_OFFLOAD_TCP_TSO,
- DEV_TX_OFFLOAD_VXLAN_TNL_TSO, DEV_TX_OFFLOAD_GRE_TNL_TSO, DEV_TX_OFFLOAD_VLAN_INSERT``.
- When those offloads are requested the MPS send function will not be used.
-
- It is currently only supported on the ConnectX-4 Lx, ConnectX-5, ConnectX-6 and BlueField
- families of adapters.
- On ConnectX-4 Lx the MPW is considered un-secure hence disabled by default.
- Users which enable the MPW should be aware that application which provides incorrect
- mbuf descriptors in the Tx burst can lead to serious errors in the host including, on some cases,
- NIC to get stuck.
- On ConnectX-5, ConnectX-6 and BlueField the MPW is secure and enabled by default.
-
-- ``txq_mpw_hdr_dseg_en`` parameter [int]
-
- A nonzero value enables including two pointers in the first block of TX
- descriptor. This can be used to lessen CPU load for memory copy.
-
- Effective only when Enhanced MPS is supported. Disabled by default.
-
-- ``txq_max_inline_len`` parameter [int]
-
- Maximum size of packet to be inlined. This limits the size of packet to
- be inlined. If the size of a packet is larger than configured value, the
- packet isn't inlined even though there's enough space remained in the
- descriptor. Instead, the packet is included with pointer.
-
- Effective only when Enhanced MPS is supported. The default value is 256.
-
-- ``tx_vec_en`` parameter [int]
-
- A nonzero value enables Tx vector on ConnectX-5, ConnectX-6 and BlueField NICs if the number of
- global Tx queues on the port is less than ``txqs_max_vec``.
-
- This option cannot be used with certain offloads such as ``DEV_TX_OFFLOAD_TCP_TSO,
- DEV_TX_OFFLOAD_VXLAN_TNL_TSO, DEV_TX_OFFLOAD_GRE_TNL_TSO, DEV_TX_OFFLOAD_VLAN_INSERT``.
- When those offloads are requested the MPS send function will not be used.
-
- Enabled by default on ConnectX-5, ConnectX-6 and BlueField.
+ A nonzero value enables Enhanced Multi-Packet Write (eMPW) for ConnectX-5,
+ ConnectX-6 and BlueField. eMPW allows the TX burst function to pack up multiple
+ packets in a single descriptor session in order to save PCI bandwidth and improve
+ performance at the cost of a slightly higher CPU usage. When ``txq_inline_mpw``
+ is set along with ``txq_mpw_en``, TX burst function copies entire packet
+ data on to TX descriptor instead of including pointer of packet.
+
+ The Enhanced Multi-Packet Write feature is enabled by default if NIC supports
+ it, can be disabled by explicit specifying 0 value for ``txq_mpw_en`` option.
+ Also, if minimal data inlining is requested by non-zero ``txq_inline_min``
+ option or reported by the NIC, the eMPW feature is disengaged.
- ``rx_vec_en`` parameter [int]
@@ -68,6 +68,15 @@
/* Device parameter to set the minimum number of Rx queues to enable MPRQ. */
#define MLX5_RXQS_MIN_MPRQ "rxqs_min_mprq"
+/* Device parameter to limit packet size to inline with ordinary SEND. */
+#define MLX5_TXQ_INLINE_MAX "txq_inline_max"
+
+/* Device parameter to configure minimal data size to inline. */
+#define MLX5_TXQ_INLINE_MIN "txq_inline_min"
+
+/* Device parameter to limit packet size to inline with Enhanced MPW. */
+#define MLX5_TXQ_INLINE_MPW "txq_inline_mpw"
+
/*
* Device parameter to configure the number of TX queues threshold for
* enabling inline send.
@@ -884,6 +893,12 @@ struct mlx5_dev_spawn_data {
config->mprq.max_memcpy_len = tmp;
} else if (strcmp(MLX5_RXQS_MIN_MPRQ, key) == 0) {
config->mprq.min_rxqs_num = tmp;
+ } else if (strcmp(MLX5_TXQ_INLINE_MAX, key) == 0) {
+ config->txq_inline_max = tmp;
+ } else if (strcmp(MLX5_TXQ_INLINE_MIN, key) == 0) {
+ config->txq_inline_min = tmp;
+ } else if (strcmp(MLX5_TXQ_INLINE_MPW, key) == 0) {
+ config->txq_inline_mpw = tmp;
} else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) {
config->txqs_inline = tmp;
} else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) {
@@ -932,6 +947,9 @@ struct mlx5_dev_spawn_data {
MLX5_RX_MPRQ_LOG_STRIDE_NUM,
MLX5_RX_MPRQ_MAX_MEMCPY_LEN,
MLX5_RXQS_MIN_MPRQ,
+ MLX5_TXQ_INLINE_MIN,
+ MLX5_TXQ_INLINE_MAX,
+ MLX5_TXQ_INLINE_MPW,
MLX5_TXQS_MIN_INLINE,
MLX5_TXQ_MPW_EN,
MLX5_RX_VEC_EN,
@@ -1882,6 +1900,9 @@ struct mlx5_dev_spawn_data {
.hw_padding = 0,
.mps = MLX5_ARG_UNSET,
.rx_vec_en = 1,
+ .txq_inline_max = MLX5_ARG_UNSET,
+ .txq_inline_min = MLX5_ARG_UNSET,
+ .txq_inline_mpw = MLX5_ARG_UNSET,
.txqs_inline = MLX5_ARG_UNSET,
.vf_nl_en = 1,
.mr_ext_memseg_en = 1,
@@ -188,6 +188,7 @@ struct mlx5_dev_config {
unsigned int cqe_comp:1; /* CQE compression is enabled. */
unsigned int cqe_pad:1; /* CQE padding is enabled. */
unsigned int tso:1; /* Whether TSO is supported. */
+ unsigned int tx_inline:1; /* Engage TX data inlining. */
unsigned int rx_vec_en:1; /* Rx vector is enabled. */
unsigned int mr_ext_memseg_en:1;
/* Whether memseg should be extended for MR creation. */
@@ -213,6 +214,9 @@ struct mlx5_dev_config {
unsigned int ind_table_max_size; /* Maximum indirection table size. */
unsigned int max_dump_files_num; /* Maximum dump files per queue. */
int txqs_inline; /* Queue number threshold for inlining. */
+ int txq_inline_min; /* Minimal amount of data bytes to inline. */
+ int txq_inline_max; /* Max packet size for inlining with SEND. */
+ int txq_inline_mpw; /* Max packet size for inlining with eMPW. */
struct mlx5_hca_attr hca_attr; /* HCA attributes. */
};