[v1,02/16] net/mlx5: introduce send scheduling devargs

Message ID 1594374530-24659-2-git-send-email-viacheslavo@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Raslan Darawsheh
Headers
Series [v1,01/16] common/mlx5: update common part to support packet pacing |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Slava Ovsiienko July 10, 2020, 9:48 a.m. UTC
  This patch introduces the new devargs:

tx_pp - enables accurate packet send scheduling on mbuf timestamps
  in the PMD. On the device start if "rte_dynflag_timestamp"
  dynamic flag is registered and this devarg non-zero value is
  specified, the driver initializes all necessary internal
  infrastructure to provide packet scheduling. The parameter
  value specifies scheduling granularity in nanoseconds.

tx_skew - the parameter adjusts the send packet scheduling on
  timestamps and represents the average delay between beginning
  of the transmitting descriptor processing by the hardware and
  appearance of actual packet data on the wire. The value should
  be provided in nanoseconds and is valid only if tx_pp parameter
  is specified. The default value is zero.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
---
 doc/guides/nics/mlx5.rst         | 37 ++++++++++++++++++++++++++
 drivers/net/mlx5/linux/mlx5_os.c | 57 ++++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5.c          | 39 ++++++++++++++++++++++++---
 drivers/net/mlx5/mlx5.h          |  2 ++
 4 files changed, 132 insertions(+), 3 deletions(-)
  

Patch

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index b51aa67..6b06d16 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -241,6 +241,24 @@  Limitations
   reduce the requested Tx size or adjust data inline settings with
   ``txq_inline_max`` and ``txq_inline_mpw`` devargs keys.
 
+- To provide the packet send scheduling on mbuf timestamps the ``tx_pp``
+  parameter should be specified, RTE_MBUF_DYNFIELD_TIMESTAMP_NAME and
+  RTE_MBUF_DYNFLAG_TIMESTAMP_NAME should be registered by application.
+  When PMD sees the RTE_MBUF_DYNFLAG_TIMESTAMP_NAME set on the packet
+  being sent it tries to synchronize the time of packet appearing on
+  the wire with the specified packet timestamp. It the specified one
+  is in the past it should be ignored, if one is in the distant future
+  it should be capped with some reasonable value (in range of seconds).
+  These specific cases ("too late" and "distant future") can be optionally
+  reported via device xstats to assist applications to detect the
+  time-related problems.
+
+  There is no any packet reordering according timestamps is supposed,
+  neither within packet burst, nor between packets, it is an entirely
+  application responsibility to generate packets and its timestamps
+  in desired order. The timestamps can be put only in the first packet
+  in the burst providing the entire burst scheduling.
+
 - E-Switch decapsulation Flow:
 
   - can be applied to PF port only.
@@ -700,6 +718,25 @@  Driver options
   variable "MLX5_SHUT_UP_BF" value is used. If there is no "MLX5_SHUT_UP_BF",
   the default ``tx_db_nc`` value is zero for ARM64 hosts and one for others.
 
+- ``tx_pp`` parameter [int]
+
+  If a nonzero value is specified the driver creates all necessary internal
+  objects to provide accurate packet send scheduling on mbuf timestamps.
+  The positive value specifies the scheduling granularity in nanoseconds,
+  the packet send will be accurate up to specified digits. The allowed range is
+  from 500 to 1 million of nanoseconds. The negative value specifies the module
+  of granularity and engages the special test mode the check the schedule rate.
+  By default (if the ``tx_pp`` is not specified) send scheduling on timestamps
+  feature is disabled.
+
+- ``tx_skew`` parameter [int]
+
+  The parameter adjusts the send packet scheduling on timestamps and represents
+  the daverage delay between beginning of the transmitting descriptor processing
+  by the hardware and appearance of actual packet data on the wire. The value
+  should be provided in nanoseconds and is valid only if ``tx_pp`` parameter is
+  specified. The default value is zero.
+
 - ``tx_vec_en`` parameter [int]
 
   A nonzero value enables Tx vector on ConnectX-5, ConnectX-6, ConnectX-6 Dx
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 2dc57b2..daccd1c 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -879,6 +879,63 @@ 
 		}
 #endif
 	}
+	if (config.tx_pp) {
+		DRV_LOG(DEBUG, "Timestamp counter frequency %u kHz",
+			config.hca_attr.dev_freq_khz);
+		DRV_LOG(DEBUG, "Packet pacing is %ssupported",
+			config.hca_attr.qos.packet_pacing ? "" : "not ");
+		DRV_LOG(DEBUG, "Cross channel ops are %ssupported",
+			config.hca_attr.cross_channel ? "" : "not ");
+		DRV_LOG(DEBUG, "WQE index ignore is %ssupported",
+			config.hca_attr.wqe_index_ignore ? "" : "not ");
+		DRV_LOG(DEBUG, "Non-wire SQ feature is %ssupported",
+			config.hca_attr.non_wire_sq ? "" : "not ");
+		DRV_LOG(DEBUG, "Static WQE SQ feature is %ssupported (%d)",
+			config.hca_attr.log_max_static_sq_wq ? "" : "not ",
+			config.hca_attr.log_max_static_sq_wq);
+		DRV_LOG(DEBUG, "WQE rate PP mode is %ssupported",
+			config.hca_attr.qos.wqe_rate_pp ? "" : "not ");
+		if (!config.devx) {
+			DRV_LOG(ERR, "DevX is required for packet pacing");
+			err = ENODEV;
+			goto error;
+		}
+		if (!config.hca_attr.qos.packet_pacing) {
+			DRV_LOG(ERR, "Packet pacing is not supported");
+			err = ENODEV;
+			goto error;
+		}
+		if (!config.hca_attr.cross_channel) {
+			DRV_LOG(ERR, "Cross channel operations are"
+				     " required for packet pacing");
+			err = ENODEV;
+			goto error;
+		}
+		if (!config.hca_attr.wqe_index_ignore) {
+			DRV_LOG(ERR, "WQE index ignore feature is"
+				     " required for packet pacing");
+			err = ENODEV;
+			goto error;
+		}
+		if (!config.hca_attr.non_wire_sq) {
+			DRV_LOG(ERR, "Non-wire SQ feature is"
+				     " required for packet pacing");
+			err = ENODEV;
+			goto error;
+		}
+		if (!config.hca_attr.log_max_static_sq_wq) {
+			DRV_LOG(ERR, "Static WQE SQ feature is"
+				     " required for packet pacing");
+			err = ENODEV;
+			goto error;
+		}
+		if (!config.hca_attr.qos.wqe_rate_pp) {
+			DRV_LOG(ERR, "WQE rate mode is required"
+				     " for packet pacing");
+			err = ENODEV;
+			goto error;
+		}
+	}
 	if (config.mprq.enabled && mprq) {
 		if (config.mprq.stride_num_n &&
 		    (config.mprq.stride_num_n > mprq_max_stride_num_n ||
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 86b7671..13242a5 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -120,6 +120,19 @@ 
 #define MLX5_TXQ_MAX_INLINE_LEN "txq_max_inline_len"
 
 /*
+ * Device parameter to enable Tx scheduling on timestamps
+ * andspecify the packet pacing granularity in nanoseconds.
+ */
+#define MLX5_TX_PP "tx_pp"
+
+/*
+ * Device parameter to specify skew in nanoseconds on Tx datapath,
+ * it represents the time between SQ start WQE processing and
+ * appearing actual packet data on the wire.
+ */
+#define MLX5_TX_SKEW "tx_skew"
+
+/*
  * Device parameter to enable hardware Tx vector.
  * Deprecated, ignored (no vectorized Tx routines anymore).
  */
@@ -1271,18 +1284,26 @@  struct mlx5_dev_ctx_shared *
 mlx5_args_check(const char *key, const char *val, void *opaque)
 {
 	struct mlx5_dev_config *config = opaque;
-	unsigned long tmp;
+	unsigned long mod;
+	signed long tmp;
 
 	/* No-op, port representors are processed in mlx5_dev_spawn(). */
 	if (!strcmp(MLX5_REPRESENTOR, key))
 		return 0;
 	errno = 0;
-	tmp = strtoul(val, NULL, 0);
+	tmp = strtol(val, NULL, 0);
 	if (errno) {
 		rte_errno = errno;
 		DRV_LOG(WARNING, "%s: \"%s\" is not a valid integer", key, val);
 		return -rte_errno;
 	}
+	if (tmp < 0 && strcmp(MLX5_TX_PP, key) && strcmp(MLX5_TX_SKEW, key)) {
+		/* Negative values are acceptable for some keys only. */
+		rte_errno = EINVAL;
+		DRV_LOG(WARNING, "%s: invalid negative value \"%s\"", key, val);
+		return -rte_errno;
+	}
+	mod = tmp >= 0 ? tmp : -tmp;
 	if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) {
 		config->cqe_comp = !!tmp;
 	} else if (strcmp(MLX5_RXQ_CQE_PAD_EN, key) == 0) {
@@ -1333,6 +1354,15 @@  struct mlx5_dev_ctx_shared *
 		config->txq_inline_mpw = tmp;
 	} else if (strcmp(MLX5_TX_VEC_EN, key) == 0) {
 		DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key);
+	} else if (strcmp(MLX5_TX_PP, key) == 0) {
+		if (!mod) {
+			DRV_LOG(ERR, "Zero Tx packet pacing parameter");
+			rte_errno = EINVAL;
+			return -rte_errno;
+		}
+		config->tx_pp = tmp;
+	} else if (strcmp(MLX5_TX_SKEW, key) == 0) {
+		config->tx_skew = tmp;
 	} else if (strcmp(MLX5_RX_VEC_EN, key) == 0) {
 		config->rx_vec_en = !!tmp;
 	} else if (strcmp(MLX5_L3_VXLAN_EN, key) == 0) {
@@ -1415,6 +1445,8 @@  struct mlx5_dev_ctx_shared *
 		MLX5_TXQ_MPW_HDR_DSEG_EN,
 		MLX5_TXQ_MAX_INLINE_LEN,
 		MLX5_TX_DB_NC,
+		MLX5_TX_PP,
+		MLX5_TX_SKEW,
 		MLX5_TX_VEC_EN,
 		MLX5_RX_VEC_EN,
 		MLX5_L3_VXLAN_EN,
@@ -1693,7 +1725,8 @@  struct mlx5_dev_ctx_shared *
 {
 	static const char *const dynf_names[] = {
 		RTE_PMD_MLX5_FINE_GRANULARITY_INLINE,
-		RTE_MBUF_DYNFLAG_METADATA_NAME
+		RTE_MBUF_DYNFLAG_METADATA_NAME,
+		RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME
 	};
 	unsigned int i;
 
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 46e66eb..84cd3e1 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -240,6 +240,8 @@  struct mlx5_dev_config {
 	int txq_inline_min; /* Minimal amount of data bytes to inline. */
 	int txq_inline_max; /* Max packet size for inlining with SEND. */
 	int txq_inline_mpw; /* Max packet size for inlining with eMPW. */
+	int tx_pp; /* Timestamp scheduling granularity in nanoseconds. */
+	int tx_skew; /* Tx scheduling skew between WQE and data on wire. */
 	struct mlx5_hca_attr hca_attr; /* HCA attributes. */
 	struct mlx5_lro_config lro; /* LRO configuration. */
 };