[v3,11/12] mldev: support to retrieve error information

Message ID 20230207151316.835441-12-jerinj@marvell.com (mailing list archive)
State Accepted, archived
Delegated to: Thomas Monjalon
Headers
Series mldev: introduce machine learning device library |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Jerin Jacob Kollanukkaran Feb. 7, 2023, 3:13 p.m. UTC
From: Srikanth Yalavarthi <syalavarthi@marvell.com>

Added functions to get error information for an ML op.
This information can include both drive specific error
message and error code.

Signed-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>
Signed-off-by: Jerin Jacob <jerinj@marvell.com>
---
 lib/mldev/rte_mldev.c      | 31 +++++++++++++++++++++++++++++++
 lib/mldev/rte_mldev_core.h | 22 ++++++++++++++++++++++
 lib/mldev/version.map      |  1 +
 3 files changed, 54 insertions(+)
  

Patch

diff --git a/lib/mldev/rte_mldev.c b/lib/mldev/rte_mldev.c
index f6c5282f39..9258f44466 100644
--- a/lib/mldev/rte_mldev.c
+++ b/lib/mldev/rte_mldev.c
@@ -874,4 +874,35 @@  rte_ml_dequeue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uin
 	return (*dev->dequeue_burst)(dev, qp_id, ops, nb_ops);
 }
 
+int
+rte_ml_op_error_get(int16_t dev_id, struct rte_ml_op *op, struct rte_ml_op_error *error)
+{
+	struct rte_ml_dev *dev;
+
+#ifdef RTE_LIBRTE_ML_DEV_DEBUG
+	if (!rte_ml_dev_is_valid_dev(dev_id)) {
+		RTE_MLDEV_LOG(ERR, "Invalid dev_id = %d\n", dev_id);
+		return -EINVAL;
+	}
+
+	dev = rte_ml_dev_pmd_get_dev(dev_id);
+	if (*dev->op_error_get == NULL)
+		return -ENOTSUP;
+
+	if (op == NULL) {
+		RTE_MLDEV_LOG(ERR, "Dev %d, op cannot be NULL\n", dev_id);
+		return -EINVAL;
+	}
+
+	if (error == NULL) {
+		RTE_MLDEV_LOG(ERR, "Dev %d, error cannot be NULL\n", dev_id);
+		return -EINVAL;
+	}
+#else
+	dev = rte_ml_dev_pmd_get_dev(dev_id);
+#endif
+
+	return (*dev->op_error_get)(dev, op, error);
+}
+
 RTE_LOG_REGISTER_DEFAULT(rte_ml_dev_logtype, INFO);
diff --git a/lib/mldev/rte_mldev_core.h b/lib/mldev/rte_mldev_core.h
index bc94420000..14c33175d2 100644
--- a/lib/mldev/rte_mldev_core.h
+++ b/lib/mldev/rte_mldev_core.h
@@ -73,6 +73,25 @@  typedef uint16_t (*mldev_enqueue_t)(struct rte_ml_dev *dev, uint16_t qp_id, stru
 typedef uint16_t (*mldev_dequeue_t)(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op **ops,
 				    uint16_t nb_ops);
 
+/**
+ * @internal
+ *
+ * Get error information for an Op.
+ *
+ * @param dev
+ *	ML device pointer.
+ * @param op
+ *	ML Op handle.
+ * @param error
+ *	Pointer to error structure.
+ *
+ * @return
+ *	- 0 on success.
+ *	- <0, error on failure.
+ */
+typedef int (*mldev_op_error_get_t)(struct rte_ml_dev *dev, struct rte_ml_op *op,
+				    struct rte_ml_op_error *error);
+
 /**
  * Definitions of all functions exported by a driver through the generic structure of type
  * *ml_dev_ops* supplied in the *rte_ml_dev* structure associated with a device.
@@ -619,6 +638,9 @@  struct rte_ml_dev {
 	/** Pointer to PMD dequeue function. */
 	mldev_dequeue_t dequeue_burst;
 
+	/** Pointer to PMD Op error get function. */
+	mldev_op_error_get_t op_error_get;
+
 	/** Pointer to device data. */
 	struct rte_ml_dev_data *data;
 
diff --git a/lib/mldev/version.map b/lib/mldev/version.map
index 1e7c1ab2b2..ea91912f5f 100644
--- a/lib/mldev/version.map
+++ b/lib/mldev/version.map
@@ -30,6 +30,7 @@  EXPERIMENTAL {
 	rte_ml_model_start;
 	rte_ml_model_stop;
 	rte_ml_model_unload;
+	rte_ml_op_error_get;
 	rte_ml_op_pool_create;
 	rte_ml_op_pool_free;