[RFC,1/1] mldev: introduce machine learning device library

Message ID 20220803132839.2747858-2-jerinj@marvell.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series mldev: introduce machine learning device library |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS

Commit Message

Jerin Jacob Kollanukkaran Aug. 3, 2022, 1:28 p.m. UTC
  From: Jerin Jacob <jerinj@marvell.com>

Add mldev API specification to standardize and use the machine learning
device and inference operations in vendor neutral way.

Following operations are abstracted through APIs

- ML device capability probe
- ML device configuration
- ML device queue pair configuration
- ML device state management
- ML device stat/xstat operations
- ML model load/unload/start/stop operations
- ML model information probe
- ML IO operations to find size for input and output buffers
- ML quantize and dequantize operations
- ML ops pool creation and free operations
- ML device enqueue/dequeue fastpath interference operations

Signed-off-by: Jerin Jacob <jerinj@marvell.com>
Signed-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>
---
 config/rte_config.h             |    3 +
 doc/api/doxy-api-index.md       |    1 +
 doc/api/doxy-api.conf.in        |    1 +
 doc/guides/prog_guide/index.rst |    1 +
 doc/guides/prog_guide/mldev.rst |  164 +++++
 lib/eal/common/eal_common_log.c |    1 +
 lib/eal/include/rte_log.h       |    1 +
 lib/meson.build                 |    1 +
 lib/mldev/meson.build           |   12 +
 lib/mldev/rte_mldev.c           |    5 +
 lib/mldev/rte_mldev.h           | 1081 +++++++++++++++++++++++++++++++
 lib/mldev/version.map           |    5 +
 12 files changed, 1276 insertions(+)
 create mode 100644 doc/guides/prog_guide/mldev.rst
 create mode 100644 lib/mldev/meson.build
 create mode 100644 lib/mldev/rte_mldev.c
 create mode 100644 lib/mldev/rte_mldev.h
 create mode 100644 lib/mldev/version.map
  

Patch

diff --git a/config/rte_config.h b/config/rte_config.h
index 46549cb062..2adbef3f51 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -81,6 +81,9 @@ 
 /* rawdev defines */
 #define RTE_RAWDEV_MAX_DEVS 64
 
+/* mldev defines */
+#define RTE_MLDEV_MAX_DEVS 64
+
 /* ip_fragmentation defines */
 #define RTE_LIBRTE_IP_FRAG_MAX_FRAG 8
 // RTE_LIBRTE_IP_FRAG_TBL_STAT is not set
diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index 186a258be4..d55cca5b97 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -22,6 +22,7 @@  The public API headers are grouped by topics:
   [compress](@ref rte_comp.h),
   [regexdev](@ref rte_regexdev.h),
   [dmadev](@ref rte_dmadev.h),
+  [mldev](@ref rte_mldev.h),
   [eventdev](@ref rte_eventdev.h),
   [event_eth_rx_adapter](@ref rte_event_eth_rx_adapter.h),
   [event_eth_tx_adapter](@ref rte_event_eth_tx_adapter.h),
diff --git a/doc/api/doxy-api.conf.in b/doc/api/doxy-api.conf.in
index 608494a7c0..82b28e8b18 100644
--- a/doc/api/doxy-api.conf.in
+++ b/doc/api/doxy-api.conf.in
@@ -59,6 +59,7 @@  INPUT                   = @TOPDIR@/doc/api/doxy-api-index.md \
                           @TOPDIR@/lib/mempool \
                           @TOPDIR@/lib/meter \
                           @TOPDIR@/lib/metrics \
+                          @TOPDIR@/lib/mldev \
                           @TOPDIR@/lib/node \
                           @TOPDIR@/lib/net \
                           @TOPDIR@/lib/pcapng \
diff --git a/doc/guides/prog_guide/index.rst b/doc/guides/prog_guide/index.rst
index 8564883018..d7f2a28bdb 100644
--- a/doc/guides/prog_guide/index.rst
+++ b/doc/guides/prog_guide/index.rst
@@ -30,6 +30,7 @@  Programmer's Guide
     regexdev
     dmadev
     gpudev
+    mldev
     rte_security
     rawdev
     link_bonding_poll_mode_drv_lib
diff --git a/doc/guides/prog_guide/mldev.rst b/doc/guides/prog_guide/mldev.rst
new file mode 100644
index 0000000000..2ce8e2f7fe
--- /dev/null
+++ b/doc/guides/prog_guide/mldev.rst
@@ -0,0 +1,164 @@ 
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright(C) 2022 Marvell International Ltd.
+
+Machine Learning Device Library
+===============================
+
+The MLDEV library provides a Machine Learning device framework for the management and
+provisioning of hardware and software ML poll mode drivers, defining APIs which
+support a number of ML operations including device handling and inference processing.
+The ML model creation and training is outside of the scope of this library.
+
+Design Principles
+-----------------
+
+The MLDEV library follows the same basic principles as those used in DPDK's
+Ethernet Device framework and the Crypto framework. The MLDEV framework provides
+a generic Machine Learning device framework which supports both physical (hardware)
+and virtual (software) ML devices as well as an ML API to manage and configure ML
+devices. The APIs also supports performing ML inference operations through ML poll
+mode driver.
+
+
+Device Operations
+-----------------
+
+Device Creation
+~~~~~~~~~~~~~~~
+
+Physical ML devices are discovered during the PCI probe/enumeration, through the
+EAL functions which are executed at DPDK initialization, based on their PCI device
+identifier, each unique PCI BDF (bus/bridge, device, function). ML physical devices,
+like other physical devices in DPDK can be white-listed or black-listed
+using the EAL command line options.
+
+
+Device Identification
+~~~~~~~~~~~~~~~~~~~~~
+
+Each device, whether virtual or physical is uniquely designated by two
+identifiers:
+
+- A unique device index used to designate the ML device in all functions
+  exported by the MLDEV API.
+
+- A device name used to designate the ML device in console messages, for
+  administration or debugging purposes.
+
+Device Features and Capabilities
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ML devices may support different feature set. In order to get the
+supported PMD feature ``rte_ml_dev_info_get`` API which return the
+info of the device and it's supported features.
+
+Device Configuration
+~~~~~~~~~~~~~~~~~~~~
+
+The configuration of each ML device includes the following operations:
+
+- Allocation of resources, including hardware resources if a physical device.
+- Resetting the device into a well-known default state.
+- Initialization of statistics counters.
+
+The rte_ml_dev_configure API is used to configure a ML device.
+
+.. code-block:: c
+
+   int rte_ml_dev_configure(uint8_t dev_id, const struct rte_ml_dev_config *cfg);
+
+The ``rte_ml_dev_config`` structure is used to pass the configuration parameters
+for the ML device, for example  number of queue pairs, maximum number of models,
+maximum size of model and so on.
+
+Configuration of Queue Pairs
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Each ML device can be configured with number of queue pairs.
+Each queue pair is configured using ``rte_ml_dev_queue_pair_setup``
+
+Logical Cores, Memory and Queues Pair Relationships
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Multiple logical cores should never share the same queue pair for enqueuing
+operations or dequeueing operations on the same ML device since this would
+require global locks and hinder performance.
+
+Configuration of Machine Learning models
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Pre-trained ML models that are built using external ML compiler / training frameworks
+are used to perform inference operations. These models are configured on an ML device
+in a two-stage process that includes loading the model on an ML device, and starting
+the model to accept inference operations. Inference operations can be queued for a
+model only when the model is in started state. Model load stage assigns a Model ID,
+which is unique for the model in a driver's context. Model ID is used during all
+subsequent slow-path and fast-path operations.
+
+Model loading and start is done through the ``rte_ml_model_load`` and
+``rte_ml_model_start`` functions.
+
+Similarly stop and unloading are done through ``rte_ml_model_stop`` and
+``rte_ml_model_unload`` functions.
+
+Stop and unload functions would release the resources allocated for the
+models. Inference tasks cannot be queued for a model that is stopped.
+
+Detailed information related to the model can be retrieved from the driver using the
+function ``rte_ml_model_info_get``. Model information is accessible to the application
+through the ``rte_ml_model_info`` structure. Information available to the user would
+include the details related to the inputs and outputs, and the maximum batch size
+supported by the model.
+
+User can optionally update the model params such as weights and bias, without unloading
+the model, through the ``rte_ml_model_params_update`` function. A model should be in
+stopped state to update the params. Model has to be started in order to enqueue inference
+requests after a params update.
+
+Enqueue / Dequeue
+~~~~~~~~~~~~~~~~~
+
+The burst enqueue API uses a ML device identifier and a queue pair identifier
+to specify the device queue pair to schedule the processing on. The ``nb_ops``
+parameter is the number of operations to process which are supplied in the
+``ops`` array of ``rte_ml_op`` structures. The enqueue function returns the
+number of operations it enqueued for processing, a return value equal to
+``nb_ops`` means that all packets have been enqueued.
+
+The dequeue API uses the same format as the enqueue API of processed but
+the ``nb_ops`` and ``ops`` parameters are now used to specify the max processed
+operations the user wishes to retrieve and the location in which to store them.
+The API call returns the actual number of processed operations returned; this
+can never be larger than ``nb_ops``.
+
+``rte_ml_op`` provides the required information to the driver to queue an ML inference
+task. ML op specifies the model to be used and the number of batches to be executed in
+the inference task. Input and output buffer information is specified through the
+structure ``rte_ml_buff_seg``, which supports segmented data. Input is provided through
+the ``rte_ml_op::input`` and output through ``rte_ml_op::output``. Data pointed in each
+op, should not be released until the dequeue of for that op.
+
+
+Quantize and Dequantize
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Inference operations performed with lower precision types would improve the throughput
+and efficiency of the inference execution with a minimal loss of accuracy, which is within
+the tolerance limits. Quantization and dequantization is the process of converting data
+from a higher precision type to a lower precision type and vice-versa. ML library provides
+the functions ``rte_ml_io_quantize`` and ``rte_ml_io_dequantize`` to enable data type
+conversions. User needs to provide the address of the quantized and dequantized data
+buffers to the functions, along the number of the batches in the buffers.
+
+For quantization, the dequantized data is assumed to be of the type ``dtype`` provided by
+the ``rte_ml_model_info::input`` and the data is converted to ``qtype`` provided by the
+``rte_ml_model_info::input``.
+
+For dequantization, the quantized data is assumed to be of the type ``qtype`` provided by
+the ``rte_ml_model_info::output`` and the data is converted to ``dtype`` provided by the
+``rte_ml_model_info::output``.
+
+Size of the buffers required for the input and output can be calculated using the functions
+``rte_ml_io_input_size_get`` and ``rte_ml_io_output_size_get``. These functions would get the
+buffer sizes for both quantized and dequantized data for the given number of batches.
+
diff --git a/lib/eal/common/eal_common_log.c b/lib/eal/common/eal_common_log.c
index bd7b188ceb..5cb1b15dbe 100644
--- a/lib/eal/common/eal_common_log.c
+++ b/lib/eal/common/eal_common_log.c
@@ -369,6 +369,7 @@  static const struct logtype logtype_strings[] = {
 	{RTE_LOGTYPE_EFD,        "lib.efd"},
 	{RTE_LOGTYPE_EVENTDEV,   "lib.eventdev"},
 	{RTE_LOGTYPE_GSO,        "lib.gso"},
+	{RTE_LOGTYPE_MLDEV,      "lib.mldev"},
 	{RTE_LOGTYPE_USER1,      "user1"},
 	{RTE_LOGTYPE_USER2,      "user2"},
 	{RTE_LOGTYPE_USER3,      "user3"},
diff --git a/lib/eal/include/rte_log.h b/lib/eal/include/rte_log.h
index 25ce42cdfc..226be9c778 100644
--- a/lib/eal/include/rte_log.h
+++ b/lib/eal/include/rte_log.h
@@ -48,6 +48,7 @@  extern "C" {
 #define RTE_LOGTYPE_EFD       18 /**< Log related to EFD. */
 #define RTE_LOGTYPE_EVENTDEV  19 /**< Log related to eventdev. */
 #define RTE_LOGTYPE_GSO       20 /**< Log related to GSO. */
+#define RTE_LOGTYPE_MLDEV     21 /**< Log related to mldev. */
 
 /* these log types can be used in an application */
 #define RTE_LOGTYPE_USER1     24 /**< User-defined log type 1. */
diff --git a/lib/meson.build b/lib/meson.build
index c648f7d800..32c45f55ce 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -63,6 +63,7 @@  libraries = [
         'flow_classify', # flow_classify lib depends on pkt framework table lib
         'graph',
         'node',
+        'mldev'
 ]
 
 optional_libs = [
diff --git a/lib/mldev/meson.build b/lib/mldev/meson.build
new file mode 100644
index 0000000000..e1e0ffe975
--- /dev/null
+++ b/lib/mldev/meson.build
@@ -0,0 +1,12 @@ 
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2022 Marvell.
+
+sources = files(
+        'rte_mldev.c',
+)
+
+headers = files(
+        'rte_mldev.h',
+)
+
+deps += ['mempool']
diff --git a/lib/mldev/rte_mldev.c b/lib/mldev/rte_mldev.c
new file mode 100644
index 0000000000..c6644e6c12
--- /dev/null
+++ b/lib/mldev/rte_mldev.c
@@ -0,0 +1,5 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Marvell.
+ */
+
+#include <rte_mldev.h>
diff --git a/lib/mldev/rte_mldev.h b/lib/mldev/rte_mldev.h
new file mode 100644
index 0000000000..f55cc8ffb3
--- /dev/null
+++ b/lib/mldev/rte_mldev.h
@@ -0,0 +1,1081 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Marvell.
+ */
+
+#ifndef RTE_MLDEV_H
+#define RTE_MLDEV_H
+
+/**
+ * @file rte_mldev.h
+ *
+ * @warning
+ * @b EXPERIMENTAL:
+ * All functions in this file may be changed or removed without prior notice.
+ *
+ * ML (Machine Learning) device API.
+ *
+ * The ML framework is built on the following model:
+ *
+ *
+ *     +-----------------+               rte_ml_[en|de]queue_burst()
+ *     |                 |                          |
+ *     |     Machine     o------+     +--------+    |
+ *     |     Learning    |      |     | queue  |    |    +------+
+ *     |     Inference   o------+-----o        |<===o===>|Core 0|
+ *     |     Engine      |      |     | pair 0 |         +------+
+ *     |                 o----+ |     +--------+
+ *     |                 |    | |
+ *     +-----------------+    | |     +--------+
+ *              ^             | |     | queue  |         +------+
+ *              |             | +-----o        |<=======>|Core 1|
+ *              |             |       | pair 1 |         +------+
+ *              |             |       +--------+
+ *     +--------+--------+    |
+ *     | +-------------+ |    |       +--------+
+ *     | |   Model 0   | |    |       | queue  |         +------+
+ *     | +-------------+ |    +-------o        |<=======>|Core N|
+ *     | +-------------+ |            | pair N |         +------+
+ *     | |   Model 1   | |            +--------+
+ *     | +-------------+ |
+ *     | +-------------+ |<------- rte_ml_model_load()
+ *     | |   Model ..  | |-------> rte_ml_model_info()
+ *     | +-------------+ |<------- rte_ml_model_start()
+ *     | +-------------+ |<------- rte_ml_model_stop()
+ *     | |   Model N   | |<------- rte_ml_model_params_update()
+ *     | +-------------+ |<------- rte_ml_model_unload()
+ *     +-----------------+
+ *
+ * ML Device: A hardware or software-based implementation of ML device API for
+ * running inferences using a pre-trained ML model.
+ *
+ * ML Model: An ML model is an algorithm trained over a dataset. A model consists of
+ * procedure/algorithm and data/pattern required to make predictions on live data.
+ * Once the model is created and trained outside of the DPDK scope, the model can be loaded
+ * via rte_ml_model_load() and then start it using rte_ml_model_start() API.
+ * The rte_ml_model_params_update() can be used to update the model parameters such as weight
+ * and bias without unloading the model using rte_ml_model_unload().
+ *
+ * ML Inference: ML inference is the process of feeding data to the model via
+ * rte_ml_enqueue_burst() API and use rte_ml_dequeue_burst() API to get the calculated
+ * outputs/predictions from the started model.
+ *
+ * In all functions of the ML device API, the ML device is designated by an
+ * integer >= 0 named as device identifier *dev_id*.
+ *
+ * The functions exported by the ML device API to setup a device designated by
+ * its device identifier must be invoked in the following order:
+ *
+ *      - rte_ml_dev_configure()
+ *      - rte_ml_dev_queue_pair_setup()
+ *      - rte_ml_dev_start()
+ *
+ * A model is required to run the inference operations with the user specified inputs.
+ * Application needs to invoke the ML model API in the following order before queueing
+ * inference jobs.
+ *
+ *      - rte_ml_model_load()
+ *      - rte_ml_model_start()
+ *
+ * The rte_ml_model_info() API is provided to retrieve the information related to the model.
+ * The information would include the shape and type of input and output required for the inference.
+ *
+ * Data quantization and dequantization is one of the main aspects in ML domain. This involves
+ * conversion of input data from a higher precision to a lower precision data type and vice-versa
+ * for the output. APIs are provided to enable quantization through rte_ml_io_quantize() and
+ * dequantization through rte_ml_io_dequantize(). These APIs have the capability to handle input
+ * and output buffers holding data for multiple batches.
+ *
+ * Two utility APIs rte_ml_io_input_size_get() and rte_ml_io_output_size_get() can used to get the
+ * size of quantized and de-quantized multi-batch input and output buffers.
+ *
+ * User can optionally update the model parameters with rte_ml_model_params_update() after
+ * invoking rte_ml_model_stop() API on a given model ID.
+ *
+ * The application can invoke, in any order, the functions exported by the ML API to enqueue
+ * inference jobs and dequeue inference response.
+ *
+ * If the application wants to change the device configuration (i.e., call
+ * rte_ml_dev_configure() or rte_ml_dev_queue_pair_setup()), then application must stop the
+ * device using rte_ml_dev_stop() API. Likewise, if model parameters need to be updated then
+ * the application must call rte_ml_model_stop() followed by rte_ml_model_params_update() API
+ * for the given model. The application does not need to call rte_ml_dev_stop() API for
+ * any model re-configuration such as rte_ml_model_params_update(), rte_ml_model_unload() etc.
+ *
+ * Once the device is in the start state after invoking rte_ml_dev_start() API and the model is in
+ * start state after invoking rte_ml_model_start() API, then the application can call
+ * rte_ml_enqueue() and rte_ml_dequeue() API on the destined device and model ID.
+ *
+ * Finally, an application can close an ML device by invoking the rte_ml_dev_close() function.
+ *
+ * Typical application utilisation of the ML API will follow the following
+ * programming flow.
+ *
+ * - rte_ml_dev_configure()
+ * - rte_ml_dev_queue_pair_setup()
+ * - rte_ml_model_load()
+ * - rte_ml_model_start()
+ * - rte_ml_model_info()
+ * - rte_ml_dev_start()
+ * - rte_ml_enqueue_burst()
+ * - rte_ml_dequeue_burst()
+ * - rte_ml_model_stop()
+ * - rte_ml_model_unload()
+ * - rte_ml_dev_stop()
+ * - rte_ml_dev_close()
+ *
+ * Regarding multi-threading, by default, all the functions of the ML Device API exported by a PMD
+ * are lock-free functions which assume to not be invoked in parallel on different logical cores
+ * on the same target object. For instance, the dequeue function of a poll mode driver cannot be
+ * invoked in parallel on two logical cores to operate on same queue pair. Of course, this function
+ * can be invoked in parallel by different logical core on different queue pair.
+ * It is the responsibility of the user application to enforce this rule.
+ */
+
+#include <rte_common.h>
+#include <rte_mempool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RTE_ML_STR_MAX 128
+/**< Maximum length of name string */
+
+/* Device operations */
+
+/**
+ * Get the total number of ML devices that have been successfully initialised.
+ *
+ * @return
+ *   - The total number of usable ML devices.
+ */
+__rte_experimental
+uint16_t
+rte_ml_dev_count(void);
+
+/**
+ * Check if the device is in ready state.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ *
+ * @return
+ *   - 0 if device state is not in ready state.
+ *   - 1 if device state is ready state.
+ */
+__rte_experimental
+int
+rte_ml_dev_is_valid_dev(int16_t dev_id);
+
+/**
+ * Return the NUMA socket to which a device is connected.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ *
+ * @return
+ *   - The NUMA socket id to which the device is connected
+ *   - 0 If the socket could not be determined.
+ *   - -EINVAL: if the dev_id value is not valid.
+ */
+__rte_experimental
+int
+rte_ml_dev_socket_id(int16_t dev_id);
+
+/**  ML device information */
+struct rte_ml_dev_info {
+	const char *driver_name;
+	/**< Driver name */
+	int16_t max_models;
+	/**< Maximum number of models supported by the device.
+	 * @see struct rte_ml_dev_config::max_nb_models
+	 */
+	uint16_t max_queue_pairs;
+	/**< Maximum number of queues pairs supported by the device.
+	 * @see struct rte_ml_dev_config::nb_queue_pairs
+	 */
+	uint16_t max_desc;
+	/**< Maximum allowed number of descriptors for queue pair by the device.
+	 * @see struct rte_ml_dev_qp_conf::nb_desc
+	 */
+	uint16_t max_segments;
+	/**< Maximum number of scatter-gather entry supported by the device.
+	 * @see struct rte_ml_buff_seg  struct rte_ml_buff_seg::next
+	 */
+};
+
+/**
+ * Retrieve the information of the device.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ * @param dev_info
+ *   A pointer to a structure of type *rte_ml_dev_info* to be filled with the info of the device.
+ *
+ * @return
+ *   - 0: Success, driver updates the information of the ML device
+ *   - < 0: Error code returned by the driver info get function.
+ */
+__rte_experimental
+int
+rte_ml_dev_info_get(int16_t dev_id, struct rte_ml_dev_info *dev_info);
+
+/** ML device configuration structure */
+struct rte_ml_dev_config {
+	int socket_id;
+	/**< Socket to allocate resources on. */
+	int16_t max_nb_models;
+	/**< Max number of models allowed to be loaded on the device.
+	 * This value cannot exceed the max_models which is previously provided in
+	 * struct rte_ml_dev_info::max_models
+	 */
+	uint16_t nb_queue_pairs;
+	/**< Number of queue pairs to configure on this device.
+	 * This value cannot exceed the max_models which is previously provided in
+	 * struct rte_ml_dev_info::max_queue_pairs
+	 */
+};
+
+/**
+ * Configure an ML device.
+ *
+ * This function must be invoked first before any other function in the API.
+ * This function can also be re-invoked when a device is in the stopped state.
+ *
+ * The caller may use rte_ml_dev_info_get() to get the capability of each resources available
+ * for this ML device.
+ *
+ * @param dev_id
+ *   The identifier of the device to configure.
+ * @param config
+ *   The ML device configuration structure.
+ *
+ * @return
+ *   - 0: Success, device configured.
+ *   - < 0: Error code returned by the driver configuration function.
+ */
+__rte_experimental
+int
+rte_ml_dev_configure(int16_t dev_id, const struct rte_ml_dev_config *config);
+
+/* Forward declaration */
+struct rte_ml_op;
+
+/**< Callback function called during rte_ml_dev_stop(), invoked once per flushed ML op */
+typedef void (*rte_ml_dev_stop_flush_t)(int16_t dev_id, uint16_t qp_id, struct rte_ml_op *op);
+
+/** ML device queue pair configuration structure. */
+struct rte_ml_dev_qp_conf {
+	uint32_t nb_desc;
+	/**< Number of descriptors per queue pair.
+	 * This value cannot exceed the max_desc which previously provided in
+	 * struct rte_ml_dev_info:max_models
+	 */
+	rte_ml_dev_stop_flush_t cb;
+	/**< Callback function called during rte_ml_dev_stop(), invoked once per active ML op.
+	 * Value NULL is allowed, in which case callback will not be invoked.
+	 * This function can be used to properly dispose of outstanding ML ops from all
+	 * queue pairs, for example ops containing  memory pointers.
+	 * @see rte_ml_dev_stop()
+	 */
+};
+
+/**
+ * Set up a queue pair for a device. This should only be called when the device is stopped.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ * @param queue_pair_id
+ *   The index of the queue pairs to set up. The value must be in the range [0, nb_queue_pairs - 1]
+ * previously supplied to rte_ml_dev_configure().
+ * @param qp_conf
+ *   The pointer to the configuration data to be used for the queue pair.
+ * @param socket_id
+ *   The *socket_id* argument is the socket identifier in case of NUMA.
+ * The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the memory allocated
+ * for the queue pair.
+ *
+ * @return
+ *   - 0: Success, queue pair correctly set up.
+ *   - < 0: Queue pair configuration failed.
+ */
+__rte_experimental
+int
+rte_ml_dev_queue_pair_setup(int16_t dev_id, uint16_t queue_pair_id,
+			    const struct rte_ml_dev_qp_conf *qp_conf, int socket_id);
+
+/**
+ * Start an ML device.
+ *
+ * The device start step consists of setting the configured features and enabling the ML device
+ * to accept inference jobs.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ *
+ * @return
+ *   - 0: Success, device started.
+ *   - <0: Error code of the driver device start function.
+ */
+__rte_experimental
+int
+rte_ml_dev_start(int16_t dev_id);
+
+/**
+ * Stop an ML device. A stopped device cannot accept inference jobs.
+ * The device can be restarted with a call to rte_ml_dev_start().
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ *
+ * @return
+ *   - 0: Success, device stopped.
+ *   - <0: Error code of the driver device stop function.
+ */
+__rte_experimental
+int
+rte_ml_dev_stop(int16_t dev_id);
+
+/**
+ * Close an ML device. The device cannot be restarted!
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ *
+ * @return
+ *  - 0 on successfully closing device.
+ *  - <0 on failure to close device.
+ */
+__rte_experimental
+int
+rte_ml_dev_close(int16_t dev_id);
+
+/** Status of ML operation */
+enum rte_ml_op_status {
+	RTE_ML_OP_STATUS_SUCCESS = 0,
+	/**< Operation completed successfully */
+	RTE_ML_OP_STATUS_NOT_PROCESSED,
+	/**< Operation has not yet been processed by the device.
+	 * When an ML op is enqueued to the device, the driver sets the status as
+	 * RTE_ML_OP_STATUS_NOT_PROCESSED. Upon the ML operation completion,
+	 * the respective status will be updated by the driver.
+	 */
+	RTE_ML_OP_STATUS_ERROR,
+	/**< Operation completed with error.
+	 * Application can invoke rte_ml_op_error_get() to get PMD specific
+	 * error code if needed.
+	 */
+};
+
+/** ML operation's input and output buffer representation as scatter gather list
+ */
+struct rte_ml_buff_seg {
+	rte_iova_t iova_addr;
+	/**< IOVA address of segment buffer. */
+	void *addr;
+	/**< Virtual address of segment buffer. */
+	uint32_t length;
+	/**< Segment length. */
+	uint32_t reserved;
+	/**< Reserved for future use. */
+	struct rte_ml_buff_seg *next;
+	/**< Points to next segment. Value NULL represents the last segment. */
+};
+
+/**
+ * ML Operation.
+ *
+ * This structure contains data related to performing an ML operation on the buffers using
+ * the model specified through model_id.
+ */
+struct rte_ml_op {
+	int16_t model_id;
+	/**< Model ID to be used for the operation. */
+	uint16_t nb_batches;
+	/**< Number of batches. Minimum value must be one.
+	 * Input buffer must hold inference data for each batch as contiguous.
+	 */
+	uint32_t reserved;
+	/**< Reserved for future use. */
+	struct rte_mempool *mempool;
+	/**< Pool from which operation is allocated. */
+	struct rte_ml_buff_seg input;
+	/**< Input buffer to hold the inference data. */
+	struct rte_ml_buff_seg output;
+	/**< Output buffer to hold the inference output by the driver. */
+	RTE_STD_C11
+	union {
+		uint64_t user_u64;
+		/**< User data as uint64_t.*/
+		void *user_ptr;
+		/**< User data as void*.*/
+	};
+	enum rte_ml_op_status status;
+	/**< Operation status. */
+} __rte_cache_aligned;
+
+/* Enqueue/Dequeue operations */
+
+/**
+ * Enqueue a burst of ML inferences for processing on an ML device.
+ *
+ * The rte_ml_enqueue_burst() function is invoked to place ML inference
+ * operations on the queue *qp_id* of the device designated by its *dev_id*.
+ *
+ * The *nb_ops* parameter is the number of inferences to process which are
+ * supplied in the *ops* array of *rte_ml_op* structures.
+ *
+ * The rte_ml_enqueue_burst() function returns the number of inferences it
+ * actually enqueued for processing. A return value equal to *nb_ops* means that
+ * all packets have been enqueued.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ * @param qp_id
+ *   The index of the queue pair which inferences are to be enqueued for processing.
+ * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to
+ * *rte_ml_dev_configure*.
+ * @param ops
+ *   The address of an array of *nb_ops* pointers to *rte_ml_op* structures which contain the
+ * ML inferences to be processed.
+ * @param nb_ops
+ *   The number of operations to process.
+ *
+ * @return
+ *   The number of inference operations actually enqueued to the ML device.
+ * The return value can be less than the value of the *nb_ops* parameter when the ML device queue
+ * is full or if invalid parameters are specified in a *rte_ml_op*.
+ */
+__rte_experimental
+uint16_t
+rte_ml_enqueue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops);
+
+/**
+ * Dequeue a burst of processed ML inferences operations from a queue on the ML device.
+ * The dequeued operations are stored in *rte_ml_op* structures whose pointers are supplied
+ * in the *ops* array.
+ *
+ * The rte_ml_dequeue_burst() function returns the number of inferences actually dequeued,
+ * which is the number of *rte_ml_op* data structures effectively supplied into the *ops* array.
+ *
+ * A return value equal to *nb_ops* indicates that the queue contained at least nb_ops* operations,
+ * and this is likely to signify that other processed operations remain in the devices output queue.
+ * Application implementing a "retrieve as many processed operations as possible" policy can check
+ * this specific case and keep invoking the rte_ml_dequeue_burst() function until a value less than
+ * *nb_ops* is returned.
+ *
+ * The rte_ml_dequeue_burst() function does not provide any error notification to avoid
+ * the corresponding overhead.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ * @param qp_id
+ *   The index of the queue pair from which to retrieve processed packets.
+ * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to
+ * rte_ml_dev_configure().
+ * @param ops
+ *   The address of an array of pointers to *rte_ml_op* structures that must be large enough to
+ * store *nb_ops* pointers in it.
+ * @param nb_ops
+ *   The maximum number of inferences to dequeue.
+ *
+ * @return
+ *   The number of operations actually dequeued, which is the number of pointers
+ * to *rte_ml_op* structures effectively supplied to the *ops* array.
+ */
+__rte_experimental
+uint16_t
+rte_ml_dequeue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops);
+
+/**
+ * Verbose error structure definition.
+ */
+struct rte_ml_op_error {
+	const char message[RTE_ML_STR_MAX]; /**< Human-readable error message. */
+	uint64_t errcode; /**< Vendor specific error code. */
+};
+
+/**
+ * Get PMD specific error information for an ML op.
+ *
+ * When an ML operation completed with RTE_ML_OP_STATUS_ERROR as status,
+ * This API allows to get PMD specific error details.
+ *
+ * @param[in] dev_id
+ *   Device identifier
+ * @param[in] op
+ *   Handle of ML operation
+ * @param[in] error
+ *   Address of structure rte_ml_op_error to be filled
+ *
+ * @return
+ *   - Returns 0 on success
+ *   - Returns negative value on failure
+ */
+__rte_experimental
+int
+rte_ml_op_error_get(int16_t dev_id, struct rte_ml_op *op, struct rte_ml_op_error *error);
+
+/* Statistics operations */
+
+/** Device statistics. */
+struct rte_ml_dev_stats {
+	uint64_t enqueued_count;
+	/**< Count of all operations enqueued */
+	uint64_t dequeued_count;
+	/**< Count of all operations dequeued */
+	uint64_t enqueue_err_count;
+	/**< Total error count on operations enqueued */
+	uint64_t dequeue_err_count;
+	/**< Total error count on operations dequeued */
+};
+
+/**
+ * Retrieve the general I/O statistics of a device.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ * @param stats
+ *   Pointer to structure to where statistics will be copied.
+ * On error, this location may or may not have been modified.
+ * @return
+ *   - 0 on success
+ *   - -EINVAL: If invalid parameter pointer is provided.
+ */
+__rte_experimental
+int
+rte_ml_dev_stats_get(int16_t dev_id, struct rte_ml_dev_stats *stats);
+
+/**
+ * Reset the statistics of a device.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ */
+__rte_experimental
+void
+rte_ml_dev_stats_reset(int16_t dev_id);
+
+/**< Maximum name length for extended statistics counters */
+
+/**
+ * A name-key lookup element for extended statistics.
+ *
+ * This structure is used to map between names and ID numbers for extended ML device statistics.
+ */
+struct rte_ml_dev_xstats_map {
+	uint16_t id;
+	/**< xstat identifier */
+	char name[RTE_ML_STR_MAX];
+	/**< xstat name */
+};
+
+/**
+ * Retrieve names of extended statistics of an ML device.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ * @param[out] xstats_map
+ *   Block of memory to insert id and names into. Must be at least size in capacity.
+ * If set to NULL, function returns required capacity.
+ *
+ * @return
+ *   - Positive value on success:
+ *      - The return value is the number of entries filled in the stats map.
+ *      - If xstats_map set to NULL then required capacity for xstats_map.
+ *   - Negative value on error:
+ *      - -ENODEV: for invalid *dev_id*.
+ *      - -ENOTSUP: if the device doesn't support this function.
+ */
+__rte_experimental
+int
+rte_ml_dev_xstats_names_get(int16_t dev_id, struct rte_ml_dev_xstats_map *xstats_map);
+
+/**
+ * Retrieve the value of a single stat by requesting it by name.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ * @param name
+ *   The stat name to retrieve.
+ * @param stat_id
+ *   If non-NULL, the numerical id of the stat will be returned, so that further requests for
+ * the stat can be got using rte_ml_dev_xstats_get, which will be faster as it doesn't need to
+ * scan a list of names for the stat.
+ * @param[out] value
+ *   Must be non-NULL, retrieved xstat value will be stored in this address.
+ *
+ * @return
+ *   - 0: Successfully retrieved xstat value.
+ *   - -EINVAL: invalid parameters.
+ *   - -ENOTSUP: if not supported.
+ */
+__rte_experimental
+int
+rte_ml_dev_xstats_by_name_get(int16_t dev_id, const char *name, uint16_t *stat_id, uint64_t *value);
+
+/**
+ * Retrieve extended statistics of an ML device.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ * @param stat_ids
+ *   The id numbers of the stats to get. The ids can be fetched from the stat position in the
+ * stat list from rte_ml_dev_xstats_names_get(), or by using rte_ml_dev_xstats_by_name_get().
+ * @param values
+ *   The values for each stats request by ID.
+ * @param nb_ids
+ *   The number of stats requested.
+ * @return
+ *   - Positive value: number of stat entries filled into the values array
+ *   - Negative value on error:
+ *      - -ENODEV: for invalid *dev_id*.
+ *      - -ENOTSUP: if the device doesn't support this function.
+ */
+__rte_experimental
+int
+rte_ml_dev_xstats_get(int16_t dev_id, const uint16_t *stat_ids, uint64_t *values, uint16_t nb_ids);
+
+/**
+ * Reset the values of the xstats of the selected component in the device.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ * @param stat_ids
+ *   Selects specific statistics to be reset. When NULL, all statistics will be reset.
+ * If non-NULL, must point to array of at least *nb_ids* size.
+ * @param nb_ids
+ *   The number of ids available from the *ids* array. Ignored when ids is NULL.
+ * @return
+ *   - 0: Successfully reset the statistics to zero.
+ *   - -EINVAL: invalid parameters.
+ *   - -ENOTSUP: if not supported.
+ */
+__rte_experimental
+int
+rte_ml_dev_xstats_reset(int16_t dev_id, const uint16_t *stat_ids, uint16_t nb_ids);
+
+/* Utility operations */
+
+/**
+ * Dump internal information about *dev_id* to the FILE* provided in *fd*.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ * @param fd
+ *   A pointer to a file for output.
+ * @return
+ *   - 0: on success.
+ *   - <0: on failure.
+ */
+__rte_experimental
+int
+rte_ml_dev_dump(int16_t dev_id, FILE *fd);
+
+/**
+ * Trigger the ML device self test.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ * @return
+ *   - 0: Selftest successful.
+ *   - -ENOTSUP: if the device doesn't support selftest.
+ *   - other values < 0 on failure.
+ */
+__rte_experimental
+int
+rte_ml_dev_selftest(int16_t dev_id);
+
+/* Model operations */
+
+/** ML model load parameters
+ *
+ * Parameters required to load an ML model.
+ */
+struct rte_ml_model_params {
+	void *addr;
+	/**< Address of model buffer */
+	size_t size;
+	/**< Size of model buffer */
+};
+
+/**
+ * Load an ML model to the device.
+ *
+ * Load an ML model to the device with parameters requested in the structure rte_ml_model_params.
+ *
+ * @param[in] dev_id
+ *   The identifier of the device.
+ * @param[in] params
+ *   Parameters for the model to be loaded.
+ * @param[out] model_id
+ *   Identifier of the model loaded.
+ *
+ * @return
+ *   - 0: Success, Model created.
+ *   - < 0: Failure, Error code of the model load driver function.
+ */
+__rte_experimental
+int
+rte_ml_model_load(int16_t dev_id, struct rte_ml_model_params *params, uint16_t *model_id);
+
+/**
+ * Unload an ML model from the device.
+ *
+ * @param[in] dev_id
+ *   The identifier of the device.
+ * @param[in] model_id
+ *   Identifier of the model to be unloaded.
+ *
+ * @return
+ *   - 0: Success, Model destroyed.
+ *   - < 0: Failure, Error code of the model unload driver function.
+ */
+__rte_experimental
+int
+rte_ml_model_unload(int16_t dev_id, int16_t model_id);
+
+/**
+ * Start an ML model for the given device ID.
+ *
+ * Start an ML model to accept inference requests.
+ *
+ * @param[in] dev_id
+ *   The identifier of the device.
+ * @param[in] model_id
+ *   Identifier of the model to be started.
+ *
+ * @return
+ *   - 0: Success, Model loaded.
+ *   - < 0: Failure, Error code of the model start driver function.
+ */
+__rte_experimental
+int
+rte_ml_model_start(int16_t dev_id, int16_t model_id);
+
+/**
+ * Stop an ML model for the given device ID.
+ *
+ * Model stop would disable the ML model to be used for inference jobs.
+ * All inference jobs must have been completed before model stop is attempted.
+
+ * @param[in] dev_id
+ *   The identifier of the device.
+ * @param[in] model_id
+ *   Identifier of the model to be stopped.
+ *
+ * @return
+ *   - 0: Success, Model unloaded.
+ *   - < 0: Failure, Error code of the model stop driver function.
+ */
+__rte_experimental
+int
+rte_ml_model_stop(int16_t dev_id, int16_t model_id);
+
+/**
+ * Input and output data types. ML models can operate on reduced precision
+ * datatypes to achieve better power efficiency, lower network latency and lower memory footprint.
+ * This enum is used to represent the lower precision integer and floating point types used
+ * by ML models.
+ */
+enum rte_ml_io_type {
+	RTE_ML_IO_TYPE_UNKNOWN = 0,
+	/**< Invalid or unknown type */
+	RTE_ML_IO_TYPE_INT8,
+	/**< 8-bit integer */
+	RTE_ML_IO_TYPE_UINT8,
+	/**< 8-bit unsigned integer */
+	RTE_ML_IO_TYPE_INT16,
+	/**< 16-bit integer */
+	RTE_ML_IO_TYPE_UINT16,
+	/**< 16-bit unsigned integer */
+	RTE_ML_IO_TYPE_INT32,
+	/**< 32-bit integer */
+	RTE_ML_IO_TYPE_UINT32,
+	/**< 32-bit unsigned integer */
+	RTE_ML_IO_TYPE_FP8,
+	/**< 8-bit floating point number */
+	RTE_ML_IO_TYPE_FP16,
+	/**< IEEE 754 16-bit floating point number */
+	RTE_ML_IO_TYPE_FP32,
+	/**< IEEE 754 32-bit floating point number */
+	RTE_ML_IO_TYPE_BFLOAT16
+	/**< 16-bit brain floating point number. */
+};
+
+/**
+ * Input and output format. This is used to represent the encoding type of multi-dimensional
+ * used by ML models.
+ */
+enum rte_ml_io_format {
+	RTE_ML_IO_FORMAT_NCHW = 1,
+	/**< Batch size (N) x channels (C) x height (H) x width (W) */
+	RTE_ML_IO_FORMAT_NHWC,
+	/**< Batch size (N) x height (H) x width (W) x channels (C) */
+	RTE_ML_IO_FORMAT_CHWN,
+	/**< Channels (C) x height (H) x width (W) x batch size (N) */
+	RTE_ML_IO_FORMAT_3D,
+	/**< Format to represent a 3 dimensional data */
+	RTE_ML_IO_FORMAT_2D,
+	/**< Format to represent matrix data */
+	RTE_ML_IO_FORMAT_1D,
+	/**< Format to represent vector data */
+	RTE_ML_IO_FORMAT_SCALAR,
+	/**< Format to represent scalar data */
+};
+
+/**
+ * Input and output shape. This structure represents the encoding format and dimensions
+ * of the tensor or vector.
+ *
+ * The data can be a 4D / 3D tensor, matrix, vector or a scalar. Number of dimensions used
+ * for the data would depend on the format. Unused dimensions to be set to 1.
+ */
+struct rte_ml_io_shape {
+	enum rte_ml_io_format format;
+	/**< Format of the data */
+	uint32_t w;
+	/**< First dimension */
+	uint32_t x;
+	/**< Second dimension */
+	uint32_t y;
+	/**< Third dimension */
+	uint32_t z;
+	/**< Fourth dimension */
+};
+
+/** Input and output data information structure
+ *
+ * Specifies the type and shape of input and output data.
+ */
+struct rte_ml_io_info {
+	char name[RTE_ML_STR_MAX];
+	/**< Name of data */
+	struct rte_ml_io_shape shape;
+	/**< Shape of data */
+	enum rte_ml_io_type qtype;
+	/**< Type of quantized data */
+	enum rte_ml_io_type dtype;
+	/**< Type of de-quantized data */
+};
+
+/** Model information structure */
+struct rte_ml_model_info {
+	char name[RTE_ML_STR_MAX];
+	/**< Model name. */
+	char version[RTE_ML_STR_MAX];
+	/**< Model version */
+	int16_t model_id;
+	/**< Model ID */
+	uint16_t device_id;
+	/**< Device ID */
+	uint16_t batch_size;
+	/**< Maximum number of batches that the model can process simultaneously */
+	uint32_t nb_inputs;
+	/**< Number of inputs */
+	const struct rte_ml_io_info *input_info;
+	/**< Input info array. Array size is equal to nb_inputs */
+	uint32_t nb_outputs;
+	/**< Number of outputs */
+	const struct rte_ml_io_info *output_info;
+	/**< Output info array. Array size is equal to nb_output */
+	uint64_t wb_size;
+	/**< Size of model weights and bias */
+};
+
+/**
+ * Get ML model information.
+ *
+ * @param[in] dev_id
+ *   The identifier of the device.
+ * @param[in] model_id
+ *   Identifier for the model created
+ * @param[out] model_info
+ *   Pointer to a model info structure
+ *
+ * @return
+ *   - Returns 0 on success
+ *   - Returns negative value on failure
+ */
+__rte_experimental
+int
+rte_ml_model_info_get(int16_t dev_id, int16_t model_id, struct rte_ml_model_info *model_info);
+
+/**
+ * Update the model parameters without unloading model.
+ *
+ * Update model parameters such as weights and bias without unloading the model.
+ * rte_ml_model_stop() must be called before invoking this API.
+ *
+ * @param[in] dev_id
+ *   The identifier of the device.
+ * @param[in] model_id
+ *   Identifier for the model created
+ * @param[in] buffer
+ *   Pointer to the model weights and bias buffer.
+ * Size of the buffer is equal to wb_size returned in *rte_ml_model_info*.
+ *
+ * @return
+ *   - Returns 0 on success
+ *   - Returns negative value on failure
+ */
+__rte_experimental
+int
+rte_ml_model_params_update(int16_t dev_id, int16_t model_id, void *buffer);
+
+/* IO operations */
+
+/**
+ * Get size of quantized and dequantized input buffers.
+ *
+ * Calculate the size of buffers required for quantized and dequantized input data.
+ * This API would return the buffer sizes for the number of batches provided and would
+ * consider the alignment requirements as per the PMD. Input sizes computed by this API can
+ * be used by the application to allocate buffers.
+ *
+ * @param[in] dev_id
+ *   The identifier of the device.
+ * @param[in] model_id
+ *   Identifier for the model created
+ * @param[in] nb_batches
+ *   Number of batches of input to be processed in a single inference job
+ * @param[out] input_qsize
+ *   Quantized input size pointer.
+ * NULL value is allowed, in which case input_qsize is not calculated by the driver.
+ * @param[out] input_dsize
+ *   Dequantized input size pointer.
+ * NULL value is allowed, in which case input_dsize is not calculated by the driver.
+ *
+ * @return
+ *   - Returns 0 on success
+ *   - Returns negative value on failure
+ */
+__rte_experimental
+int
+rte_ml_io_input_size_get(int16_t dev_id, int16_t model_id, uint32_t nb_batches,
+			 uint64_t *input_qsize, uint64_t *input_dsize);
+
+/**
+ * Get size of quantized and dequantized output buffers.
+ *
+ * Calculate the size of buffers required for quantized and dequantized output data.
+ * This API would return the buffer sizes for the number of batches provided and would consider
+ * the alignment requirements as per the PMD. Output sizes computed by this API can be used by the
+ * application to allocate buffers.
+ *
+ * @param[in] dev_id
+ *   The identifier of the device.
+ * @param[in] model_id
+ *   Identifier for the model created
+ * @param[in] nb_batches
+ *   Number of batches of input to be processed in a single inference job
+ * @param[out] output_qsize
+ *   Quantized output size pointer.
+ * NULL value is allowed, in which case output_qsize is not calculated by the driver.
+ * @param[out] output_dsize
+ *   Dequantized output size pointer.
+ * NULL value is allowed, in which case output_dsize is not calculated by the driver.
+ *
+ * @return
+ *   - Returns 0 on success
+ *   - Returns negative value on failure
+ */
+__rte_experimental
+int
+rte_ml_io_output_size_get(int16_t dev_id, int16_t model_id, uint32_t nb_batches,
+			  uint64_t *output_qsize, uint64_t *output_dsize);
+
+/**
+ * Quantize input data.
+ *
+ * Quantization converts data from a higher precision types to a lower precision types to improve
+ * the throughput and efficiency of the model execution with minimal loss of accuracy.
+ * Types of dequantized data and quantized data are specified by the model.
+ *
+ * @param[in] dev_id
+ *   The identifier of the device.
+ * @param[in] model_id
+ *   Identifier for the model
+ * @param[in] nb_batches
+ *   Number of batches in the dequantized input buffer
+ * @param[in] dbuffer
+ *   Address of dequantized input data
+ * @param[in] qbuffer
+ *   Address of quantized input data
+ *
+ * @return
+ *   - Returns 0 on success
+ *   - Returns negative value on failure
+ */
+__rte_experimental
+int
+rte_ml_io_quantize(int16_t dev_id, int16_t model_id, uint16_t nb_batches, void *dbuffer,
+		   void *qbuffer);
+
+/**
+ * Dequantize output data.
+ *
+ * Dequantization converts data from a lower precision type to a higher precision type.
+ * Types of quantized data and dequantized are specified by the model.
+ *
+ * @param[in] dev_id
+ *   The identifier of the device.
+ * @param[in] model_id
+ *   Identifier for the model
+ * @param[in] nb_batches
+ *   Number of batches in the dequantized output buffer
+ * @param[in] qbuffer
+ *   Address of quantized output data
+ * @param[in] dbuffer
+ *   Address of dequantized output data
+ *
+ * @return
+ *   - Returns 0 on success
+ *   - Returns negative value on failure
+ */
+__rte_experimental
+int
+rte_ml_io_dequantize(int16_t dev_id, int16_t model_id, uint16_t nb_batches, void *qbuffer,
+		     void *dbuffer);
+
+/* ML op pool operations */
+
+/**
+ * Create an ML operation pool
+ *
+ * @param name
+ *   ML operations pool name
+ * @param nb_elts
+ *   Number of elements in pool
+ * @param cache_size
+ *   Number of elements to cache on lcore, see
+ *   *rte_mempool_create* for further details about cache size
+ * @param user_size
+ *   Size of private data to allocate for user with each operation
+ * @param socket_id
+ *   Socket to identifier allocate memory on
+ * @return
+ *  - On success pointer to mempool
+ *  - On failure NULL
+ */
+__rte_experimental
+struct rte_mempool *
+rte_ml_op_pool_create(const char *name, unsigned int nb_elts, unsigned int cache_size,
+		      uint16_t user_size, int socket_id);
+
+/**
+ * Free an ML operation pool
+ *
+ * @param mempool
+ *   A pointer to the mempool structure.
+ *   If NULL then, the function does nothing.
+ */
+__rte_experimental
+void
+rte_ml_op_pool_free(struct rte_mempool *mempool);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_MLDEV_H */
diff --git a/lib/mldev/version.map b/lib/mldev/version.map
new file mode 100644
index 0000000000..5aeea7c827
--- /dev/null
+++ b/lib/mldev/version.map
@@ -0,0 +1,5 @@ 
+EXPERIMENTAL {
+
+	local: *;
+};
+