@@ -81,6 +81,9 @@
/* rawdev defines */
#define RTE_RAWDEV_MAX_DEVS 64
+/* mldev defines */
+#define RTE_MLDEV_MAX_DEVS 64
+
/* ip_fragmentation defines */
#define RTE_LIBRTE_IP_FRAG_MAX_FRAG 8
// RTE_LIBRTE_IP_FRAG_TBL_STAT is not set
@@ -22,6 +22,7 @@ The public API headers are grouped by topics:
[compress](@ref rte_comp.h),
[regexdev](@ref rte_regexdev.h),
[dmadev](@ref rte_dmadev.h),
+ [mldev](@ref rte_mldev.h),
[eventdev](@ref rte_eventdev.h),
[event_eth_rx_adapter](@ref rte_event_eth_rx_adapter.h),
[event_eth_tx_adapter](@ref rte_event_eth_tx_adapter.h),
@@ -59,6 +59,7 @@ INPUT = @TOPDIR@/doc/api/doxy-api-index.md \
@TOPDIR@/lib/mempool \
@TOPDIR@/lib/meter \
@TOPDIR@/lib/metrics \
+ @TOPDIR@/lib/mldev \
@TOPDIR@/lib/node \
@TOPDIR@/lib/net \
@TOPDIR@/lib/pcapng \
@@ -30,6 +30,7 @@ Programmer's Guide
regexdev
dmadev
gpudev
+ mldev
rte_security
rawdev
link_bonding_poll_mode_drv_lib
new file mode 100644
@@ -0,0 +1,164 @@
+.. SPDX-License-Identifier: BSD-3-Clause
+ Copyright(C) 2022 Marvell International Ltd.
+
+Machine Learning Device Library
+===============================
+
+The MLDEV library provides a Machine Learning device framework for the management and
+provisioning of hardware and software ML poll mode drivers, defining APIs which
+support a number of ML operations including device handling and inference processing.
+The ML model creation and training is outside of the scope of this library.
+
+Design Principles
+-----------------
+
+The MLDEV library follows the same basic principles as those used in DPDK's
+Ethernet Device framework and the Crypto framework. The MLDEV framework provides
+a generic Machine Learning device framework which supports both physical (hardware)
+and virtual (software) ML devices as well as an ML API to manage and configure ML
+devices. The APIs also supports performing ML inference operations through ML poll
+mode driver.
+
+
+Device Operations
+-----------------
+
+Device Creation
+~~~~~~~~~~~~~~~
+
+Physical ML devices are discovered during the PCI probe/enumeration, through the
+EAL functions which are executed at DPDK initialization, based on their PCI device
+identifier, each unique PCI BDF (bus/bridge, device, function). ML physical devices,
+like other physical devices in DPDK can be white-listed or black-listed
+using the EAL command line options.
+
+
+Device Identification
+~~~~~~~~~~~~~~~~~~~~~
+
+Each device, whether virtual or physical is uniquely designated by two
+identifiers:
+
+- A unique device index used to designate the ML device in all functions
+ exported by the MLDEV API.
+
+- A device name used to designate the ML device in console messages, for
+ administration or debugging purposes.
+
+Device Features and Capabilities
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ML devices may support different feature set. In order to get the
+supported PMD feature ``rte_ml_dev_info_get`` API which return the
+info of the device and it's supported features.
+
+Device Configuration
+~~~~~~~~~~~~~~~~~~~~
+
+The configuration of each ML device includes the following operations:
+
+- Allocation of resources, including hardware resources if a physical device.
+- Resetting the device into a well-known default state.
+- Initialization of statistics counters.
+
+The rte_ml_dev_configure API is used to configure a ML device.
+
+.. code-block:: c
+
+ int rte_ml_dev_configure(uint8_t dev_id, const struct rte_ml_dev_config *cfg);
+
+The ``rte_ml_dev_config`` structure is used to pass the configuration parameters
+for the ML device, for example number of queue pairs, maximum number of models,
+maximum size of model and so on.
+
+Configuration of Queue Pairs
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Each ML device can be configured with number of queue pairs.
+Each queue pair is configured using ``rte_ml_dev_queue_pair_setup``
+
+Logical Cores, Memory and Queues Pair Relationships
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Multiple logical cores should never share the same queue pair for enqueuing
+operations or dequeueing operations on the same ML device since this would
+require global locks and hinder performance.
+
+Configuration of Machine Learning models
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Pre-trained ML models that are built using external ML compiler / training frameworks
+are used to perform inference operations. These models are configured on an ML device
+in a two-stage process that includes loading the model on an ML device, and starting
+the model to accept inference operations. Inference operations can be queued for a
+model only when the model is in started state. Model load stage assigns a Model ID,
+which is unique for the model in a driver's context. Model ID is used during all
+subsequent slow-path and fast-path operations.
+
+Model loading and start is done through the ``rte_ml_model_load`` and
+``rte_ml_model_start`` functions.
+
+Similarly stop and unloading are done through ``rte_ml_model_stop`` and
+``rte_ml_model_unload`` functions.
+
+Stop and unload functions would release the resources allocated for the
+models. Inference tasks cannot be queued for a model that is stopped.
+
+Detailed information related to the model can be retrieved from the driver using the
+function ``rte_ml_model_info_get``. Model information is accessible to the application
+through the ``rte_ml_model_info`` structure. Information available to the user would
+include the details related to the inputs and outputs, and the maximum batch size
+supported by the model.
+
+User can optionally update the model params such as weights and bias, without unloading
+the model, through the ``rte_ml_model_params_update`` function. A model should be in
+stopped state to update the params. Model has to be started in order to enqueue inference
+requests after a params update.
+
+Enqueue / Dequeue
+~~~~~~~~~~~~~~~~~
+
+The burst enqueue API uses a ML device identifier and a queue pair identifier
+to specify the device queue pair to schedule the processing on. The ``nb_ops``
+parameter is the number of operations to process which are supplied in the
+``ops`` array of ``rte_ml_op`` structures. The enqueue function returns the
+number of operations it enqueued for processing, a return value equal to
+``nb_ops`` means that all packets have been enqueued.
+
+The dequeue API uses the same format as the enqueue API of processed but
+the ``nb_ops`` and ``ops`` parameters are now used to specify the max processed
+operations the user wishes to retrieve and the location in which to store them.
+The API call returns the actual number of processed operations returned; this
+can never be larger than ``nb_ops``.
+
+``rte_ml_op`` provides the required information to the driver to queue an ML inference
+task. ML op specifies the model to be used and the number of batches to be executed in
+the inference task. Input and output buffer information is specified through the
+structure ``rte_ml_buff_seg``, which supports segmented data. Input is provided through
+the ``rte_ml_op::input`` and output through ``rte_ml_op::output``. Data pointed in each
+op, should not be released until the dequeue of for that op.
+
+
+Quantize and Dequantize
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Inference operations performed with lower precision types would improve the throughput
+and efficiency of the inference execution with a minimal loss of accuracy, which is within
+the tolerance limits. Quantization and dequantization is the process of converting data
+from a higher precision type to a lower precision type and vice-versa. ML library provides
+the functions ``rte_ml_io_quantize`` and ``rte_ml_io_dequantize`` to enable data type
+conversions. User needs to provide the address of the quantized and dequantized data
+buffers to the functions, along the number of the batches in the buffers.
+
+For quantization, the dequantized data is assumed to be of the type ``dtype`` provided by
+the ``rte_ml_model_info::input`` and the data is converted to ``qtype`` provided by the
+``rte_ml_model_info::input``.
+
+For dequantization, the quantized data is assumed to be of the type ``qtype`` provided by
+the ``rte_ml_model_info::output`` and the data is converted to ``dtype`` provided by the
+``rte_ml_model_info::output``.
+
+Size of the buffers required for the input and output can be calculated using the functions
+``rte_ml_io_input_size_get`` and ``rte_ml_io_output_size_get``. These functions would get the
+buffer sizes for both quantized and dequantized data for the given number of batches.
+
@@ -369,6 +369,7 @@ static const struct logtype logtype_strings[] = {
{RTE_LOGTYPE_EFD, "lib.efd"},
{RTE_LOGTYPE_EVENTDEV, "lib.eventdev"},
{RTE_LOGTYPE_GSO, "lib.gso"},
+ {RTE_LOGTYPE_MLDEV, "lib.mldev"},
{RTE_LOGTYPE_USER1, "user1"},
{RTE_LOGTYPE_USER2, "user2"},
{RTE_LOGTYPE_USER3, "user3"},
@@ -48,6 +48,7 @@ extern "C" {
#define RTE_LOGTYPE_EFD 18 /**< Log related to EFD. */
#define RTE_LOGTYPE_EVENTDEV 19 /**< Log related to eventdev. */
#define RTE_LOGTYPE_GSO 20 /**< Log related to GSO. */
+#define RTE_LOGTYPE_MLDEV 21 /**< Log related to mldev. */
/* these log types can be used in an application */
#define RTE_LOGTYPE_USER1 24 /**< User-defined log type 1. */
@@ -63,6 +63,7 @@ libraries = [
'flow_classify', # flow_classify lib depends on pkt framework table lib
'graph',
'node',
+ 'mldev'
]
optional_libs = [
new file mode 100644
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2022 Marvell.
+
+sources = files(
+ 'rte_mldev.c',
+)
+
+headers = files(
+ 'rte_mldev.h',
+)
+
+deps += ['mempool']
new file mode 100644
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Marvell.
+ */
+
+#include <rte_mldev.h>
new file mode 100644
@@ -0,0 +1,1081 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Marvell.
+ */
+
+#ifndef RTE_MLDEV_H
+#define RTE_MLDEV_H
+
+/**
+ * @file rte_mldev.h
+ *
+ * @warning
+ * @b EXPERIMENTAL:
+ * All functions in this file may be changed or removed without prior notice.
+ *
+ * ML (Machine Learning) device API.
+ *
+ * The ML framework is built on the following model:
+ *
+ *
+ * +-----------------+ rte_ml_[en|de]queue_burst()
+ * | | |
+ * | Machine o------+ +--------+ |
+ * | Learning | | | queue | | +------+
+ * | Inference o------+-----o |<===o===>|Core 0|
+ * | Engine | | | pair 0 | +------+
+ * | o----+ | +--------+
+ * | | | |
+ * +-----------------+ | | +--------+
+ * ^ | | | queue | +------+
+ * | | +-----o |<=======>|Core 1|
+ * | | | pair 1 | +------+
+ * | | +--------+
+ * +--------+--------+ |
+ * | +-------------+ | | +--------+
+ * | | Model 0 | | | | queue | +------+
+ * | +-------------+ | +-------o |<=======>|Core N|
+ * | +-------------+ | | pair N | +------+
+ * | | Model 1 | | +--------+
+ * | +-------------+ |
+ * | +-------------+ |<------- rte_ml_model_load()
+ * | | Model .. | |-------> rte_ml_model_info()
+ * | +-------------+ |<------- rte_ml_model_start()
+ * | +-------------+ |<------- rte_ml_model_stop()
+ * | | Model N | |<------- rte_ml_model_params_update()
+ * | +-------------+ |<------- rte_ml_model_unload()
+ * +-----------------+
+ *
+ * ML Device: A hardware or software-based implementation of ML device API for
+ * running inferences using a pre-trained ML model.
+ *
+ * ML Model: An ML model is an algorithm trained over a dataset. A model consists of
+ * procedure/algorithm and data/pattern required to make predictions on live data.
+ * Once the model is created and trained outside of the DPDK scope, the model can be loaded
+ * via rte_ml_model_load() and then start it using rte_ml_model_start() API.
+ * The rte_ml_model_params_update() can be used to update the model parameters such as weight
+ * and bias without unloading the model using rte_ml_model_unload().
+ *
+ * ML Inference: ML inference is the process of feeding data to the model via
+ * rte_ml_enqueue_burst() API and use rte_ml_dequeue_burst() API to get the calculated
+ * outputs/predictions from the started model.
+ *
+ * In all functions of the ML device API, the ML device is designated by an
+ * integer >= 0 named as device identifier *dev_id*.
+ *
+ * The functions exported by the ML device API to setup a device designated by
+ * its device identifier must be invoked in the following order:
+ *
+ * - rte_ml_dev_configure()
+ * - rte_ml_dev_queue_pair_setup()
+ * - rte_ml_dev_start()
+ *
+ * A model is required to run the inference operations with the user specified inputs.
+ * Application needs to invoke the ML model API in the following order before queueing
+ * inference jobs.
+ *
+ * - rte_ml_model_load()
+ * - rte_ml_model_start()
+ *
+ * The rte_ml_model_info() API is provided to retrieve the information related to the model.
+ * The information would include the shape and type of input and output required for the inference.
+ *
+ * Data quantization and dequantization is one of the main aspects in ML domain. This involves
+ * conversion of input data from a higher precision to a lower precision data type and vice-versa
+ * for the output. APIs are provided to enable quantization through rte_ml_io_quantize() and
+ * dequantization through rte_ml_io_dequantize(). These APIs have the capability to handle input
+ * and output buffers holding data for multiple batches.
+ *
+ * Two utility APIs rte_ml_io_input_size_get() and rte_ml_io_output_size_get() can used to get the
+ * size of quantized and de-quantized multi-batch input and output buffers.
+ *
+ * User can optionally update the model parameters with rte_ml_model_params_update() after
+ * invoking rte_ml_model_stop() API on a given model ID.
+ *
+ * The application can invoke, in any order, the functions exported by the ML API to enqueue
+ * inference jobs and dequeue inference response.
+ *
+ * If the application wants to change the device configuration (i.e., call
+ * rte_ml_dev_configure() or rte_ml_dev_queue_pair_setup()), then application must stop the
+ * device using rte_ml_dev_stop() API. Likewise, if model parameters need to be updated then
+ * the application must call rte_ml_model_stop() followed by rte_ml_model_params_update() API
+ * for the given model. The application does not need to call rte_ml_dev_stop() API for
+ * any model re-configuration such as rte_ml_model_params_update(), rte_ml_model_unload() etc.
+ *
+ * Once the device is in the start state after invoking rte_ml_dev_start() API and the model is in
+ * start state after invoking rte_ml_model_start() API, then the application can call
+ * rte_ml_enqueue() and rte_ml_dequeue() API on the destined device and model ID.
+ *
+ * Finally, an application can close an ML device by invoking the rte_ml_dev_close() function.
+ *
+ * Typical application utilisation of the ML API will follow the following
+ * programming flow.
+ *
+ * - rte_ml_dev_configure()
+ * - rte_ml_dev_queue_pair_setup()
+ * - rte_ml_model_load()
+ * - rte_ml_model_start()
+ * - rte_ml_model_info()
+ * - rte_ml_dev_start()
+ * - rte_ml_enqueue_burst()
+ * - rte_ml_dequeue_burst()
+ * - rte_ml_model_stop()
+ * - rte_ml_model_unload()
+ * - rte_ml_dev_stop()
+ * - rte_ml_dev_close()
+ *
+ * Regarding multi-threading, by default, all the functions of the ML Device API exported by a PMD
+ * are lock-free functions which assume to not be invoked in parallel on different logical cores
+ * on the same target object. For instance, the dequeue function of a poll mode driver cannot be
+ * invoked in parallel on two logical cores to operate on same queue pair. Of course, this function
+ * can be invoked in parallel by different logical core on different queue pair.
+ * It is the responsibility of the user application to enforce this rule.
+ */
+
+#include <rte_common.h>
+#include <rte_mempool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RTE_ML_STR_MAX 128
+/**< Maximum length of name string */
+
+/* Device operations */
+
+/**
+ * Get the total number of ML devices that have been successfully initialised.
+ *
+ * @return
+ * - The total number of usable ML devices.
+ */
+__rte_experimental
+uint16_t
+rte_ml_dev_count(void);
+
+/**
+ * Check if the device is in ready state.
+ *
+ * @param dev_id
+ * The identifier of the device.
+ *
+ * @return
+ * - 0 if device state is not in ready state.
+ * - 1 if device state is ready state.
+ */
+__rte_experimental
+int
+rte_ml_dev_is_valid_dev(int16_t dev_id);
+
+/**
+ * Return the NUMA socket to which a device is connected.
+ *
+ * @param dev_id
+ * The identifier of the device.
+ *
+ * @return
+ * - The NUMA socket id to which the device is connected
+ * - 0 If the socket could not be determined.
+ * - -EINVAL: if the dev_id value is not valid.
+ */
+__rte_experimental
+int
+rte_ml_dev_socket_id(int16_t dev_id);
+
+/** ML device information */
+struct rte_ml_dev_info {
+ const char *driver_name;
+ /**< Driver name */
+ int16_t max_models;
+ /**< Maximum number of models supported by the device.
+ * @see struct rte_ml_dev_config::max_nb_models
+ */
+ uint16_t max_queue_pairs;
+ /**< Maximum number of queues pairs supported by the device.
+ * @see struct rte_ml_dev_config::nb_queue_pairs
+ */
+ uint16_t max_desc;
+ /**< Maximum allowed number of descriptors for queue pair by the device.
+ * @see struct rte_ml_dev_qp_conf::nb_desc
+ */
+ uint16_t max_segments;
+ /**< Maximum number of scatter-gather entry supported by the device.
+ * @see struct rte_ml_buff_seg struct rte_ml_buff_seg::next
+ */
+};
+
+/**
+ * Retrieve the information of the device.
+ *
+ * @param dev_id
+ * The identifier of the device.
+ * @param dev_info
+ * A pointer to a structure of type *rte_ml_dev_info* to be filled with the info of the device.
+ *
+ * @return
+ * - 0: Success, driver updates the information of the ML device
+ * - < 0: Error code returned by the driver info get function.
+ */
+__rte_experimental
+int
+rte_ml_dev_info_get(int16_t dev_id, struct rte_ml_dev_info *dev_info);
+
+/** ML device configuration structure */
+struct rte_ml_dev_config {
+ int socket_id;
+ /**< Socket to allocate resources on. */
+ int16_t max_nb_models;
+ /**< Max number of models allowed to be loaded on the device.
+ * This value cannot exceed the max_models which is previously provided in
+ * struct rte_ml_dev_info::max_models
+ */
+ uint16_t nb_queue_pairs;
+ /**< Number of queue pairs to configure on this device.
+ * This value cannot exceed the max_models which is previously provided in
+ * struct rte_ml_dev_info::max_queue_pairs
+ */
+};
+
+/**
+ * Configure an ML device.
+ *
+ * This function must be invoked first before any other function in the API.
+ * This function can also be re-invoked when a device is in the stopped state.
+ *
+ * The caller may use rte_ml_dev_info_get() to get the capability of each resources available
+ * for this ML device.
+ *
+ * @param dev_id
+ * The identifier of the device to configure.
+ * @param config
+ * The ML device configuration structure.
+ *
+ * @return
+ * - 0: Success, device configured.
+ * - < 0: Error code returned by the driver configuration function.
+ */
+__rte_experimental
+int
+rte_ml_dev_configure(int16_t dev_id, const struct rte_ml_dev_config *config);
+
+/* Forward declaration */
+struct rte_ml_op;
+
+/**< Callback function called during rte_ml_dev_stop(), invoked once per flushed ML op */
+typedef void (*rte_ml_dev_stop_flush_t)(int16_t dev_id, uint16_t qp_id, struct rte_ml_op *op);
+
+/** ML device queue pair configuration structure. */
+struct rte_ml_dev_qp_conf {
+ uint32_t nb_desc;
+ /**< Number of descriptors per queue pair.
+ * This value cannot exceed the max_desc which previously provided in
+ * struct rte_ml_dev_info:max_models
+ */
+ rte_ml_dev_stop_flush_t cb;
+ /**< Callback function called during rte_ml_dev_stop(), invoked once per active ML op.
+ * Value NULL is allowed, in which case callback will not be invoked.
+ * This function can be used to properly dispose of outstanding ML ops from all
+ * queue pairs, for example ops containing memory pointers.
+ * @see rte_ml_dev_stop()
+ */
+};
+
+/**
+ * Set up a queue pair for a device. This should only be called when the device is stopped.
+ *
+ * @param dev_id
+ * The identifier of the device.
+ * @param queue_pair_id
+ * The index of the queue pairs to set up. The value must be in the range [0, nb_queue_pairs - 1]
+ * previously supplied to rte_ml_dev_configure().
+ * @param qp_conf
+ * The pointer to the configuration data to be used for the queue pair.
+ * @param socket_id
+ * The *socket_id* argument is the socket identifier in case of NUMA.
+ * The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the memory allocated
+ * for the queue pair.
+ *
+ * @return
+ * - 0: Success, queue pair correctly set up.
+ * - < 0: Queue pair configuration failed.
+ */
+__rte_experimental
+int
+rte_ml_dev_queue_pair_setup(int16_t dev_id, uint16_t queue_pair_id,
+ const struct rte_ml_dev_qp_conf *qp_conf, int socket_id);
+
+/**
+ * Start an ML device.
+ *
+ * The device start step consists of setting the configured features and enabling the ML device
+ * to accept inference jobs.
+ *
+ * @param dev_id
+ * The identifier of the device.
+ *
+ * @return
+ * - 0: Success, device started.
+ * - <0: Error code of the driver device start function.
+ */
+__rte_experimental
+int
+rte_ml_dev_start(int16_t dev_id);
+
+/**
+ * Stop an ML device. A stopped device cannot accept inference jobs.
+ * The device can be restarted with a call to rte_ml_dev_start().
+ *
+ * @param dev_id
+ * The identifier of the device.
+ *
+ * @return
+ * - 0: Success, device stopped.
+ * - <0: Error code of the driver device stop function.
+ */
+__rte_experimental
+int
+rte_ml_dev_stop(int16_t dev_id);
+
+/**
+ * Close an ML device. The device cannot be restarted!
+ *
+ * @param dev_id
+ * The identifier of the device.
+ *
+ * @return
+ * - 0 on successfully closing device.
+ * - <0 on failure to close device.
+ */
+__rte_experimental
+int
+rte_ml_dev_close(int16_t dev_id);
+
+/** Status of ML operation */
+enum rte_ml_op_status {
+ RTE_ML_OP_STATUS_SUCCESS = 0,
+ /**< Operation completed successfully */
+ RTE_ML_OP_STATUS_NOT_PROCESSED,
+ /**< Operation has not yet been processed by the device.
+ * When an ML op is enqueued to the device, the driver sets the status as
+ * RTE_ML_OP_STATUS_NOT_PROCESSED. Upon the ML operation completion,
+ * the respective status will be updated by the driver.
+ */
+ RTE_ML_OP_STATUS_ERROR,
+ /**< Operation completed with error.
+ * Application can invoke rte_ml_op_error_get() to get PMD specific
+ * error code if needed.
+ */
+};
+
+/** ML operation's input and output buffer representation as scatter gather list
+ */
+struct rte_ml_buff_seg {
+ rte_iova_t iova_addr;
+ /**< IOVA address of segment buffer. */
+ void *addr;
+ /**< Virtual address of segment buffer. */
+ uint32_t length;
+ /**< Segment length. */
+ uint32_t reserved;
+ /**< Reserved for future use. */
+ struct rte_ml_buff_seg *next;
+ /**< Points to next segment. Value NULL represents the last segment. */
+};
+
+/**
+ * ML Operation.
+ *
+ * This structure contains data related to performing an ML operation on the buffers using
+ * the model specified through model_id.
+ */
+struct rte_ml_op {
+ int16_t model_id;
+ /**< Model ID to be used for the operation. */
+ uint16_t nb_batches;
+ /**< Number of batches. Minimum value must be one.
+ * Input buffer must hold inference data for each batch as contiguous.
+ */
+ uint32_t reserved;
+ /**< Reserved for future use. */
+ struct rte_mempool *mempool;
+ /**< Pool from which operation is allocated. */
+ struct rte_ml_buff_seg input;
+ /**< Input buffer to hold the inference data. */
+ struct rte_ml_buff_seg output;
+ /**< Output buffer to hold the inference output by the driver. */
+ RTE_STD_C11
+ union {
+ uint64_t user_u64;
+ /**< User data as uint64_t.*/
+ void *user_ptr;
+ /**< User data as void*.*/
+ };
+ enum rte_ml_op_status status;
+ /**< Operation status. */
+} __rte_cache_aligned;
+
+/* Enqueue/Dequeue operations */
+
+/**
+ * Enqueue a burst of ML inferences for processing on an ML device.
+ *
+ * The rte_ml_enqueue_burst() function is invoked to place ML inference
+ * operations on the queue *qp_id* of the device designated by its *dev_id*.
+ *
+ * The *nb_ops* parameter is the number of inferences to process which are
+ * supplied in the *ops* array of *rte_ml_op* structures.
+ *
+ * The rte_ml_enqueue_burst() function returns the number of inferences it
+ * actually enqueued for processing. A return value equal to *nb_ops* means that
+ * all packets have been enqueued.
+ *
+ * @param dev_id
+ * The identifier of the device.
+ * @param qp_id
+ * The index of the queue pair which inferences are to be enqueued for processing.
+ * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to
+ * *rte_ml_dev_configure*.
+ * @param ops
+ * The address of an array of *nb_ops* pointers to *rte_ml_op* structures which contain the
+ * ML inferences to be processed.
+ * @param nb_ops
+ * The number of operations to process.
+ *
+ * @return
+ * The number of inference operations actually enqueued to the ML device.
+ * The return value can be less than the value of the *nb_ops* parameter when the ML device queue
+ * is full or if invalid parameters are specified in a *rte_ml_op*.
+ */
+__rte_experimental
+uint16_t
+rte_ml_enqueue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops);
+
+/**
+ * Dequeue a burst of processed ML inferences operations from a queue on the ML device.
+ * The dequeued operations are stored in *rte_ml_op* structures whose pointers are supplied
+ * in the *ops* array.
+ *
+ * The rte_ml_dequeue_burst() function returns the number of inferences actually dequeued,
+ * which is the number of *rte_ml_op* data structures effectively supplied into the *ops* array.
+ *
+ * A return value equal to *nb_ops* indicates that the queue contained at least nb_ops* operations,
+ * and this is likely to signify that other processed operations remain in the devices output queue.
+ * Application implementing a "retrieve as many processed operations as possible" policy can check
+ * this specific case and keep invoking the rte_ml_dequeue_burst() function until a value less than
+ * *nb_ops* is returned.
+ *
+ * The rte_ml_dequeue_burst() function does not provide any error notification to avoid
+ * the corresponding overhead.
+ *
+ * @param dev_id
+ * The identifier of the device.
+ * @param qp_id
+ * The index of the queue pair from which to retrieve processed packets.
+ * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to
+ * rte_ml_dev_configure().
+ * @param ops
+ * The address of an array of pointers to *rte_ml_op* structures that must be large enough to
+ * store *nb_ops* pointers in it.
+ * @param nb_ops
+ * The maximum number of inferences to dequeue.
+ *
+ * @return
+ * The number of operations actually dequeued, which is the number of pointers
+ * to *rte_ml_op* structures effectively supplied to the *ops* array.
+ */
+__rte_experimental
+uint16_t
+rte_ml_dequeue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops);
+
+/**
+ * Verbose error structure definition.
+ */
+struct rte_ml_op_error {
+ const char message[RTE_ML_STR_MAX]; /**< Human-readable error message. */
+ uint64_t errcode; /**< Vendor specific error code. */
+};
+
+/**
+ * Get PMD specific error information for an ML op.
+ *
+ * When an ML operation completed with RTE_ML_OP_STATUS_ERROR as status,
+ * This API allows to get PMD specific error details.
+ *
+ * @param[in] dev_id
+ * Device identifier
+ * @param[in] op
+ * Handle of ML operation
+ * @param[in] error
+ * Address of structure rte_ml_op_error to be filled
+ *
+ * @return
+ * - Returns 0 on success
+ * - Returns negative value on failure
+ */
+__rte_experimental
+int
+rte_ml_op_error_get(int16_t dev_id, struct rte_ml_op *op, struct rte_ml_op_error *error);
+
+/* Statistics operations */
+
+/** Device statistics. */
+struct rte_ml_dev_stats {
+ uint64_t enqueued_count;
+ /**< Count of all operations enqueued */
+ uint64_t dequeued_count;
+ /**< Count of all operations dequeued */
+ uint64_t enqueue_err_count;
+ /**< Total error count on operations enqueued */
+ uint64_t dequeue_err_count;
+ /**< Total error count on operations dequeued */
+};
+
+/**
+ * Retrieve the general I/O statistics of a device.
+ *
+ * @param dev_id
+ * The identifier of the device.
+ * @param stats
+ * Pointer to structure to where statistics will be copied.
+ * On error, this location may or may not have been modified.
+ * @return
+ * - 0 on success
+ * - -EINVAL: If invalid parameter pointer is provided.
+ */
+__rte_experimental
+int
+rte_ml_dev_stats_get(int16_t dev_id, struct rte_ml_dev_stats *stats);
+
+/**
+ * Reset the statistics of a device.
+ *
+ * @param dev_id
+ * The identifier of the device.
+ */
+__rte_experimental
+void
+rte_ml_dev_stats_reset(int16_t dev_id);
+
+/**< Maximum name length for extended statistics counters */
+
+/**
+ * A name-key lookup element for extended statistics.
+ *
+ * This structure is used to map between names and ID numbers for extended ML device statistics.
+ */
+struct rte_ml_dev_xstats_map {
+ uint16_t id;
+ /**< xstat identifier */
+ char name[RTE_ML_STR_MAX];
+ /**< xstat name */
+};
+
+/**
+ * Retrieve names of extended statistics of an ML device.
+ *
+ * @param dev_id
+ * The identifier of the device.
+ * @param[out] xstats_map
+ * Block of memory to insert id and names into. Must be at least size in capacity.
+ * If set to NULL, function returns required capacity.
+ *
+ * @return
+ * - Positive value on success:
+ * - The return value is the number of entries filled in the stats map.
+ * - If xstats_map set to NULL then required capacity for xstats_map.
+ * - Negative value on error:
+ * - -ENODEV: for invalid *dev_id*.
+ * - -ENOTSUP: if the device doesn't support this function.
+ */
+__rte_experimental
+int
+rte_ml_dev_xstats_names_get(int16_t dev_id, struct rte_ml_dev_xstats_map *xstats_map);
+
+/**
+ * Retrieve the value of a single stat by requesting it by name.
+ *
+ * @param dev_id
+ * The identifier of the device.
+ * @param name
+ * The stat name to retrieve.
+ * @param stat_id
+ * If non-NULL, the numerical id of the stat will be returned, so that further requests for
+ * the stat can be got using rte_ml_dev_xstats_get, which will be faster as it doesn't need to
+ * scan a list of names for the stat.
+ * @param[out] value
+ * Must be non-NULL, retrieved xstat value will be stored in this address.
+ *
+ * @return
+ * - 0: Successfully retrieved xstat value.
+ * - -EINVAL: invalid parameters.
+ * - -ENOTSUP: if not supported.
+ */
+__rte_experimental
+int
+rte_ml_dev_xstats_by_name_get(int16_t dev_id, const char *name, uint16_t *stat_id, uint64_t *value);
+
+/**
+ * Retrieve extended statistics of an ML device.
+ *
+ * @param dev_id
+ * The identifier of the device.
+ * @param stat_ids
+ * The id numbers of the stats to get. The ids can be fetched from the stat position in the
+ * stat list from rte_ml_dev_xstats_names_get(), or by using rte_ml_dev_xstats_by_name_get().
+ * @param values
+ * The values for each stats request by ID.
+ * @param nb_ids
+ * The number of stats requested.
+ * @return
+ * - Positive value: number of stat entries filled into the values array
+ * - Negative value on error:
+ * - -ENODEV: for invalid *dev_id*.
+ * - -ENOTSUP: if the device doesn't support this function.
+ */
+__rte_experimental
+int
+rte_ml_dev_xstats_get(int16_t dev_id, const uint16_t *stat_ids, uint64_t *values, uint16_t nb_ids);
+
+/**
+ * Reset the values of the xstats of the selected component in the device.
+ *
+ * @param dev_id
+ * The identifier of the device.
+ * @param stat_ids
+ * Selects specific statistics to be reset. When NULL, all statistics will be reset.
+ * If non-NULL, must point to array of at least *nb_ids* size.
+ * @param nb_ids
+ * The number of ids available from the *ids* array. Ignored when ids is NULL.
+ * @return
+ * - 0: Successfully reset the statistics to zero.
+ * - -EINVAL: invalid parameters.
+ * - -ENOTSUP: if not supported.
+ */
+__rte_experimental
+int
+rte_ml_dev_xstats_reset(int16_t dev_id, const uint16_t *stat_ids, uint16_t nb_ids);
+
+/* Utility operations */
+
+/**
+ * Dump internal information about *dev_id* to the FILE* provided in *fd*.
+ *
+ * @param dev_id
+ * The identifier of the device.
+ * @param fd
+ * A pointer to a file for output.
+ * @return
+ * - 0: on success.
+ * - <0: on failure.
+ */
+__rte_experimental
+int
+rte_ml_dev_dump(int16_t dev_id, FILE *fd);
+
+/**
+ * Trigger the ML device self test.
+ *
+ * @param dev_id
+ * The identifier of the device.
+ * @return
+ * - 0: Selftest successful.
+ * - -ENOTSUP: if the device doesn't support selftest.
+ * - other values < 0 on failure.
+ */
+__rte_experimental
+int
+rte_ml_dev_selftest(int16_t dev_id);
+
+/* Model operations */
+
+/** ML model load parameters
+ *
+ * Parameters required to load an ML model.
+ */
+struct rte_ml_model_params {
+ void *addr;
+ /**< Address of model buffer */
+ size_t size;
+ /**< Size of model buffer */
+};
+
+/**
+ * Load an ML model to the device.
+ *
+ * Load an ML model to the device with parameters requested in the structure rte_ml_model_params.
+ *
+ * @param[in] dev_id
+ * The identifier of the device.
+ * @param[in] params
+ * Parameters for the model to be loaded.
+ * @param[out] model_id
+ * Identifier of the model loaded.
+ *
+ * @return
+ * - 0: Success, Model created.
+ * - < 0: Failure, Error code of the model load driver function.
+ */
+__rte_experimental
+int
+rte_ml_model_load(int16_t dev_id, struct rte_ml_model_params *params, uint16_t *model_id);
+
+/**
+ * Unload an ML model from the device.
+ *
+ * @param[in] dev_id
+ * The identifier of the device.
+ * @param[in] model_id
+ * Identifier of the model to be unloaded.
+ *
+ * @return
+ * - 0: Success, Model destroyed.
+ * - < 0: Failure, Error code of the model unload driver function.
+ */
+__rte_experimental
+int
+rte_ml_model_unload(int16_t dev_id, int16_t model_id);
+
+/**
+ * Start an ML model for the given device ID.
+ *
+ * Start an ML model to accept inference requests.
+ *
+ * @param[in] dev_id
+ * The identifier of the device.
+ * @param[in] model_id
+ * Identifier of the model to be started.
+ *
+ * @return
+ * - 0: Success, Model loaded.
+ * - < 0: Failure, Error code of the model start driver function.
+ */
+__rte_experimental
+int
+rte_ml_model_start(int16_t dev_id, int16_t model_id);
+
+/**
+ * Stop an ML model for the given device ID.
+ *
+ * Model stop would disable the ML model to be used for inference jobs.
+ * All inference jobs must have been completed before model stop is attempted.
+
+ * @param[in] dev_id
+ * The identifier of the device.
+ * @param[in] model_id
+ * Identifier of the model to be stopped.
+ *
+ * @return
+ * - 0: Success, Model unloaded.
+ * - < 0: Failure, Error code of the model stop driver function.
+ */
+__rte_experimental
+int
+rte_ml_model_stop(int16_t dev_id, int16_t model_id);
+
+/**
+ * Input and output data types. ML models can operate on reduced precision
+ * datatypes to achieve better power efficiency, lower network latency and lower memory footprint.
+ * This enum is used to represent the lower precision integer and floating point types used
+ * by ML models.
+ */
+enum rte_ml_io_type {
+ RTE_ML_IO_TYPE_UNKNOWN = 0,
+ /**< Invalid or unknown type */
+ RTE_ML_IO_TYPE_INT8,
+ /**< 8-bit integer */
+ RTE_ML_IO_TYPE_UINT8,
+ /**< 8-bit unsigned integer */
+ RTE_ML_IO_TYPE_INT16,
+ /**< 16-bit integer */
+ RTE_ML_IO_TYPE_UINT16,
+ /**< 16-bit unsigned integer */
+ RTE_ML_IO_TYPE_INT32,
+ /**< 32-bit integer */
+ RTE_ML_IO_TYPE_UINT32,
+ /**< 32-bit unsigned integer */
+ RTE_ML_IO_TYPE_FP8,
+ /**< 8-bit floating point number */
+ RTE_ML_IO_TYPE_FP16,
+ /**< IEEE 754 16-bit floating point number */
+ RTE_ML_IO_TYPE_FP32,
+ /**< IEEE 754 32-bit floating point number */
+ RTE_ML_IO_TYPE_BFLOAT16
+ /**< 16-bit brain floating point number. */
+};
+
+/**
+ * Input and output format. This is used to represent the encoding type of multi-dimensional
+ * used by ML models.
+ */
+enum rte_ml_io_format {
+ RTE_ML_IO_FORMAT_NCHW = 1,
+ /**< Batch size (N) x channels (C) x height (H) x width (W) */
+ RTE_ML_IO_FORMAT_NHWC,
+ /**< Batch size (N) x height (H) x width (W) x channels (C) */
+ RTE_ML_IO_FORMAT_CHWN,
+ /**< Channels (C) x height (H) x width (W) x batch size (N) */
+ RTE_ML_IO_FORMAT_3D,
+ /**< Format to represent a 3 dimensional data */
+ RTE_ML_IO_FORMAT_2D,
+ /**< Format to represent matrix data */
+ RTE_ML_IO_FORMAT_1D,
+ /**< Format to represent vector data */
+ RTE_ML_IO_FORMAT_SCALAR,
+ /**< Format to represent scalar data */
+};
+
+/**
+ * Input and output shape. This structure represents the encoding format and dimensions
+ * of the tensor or vector.
+ *
+ * The data can be a 4D / 3D tensor, matrix, vector or a scalar. Number of dimensions used
+ * for the data would depend on the format. Unused dimensions to be set to 1.
+ */
+struct rte_ml_io_shape {
+ enum rte_ml_io_format format;
+ /**< Format of the data */
+ uint32_t w;
+ /**< First dimension */
+ uint32_t x;
+ /**< Second dimension */
+ uint32_t y;
+ /**< Third dimension */
+ uint32_t z;
+ /**< Fourth dimension */
+};
+
+/** Input and output data information structure
+ *
+ * Specifies the type and shape of input and output data.
+ */
+struct rte_ml_io_info {
+ char name[RTE_ML_STR_MAX];
+ /**< Name of data */
+ struct rte_ml_io_shape shape;
+ /**< Shape of data */
+ enum rte_ml_io_type qtype;
+ /**< Type of quantized data */
+ enum rte_ml_io_type dtype;
+ /**< Type of de-quantized data */
+};
+
+/** Model information structure */
+struct rte_ml_model_info {
+ char name[RTE_ML_STR_MAX];
+ /**< Model name. */
+ char version[RTE_ML_STR_MAX];
+ /**< Model version */
+ int16_t model_id;
+ /**< Model ID */
+ uint16_t device_id;
+ /**< Device ID */
+ uint16_t batch_size;
+ /**< Maximum number of batches that the model can process simultaneously */
+ uint32_t nb_inputs;
+ /**< Number of inputs */
+ const struct rte_ml_io_info *input_info;
+ /**< Input info array. Array size is equal to nb_inputs */
+ uint32_t nb_outputs;
+ /**< Number of outputs */
+ const struct rte_ml_io_info *output_info;
+ /**< Output info array. Array size is equal to nb_output */
+ uint64_t wb_size;
+ /**< Size of model weights and bias */
+};
+
+/**
+ * Get ML model information.
+ *
+ * @param[in] dev_id
+ * The identifier of the device.
+ * @param[in] model_id
+ * Identifier for the model created
+ * @param[out] model_info
+ * Pointer to a model info structure
+ *
+ * @return
+ * - Returns 0 on success
+ * - Returns negative value on failure
+ */
+__rte_experimental
+int
+rte_ml_model_info_get(int16_t dev_id, int16_t model_id, struct rte_ml_model_info *model_info);
+
+/**
+ * Update the model parameters without unloading model.
+ *
+ * Update model parameters such as weights and bias without unloading the model.
+ * rte_ml_model_stop() must be called before invoking this API.
+ *
+ * @param[in] dev_id
+ * The identifier of the device.
+ * @param[in] model_id
+ * Identifier for the model created
+ * @param[in] buffer
+ * Pointer to the model weights and bias buffer.
+ * Size of the buffer is equal to wb_size returned in *rte_ml_model_info*.
+ *
+ * @return
+ * - Returns 0 on success
+ * - Returns negative value on failure
+ */
+__rte_experimental
+int
+rte_ml_model_params_update(int16_t dev_id, int16_t model_id, void *buffer);
+
+/* IO operations */
+
+/**
+ * Get size of quantized and dequantized input buffers.
+ *
+ * Calculate the size of buffers required for quantized and dequantized input data.
+ * This API would return the buffer sizes for the number of batches provided and would
+ * consider the alignment requirements as per the PMD. Input sizes computed by this API can
+ * be used by the application to allocate buffers.
+ *
+ * @param[in] dev_id
+ * The identifier of the device.
+ * @param[in] model_id
+ * Identifier for the model created
+ * @param[in] nb_batches
+ * Number of batches of input to be processed in a single inference job
+ * @param[out] input_qsize
+ * Quantized input size pointer.
+ * NULL value is allowed, in which case input_qsize is not calculated by the driver.
+ * @param[out] input_dsize
+ * Dequantized input size pointer.
+ * NULL value is allowed, in which case input_dsize is not calculated by the driver.
+ *
+ * @return
+ * - Returns 0 on success
+ * - Returns negative value on failure
+ */
+__rte_experimental
+int
+rte_ml_io_input_size_get(int16_t dev_id, int16_t model_id, uint32_t nb_batches,
+ uint64_t *input_qsize, uint64_t *input_dsize);
+
+/**
+ * Get size of quantized and dequantized output buffers.
+ *
+ * Calculate the size of buffers required for quantized and dequantized output data.
+ * This API would return the buffer sizes for the number of batches provided and would consider
+ * the alignment requirements as per the PMD. Output sizes computed by this API can be used by the
+ * application to allocate buffers.
+ *
+ * @param[in] dev_id
+ * The identifier of the device.
+ * @param[in] model_id
+ * Identifier for the model created
+ * @param[in] nb_batches
+ * Number of batches of input to be processed in a single inference job
+ * @param[out] output_qsize
+ * Quantized output size pointer.
+ * NULL value is allowed, in which case output_qsize is not calculated by the driver.
+ * @param[out] output_dsize
+ * Dequantized output size pointer.
+ * NULL value is allowed, in which case output_dsize is not calculated by the driver.
+ *
+ * @return
+ * - Returns 0 on success
+ * - Returns negative value on failure
+ */
+__rte_experimental
+int
+rte_ml_io_output_size_get(int16_t dev_id, int16_t model_id, uint32_t nb_batches,
+ uint64_t *output_qsize, uint64_t *output_dsize);
+
+/**
+ * Quantize input data.
+ *
+ * Quantization converts data from a higher precision types to a lower precision types to improve
+ * the throughput and efficiency of the model execution with minimal loss of accuracy.
+ * Types of dequantized data and quantized data are specified by the model.
+ *
+ * @param[in] dev_id
+ * The identifier of the device.
+ * @param[in] model_id
+ * Identifier for the model
+ * @param[in] nb_batches
+ * Number of batches in the dequantized input buffer
+ * @param[in] dbuffer
+ * Address of dequantized input data
+ * @param[in] qbuffer
+ * Address of quantized input data
+ *
+ * @return
+ * - Returns 0 on success
+ * - Returns negative value on failure
+ */
+__rte_experimental
+int
+rte_ml_io_quantize(int16_t dev_id, int16_t model_id, uint16_t nb_batches, void *dbuffer,
+ void *qbuffer);
+
+/**
+ * Dequantize output data.
+ *
+ * Dequantization converts data from a lower precision type to a higher precision type.
+ * Types of quantized data and dequantized are specified by the model.
+ *
+ * @param[in] dev_id
+ * The identifier of the device.
+ * @param[in] model_id
+ * Identifier for the model
+ * @param[in] nb_batches
+ * Number of batches in the dequantized output buffer
+ * @param[in] qbuffer
+ * Address of quantized output data
+ * @param[in] dbuffer
+ * Address of dequantized output data
+ *
+ * @return
+ * - Returns 0 on success
+ * - Returns negative value on failure
+ */
+__rte_experimental
+int
+rte_ml_io_dequantize(int16_t dev_id, int16_t model_id, uint16_t nb_batches, void *qbuffer,
+ void *dbuffer);
+
+/* ML op pool operations */
+
+/**
+ * Create an ML operation pool
+ *
+ * @param name
+ * ML operations pool name
+ * @param nb_elts
+ * Number of elements in pool
+ * @param cache_size
+ * Number of elements to cache on lcore, see
+ * *rte_mempool_create* for further details about cache size
+ * @param user_size
+ * Size of private data to allocate for user with each operation
+ * @param socket_id
+ * Socket to identifier allocate memory on
+ * @return
+ * - On success pointer to mempool
+ * - On failure NULL
+ */
+__rte_experimental
+struct rte_mempool *
+rte_ml_op_pool_create(const char *name, unsigned int nb_elts, unsigned int cache_size,
+ uint16_t user_size, int socket_id);
+
+/**
+ * Free an ML operation pool
+ *
+ * @param mempool
+ * A pointer to the mempool structure.
+ * If NULL then, the function does nothing.
+ */
+__rte_experimental
+void
+rte_ml_op_pool_free(struct rte_mempool *mempool);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_MLDEV_H */
new file mode 100644
@@ -0,0 +1,5 @@
+EXPERIMENTAL {
+
+ local: *;
+};
+