[01/27] eventdev: dlb upstream prerequisites

Message ID 1593232671-5690-2-git-send-email-timothy.mcdaniel@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Jerin Jacob
Headers
Series event/dlb Intel DLB PMD |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-nxp-Performance fail Performance Testing issues
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-testing fail Testing issues
ci/Intel-compilation fail Compilation issues

Commit Message

Timothy McDaniel June 27, 2020, 4:37 a.m. UTC
  From: "McDaniel, Timothy" <timothy.mcdaniel@intel.com>

The DLB hardware does not conform exactly to the eventdev interface.
1) It has a limit on the number of queues that may be linked to a port.
2) Some ports a further restricted to a maximum of 1 linked queue.
3) It does not (currently) have the ability to carry the flow_id as part
of the event (QE) payload.

Due to the above, we would like to propose the following enhancements.

1) Add new fields to the rte_event_dev_info struct. These fields allow
the device to advertize its capabilities so that applications can take
the appropriate actions based on those capabilities.

    struct rte_event_dev_info {
	uint32_t max_event_port_links;
	/**< Maximum number of queues that can be linked to a single event
	 * port by this device.
	 */

	uint8_t max_single_link_event_port_queue_pairs;
	/**< Maximum number of event ports and queues that are optimized for
	 * (and only capable of) single-link configurations supported by this
	 * device. These ports and queues are not accounted for in
	 * max_event_ports or max_event_queues.
	 */
    }

2) Add a new field to the rte_event_dev_config struct. This field allows the
application to specify how many of its ports are limited to a single link,
or will be used in single link mode.

    /** Event device configuration structure */
    struct rte_event_dev_config {
	uint8_t nb_single_link_event_port_queues;
	/**< Number of event ports and queues that will be singly-linked to
	 * each other. These are a subset of the overall event ports and
	 * queues; this value cannot exceed *nb_event_ports* or
	 * *nb_event_queues*. If the device has ports and queues that are
	 * optimized for single-link usage, this field is a hint for how many
	 * to allocate; otherwise, regular event ports and queues can be used.
	 */
    }

3) Replace the dedicated implicit_release_disabled field with a bit field
of explicit port capabilities. The implicit_release_disable functionality
is assiged to one bit, and a port-is-single-link-only  attribute is
assigned to other, with the remaining bits available for future assignment.

	* Event port configuration bitmap flags */
	#define RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL    (1ULL << 0)
	/**< Configure the port not to release outstanding events in
	 * rte_event_dev_dequeue_burst(). If set, all events received through
	 * the port must be explicitly released with RTE_EVENT_OP_RELEASE or
	 * RTE_EVENT_OP_FORWARD. Must be unset if the device is not
	 * RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE capable.
	 */
	#define RTE_EVENT_PORT_CFG_SINGLE_LINK         (1ULL << 1)

	/**< This event port links only to a single event queue.
	 *
	 *  @see rte_event_port_setup(), rte_event_port_link()
	 */

	#define RTE_EVENT_PORT_ATTR_IMPLICIT_RELEASE_DISABLE 3
	/**
	 * The implicit release disable attribute of the port
	 */

	struct rte_event_port_conf {
		uint32_t event_port_cfg; /**< Port cfg flags(EVENT_PORT_CFG_) */
	}

4) Add UMWAIT/UMONITOR bit to rte_cpuflags

5) Added a new API that is useful for probing PCI devices.

	/**
	 * @internal
	 * Wrapper for use by pci drivers as a .probe function to attach to a event
	 * interface.  Same as rte_event_pmd_pci_probe, except caller can specify
	 * the name.
	 */
	static inline int
	rte_event_pmd_pci_probe_named(struct rte_pci_driver *pci_drv,
				    struct rte_pci_device *pci_dev,
				    size_t private_data_size,
				    eventdev_pmd_pci_callback_t devinit,
				    const char *name);

Signed-off-by: McDaniel, Timothy <timothy.mcdaniel@intel.com>
---
 app/test-eventdev/evt_common.h                     |    1 +
 app/test-eventdev/test_order_atq.c                 |    4 +
 app/test-eventdev/test_order_common.c              |    6 +-
 app/test-eventdev/test_order_queue.c               |    4 +
 app/test-eventdev/test_perf_atq.c                  |    1 +
 app/test-eventdev/test_perf_queue.c                |    1 +
 app/test-eventdev/test_pipeline_atq.c              |    1 +
 app/test-eventdev/test_pipeline_queue.c            |    1 +
 app/test/test_eventdev.c                           |    4 +-
 drivers/event/dpaa2/dpaa2_eventdev.c               |    2 +-
 drivers/event/octeontx/ssovf_evdev.c               |    2 +-
 drivers/event/skeleton/skeleton_eventdev.c         |    2 +-
 drivers/event/sw/sw_evdev.c                        |    5 +-
 drivers/event/sw/sw_evdev_selftest.c               |    9 +-
 .../eventdev_pipeline/pipeline_worker_generic.c    |    8 +-
 examples/eventdev_pipeline/pipeline_worker_tx.c    |    3 +
 examples/l2fwd-event/l2fwd_event_generic.c         |    5 +-
 examples/l2fwd-event/l2fwd_event_internal_port.c   |    5 +-
 examples/l3fwd/l3fwd_event_generic.c               |    5 +-
 examples/l3fwd/l3fwd_event_internal_port.c         |    5 +-
 lib/librte_eal/x86/include/rte_cpuflags.h          |    1 +
 lib/librte_eal/x86/rte_cpuflags.c                  |    1 +
 lib/librte_eventdev/meson.build                    |    1 +
 lib/librte_eventdev/rte_event_eth_tx_adapter.c     |    2 +-
 lib/librte_eventdev/rte_eventdev.c                 |  198 ++++++++++++++++++--
 lib/librte_eventdev/rte_eventdev.h                 |  198 ++++++++++++++++++++
 lib/librte_eventdev/rte_eventdev_pmd_pci.h         |   54 ++++++
 lib/librte_eventdev/rte_eventdev_version.map       |   13 +-
 28 files changed, 507 insertions(+), 35 deletions(-)
  

Comments

Jerin Jacob June 27, 2020, 7:44 a.m. UTC | #1
> +
> +/** Event port configuration structure */
> +struct rte_event_port_conf_v20 {
> +       int32_t new_event_threshold;
> +       /**< A backpressure threshold for new event enqueues on this port.
> +        * Use for *closed system* event dev where event capacity is limited,
> +        * and cannot exceed the capacity of the event dev.
> +        * Configuring ports with different thresholds can make higher priority
> +        * traffic less likely to  be backpressured.
> +        * For example, a port used to inject NIC Rx packets into the event dev
> +        * can have a lower threshold so as not to overwhelm the device,
> +        * while ports used for worker pools can have a higher threshold.
> +        * This value cannot exceed the *nb_events_limit*
> +        * which was previously supplied to rte_event_dev_configure().
> +        * This should be set to '-1' for *open system*.
> +        */
> +       uint16_t dequeue_depth;
> +       /**< Configure number of bulk dequeues for this event port.
> +        * This value cannot exceed the *nb_event_port_dequeue_depth*
> +        * which previously supplied to rte_event_dev_configure().
> +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
> +        */
> +       uint16_t enqueue_depth;
> +       /**< Configure number of bulk enqueues for this event port.
> +        * This value cannot exceed the *nb_event_port_enqueue_depth*
> +        * which previously supplied to rte_event_dev_configure().
> +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
> +        */
>         uint8_t disable_implicit_release;
>         /**< Configure the port not to release outstanding events in
>          * rte_event_dev_dequeue_burst(). If true, all events received through
> @@ -733,6 +911,14 @@ struct rte_event_port_conf {
>  rte_event_port_default_conf_get(uint8_t dev_id, uint8_t port_id,
>                                 struct rte_event_port_conf *port_conf);
>
> +int
> +rte_event_port_default_conf_get_v20(uint8_t dev_id, uint8_t port_id,
> +                               struct rte_event_port_conf_v20 *port_conf);
> +
> +int
> +rte_event_port_default_conf_get_v21(uint8_t dev_id, uint8_t port_id,
> +                                     struct rte_event_port_conf *port_conf);

Hi Timothy,

+ ABI Maintainers (Ray, Neil)

# As per my understanding, the structures can not be versioned, only
function can be versioned.
i.e we can not make any change to " struct rte_event_port_conf"

# We have a similar case with ethdev and it deferred to next release v20.11
http://patches.dpdk.org/patch/69113/

Regarding the API changes:
# The slow path changes general looks good to me. I will review the
next level in the coming days
# The following fast path changes bothers to me. Could you share more
details on below change?

diff --git a/app/test-eventdev/test_order_atq.c
b/app/test-eventdev/test_order_atq.c
index 3366cfc..8246b96 100644
--- a/app/test-eventdev/test_order_atq.c
+++ b/app/test-eventdev/test_order_atq.c
@@ -34,6 +34,8 @@
                        continue;
                }

+               ev.flow_id = ev.mbuf->udata64;
+
# Since RC1 is near, I am not sure how to accommodate the API changes
now and sort out ABI stuffs.
# Other concern is eventdev spec get bloated with versioning files
just for ONE release as 20.11 will be OK to change the ABI.
# While we discuss the API change, Please send deprecation notice for
ABI change for 20.11,
so that there is no ambiguity of this patch for the 20.11 release.
  
Timothy McDaniel June 29, 2020, 7:30 p.m. UTC | #2
-----Original Message-----
From: Jerin Jacob <jerinjacobk@gmail.com> 
Sent: Saturday, June 27, 2020 2:45 AM
To: McDaniel, Timothy <timothy.mcdaniel@intel.com>; Ray Kinsella <mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>
Cc: Jerin Jacob <jerinj@marvell.com>; Mattias Rönnblom <mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>; Eads, Gage <gage.eads@intel.com>; Van Haaren, Harry <harry.van.haaren@intel.com>
Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream prerequisites

> +
> +/** Event port configuration structure */
> +struct rte_event_port_conf_v20 {
> +       int32_t new_event_threshold;
> +       /**< A backpressure threshold for new event enqueues on this port.
> +        * Use for *closed system* event dev where event capacity is limited,
> +        * and cannot exceed the capacity of the event dev.
> +        * Configuring ports with different thresholds can make higher priority
> +        * traffic less likely to  be backpressured.
> +        * For example, a port used to inject NIC Rx packets into the event dev
> +        * can have a lower threshold so as not to overwhelm the device,
> +        * while ports used for worker pools can have a higher threshold.
> +        * This value cannot exceed the *nb_events_limit*
> +        * which was previously supplied to rte_event_dev_configure().
> +        * This should be set to '-1' for *open system*.
> +        */
> +       uint16_t dequeue_depth;
> +       /**< Configure number of bulk dequeues for this event port.
> +        * This value cannot exceed the *nb_event_port_dequeue_depth*
> +        * which previously supplied to rte_event_dev_configure().
> +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
> +        */
> +       uint16_t enqueue_depth;
> +       /**< Configure number of bulk enqueues for this event port.
> +        * This value cannot exceed the *nb_event_port_enqueue_depth*
> +        * which previously supplied to rte_event_dev_configure().
> +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
> +        */
>         uint8_t disable_implicit_release;
>         /**< Configure the port not to release outstanding events in
>          * rte_event_dev_dequeue_burst(). If true, all events received through
> @@ -733,6 +911,14 @@ struct rte_event_port_conf {
>  rte_event_port_default_conf_get(uint8_t dev_id, uint8_t port_id,
>                                 struct rte_event_port_conf *port_conf);
>
> +int
> +rte_event_port_default_conf_get_v20(uint8_t dev_id, uint8_t port_id,
> +                               struct rte_event_port_conf_v20 *port_conf);
> +
> +int
> +rte_event_port_default_conf_get_v21(uint8_t dev_id, uint8_t port_id,
> +                                     struct rte_event_port_conf *port_conf);

Hi Timothy,

+ ABI Maintainers (Ray, Neil)

# As per my understanding, the structures can not be versioned, only
function can be versioned.
i.e we can not make any change to " struct rte_event_port_conf"

# We have a similar case with ethdev and it deferred to next release v20.11
http://patches.dpdk.org/patch/69113/

Regarding the API changes:
# The slow path changes general looks good to me. I will review the
next level in the coming days
# The following fast path changes bothers to me. Could you share more
details on below change?

diff --git a/app/test-eventdev/test_order_atq.c
b/app/test-eventdev/test_order_atq.c
index 3366cfc..8246b96 100644
--- a/app/test-eventdev/test_order_atq.c
+++ b/app/test-eventdev/test_order_atq.c
@@ -34,6 +34,8 @@
                        continue;
                }

+               ev.flow_id = ev.mbuf->udata64;
+
# Since RC1 is near, I am not sure how to accommodate the API changes
now and sort out ABI stuffs.
# Other concern is eventdev spec get bloated with versioning files
just for ONE release as 20.11 will be OK to change the ABI.
# While we discuss the API change, Please send deprecation notice for
ABI change for 20.11,
so that there is no ambiguity of this patch for the 20.11 release.

Hello Jerin,

Thank you for the review comments.

With regard to your comments regarding the fast path flow_id change, the Intel DLB hardware
is not capable of transferring the flow_id as part of the event itself. We therefore require a mechanism
to accomplish this. What we have done to work around this is to require the application to embed the flow_id
within the data payload. The new flag, #define RTE_EVENT_DEV_CAP_CARRY_FLOW_ID (1ULL << 9), can be used
by applications to determine if they need to embed the flow_id, or if its automatically propagated and present in the
received event.

What we should have done is to wrap the assignment with a conditional.  

if (!(device_capability_flags & RTE_EVENT_DEV_CAP_CARRY_FLOW_ID))
	ev.flow_id = ev.mbuf->udata64;

This would minimize/eliminate any performance impact due to the processor's branch prediction logic.
The assignment then becomes in essence a NOOP for all event devices that are capable of carrying the flow_id as part of the event payload itself.

Thanks,
Tim



Thanks,
Tim
  
Jerin Jacob June 30, 2020, 4:21 a.m. UTC | #3
On Tue, Jun 30, 2020 at 1:01 AM McDaniel, Timothy
<timothy.mcdaniel@intel.com> wrote:
>
> -----Original Message-----
> From: Jerin Jacob <jerinjacobk@gmail.com>
> Sent: Saturday, June 27, 2020 2:45 AM
> To: McDaniel, Timothy <timothy.mcdaniel@intel.com>; Ray Kinsella <mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>
> Cc: Jerin Jacob <jerinj@marvell.com>; Mattias Rönnblom <mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>; Eads, Gage <gage.eads@intel.com>; Van Haaren, Harry <harry.van.haaren@intel.com>
> Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream prerequisites
>
> > +
> > +/** Event port configuration structure */
> > +struct rte_event_port_conf_v20 {
> > +       int32_t new_event_threshold;
> > +       /**< A backpressure threshold for new event enqueues on this port.
> > +        * Use for *closed system* event dev where event capacity is limited,
> > +        * and cannot exceed the capacity of the event dev.
> > +        * Configuring ports with different thresholds can make higher priority
> > +        * traffic less likely to  be backpressured.
> > +        * For example, a port used to inject NIC Rx packets into the event dev
> > +        * can have a lower threshold so as not to overwhelm the device,
> > +        * while ports used for worker pools can have a higher threshold.
> > +        * This value cannot exceed the *nb_events_limit*
> > +        * which was previously supplied to rte_event_dev_configure().
> > +        * This should be set to '-1' for *open system*.
> > +        */
> > +       uint16_t dequeue_depth;
> > +       /**< Configure number of bulk dequeues for this event port.
> > +        * This value cannot exceed the *nb_event_port_dequeue_depth*
> > +        * which previously supplied to rte_event_dev_configure().
> > +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
> > +        */
> > +       uint16_t enqueue_depth;
> > +       /**< Configure number of bulk enqueues for this event port.
> > +        * This value cannot exceed the *nb_event_port_enqueue_depth*
> > +        * which previously supplied to rte_event_dev_configure().
> > +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
> > +        */
> >         uint8_t disable_implicit_release;
> >         /**< Configure the port not to release outstanding events in
> >          * rte_event_dev_dequeue_burst(). If true, all events received through
> > @@ -733,6 +911,14 @@ struct rte_event_port_conf {
> >  rte_event_port_default_conf_get(uint8_t dev_id, uint8_t port_id,
> >                                 struct rte_event_port_conf *port_conf);
> >
> > +int
> > +rte_event_port_default_conf_get_v20(uint8_t dev_id, uint8_t port_id,
> > +                               struct rte_event_port_conf_v20 *port_conf);
> > +
> > +int
> > +rte_event_port_default_conf_get_v21(uint8_t dev_id, uint8_t port_id,
> > +                                     struct rte_event_port_conf *port_conf);
>
> Hi Timothy,
>
> + ABI Maintainers (Ray, Neil)
>
> # As per my understanding, the structures can not be versioned, only
> function can be versioned.
> i.e we can not make any change to " struct rte_event_port_conf"
>
> # We have a similar case with ethdev and it deferred to next release v20.11
> http://patches.dpdk.org/patch/69113/
>
> Regarding the API changes:
> # The slow path changes general looks good to me. I will review the
> next level in the coming days
> # The following fast path changes bothers to me. Could you share more
> details on below change?
>
> diff --git a/app/test-eventdev/test_order_atq.c
> b/app/test-eventdev/test_order_atq.c
> index 3366cfc..8246b96 100644
> --- a/app/test-eventdev/test_order_atq.c
> +++ b/app/test-eventdev/test_order_atq.c
> @@ -34,6 +34,8 @@
>                         continue;
>                 }
>
> +               ev.flow_id = ev.mbuf->udata64;
> +
> # Since RC1 is near, I am not sure how to accommodate the API changes
> now and sort out ABI stuffs.
> # Other concern is eventdev spec get bloated with versioning files
> just for ONE release as 20.11 will be OK to change the ABI.
> # While we discuss the API change, Please send deprecation notice for
> ABI change for 20.11,
> so that there is no ambiguity of this patch for the 20.11 release.
>
> Hello Jerin,
>
> Thank you for the review comments.
>
> With regard to your comments regarding the fast path flow_id change, the Intel DLB hardware
> is not capable of transferring the flow_id as part of the event itself. We therefore require a mechanism
> to accomplish this. What we have done to work around this is to require the application to embed the flow_id
> within the data payload. The new flag, #define RTE_EVENT_DEV_CAP_CARRY_FLOW_ID (1ULL << 9), can be used
> by applications to determine if they need to embed the flow_id, or if its automatically propagated and present in the
> received event.
>
> What we should have done is to wrap the assignment with a conditional.
>
> if (!(device_capability_flags & RTE_EVENT_DEV_CAP_CARRY_FLOW_ID))
>         ev.flow_id = ev.mbuf->udata64;

Two problems with this approach,
1) we are assuming mbuf udata64 field is available for DLB driver
2) It won't work with another adapter, eventdev has no dependency with mbuf

Question:
1) In the case of DLB hardware, on dequeue(),  what HW returns? is it
only event pointer and not have any other metadata like schedule_type
etc.


>
> This would minimize/eliminate any performance impact due to the processor's branch prediction logic.
> The assignment then becomes in essence a NOOP for all event devices that are capable of carrying the flow_id as part of the event payload itself.
>
> Thanks,
> Tim
>
>
>
> Thanks,
> Tim
  
Ray Kinsella June 30, 2020, 11:22 a.m. UTC | #4
On 27/06/2020 08:44, Jerin Jacob wrote:
>> +
>> +/** Event port configuration structure */
>> +struct rte_event_port_conf_v20 {
>> +       int32_t new_event_threshold;
>> +       /**< A backpressure threshold for new event enqueues on this port.
>> +        * Use for *closed system* event dev where event capacity is limited,
>> +        * and cannot exceed the capacity of the event dev.
>> +        * Configuring ports with different thresholds can make higher priority
>> +        * traffic less likely to  be backpressured.
>> +        * For example, a port used to inject NIC Rx packets into the event dev
>> +        * can have a lower threshold so as not to overwhelm the device,
>> +        * while ports used for worker pools can have a higher threshold.
>> +        * This value cannot exceed the *nb_events_limit*
>> +        * which was previously supplied to rte_event_dev_configure().
>> +        * This should be set to '-1' for *open system*.
>> +        */
>> +       uint16_t dequeue_depth;
>> +       /**< Configure number of bulk dequeues for this event port.
>> +        * This value cannot exceed the *nb_event_port_dequeue_depth*
>> +        * which previously supplied to rte_event_dev_configure().
>> +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
>> +        */
>> +       uint16_t enqueue_depth;
>> +       /**< Configure number of bulk enqueues for this event port.
>> +        * This value cannot exceed the *nb_event_port_enqueue_depth*
>> +        * which previously supplied to rte_event_dev_configure().
>> +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
>> +        */
>>         uint8_t disable_implicit_release;
>>         /**< Configure the port not to release outstanding events in
>>          * rte_event_dev_dequeue_burst(). If true, all events received through
>> @@ -733,6 +911,14 @@ struct rte_event_port_conf {
>>  rte_event_port_default_conf_get(uint8_t dev_id, uint8_t port_id,
>>                                 struct rte_event_port_conf *port_conf);
>>
>> +int
>> +rte_event_port_default_conf_get_v20(uint8_t dev_id, uint8_t port_id,
>> +                               struct rte_event_port_conf_v20 *port_conf);
>> +
>> +int
>> +rte_event_port_default_conf_get_v21(uint8_t dev_id, uint8_t port_id,
>> +                                     struct rte_event_port_conf *port_conf);
> 
> Hi Timothy,
> 
> + ABI Maintainers (Ray, Neil)
> 
> # As per my understanding, the structures can not be versioned, only
> function can be versioned.
> i.e we can not make any change to " struct rte_event_port_conf"

So the answer is (as always): depends

If the structure is being use in inline functions is when you run into trouble 
- as knowledge of the structure is embedded in the linked application. 

However if the structure is _strictly_ being used as a non-inlined function parameter,
It can be safe to version in this way. 

So just to be clear, it is still the function that is actually being versioned here.

> 
> # We have a similar case with ethdev and it deferred to next release v20.11
> http://patches.dpdk.org/patch/69113/

Yes - I spent a why looking at this one, but I am struggling to recall,
why when I looked it we didn't suggest function versioning as a potential solution in this case. 

Looking back at it now, looks like it would have been ok. 

> 
> Regarding the API changes:
> # The slow path changes general looks good to me. I will review the
> next level in the coming days
> # The following fast path changes bothers to me. Could you share more
> details on below change?
> 
> diff --git a/app/test-eventdev/test_order_atq.c
> b/app/test-eventdev/test_order_atq.c
> index 3366cfc..8246b96 100644
> --- a/app/test-eventdev/test_order_atq.c
> +++ b/app/test-eventdev/test_order_atq.c
> @@ -34,6 +34,8 @@
>                         continue;
>                 }
> 
> +               ev.flow_id = ev.mbuf->udata64;
> +
> # Since RC1 is near, I am not sure how to accommodate the API changes
> now and sort out ABI stuffs.
> # Other concern is eventdev spec get bloated with versioning files
> just for ONE release as 20.11 will be OK to change the ABI.
> # While we discuss the API change, Please send deprecation notice for
> ABI change for 20.11,
> so that there is no ambiguity of this patch for the 20.11 release.
>
  
Jerin Jacob June 30, 2020, 11:30 a.m. UTC | #5
On Tue, Jun 30, 2020 at 4:52 PM Kinsella, Ray <mdr@ashroe.eu> wrote:
>
>
>
> On 27/06/2020 08:44, Jerin Jacob wrote:
> >> +
> >> +/** Event port configuration structure */
> >> +struct rte_event_port_conf_v20 {
> >> +       int32_t new_event_threshold;
> >> +       /**< A backpressure threshold for new event enqueues on this port.
> >> +        * Use for *closed system* event dev where event capacity is limited,
> >> +        * and cannot exceed the capacity of the event dev.
> >> +        * Configuring ports with different thresholds can make higher priority
> >> +        * traffic less likely to  be backpressured.
> >> +        * For example, a port used to inject NIC Rx packets into the event dev
> >> +        * can have a lower threshold so as not to overwhelm the device,
> >> +        * while ports used for worker pools can have a higher threshold.
> >> +        * This value cannot exceed the *nb_events_limit*
> >> +        * which was previously supplied to rte_event_dev_configure().
> >> +        * This should be set to '-1' for *open system*.
> >> +        */
> >> +       uint16_t dequeue_depth;
> >> +       /**< Configure number of bulk dequeues for this event port.
> >> +        * This value cannot exceed the *nb_event_port_dequeue_depth*
> >> +        * which previously supplied to rte_event_dev_configure().
> >> +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
> >> +        */
> >> +       uint16_t enqueue_depth;
> >> +       /**< Configure number of bulk enqueues for this event port.
> >> +        * This value cannot exceed the *nb_event_port_enqueue_depth*
> >> +        * which previously supplied to rte_event_dev_configure().
> >> +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
> >> +        */
> >>         uint8_t disable_implicit_release;
> >>         /**< Configure the port not to release outstanding events in
> >>          * rte_event_dev_dequeue_burst(). If true, all events received through
> >> @@ -733,6 +911,14 @@ struct rte_event_port_conf {
> >>  rte_event_port_default_conf_get(uint8_t dev_id, uint8_t port_id,
> >>                                 struct rte_event_port_conf *port_conf);
> >>
> >> +int
> >> +rte_event_port_default_conf_get_v20(uint8_t dev_id, uint8_t port_id,
> >> +                               struct rte_event_port_conf_v20 *port_conf);
> >> +
> >> +int
> >> +rte_event_port_default_conf_get_v21(uint8_t dev_id, uint8_t port_id,
> >> +                                     struct rte_event_port_conf *port_conf);
> >
> > Hi Timothy,
> >
> > + ABI Maintainers (Ray, Neil)
> >
> > # As per my understanding, the structures can not be versioned, only
> > function can be versioned.
> > i.e we can not make any change to " struct rte_event_port_conf"
>
> So the answer is (as always): depends
>
> If the structure is being use in inline functions is when you run into trouble
> - as knowledge of the structure is embedded in the linked application.
>
> However if the structure is _strictly_ being used as a non-inlined function parameter,
> It can be safe to version in this way.

But based on the optimization applied when building the consumer code
matters. Right?
i.e compiler can "inline" it, based on the optimization even the
source code explicitly mentions it.


>
> So just to be clear, it is still the function that is actually being versioned here.
>
> >
> > # We have a similar case with ethdev and it deferred to next release v20.11
> > http://patches.dpdk.org/patch/69113/
>
> Yes - I spent a why looking at this one, but I am struggling to recall,
> why when I looked it we didn't suggest function versioning as a potential solution in this case.
>
> Looking back at it now, looks like it would have been ok.

Ok.

>
> >
> > Regarding the API changes:
> > # The slow path changes general looks good to me. I will review the
> > next level in the coming days
> > # The following fast path changes bothers to me. Could you share more
> > details on below change?
> >
> > diff --git a/app/test-eventdev/test_order_atq.c
> > b/app/test-eventdev/test_order_atq.c
> > index 3366cfc..8246b96 100644
> > --- a/app/test-eventdev/test_order_atq.c
> > +++ b/app/test-eventdev/test_order_atq.c
> > @@ -34,6 +34,8 @@
> >                         continue;
> >                 }
> >
> > +               ev.flow_id = ev.mbuf->udata64;
> > +
> > # Since RC1 is near, I am not sure how to accommodate the API changes
> > now and sort out ABI stuffs.
> > # Other concern is eventdev spec get bloated with versioning files
> > just for ONE release as 20.11 will be OK to change the ABI.
> > # While we discuss the API change, Please send deprecation notice for
> > ABI change for 20.11,
> > so that there is no ambiguity of this patch for the 20.11 release.
> >
  
Ray Kinsella June 30, 2020, 11:36 a.m. UTC | #6
On 30/06/2020 12:30, Jerin Jacob wrote:
> On Tue, Jun 30, 2020 at 4:52 PM Kinsella, Ray <mdr@ashroe.eu> wrote:
>>
>>
>>
>> On 27/06/2020 08:44, Jerin Jacob wrote:
>>>> +
>>>> +/** Event port configuration structure */
>>>> +struct rte_event_port_conf_v20 {
>>>> +       int32_t new_event_threshold;
>>>> +       /**< A backpressure threshold for new event enqueues on this port.
>>>> +        * Use for *closed system* event dev where event capacity is limited,
>>>> +        * and cannot exceed the capacity of the event dev.
>>>> +        * Configuring ports with different thresholds can make higher priority
>>>> +        * traffic less likely to  be backpressured.
>>>> +        * For example, a port used to inject NIC Rx packets into the event dev
>>>> +        * can have a lower threshold so as not to overwhelm the device,
>>>> +        * while ports used for worker pools can have a higher threshold.
>>>> +        * This value cannot exceed the *nb_events_limit*
>>>> +        * which was previously supplied to rte_event_dev_configure().
>>>> +        * This should be set to '-1' for *open system*.
>>>> +        */
>>>> +       uint16_t dequeue_depth;
>>>> +       /**< Configure number of bulk dequeues for this event port.
>>>> +        * This value cannot exceed the *nb_event_port_dequeue_depth*
>>>> +        * which previously supplied to rte_event_dev_configure().
>>>> +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
>>>> +        */
>>>> +       uint16_t enqueue_depth;
>>>> +       /**< Configure number of bulk enqueues for this event port.
>>>> +        * This value cannot exceed the *nb_event_port_enqueue_depth*
>>>> +        * which previously supplied to rte_event_dev_configure().
>>>> +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
>>>> +        */
>>>>         uint8_t disable_implicit_release;
>>>>         /**< Configure the port not to release outstanding events in
>>>>          * rte_event_dev_dequeue_burst(). If true, all events received through
>>>> @@ -733,6 +911,14 @@ struct rte_event_port_conf {
>>>>  rte_event_port_default_conf_get(uint8_t dev_id, uint8_t port_id,
>>>>                                 struct rte_event_port_conf *port_conf);
>>>>
>>>> +int
>>>> +rte_event_port_default_conf_get_v20(uint8_t dev_id, uint8_t port_id,
>>>> +                               struct rte_event_port_conf_v20 *port_conf);
>>>> +
>>>> +int
>>>> +rte_event_port_default_conf_get_v21(uint8_t dev_id, uint8_t port_id,
>>>> +                                     struct rte_event_port_conf *port_conf);
>>>
>>> Hi Timothy,
>>>
>>> + ABI Maintainers (Ray, Neil)
>>>
>>> # As per my understanding, the structures can not be versioned, only
>>> function can be versioned.
>>> i.e we can not make any change to " struct rte_event_port_conf"
>>
>> So the answer is (as always): depends
>>
>> If the structure is being use in inline functions is when you run into trouble
>> - as knowledge of the structure is embedded in the linked application.
>>
>> However if the structure is _strictly_ being used as a non-inlined function parameter,
>> It can be safe to version in this way.
> 
> But based on the optimization applied when building the consumer code
> matters. Right?
> i.e compiler can "inline" it, based on the optimization even the
> source code explicitly mentions it.

Well a compiler will typically only inline within the confines of a given object file, or 
binary, if LTO is enabled. 

If a function symbol is exported from library however, it won't be inlined in a linked application. 
The compiler doesn't have enough information to inline it. 
All the compiler will know about it is it's offset in memory, and it's signature. 

> 
> 
>>
>> So just to be clear, it is still the function that is actually being versioned here.
>>
>>>
>>> # We have a similar case with ethdev and it deferred to next release v20.11
>>> http://patches.dpdk.org/patch/69113/
>>
>> Yes - I spent a why looking at this one, but I am struggling to recall,
>> why when I looked it we didn't suggest function versioning as a potential solution in this case.
>>
>> Looking back at it now, looks like it would have been ok.
> 
> Ok.
> 
>>
>>>
>>> Regarding the API changes:
>>> # The slow path changes general looks good to me. I will review the
>>> next level in the coming days
>>> # The following fast path changes bothers to me. Could you share more
>>> details on below change?
>>>
>>> diff --git a/app/test-eventdev/test_order_atq.c
>>> b/app/test-eventdev/test_order_atq.c
>>> index 3366cfc..8246b96 100644
>>> --- a/app/test-eventdev/test_order_atq.c
>>> +++ b/app/test-eventdev/test_order_atq.c
>>> @@ -34,6 +34,8 @@
>>>                         continue;
>>>                 }
>>>
>>> +               ev.flow_id = ev.mbuf->udata64;
>>> +
>>> # Since RC1 is near, I am not sure how to accommodate the API changes
>>> now and sort out ABI stuffs.
>>> # Other concern is eventdev spec get bloated with versioning files
>>> just for ONE release as 20.11 will be OK to change the ABI.
>>> # While we discuss the API change, Please send deprecation notice for
>>> ABI change for 20.11,
>>> so that there is no ambiguity of this patch for the 20.11 release.
>>>
  
Jerin Jacob June 30, 2020, 12:14 p.m. UTC | #7
On Tue, Jun 30, 2020 at 5:06 PM Kinsella, Ray <mdr@ashroe.eu> wrote:
>
>
>
> On 30/06/2020 12:30, Jerin Jacob wrote:
> > On Tue, Jun 30, 2020 at 4:52 PM Kinsella, Ray <mdr@ashroe.eu> wrote:
> >>
> >>
> >>
> >> On 27/06/2020 08:44, Jerin Jacob wrote:
> >>>> +
> >>>> +/** Event port configuration structure */
> >>>> +struct rte_event_port_conf_v20 {
> >>>> +       int32_t new_event_threshold;
> >>>> +       /**< A backpressure threshold for new event enqueues on this port.
> >>>> +        * Use for *closed system* event dev where event capacity is limited,
> >>>> +        * and cannot exceed the capacity of the event dev.
> >>>> +        * Configuring ports with different thresholds can make higher priority
> >>>> +        * traffic less likely to  be backpressured.
> >>>> +        * For example, a port used to inject NIC Rx packets into the event dev
> >>>> +        * can have a lower threshold so as not to overwhelm the device,
> >>>> +        * while ports used for worker pools can have a higher threshold.
> >>>> +        * This value cannot exceed the *nb_events_limit*
> >>>> +        * which was previously supplied to rte_event_dev_configure().
> >>>> +        * This should be set to '-1' for *open system*.
> >>>> +        */
> >>>> +       uint16_t dequeue_depth;
> >>>> +       /**< Configure number of bulk dequeues for this event port.
> >>>> +        * This value cannot exceed the *nb_event_port_dequeue_depth*
> >>>> +        * which previously supplied to rte_event_dev_configure().
> >>>> +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
> >>>> +        */
> >>>> +       uint16_t enqueue_depth;
> >>>> +       /**< Configure number of bulk enqueues for this event port.
> >>>> +        * This value cannot exceed the *nb_event_port_enqueue_depth*
> >>>> +        * which previously supplied to rte_event_dev_configure().
> >>>> +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
> >>>> +        */
> >>>>         uint8_t disable_implicit_release;
> >>>>         /**< Configure the port not to release outstanding events in
> >>>>          * rte_event_dev_dequeue_burst(). If true, all events received through
> >>>> @@ -733,6 +911,14 @@ struct rte_event_port_conf {
> >>>>  rte_event_port_default_conf_get(uint8_t dev_id, uint8_t port_id,
> >>>>                                 struct rte_event_port_conf *port_conf);
> >>>>
> >>>> +int
> >>>> +rte_event_port_default_conf_get_v20(uint8_t dev_id, uint8_t port_id,
> >>>> +                               struct rte_event_port_conf_v20 *port_conf);
> >>>> +
> >>>> +int
> >>>> +rte_event_port_default_conf_get_v21(uint8_t dev_id, uint8_t port_id,
> >>>> +                                     struct rte_event_port_conf *port_conf);
> >>>
> >>> Hi Timothy,
> >>>
> >>> + ABI Maintainers (Ray, Neil)
> >>>
> >>> # As per my understanding, the structures can not be versioned, only
> >>> function can be versioned.
> >>> i.e we can not make any change to " struct rte_event_port_conf"
> >>
> >> So the answer is (as always): depends
> >>
> >> If the structure is being use in inline functions is when you run into trouble
> >> - as knowledge of the structure is embedded in the linked application.
> >>
> >> However if the structure is _strictly_ being used as a non-inlined function parameter,
> >> It can be safe to version in this way.
> >
> > But based on the optimization applied when building the consumer code
> > matters. Right?
> > i.e compiler can "inline" it, based on the optimization even the
> > source code explicitly mentions it.
>
> Well a compiler will typically only inline within the confines of a given object file, or
> binary, if LTO is enabled.

>
> If a function symbol is exported from library however, it won't be inlined in a linked application.

Yes, With respect to that function.
But the application can use struct rte_event_port_conf in their code
and it can be part of other structures.
Right?


> The compiler doesn't have enough information to inline it.
> All the compiler will know about it is it's offset in memory, and it's signature.
>
> >
> >
> >>
> >> So just to be clear, it is still the function that is actually being versioned here.
> >>
> >>>
> >>> # We have a similar case with ethdev and it deferred to next release v20.11
> >>> http://patches.dpdk.org/patch/69113/
> >>
> >> Yes - I spent a why looking at this one, but I am struggling to recall,
> >> why when I looked it we didn't suggest function versioning as a potential solution in this case.
> >>
> >> Looking back at it now, looks like it would have been ok.
> >
> > Ok.
> >
> >>
> >>>
> >>> Regarding the API changes:
> >>> # The slow path changes general looks good to me. I will review the
> >>> next level in the coming days
> >>> # The following fast path changes bothers to me. Could you share more
> >>> details on below change?
> >>>
> >>> diff --git a/app/test-eventdev/test_order_atq.c
> >>> b/app/test-eventdev/test_order_atq.c
> >>> index 3366cfc..8246b96 100644
> >>> --- a/app/test-eventdev/test_order_atq.c
> >>> +++ b/app/test-eventdev/test_order_atq.c
> >>> @@ -34,6 +34,8 @@
> >>>                         continue;
> >>>                 }
> >>>
> >>> +               ev.flow_id = ev.mbuf->udata64;
> >>> +
> >>> # Since RC1 is near, I am not sure how to accommodate the API changes
> >>> now and sort out ABI stuffs.
> >>> # Other concern is eventdev spec get bloated with versioning files
> >>> just for ONE release as 20.11 will be OK to change the ABI.
> >>> # While we discuss the API change, Please send deprecation notice for
> >>> ABI change for 20.11,
> >>> so that there is no ambiguity of this patch for the 20.11 release.
> >>>
  
Timothy McDaniel June 30, 2020, 3:37 p.m. UTC | #8
>-----Original Message-----
>From: Jerin Jacob <jerinjacobk@gmail.com>
>Sent: Monday, June 29, 2020 11:21 PM
>To: McDaniel, Timothy <timothy.mcdaniel@intel.com>
>Cc: Ray Kinsella <mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>;
>Jerin Jacob <jerinj@marvell.com>; Mattias Rönnblom
><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>; Eads, Gage
><gage.eads@intel.com>; Van Haaren, Harry <harry.van.haaren@intel.com>
>Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream prerequisites
>
>On Tue, Jun 30, 2020 at 1:01 AM McDaniel, Timothy
><timothy.mcdaniel@intel.com> wrote:
>>
>> -----Original Message-----
>> From: Jerin Jacob <jerinjacobk@gmail.com>
>> Sent: Saturday, June 27, 2020 2:45 AM
>> To: McDaniel, Timothy <timothy.mcdaniel@intel.com>; Ray Kinsella
><mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>
>> Cc: Jerin Jacob <jerinj@marvell.com>; Mattias Rönnblom
><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>; Eads, Gage
><gage.eads@intel.com>; Van Haaren, Harry <harry.van.haaren@intel.com>
>> Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream prerequisites
>>
>> > +
>> > +/** Event port configuration structure */
>> > +struct rte_event_port_conf_v20 {
>> > +       int32_t new_event_threshold;
>> > +       /**< A backpressure threshold for new event enqueues on this port.
>> > +        * Use for *closed system* event dev where event capacity is limited,
>> > +        * and cannot exceed the capacity of the event dev.
>> > +        * Configuring ports with different thresholds can make higher priority
>> > +        * traffic less likely to  be backpressured.
>> > +        * For example, a port used to inject NIC Rx packets into the event dev
>> > +        * can have a lower threshold so as not to overwhelm the device,
>> > +        * while ports used for worker pools can have a higher threshold.
>> > +        * This value cannot exceed the *nb_events_limit*
>> > +        * which was previously supplied to rte_event_dev_configure().
>> > +        * This should be set to '-1' for *open system*.
>> > +        */
>> > +       uint16_t dequeue_depth;
>> > +       /**< Configure number of bulk dequeues for this event port.
>> > +        * This value cannot exceed the *nb_event_port_dequeue_depth*
>> > +        * which previously supplied to rte_event_dev_configure().
>> > +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE
>capable.
>> > +        */
>> > +       uint16_t enqueue_depth;
>> > +       /**< Configure number of bulk enqueues for this event port.
>> > +        * This value cannot exceed the *nb_event_port_enqueue_depth*
>> > +        * which previously supplied to rte_event_dev_configure().
>> > +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE
>capable.
>> > +        */
>> >         uint8_t disable_implicit_release;
>> >         /**< Configure the port not to release outstanding events in
>> >          * rte_event_dev_dequeue_burst(). If true, all events received through
>> > @@ -733,6 +911,14 @@ struct rte_event_port_conf {
>> >  rte_event_port_default_conf_get(uint8_t dev_id, uint8_t port_id,
>> >                                 struct rte_event_port_conf *port_conf);
>> >
>> > +int
>> > +rte_event_port_default_conf_get_v20(uint8_t dev_id, uint8_t port_id,
>> > +                               struct rte_event_port_conf_v20 *port_conf);
>> > +
>> > +int
>> > +rte_event_port_default_conf_get_v21(uint8_t dev_id, uint8_t port_id,
>> > +                                     struct rte_event_port_conf *port_conf);
>>
>> Hi Timothy,
>>
>> + ABI Maintainers (Ray, Neil)
>>
>> # As per my understanding, the structures can not be versioned, only
>> function can be versioned.
>> i.e we can not make any change to " struct rte_event_port_conf"
>>
>> # We have a similar case with ethdev and it deferred to next release v20.11
>> http://patches.dpdk.org/patch/69113/
>>
>> Regarding the API changes:
>> # The slow path changes general looks good to me. I will review the
>> next level in the coming days
>> # The following fast path changes bothers to me. Could you share more
>> details on below change?
>>
>> diff --git a/app/test-eventdev/test_order_atq.c
>> b/app/test-eventdev/test_order_atq.c
>> index 3366cfc..8246b96 100644
>> --- a/app/test-eventdev/test_order_atq.c
>> +++ b/app/test-eventdev/test_order_atq.c
>> @@ -34,6 +34,8 @@
>>                         continue;
>>                 }
>>
>> +               ev.flow_id = ev.mbuf->udata64;
>> +
>> # Since RC1 is near, I am not sure how to accommodate the API changes
>> now and sort out ABI stuffs.
>> # Other concern is eventdev spec get bloated with versioning files
>> just for ONE release as 20.11 will be OK to change the ABI.
>> # While we discuss the API change, Please send deprecation notice for
>> ABI change for 20.11,
>> so that there is no ambiguity of this patch for the 20.11 release.
>>
>> Hello Jerin,
>>
>> Thank you for the review comments.
>>
>> With regard to your comments regarding the fast path flow_id change, the Intel
>DLB hardware
>> is not capable of transferring the flow_id as part of the event itself. We
>therefore require a mechanism
>> to accomplish this. What we have done to work around this is to require the
>application to embed the flow_id
>> within the data payload. The new flag, #define
>RTE_EVENT_DEV_CAP_CARRY_FLOW_ID (1ULL << 9), can be used
>> by applications to determine if they need to embed the flow_id, or if its
>automatically propagated and present in the
>> received event.
>>
>> What we should have done is to wrap the assignment with a conditional.
>>
>> if (!(device_capability_flags & RTE_EVENT_DEV_CAP_CARRY_FLOW_ID))
>>         ev.flow_id = ev.mbuf->udata64;
>
>Two problems with this approach,
>1) we are assuming mbuf udata64 field is available for DLB driver
>2) It won't work with another adapter, eventdev has no dependency with mbuf
>

This snippet is not intended to suggest that udata64 always be used to store the flow ID, but as an example of how an application could do it. Some applications won’t need to carry the flow ID through; others can select an unused field in the event data (e.g. hash.rss or udata64 if using mbufs), or (worst-case) re-generate the flow ID in pipeline stages that require it.

>Question:
>1) In the case of DLB hardware, on dequeue(),  what HW returns? is it
>only event pointer and not have any other metadata like schedule_type
>etc.
>

The DLB device provides a 16B “queue entry” that consists of:

*	8B event data
*	Queue ID
*	Priority
*	Scheduling type
*	19 bits of carried-through data
*	Assorted error/debug/reserved bits that are set by the device (not carried-through)

 For the carried-through 19b, we use 12b for event_type and sub_event_type.

>
>>
>> This would minimize/eliminate any performance impact due to the processor's
>branch prediction logic.
>> The assignment then becomes in essence a NOOP for all event devices that are
>capable of carrying the flow_id as part of the event payload itself.
>>
>> Thanks,
>> Tim
>>
>>
>>
>> Thanks,
>> Tim
  
Jerin Jacob June 30, 2020, 3:57 p.m. UTC | #9
On Tue, Jun 30, 2020 at 9:12 PM McDaniel, Timothy
<timothy.mcdaniel@intel.com> wrote:
>
> >-----Original Message-----
> >From: Jerin Jacob <jerinjacobk@gmail.com>
> >Sent: Monday, June 29, 2020 11:21 PM
> >To: McDaniel, Timothy <timothy.mcdaniel@intel.com>
> >Cc: Ray Kinsella <mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>;
> >Jerin Jacob <jerinj@marvell.com>; Mattias Rönnblom
> ><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>; Eads, Gage
> ><gage.eads@intel.com>; Van Haaren, Harry <harry.van.haaren@intel.com>
> >Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream prerequisites
> >
> >On Tue, Jun 30, 2020 at 1:01 AM McDaniel, Timothy
> ><timothy.mcdaniel@intel.com> wrote:
> >>
> >> -----Original Message-----
> >> From: Jerin Jacob <jerinjacobk@gmail.com>
> >> Sent: Saturday, June 27, 2020 2:45 AM
> >> To: McDaniel, Timothy <timothy.mcdaniel@intel.com>; Ray Kinsella
> ><mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>
> >> Cc: Jerin Jacob <jerinj@marvell.com>; Mattias Rönnblom
> ><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>; Eads, Gage
> ><gage.eads@intel.com>; Van Haaren, Harry <harry.van.haaren@intel.com>
> >> Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream prerequisites
> >>
> >> > +
> >> > +/** Event port configuration structure */
> >> > +struct rte_event_port_conf_v20 {
> >> > +       int32_t new_event_threshold;
> >> > +       /**< A backpressure threshold for new event enqueues on this port.
> >> > +        * Use for *closed system* event dev where event capacity is limited,
> >> > +        * and cannot exceed the capacity of the event dev.
> >> > +        * Configuring ports with different thresholds can make higher priority
> >> > +        * traffic less likely to  be backpressured.
> >> > +        * For example, a port used to inject NIC Rx packets into the event dev
> >> > +        * can have a lower threshold so as not to overwhelm the device,
> >> > +        * while ports used for worker pools can have a higher threshold.
> >> > +        * This value cannot exceed the *nb_events_limit*
> >> > +        * which was previously supplied to rte_event_dev_configure().
> >> > +        * This should be set to '-1' for *open system*.
> >> > +        */
> >> > +       uint16_t dequeue_depth;
> >> > +       /**< Configure number of bulk dequeues for this event port.
> >> > +        * This value cannot exceed the *nb_event_port_dequeue_depth*
> >> > +        * which previously supplied to rte_event_dev_configure().
> >> > +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE
> >capable.
> >> > +        */
> >> > +       uint16_t enqueue_depth;
> >> > +       /**< Configure number of bulk enqueues for this event port.
> >> > +        * This value cannot exceed the *nb_event_port_enqueue_depth*
> >> > +        * which previously supplied to rte_event_dev_configure().
> >> > +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE
> >capable.
> >> > +        */
> >> >         uint8_t disable_implicit_release;
> >> >         /**< Configure the port not to release outstanding events in
> >> >          * rte_event_dev_dequeue_burst(). If true, all events received through
> >> > @@ -733,6 +911,14 @@ struct rte_event_port_conf {
> >> >  rte_event_port_default_conf_get(uint8_t dev_id, uint8_t port_id,
> >> >                                 struct rte_event_port_conf *port_conf);
> >> >
> >> > +int
> >> > +rte_event_port_default_conf_get_v20(uint8_t dev_id, uint8_t port_id,
> >> > +                               struct rte_event_port_conf_v20 *port_conf);
> >> > +
> >> > +int
> >> > +rte_event_port_default_conf_get_v21(uint8_t dev_id, uint8_t port_id,
> >> > +                                     struct rte_event_port_conf *port_conf);
> >>
> >> Hi Timothy,
> >>
> >> + ABI Maintainers (Ray, Neil)
> >>
> >> # As per my understanding, the structures can not be versioned, only
> >> function can be versioned.
> >> i.e we can not make any change to " struct rte_event_port_conf"
> >>
> >> # We have a similar case with ethdev and it deferred to next release v20.11
> >> http://patches.dpdk.org/patch/69113/
> >>
> >> Regarding the API changes:
> >> # The slow path changes general looks good to me. I will review the
> >> next level in the coming days
> >> # The following fast path changes bothers to me. Could you share more
> >> details on below change?
> >>
> >> diff --git a/app/test-eventdev/test_order_atq.c
> >> b/app/test-eventdev/test_order_atq.c
> >> index 3366cfc..8246b96 100644
> >> --- a/app/test-eventdev/test_order_atq.c
> >> +++ b/app/test-eventdev/test_order_atq.c
> >> @@ -34,6 +34,8 @@
> >>                         continue;
> >>                 }
> >>
> >> +               ev.flow_id = ev.mbuf->udata64;
> >> +
> >> # Since RC1 is near, I am not sure how to accommodate the API changes
> >> now and sort out ABI stuffs.
> >> # Other concern is eventdev spec get bloated with versioning files
> >> just for ONE release as 20.11 will be OK to change the ABI.
> >> # While we discuss the API change, Please send deprecation notice for
> >> ABI change for 20.11,
> >> so that there is no ambiguity of this patch for the 20.11 release.
> >>
> >> Hello Jerin,
> >>
> >> Thank you for the review comments.
> >>
> >> With regard to your comments regarding the fast path flow_id change, the Intel
> >DLB hardware
> >> is not capable of transferring the flow_id as part of the event itself. We
> >therefore require a mechanism
> >> to accomplish this. What we have done to work around this is to require the
> >application to embed the flow_id
> >> within the data payload. The new flag, #define
> >RTE_EVENT_DEV_CAP_CARRY_FLOW_ID (1ULL << 9), can be used
> >> by applications to determine if they need to embed the flow_id, or if its
> >automatically propagated and present in the
> >> received event.
> >>
> >> What we should have done is to wrap the assignment with a conditional.
> >>
> >> if (!(device_capability_flags & RTE_EVENT_DEV_CAP_CARRY_FLOW_ID))
> >>         ev.flow_id = ev.mbuf->udata64;
> >
> >Two problems with this approach,
> >1) we are assuming mbuf udata64 field is available for DLB driver
> >2) It won't work with another adapter, eventdev has no dependency with mbuf
> >
>
> This snippet is not intended to suggest that udata64 always be used to store the flow ID, but as an example of how an application could do it. Some applications won’t need to carry the flow ID through; others can select an unused field in the event data (e.g. hash.rss or udata64 if using mbufs), or (worst-case) re-generate the flow ID in pipeline stages that require it.

OK.
>
> >Question:
> >1) In the case of DLB hardware, on dequeue(),  what HW returns? is it
> >only event pointer and not have any other metadata like schedule_type
> >etc.
> >
>
> The DLB device provides a 16B “queue entry” that consists of:
>
> *       8B event data
> *       Queue ID
> *       Priority
> *       Scheduling type
> *       19 bits of carried-through data
> *       Assorted error/debug/reserved bits that are set by the device (not carried-through)
>
>  For the carried-through 19b, we use 12b for event_type and sub_event_type.

I can only think of TWO options to help
1) Since event pointer always cache aligned, You could grab LSB
6bits(2^6 = 64B ) and 7 bits from (19b - 12b) carried through
structure
2) Have separate mempool driver using existing drivers, ie "event
pointer" + or - some offset have any amount of custom data.


>
> >
> >>
> >> This would minimize/eliminate any performance impact due to the processor's
> >branch prediction logic.

I think, If we need to change common fastpath, better we need to make
it template to create code for compile-time to have absolute zero
overhead
and use runtime.
See app/test-eventdev/test_order_atq.c: function: worker_wrapper()
_create_ worker at compile time based on runtime capability.



> >> The assignment then becomes in essence a NOOP for all event devices that are
> >capable of carrying the flow_id as part of the event payload itself.
> >>
> >> Thanks,
> >> Tim
> >>
> >>
> >>
> >> Thanks,
> >> Tim
  
Timothy McDaniel June 30, 2020, 7:26 p.m. UTC | #10
>-----Original Message-----
>From: Jerin Jacob <jerinjacobk@gmail.com>
>Sent: Tuesday, June 30, 2020 10:58 AM
>To: McDaniel, Timothy <timothy.mcdaniel@intel.com>
>Cc: Ray Kinsella <mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>;
>Jerin Jacob <jerinj@marvell.com>; Mattias Rönnblom
><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>; Eads, Gage
><gage.eads@intel.com>; Van Haaren, Harry <harry.van.haaren@intel.com>
>Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream prerequisites
>
>On Tue, Jun 30, 2020 at 9:12 PM McDaniel, Timothy
><timothy.mcdaniel@intel.com> wrote:
>>
>> >-----Original Message-----
>> >From: Jerin Jacob <jerinjacobk@gmail.com>
>> >Sent: Monday, June 29, 2020 11:21 PM
>> >To: McDaniel, Timothy <timothy.mcdaniel@intel.com>
>> >Cc: Ray Kinsella <mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>;
>> >Jerin Jacob <jerinj@marvell.com>; Mattias Rönnblom
>> ><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>; Eads, Gage
>> ><gage.eads@intel.com>; Van Haaren, Harry <harry.van.haaren@intel.com>
>> >Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream prerequisites
>> >
>> >On Tue, Jun 30, 2020 at 1:01 AM McDaniel, Timothy
>> ><timothy.mcdaniel@intel.com> wrote:
>> >>
>> >> -----Original Message-----
>> >> From: Jerin Jacob <jerinjacobk@gmail.com>
>> >> Sent: Saturday, June 27, 2020 2:45 AM
>> >> To: McDaniel, Timothy <timothy.mcdaniel@intel.com>; Ray Kinsella
>> ><mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>
>> >> Cc: Jerin Jacob <jerinj@marvell.com>; Mattias Rönnblom
>> ><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>; Eads, Gage
>> ><gage.eads@intel.com>; Van Haaren, Harry <harry.van.haaren@intel.com>
>> >> Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream prerequisites
>> >>
>> >> > +
>> >> > +/** Event port configuration structure */
>> >> > +struct rte_event_port_conf_v20 {
>> >> > +       int32_t new_event_threshold;
>> >> > +       /**< A backpressure threshold for new event enqueues on this port.
>> >> > +        * Use for *closed system* event dev where event capacity is limited,
>> >> > +        * and cannot exceed the capacity of the event dev.
>> >> > +        * Configuring ports with different thresholds can make higher priority
>> >> > +        * traffic less likely to  be backpressured.
>> >> > +        * For example, a port used to inject NIC Rx packets into the event dev
>> >> > +        * can have a lower threshold so as not to overwhelm the device,
>> >> > +        * while ports used for worker pools can have a higher threshold.
>> >> > +        * This value cannot exceed the *nb_events_limit*
>> >> > +        * which was previously supplied to rte_event_dev_configure().
>> >> > +        * This should be set to '-1' for *open system*.
>> >> > +        */
>> >> > +       uint16_t dequeue_depth;
>> >> > +       /**< Configure number of bulk dequeues for this event port.
>> >> > +        * This value cannot exceed the *nb_event_port_dequeue_depth*
>> >> > +        * which previously supplied to rte_event_dev_configure().
>> >> > +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE
>> >capable.
>> >> > +        */
>> >> > +       uint16_t enqueue_depth;
>> >> > +       /**< Configure number of bulk enqueues for this event port.
>> >> > +        * This value cannot exceed the *nb_event_port_enqueue_depth*
>> >> > +        * which previously supplied to rte_event_dev_configure().
>> >> > +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE
>> >capable.
>> >> > +        */
>> >> >         uint8_t disable_implicit_release;
>> >> >         /**< Configure the port not to release outstanding events in
>> >> >          * rte_event_dev_dequeue_burst(). If true, all events received through
>> >> > @@ -733,6 +911,14 @@ struct rte_event_port_conf {
>> >> >  rte_event_port_default_conf_get(uint8_t dev_id, uint8_t port_id,
>> >> >                                 struct rte_event_port_conf *port_conf);
>> >> >
>> >> > +int
>> >> > +rte_event_port_default_conf_get_v20(uint8_t dev_id, uint8_t port_id,
>> >> > +                               struct rte_event_port_conf_v20 *port_conf);
>> >> > +
>> >> > +int
>> >> > +rte_event_port_default_conf_get_v21(uint8_t dev_id, uint8_t port_id,
>> >> > +                                     struct rte_event_port_conf *port_conf);
>> >>
>> >> Hi Timothy,
>> >>
>> >> + ABI Maintainers (Ray, Neil)
>> >>
>> >> # As per my understanding, the structures can not be versioned, only
>> >> function can be versioned.
>> >> i.e we can not make any change to " struct rte_event_port_conf"
>> >>
>> >> # We have a similar case with ethdev and it deferred to next release v20.11
>> >> http://patches.dpdk.org/patch/69113/
>> >>
>> >> Regarding the API changes:
>> >> # The slow path changes general looks good to me. I will review the
>> >> next level in the coming days
>> >> # The following fast path changes bothers to me. Could you share more
>> >> details on below change?
>> >>
>> >> diff --git a/app/test-eventdev/test_order_atq.c
>> >> b/app/test-eventdev/test_order_atq.c
>> >> index 3366cfc..8246b96 100644
>> >> --- a/app/test-eventdev/test_order_atq.c
>> >> +++ b/app/test-eventdev/test_order_atq.c
>> >> @@ -34,6 +34,8 @@
>> >>                         continue;
>> >>                 }
>> >>
>> >> +               ev.flow_id = ev.mbuf->udata64;
>> >> +
>> >> # Since RC1 is near, I am not sure how to accommodate the API changes
>> >> now and sort out ABI stuffs.
>> >> # Other concern is eventdev spec get bloated with versioning files
>> >> just for ONE release as 20.11 will be OK to change the ABI.
>> >> # While we discuss the API change, Please send deprecation notice for
>> >> ABI change for 20.11,
>> >> so that there is no ambiguity of this patch for the 20.11 release.
>> >>
>> >> Hello Jerin,
>> >>
>> >> Thank you for the review comments.
>> >>
>> >> With regard to your comments regarding the fast path flow_id change, the
>Intel
>> >DLB hardware
>> >> is not capable of transferring the flow_id as part of the event itself. We
>> >therefore require a mechanism
>> >> to accomplish this. What we have done to work around this is to require the
>> >application to embed the flow_id
>> >> within the data payload. The new flag, #define
>> >RTE_EVENT_DEV_CAP_CARRY_FLOW_ID (1ULL << 9), can be used
>> >> by applications to determine if they need to embed the flow_id, or if its
>> >automatically propagated and present in the
>> >> received event.
>> >>
>> >> What we should have done is to wrap the assignment with a conditional.
>> >>
>> >> if (!(device_capability_flags & RTE_EVENT_DEV_CAP_CARRY_FLOW_ID))
>> >>         ev.flow_id = ev.mbuf->udata64;
>> >
>> >Two problems with this approach,
>> >1) we are assuming mbuf udata64 field is available for DLB driver
>> >2) It won't work with another adapter, eventdev has no dependency with mbuf
>> >
>>
>> This snippet is not intended to suggest that udata64 always be used to store the
>flow ID, but as an example of how an application could do it. Some applications
>won’t need to carry the flow ID through; others can select an unused field in the
>event data (e.g. hash.rss or udata64 if using mbufs), or (worst-case) re-generate
>the flow ID in pipeline stages that require it.
>
>OK.
>>
>> >Question:
>> >1) In the case of DLB hardware, on dequeue(),  what HW returns? is it
>> >only event pointer and not have any other metadata like schedule_type
>> >etc.
>> >
>>
>> The DLB device provides a 16B “queue entry” that consists of:
>>
>> *       8B event data
>> *       Queue ID
>> *       Priority
>> *       Scheduling type
>> *       19 bits of carried-through data
>> *       Assorted error/debug/reserved bits that are set by the device (not carried-
>through)
>>
>>  For the carried-through 19b, we use 12b for event_type and sub_event_type.
>
>I can only think of TWO options to help
>1) Since event pointer always cache aligned, You could grab LSB
>6bits(2^6 = 64B ) and 7 bits from (19b - 12b) carried through
>structure
>2) Have separate mempool driver using existing drivers, ie "event
>pointer" + or - some offset have any amount of custom data.
>

We can't guarantee that the event will contain a pointer -- it's possible that 8B is inline data (i.e. struct rte_event's u64 field).

It's really an application decision -- for example an app could allocate space in the 'mbuf private data' to store the flow ID, if the event device lacks that carry-flow-ID capability and the other mbuf fields can't be used for whatever reason.
We modified the tests, sample apps to show how this might be done, not necessarily how it must be done.

>
>>
>> >
>> >>
>> >> This would minimize/eliminate any performance impact due to the
>processor's
>> >branch prediction logic.
>
>I think, If we need to change common fastpath, better we need to make
>it template to create code for compile-time to have absolute zero
>overhead
>and use runtime.
>See app/test-eventdev/test_order_atq.c: function: worker_wrapper()
>_create_ worker at compile time based on runtime capability.
>

Yes, that would be perfect.  Thanks for the example!

>
>
>> >> The assignment then becomes in essence a NOOP for all event devices that
>are
>> >capable of carrying the flow_id as part of the event payload itself.
>> >>
>> >> Thanks,
>> >> Tim
>> >>
>> >>
>> >>
>> >> Thanks,
>> >> Tim
  
Pavan Nikhilesh Bhagavatula June 30, 2020, 8:40 p.m. UTC | #11
>-----Original Message-----
>From: dev <dev-bounces@dpdk.org> On Behalf Of McDaniel, Timothy
>Sent: Wednesday, July 1, 2020 12:57 AM
>To: Jerin Jacob <jerinjacobk@gmail.com>
>Cc: Ray Kinsella <mdr@ashroe.eu>; Neil Horman
><nhorman@tuxdriver.com>; Jerin Jacob Kollanukkaran
><jerinj@marvell.com>; Mattias Rönnblom
><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>; Eads,
>Gage <gage.eads@intel.com>; Van Haaren, Harry
><harry.van.haaren@intel.com>
>Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream
>prerequisites
>
>>-----Original Message-----
>>From: Jerin Jacob <jerinjacobk@gmail.com>
>>Sent: Tuesday, June 30, 2020 10:58 AM
>>To: McDaniel, Timothy <timothy.mcdaniel@intel.com>
>>Cc: Ray Kinsella <mdr@ashroe.eu>; Neil Horman
><nhorman@tuxdriver.com>;
>>Jerin Jacob <jerinj@marvell.com>; Mattias Rönnblom
>><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>; Eads,
>Gage
>><gage.eads@intel.com>; Van Haaren, Harry
><harry.van.haaren@intel.com>
>>Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream
>prerequisites
>>
>>On Tue, Jun 30, 2020 at 9:12 PM McDaniel, Timothy
>><timothy.mcdaniel@intel.com> wrote:
>>>
>>> >-----Original Message-----
>>> >From: Jerin Jacob <jerinjacobk@gmail.com>
>>> >Sent: Monday, June 29, 2020 11:21 PM
>>> >To: McDaniel, Timothy <timothy.mcdaniel@intel.com>
>>> >Cc: Ray Kinsella <mdr@ashroe.eu>; Neil Horman
><nhorman@tuxdriver.com>;
>>> >Jerin Jacob <jerinj@marvell.com>; Mattias Rönnblom
>>> ><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>;
>Eads, Gage
>>> ><gage.eads@intel.com>; Van Haaren, Harry
><harry.van.haaren@intel.com>
>>> >Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream
>prerequisites
>>> >
>>> >On Tue, Jun 30, 2020 at 1:01 AM McDaniel, Timothy
>>> ><timothy.mcdaniel@intel.com> wrote:
>>> >>
>>> >> -----Original Message-----
>>> >> From: Jerin Jacob <jerinjacobk@gmail.com>
>>> >> Sent: Saturday, June 27, 2020 2:45 AM
>>> >> To: McDaniel, Timothy <timothy.mcdaniel@intel.com>; Ray
>Kinsella
>>> ><mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>
>>> >> Cc: Jerin Jacob <jerinj@marvell.com>; Mattias Rönnblom
>>> ><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>;
>Eads, Gage
>>> ><gage.eads@intel.com>; Van Haaren, Harry
><harry.van.haaren@intel.com>
>>> >> Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream
>prerequisites
>>> >>
>>> >> > +
>>> >> > +/** Event port configuration structure */
>>> >> > +struct rte_event_port_conf_v20 {
>>> >> > +       int32_t new_event_threshold;
>>> >> > +       /**< A backpressure threshold for new event enqueues on
>this port.
>>> >> > +        * Use for *closed system* event dev where event capacity
>is limited,
>>> >> > +        * and cannot exceed the capacity of the event dev.
>>> >> > +        * Configuring ports with different thresholds can make
>higher priority
>>> >> > +        * traffic less likely to  be backpressured.
>>> >> > +        * For example, a port used to inject NIC Rx packets into
>the event dev
>>> >> > +        * can have a lower threshold so as not to overwhelm the
>device,
>>> >> > +        * while ports used for worker pools can have a higher
>threshold.
>>> >> > +        * This value cannot exceed the *nb_events_limit*
>>> >> > +        * which was previously supplied to
>rte_event_dev_configure().
>>> >> > +        * This should be set to '-1' for *open system*.
>>> >> > +        */
>>> >> > +       uint16_t dequeue_depth;
>>> >> > +       /**< Configure number of bulk dequeues for this event
>port.
>>> >> > +        * This value cannot exceed the
>*nb_event_port_dequeue_depth*
>>> >> > +        * which previously supplied to rte_event_dev_configure().
>>> >> > +        * Ignored when device is not
>RTE_EVENT_DEV_CAP_BURST_MODE
>>> >capable.
>>> >> > +        */
>>> >> > +       uint16_t enqueue_depth;
>>> >> > +       /**< Configure number of bulk enqueues for this event
>port.
>>> >> > +        * This value cannot exceed the
>*nb_event_port_enqueue_depth*
>>> >> > +        * which previously supplied to rte_event_dev_configure().
>>> >> > +        * Ignored when device is not
>RTE_EVENT_DEV_CAP_BURST_MODE
>>> >capable.
>>> >> > +        */
>>> >> >         uint8_t disable_implicit_release;
>>> >> >         /**< Configure the port not to release outstanding events
>in
>>> >> >          * rte_event_dev_dequeue_burst(). If true, all events
>received through
>>> >> > @@ -733,6 +911,14 @@ struct rte_event_port_conf {
>>> >> >  rte_event_port_default_conf_get(uint8_t dev_id, uint8_t
>port_id,
>>> >> >                                 struct rte_event_port_conf *port_conf);
>>> >> >
>>> >> > +int
>>> >> > +rte_event_port_default_conf_get_v20(uint8_t dev_id, uint8_t
>port_id,
>>> >> > +                               struct rte_event_port_conf_v20 *port_conf);
>>> >> > +
>>> >> > +int
>>> >> > +rte_event_port_default_conf_get_v21(uint8_t dev_id, uint8_t
>port_id,
>>> >> > +                                     struct rte_event_port_conf *port_conf);
>>> >>
>>> >> Hi Timothy,
>>> >>
>>> >> + ABI Maintainers (Ray, Neil)
>>> >>
>>> >> # As per my understanding, the structures can not be versioned,
>only
>>> >> function can be versioned.
>>> >> i.e we can not make any change to " struct rte_event_port_conf"
>>> >>
>>> >> # We have a similar case with ethdev and it deferred to next
>release v20.11
>>> >> https://urldefense.proofpoint.com/v2/url?u=http-
>3A__patches.dpdk.org_patch_69113_&d=DwIGaQ&c=nKjWec2b6R0mO
>yPaz7xtfQ&r=1cjuAHrGh745jHNmj2fD85sUMIJ2IPIDsIJzo6FN6Z0&m=lL7
>dDlN7ICIpENvIB7_El27UclXA2tJLdOwbsirg1Dw&s=CNmSXDDn28U-
>OjEAaZgJI_A2fDmMKM6zb12sIE9L-Io&e=
>>> >>
>>> >> Regarding the API changes:
>>> >> # The slow path changes general looks good to me. I will review
>the
>>> >> next level in the coming days
>>> >> # The following fast path changes bothers to me. Could you share
>more
>>> >> details on below change?
>>> >>
>>> >> diff --git a/app/test-eventdev/test_order_atq.c
>>> >> b/app/test-eventdev/test_order_atq.c
>>> >> index 3366cfc..8246b96 100644
>>> >> --- a/app/test-eventdev/test_order_atq.c
>>> >> +++ b/app/test-eventdev/test_order_atq.c
>>> >> @@ -34,6 +34,8 @@
>>> >>                         continue;
>>> >>                 }
>>> >>
>>> >> +               ev.flow_id = ev.mbuf->udata64;
>>> >> +
>>> >> # Since RC1 is near, I am not sure how to accommodate the API
>changes
>>> >> now and sort out ABI stuffs.
>>> >> # Other concern is eventdev spec get bloated with versioning files
>>> >> just for ONE release as 20.11 will be OK to change the ABI.
>>> >> # While we discuss the API change, Please send deprecation
>notice for
>>> >> ABI change for 20.11,
>>> >> so that there is no ambiguity of this patch for the 20.11 release.
>>> >>
>>> >> Hello Jerin,
>>> >>
>>> >> Thank you for the review comments.
>>> >>
>>> >> With regard to your comments regarding the fast path flow_id
>change, the
>>Intel
>>> >DLB hardware
>>> >> is not capable of transferring the flow_id as part of the event
>itself. We
>>> >therefore require a mechanism
>>> >> to accomplish this. What we have done to work around this is to
>require the
>>> >application to embed the flow_id
>>> >> within the data payload. The new flag, #define
>>> >RTE_EVENT_DEV_CAP_CARRY_FLOW_ID (1ULL << 9), can be used
>>> >> by applications to determine if they need to embed the flow_id,
>or if its
>>> >automatically propagated and present in the
>>> >> received event.
>>> >>
>>> >> What we should have done is to wrap the assignment with a
>conditional.
>>> >>
>>> >> if (!(device_capability_flags &
>RTE_EVENT_DEV_CAP_CARRY_FLOW_ID))
>>> >>         ev.flow_id = ev.mbuf->udata64;
>>> >
>>> >Two problems with this approach,
>>> >1) we are assuming mbuf udata64 field is available for DLB driver
>>> >2) It won't work with another adapter, eventdev has no
>dependency with mbuf
>>> >
>>>
>>> This snippet is not intended to suggest that udata64 always be used
>to store the
>>flow ID, but as an example of how an application could do it. Some
>applications
>>won’t need to carry the flow ID through; others can select an unused
>field in the
>>event data (e.g. hash.rss or udata64 if using mbufs), or (worst-case)
>re-generate
>>the flow ID in pipeline stages that require it.
>>
>>OK.
>>>
>>> >Question:
>>> >1) In the case of DLB hardware, on dequeue(),  what HW returns? is
>it
>>> >only event pointer and not have any other metadata like
>schedule_type
>>> >etc.
>>> >
>>>
>>> The DLB device provides a 16B “queue entry” that consists of:
>>>
>>> *       8B event data
>>> *       Queue ID
>>> *       Priority
>>> *       Scheduling type
>>> *       19 bits of carried-through data
>>> *       Assorted error/debug/reserved bits that are set by the device
>(not carried-
>>through)
>>>
>>>  For the carried-through 19b, we use 12b for event_type and
>sub_event_type.
>>
>>I can only think of TWO options to help
>>1) Since event pointer always cache aligned, You could grab LSB
>>6bits(2^6 = 64B ) and 7 bits from (19b - 12b) carried through
>>structure
>>2) Have separate mempool driver using existing drivers, ie "event
>>pointer" + or - some offset have any amount of custom data.
>>
>
>We can't guarantee that the event will contain a pointer -- it's possible
>that 8B is inline data (i.e. struct rte_event's u64 field).
>
>It's really an application decision -- for example an app could allocate
>space in the 'mbuf private data' to store the flow ID, if the event device
>lacks that carry-flow-ID capability and the other mbuf fields can't be
>used for whatever reason.
>We modified the tests, sample apps to show how this might be done,
>not necessarily how it must be done.
>
>>
>>>
>>> >
>>> >>
>>> >> This would minimize/eliminate any performance impact due to
>the
>>processor's
>>> >branch prediction logic.
>>
>>I think, If we need to change common fastpath, better we need to
>make
>>it template to create code for compile-time to have absolute zero
>>overhead
>>and use runtime.
>>See app/test-eventdev/test_order_atq.c: function: worker_wrapper()
>>_create_ worker at compile time based on runtime capability.
>>
>
>Yes, that would be perfect.  Thanks for the example!

Just to  add instead of having if and else using a jumptbl would be much cleaner
Ex.
	const pipeline_atq_worker_t pipeline_atq_worker_single_stage[2][2][2] = {
		[0][0] = pipeline_atq_worker_single_stage_fwd,
		[0][1] = pipeline_atq_worker_single_stage_tx,
		[1][0] = pipeline_atq_worker_single_stage_burst_fwd,
		[1][1] = pipeline_atq_worker_single_stage_burst_tx,
	};

		return (pipeline_atq_worker_single_stage[burst][internal_port])(arg);

>
>>
>>
>>> >> The assignment then becomes in essence a NOOP for all event
>devices that
>>are
>>> >capable of carrying the flow_id as part of the event payload itself.
>>> >>
>>> >> Thanks,
>>> >> Tim
>>> >>
>>> >>
>>> >>
>>> >> Thanks,
>>> >> Tim
  
Timothy McDaniel June 30, 2020, 9:07 p.m. UTC | #12
>-----Original Message-----
>From: Pavan Nikhilesh Bhagavatula <pbhagavatula@marvell.com>
>Sent: Tuesday, June 30, 2020 3:40 PM
>To: McDaniel, Timothy <timothy.mcdaniel@intel.com>; Jerin Jacob
><jerinjacobk@gmail.com>
>Cc: Ray Kinsella <mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>;
>Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Mattias Rönnblom
><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>; Eads, Gage
><gage.eads@intel.com>; Van Haaren, Harry <harry.van.haaren@intel.com>
>Subject: RE: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream prerequisites
>
>
>
>>-----Original Message-----
>>From: dev <dev-bounces@dpdk.org> On Behalf Of McDaniel, Timothy
>>Sent: Wednesday, July 1, 2020 12:57 AM
>>To: Jerin Jacob <jerinjacobk@gmail.com>
>>Cc: Ray Kinsella <mdr@ashroe.eu>; Neil Horman
>><nhorman@tuxdriver.com>; Jerin Jacob Kollanukkaran
>><jerinj@marvell.com>; Mattias Rönnblom
>><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>; Eads,
>>Gage <gage.eads@intel.com>; Van Haaren, Harry
>><harry.van.haaren@intel.com>
>>Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream
>>prerequisites
>>
>>>-----Original Message-----
>>>From: Jerin Jacob <jerinjacobk@gmail.com>
>>>Sent: Tuesday, June 30, 2020 10:58 AM
>>>To: McDaniel, Timothy <timothy.mcdaniel@intel.com>
>>>Cc: Ray Kinsella <mdr@ashroe.eu>; Neil Horman
>><nhorman@tuxdriver.com>;
>>>Jerin Jacob <jerinj@marvell.com>; Mattias Rönnblom
>>><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>; Eads,
>>Gage
>>><gage.eads@intel.com>; Van Haaren, Harry
>><harry.van.haaren@intel.com>
>>>Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream
>>prerequisites
>>>
>>>On Tue, Jun 30, 2020 at 9:12 PM McDaniel, Timothy
>>><timothy.mcdaniel@intel.com> wrote:
>>>>
>>>> >-----Original Message-----
>>>> >From: Jerin Jacob <jerinjacobk@gmail.com>
>>>> >Sent: Monday, June 29, 2020 11:21 PM
>>>> >To: McDaniel, Timothy <timothy.mcdaniel@intel.com>
>>>> >Cc: Ray Kinsella <mdr@ashroe.eu>; Neil Horman
>><nhorman@tuxdriver.com>;
>>>> >Jerin Jacob <jerinj@marvell.com>; Mattias Rönnblom
>>>> ><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>;
>>Eads, Gage
>>>> ><gage.eads@intel.com>; Van Haaren, Harry
>><harry.van.haaren@intel.com>
>>>> >Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream
>>prerequisites
>>>> >
>>>> >On Tue, Jun 30, 2020 at 1:01 AM McDaniel, Timothy
>>>> ><timothy.mcdaniel@intel.com> wrote:
>>>> >>
>>>> >> -----Original Message-----
>>>> >> From: Jerin Jacob <jerinjacobk@gmail.com>
>>>> >> Sent: Saturday, June 27, 2020 2:45 AM
>>>> >> To: McDaniel, Timothy <timothy.mcdaniel@intel.com>; Ray
>>Kinsella
>>>> ><mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>
>>>> >> Cc: Jerin Jacob <jerinj@marvell.com>; Mattias Rönnblom
>>>> ><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>;
>>Eads, Gage
>>>> ><gage.eads@intel.com>; Van Haaren, Harry
>><harry.van.haaren@intel.com>
>>>> >> Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream
>>prerequisites
>>>> >>
>>>> >> > +
>>>> >> > +/** Event port configuration structure */
>>>> >> > +struct rte_event_port_conf_v20 {
>>>> >> > +       int32_t new_event_threshold;
>>>> >> > +       /**< A backpressure threshold for new event enqueues on
>>this port.
>>>> >> > +        * Use for *closed system* event dev where event capacity
>>is limited,
>>>> >> > +        * and cannot exceed the capacity of the event dev.
>>>> >> > +        * Configuring ports with different thresholds can make
>>higher priority
>>>> >> > +        * traffic less likely to  be backpressured.
>>>> >> > +        * For example, a port used to inject NIC Rx packets into
>>the event dev
>>>> >> > +        * can have a lower threshold so as not to overwhelm the
>>device,
>>>> >> > +        * while ports used for worker pools can have a higher
>>threshold.
>>>> >> > +        * This value cannot exceed the *nb_events_limit*
>>>> >> > +        * which was previously supplied to
>>rte_event_dev_configure().
>>>> >> > +        * This should be set to '-1' for *open system*.
>>>> >> > +        */
>>>> >> > +       uint16_t dequeue_depth;
>>>> >> > +       /**< Configure number of bulk dequeues for this event
>>port.
>>>> >> > +        * This value cannot exceed the
>>*nb_event_port_dequeue_depth*
>>>> >> > +        * which previously supplied to rte_event_dev_configure().
>>>> >> > +        * Ignored when device is not
>>RTE_EVENT_DEV_CAP_BURST_MODE
>>>> >capable.
>>>> >> > +        */
>>>> >> > +       uint16_t enqueue_depth;
>>>> >> > +       /**< Configure number of bulk enqueues for this event
>>port.
>>>> >> > +        * This value cannot exceed the
>>*nb_event_port_enqueue_depth*
>>>> >> > +        * which previously supplied to rte_event_dev_configure().
>>>> >> > +        * Ignored when device is not
>>RTE_EVENT_DEV_CAP_BURST_MODE
>>>> >capable.
>>>> >> > +        */
>>>> >> >         uint8_t disable_implicit_release;
>>>> >> >         /**< Configure the port not to release outstanding events
>>in
>>>> >> >          * rte_event_dev_dequeue_burst(). If true, all events
>>received through
>>>> >> > @@ -733,6 +911,14 @@ struct rte_event_port_conf {
>>>> >> >  rte_event_port_default_conf_get(uint8_t dev_id, uint8_t
>>port_id,
>>>> >> >                                 struct rte_event_port_conf *port_conf);
>>>> >> >
>>>> >> > +int
>>>> >> > +rte_event_port_default_conf_get_v20(uint8_t dev_id, uint8_t
>>port_id,
>>>> >> > +                               struct rte_event_port_conf_v20 *port_conf);
>>>> >> > +
>>>> >> > +int
>>>> >> > +rte_event_port_default_conf_get_v21(uint8_t dev_id, uint8_t
>>port_id,
>>>> >> > +                                     struct rte_event_port_conf *port_conf);
>>>> >>
>>>> >> Hi Timothy,
>>>> >>
>>>> >> + ABI Maintainers (Ray, Neil)
>>>> >>
>>>> >> # As per my understanding, the structures can not be versioned,
>>only
>>>> >> function can be versioned.
>>>> >> i.e we can not make any change to " struct rte_event_port_conf"
>>>> >>
>>>> >> # We have a similar case with ethdev and it deferred to next
>>release v20.11
>>>> >> https://urldefense.proofpoint.com/v2/url?u=http-
>>3A__patches.dpdk.org_patch_69113_&d=DwIGaQ&c=nKjWec2b6R0mO
>>yPaz7xtfQ&r=1cjuAHrGh745jHNmj2fD85sUMIJ2IPIDsIJzo6FN6Z0&m=lL7
>>dDlN7ICIpENvIB7_El27UclXA2tJLdOwbsirg1Dw&s=CNmSXDDn28U-
>>OjEAaZgJI_A2fDmMKM6zb12sIE9L-Io&e=
>>>> >>
>>>> >> Regarding the API changes:
>>>> >> # The slow path changes general looks good to me. I will review
>>the
>>>> >> next level in the coming days
>>>> >> # The following fast path changes bothers to me. Could you share
>>more
>>>> >> details on below change?
>>>> >>
>>>> >> diff --git a/app/test-eventdev/test_order_atq.c
>>>> >> b/app/test-eventdev/test_order_atq.c
>>>> >> index 3366cfc..8246b96 100644
>>>> >> --- a/app/test-eventdev/test_order_atq.c
>>>> >> +++ b/app/test-eventdev/test_order_atq.c
>>>> >> @@ -34,6 +34,8 @@
>>>> >>                         continue;
>>>> >>                 }
>>>> >>
>>>> >> +               ev.flow_id = ev.mbuf->udata64;
>>>> >> +
>>>> >> # Since RC1 is near, I am not sure how to accommodate the API
>>changes
>>>> >> now and sort out ABI stuffs.
>>>> >> # Other concern is eventdev spec get bloated with versioning files
>>>> >> just for ONE release as 20.11 will be OK to change the ABI.
>>>> >> # While we discuss the API change, Please send deprecation
>>notice for
>>>> >> ABI change for 20.11,
>>>> >> so that there is no ambiguity of this patch for the 20.11 release.
>>>> >>
>>>> >> Hello Jerin,
>>>> >>
>>>> >> Thank you for the review comments.
>>>> >>
>>>> >> With regard to your comments regarding the fast path flow_id
>>change, the
>>>Intel
>>>> >DLB hardware
>>>> >> is not capable of transferring the flow_id as part of the event
>>itself. We
>>>> >therefore require a mechanism
>>>> >> to accomplish this. What we have done to work around this is to
>>require the
>>>> >application to embed the flow_id
>>>> >> within the data payload. The new flag, #define
>>>> >RTE_EVENT_DEV_CAP_CARRY_FLOW_ID (1ULL << 9), can be used
>>>> >> by applications to determine if they need to embed the flow_id,
>>or if its
>>>> >automatically propagated and present in the
>>>> >> received event.
>>>> >>
>>>> >> What we should have done is to wrap the assignment with a
>>conditional.
>>>> >>
>>>> >> if (!(device_capability_flags &
>>RTE_EVENT_DEV_CAP_CARRY_FLOW_ID))
>>>> >>         ev.flow_id = ev.mbuf->udata64;
>>>> >
>>>> >Two problems with this approach,
>>>> >1) we are assuming mbuf udata64 field is available for DLB driver
>>>> >2) It won't work with another adapter, eventdev has no
>>dependency with mbuf
>>>> >
>>>>
>>>> This snippet is not intended to suggest that udata64 always be used
>>to store the
>>>flow ID, but as an example of how an application could do it. Some
>>applications
>>>won’t need to carry the flow ID through; others can select an unused
>>field in the
>>>event data (e.g. hash.rss or udata64 if using mbufs), or (worst-case)
>>re-generate
>>>the flow ID in pipeline stages that require it.
>>>
>>>OK.
>>>>
>>>> >Question:
>>>> >1) In the case of DLB hardware, on dequeue(),  what HW returns? is
>>it
>>>> >only event pointer and not have any other metadata like
>>schedule_type
>>>> >etc.
>>>> >
>>>>
>>>> The DLB device provides a 16B “queue entry” that consists of:
>>>>
>>>> *       8B event data
>>>> *       Queue ID
>>>> *       Priority
>>>> *       Scheduling type
>>>> *       19 bits of carried-through data
>>>> *       Assorted error/debug/reserved bits that are set by the device
>>(not carried-
>>>through)
>>>>
>>>>  For the carried-through 19b, we use 12b for event_type and
>>sub_event_type.
>>>
>>>I can only think of TWO options to help
>>>1) Since event pointer always cache aligned, You could grab LSB
>>>6bits(2^6 = 64B ) and 7 bits from (19b - 12b) carried through
>>>structure
>>>2) Have separate mempool driver using existing drivers, ie "event
>>>pointer" + or - some offset have any amount of custom data.
>>>
>>
>>We can't guarantee that the event will contain a pointer -- it's possible
>>that 8B is inline data (i.e. struct rte_event's u64 field).
>>
>>It's really an application decision -- for example an app could allocate
>>space in the 'mbuf private data' to store the flow ID, if the event device
>>lacks that carry-flow-ID capability and the other mbuf fields can't be
>>used for whatever reason.
>>We modified the tests, sample apps to show how this might be done,
>>not necessarily how it must be done.
>>
>>>
>>>>
>>>> >
>>>> >>
>>>> >> This would minimize/eliminate any performance impact due to
>>the
>>>processor's
>>>> >branch prediction logic.
>>>
>>>I think, If we need to change common fastpath, better we need to
>>make
>>>it template to create code for compile-time to have absolute zero
>>>overhead
>>>and use runtime.
>>>See app/test-eventdev/test_order_atq.c: function: worker_wrapper()
>>>_create_ worker at compile time based on runtime capability.
>>>
>>
>>Yes, that would be perfect.  Thanks for the example!
>
>Just to  add instead of having if and else using a jumptbl would be much cleaner
>Ex.
>	const pipeline_atq_worker_t pipeline_atq_worker_single_stage[2][2][2]
>= {
>		[0][0] = pipeline_atq_worker_single_stage_fwd,
>		[0][1] = pipeline_atq_worker_single_stage_tx,
>		[1][0] = pipeline_atq_worker_single_stage_burst_fwd,
>		[1][1] = pipeline_atq_worker_single_stage_burst_tx,
>	};
>
>		return
>(pipeline_atq_worker_single_stage[burst][internal_port])(arg);
>


Thank you for the suggestion.


>>
>>>
>>>
>>>> >> The assignment then becomes in essence a NOOP for all event
>>devices that
>>>are
>>>> >capable of carrying the flow_id as part of the event payload itself.
>>>> >>
>>>> >> Thanks,
>>>> >> Tim
>>>> >>
>>>> >>
>>>> >>
>>>> >> Thanks,
>>>> >> Tim
  
Jerin Jacob July 1, 2020, 4:50 a.m. UTC | #13
On Wed, Jul 1, 2020 at 12:57 AM McDaniel, Timothy
<timothy.mcdaniel@intel.com> wrote:
>
> >-----Original Message-----
> >From: Jerin Jacob <jerinjacobk@gmail.com>
> >Sent: Tuesday, June 30, 2020 10:58 AM
> >To: McDaniel, Timothy <timothy.mcdaniel@intel.com>
> >Cc: Ray Kinsella <mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>;
> >Jerin Jacob <jerinj@marvell.com>; Mattias Rönnblom
> ><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>; Eads, Gage
> ><gage.eads@intel.com>; Van Haaren, Harry <harry.van.haaren@intel.com>
> >Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream prerequisites
> >
> >On Tue, Jun 30, 2020 at 9:12 PM McDaniel, Timothy
> ><timothy.mcdaniel@intel.com> wrote:
> >>
> >> >-----Original Message-----
> >> >From: Jerin Jacob <jerinjacobk@gmail.com>
> >> >Sent: Monday, June 29, 2020 11:21 PM
> >> >To: McDaniel, Timothy <timothy.mcdaniel@intel.com>
> >> >Cc: Ray Kinsella <mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>;
> >> >Jerin Jacob <jerinj@marvell.com>; Mattias Rönnblom
> >> ><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>; Eads, Gage
> >> ><gage.eads@intel.com>; Van Haaren, Harry <harry.van.haaren@intel.com>
> >> >Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream prerequisites
> >> >
> >> >On Tue, Jun 30, 2020 at 1:01 AM McDaniel, Timothy
> >> ><timothy.mcdaniel@intel.com> wrote:
> >> >>
> >> >> -----Original Message-----
> >> >> From: Jerin Jacob <jerinjacobk@gmail.com>
> >> >> Sent: Saturday, June 27, 2020 2:45 AM
> >> >> To: McDaniel, Timothy <timothy.mcdaniel@intel.com>; Ray Kinsella
> >> ><mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>
> >> >> Cc: Jerin Jacob <jerinj@marvell.com>; Mattias Rönnblom
> >> ><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>; Eads, Gage
> >> ><gage.eads@intel.com>; Van Haaren, Harry <harry.van.haaren@intel.com>
> >> >> Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream prerequisites
> >> >>
> >> >> > +
> >> >> > +/** Event port configuration structure */
> >> >> > +struct rte_event_port_conf_v20 {
> >> >> > +       int32_t new_event_threshold;
> >> >> > +       /**< A backpressure threshold for new event enqueues on this port.
> >> >> > +        * Use for *closed system* event dev where event capacity is limited,
> >> >> > +        * and cannot exceed the capacity of the event dev.
> >> >> > +        * Configuring ports with different thresholds can make higher priority
> >> >> > +        * traffic less likely to  be backpressured.
> >> >> > +        * For example, a port used to inject NIC Rx packets into the event dev
> >> >> > +        * can have a lower threshold so as not to overwhelm the device,
> >> >> > +        * while ports used for worker pools can have a higher threshold.
> >> >> > +        * This value cannot exceed the *nb_events_limit*
> >> >> > +        * which was previously supplied to rte_event_dev_configure().
> >> >> > +        * This should be set to '-1' for *open system*.
> >> >> > +        */
> >> >> > +       uint16_t dequeue_depth;
> >> >> > +       /**< Configure number of bulk dequeues for this event port.
> >> >> > +        * This value cannot exceed the *nb_event_port_dequeue_depth*
> >> >> > +        * which previously supplied to rte_event_dev_configure().
> >> >> > +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE
> >> >capable.
> >> >> > +        */
> >> >> > +       uint16_t enqueue_depth;
> >> >> > +       /**< Configure number of bulk enqueues for this event port.
> >> >> > +        * This value cannot exceed the *nb_event_port_enqueue_depth*
> >> >> > +        * which previously supplied to rte_event_dev_configure().
> >> >> > +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE
> >> >capable.
> >> >> > +        */
> >> >> >         uint8_t disable_implicit_release;
> >> >> >         /**< Configure the port not to release outstanding events in
> >> >> >          * rte_event_dev_dequeue_burst(). If true, all events received through
> >> >> > @@ -733,6 +911,14 @@ struct rte_event_port_conf {
> >> >> >  rte_event_port_default_conf_get(uint8_t dev_id, uint8_t port_id,
> >> >> >                                 struct rte_event_port_conf *port_conf);
> >> >> >
> >> >> > +int
> >> >> > +rte_event_port_default_conf_get_v20(uint8_t dev_id, uint8_t port_id,
> >> >> > +                               struct rte_event_port_conf_v20 *port_conf);
> >> >> > +
> >> >> > +int
> >> >> > +rte_event_port_default_conf_get_v21(uint8_t dev_id, uint8_t port_id,
> >> >> > +                                     struct rte_event_port_conf *port_conf);
> >> >>
> >> >> Hi Timothy,
> >> >>
> >> >> + ABI Maintainers (Ray, Neil)
> >> >>
> >> >> # As per my understanding, the structures can not be versioned, only
> >> >> function can be versioned.
> >> >> i.e we can not make any change to " struct rte_event_port_conf"
> >> >>
> >> >> # We have a similar case with ethdev and it deferred to next release v20.11
> >> >> http://patches.dpdk.org/patch/69113/
> >> >>
> >> >> Regarding the API changes:
> >> >> # The slow path changes general looks good to me. I will review the
> >> >> next level in the coming days
> >> >> # The following fast path changes bothers to me. Could you share more
> >> >> details on below change?
> >> >>
> >> >> diff --git a/app/test-eventdev/test_order_atq.c
> >> >> b/app/test-eventdev/test_order_atq.c
> >> >> index 3366cfc..8246b96 100644
> >> >> --- a/app/test-eventdev/test_order_atq.c
> >> >> +++ b/app/test-eventdev/test_order_atq.c
> >> >> @@ -34,6 +34,8 @@
> >> >>                         continue;
> >> >>                 }
> >> >>
> >> >> +               ev.flow_id = ev.mbuf->udata64;
> >> >> +
> >> >> # Since RC1 is near, I am not sure how to accommodate the API changes
> >> >> now and sort out ABI stuffs.
> >> >> # Other concern is eventdev spec get bloated with versioning files
> >> >> just for ONE release as 20.11 will be OK to change the ABI.
> >> >> # While we discuss the API change, Please send deprecation notice for
> >> >> ABI change for 20.11,
> >> >> so that there is no ambiguity of this patch for the 20.11 release.
> >> >>
> >> >> Hello Jerin,
> >> >>
> >> >> Thank you for the review comments.
> >> >>
> >> >> With regard to your comments regarding the fast path flow_id change, the
> >Intel
> >> >DLB hardware
> >> >> is not capable of transferring the flow_id as part of the event itself. We
> >> >therefore require a mechanism
> >> >> to accomplish this. What we have done to work around this is to require the
> >> >application to embed the flow_id
> >> >> within the data payload. The new flag, #define
> >> >RTE_EVENT_DEV_CAP_CARRY_FLOW_ID (1ULL << 9), can be used
> >> >> by applications to determine if they need to embed the flow_id, or if its
> >> >automatically propagated and present in the
> >> >> received event.
> >> >>
> >> >> What we should have done is to wrap the assignment with a conditional.
> >> >>
> >> >> if (!(device_capability_flags & RTE_EVENT_DEV_CAP_CARRY_FLOW_ID))
> >> >>         ev.flow_id = ev.mbuf->udata64;
> >> >
> >> >Two problems with this approach,
> >> >1) we are assuming mbuf udata64 field is available for DLB driver
> >> >2) It won't work with another adapter, eventdev has no dependency with mbuf
> >> >
> >>
> >> This snippet is not intended to suggest that udata64 always be used to store the
> >flow ID, but as an example of how an application could do it. Some applications
> >won’t need to carry the flow ID through; others can select an unused field in the
> >event data (e.g. hash.rss or udata64 if using mbufs), or (worst-case) re-generate
> >the flow ID in pipeline stages that require it.
> >
> >OK.
> >>
> >> >Question:
> >> >1) In the case of DLB hardware, on dequeue(),  what HW returns? is it
> >> >only event pointer and not have any other metadata like schedule_type
> >> >etc.
> >> >
> >>
> >> The DLB device provides a 16B “queue entry” that consists of:
> >>
> >> *       8B event data
> >> *       Queue ID
> >> *       Priority
> >> *       Scheduling type
> >> *       19 bits of carried-through data
> >> *       Assorted error/debug/reserved bits that are set by the device (not carried-
> >through)
> >>
> >>  For the carried-through 19b, we use 12b for event_type and sub_event_type.
> >
> >I can only think of TWO options to help
> >1) Since event pointer always cache aligned, You could grab LSB
> >6bits(2^6 = 64B ) and 7 bits from (19b - 12b) carried through
> >structure
> >2) Have separate mempool driver using existing drivers, ie "event
> >pointer" + or - some offset have any amount of custom data.
> >
>
> We can't guarantee that the event will contain a pointer -- it's possible that 8B is inline data (i.e. struct rte_event's u64 field).
>
> It's really an application decision -- for example an app could allocate space in the 'mbuf private data' to store the flow ID, if the event device lacks that carry-flow-ID capability and the other mbuf fields can't be used for whatever reason.
> We modified the tests, sample apps to show how this might be done, not necessarily how it must be done.


Yeah. If HW has limitation we can't do much. It is OK to change
eventdev spec to support new HW limitations. aka,
RTE_EVENT_DEV_CAP_CARRY_FLOW_ID is OK.
Please update existing drivers has this
RTE_EVENT_DEV_CAP_CARRY_FLOW_ID capability which is missing in the
patch(I believe)

>
> >
> >>
> >> >
> >> >>
> >> >> This would minimize/eliminate any performance impact due to the
> >processor's
> >> >branch prediction logic.
> >
> >I think, If we need to change common fastpath, better we need to make
> >it template to create code for compile-time to have absolute zero
> >overhead
> >and use runtime.
> >See app/test-eventdev/test_order_atq.c: function: worker_wrapper()
> >_create_ worker at compile time based on runtime capability.
> >
>
> Yes, that would be perfect.  Thanks for the example!

Where ever you are making fastpath change, Please follow this scheme
and send the next version.
In order to have clean and reusable code, you could have template
function and with "if" and it can opt-out in _compile_ time.
i.e

no_inline generic_worker(..., _const_ uint64_t flags)
{
..
..

if (! flags & CAP_CARRY_FLOW_ID)
    ....

}

worker_with_out_carry_flow_id()
{
          generic_worker(.., CAP_CARRY_FLOW_ID)
}

normal_worker()
{
          generic_worker(.., 0)
}

No other controversial top-level comments with this patch series.
Once we sorted out the ABI issues then I can review and merge.


>
> >
> >
> >> >> The assignment then becomes in essence a NOOP for all event devices that
> >are
> >> >capable of carrying the flow_id as part of the event payload itself.
> >> >>
> >> >> Thanks,
> >> >> Tim
> >> >>
> >> >>
> >> >>
> >> >> Thanks,
> >> >> Tim
  
Timothy McDaniel July 1, 2020, 4:48 p.m. UTC | #14
>-----Original Message-----
>From: Jerin Jacob <jerinjacobk@gmail.com>
>Sent: Tuesday, June 30, 2020 11:50 PM
>To: McDaniel, Timothy <timothy.mcdaniel@intel.com>
>Cc: Ray Kinsella <mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>;
>Jerin Jacob <jerinj@marvell.com>; Mattias Rönnblom
><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>; Eads, Gage
><gage.eads@intel.com>; Van Haaren, Harry <harry.van.haaren@intel.com>
>Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream prerequisites
>
>On Wed, Jul 1, 2020 at 12:57 AM McDaniel, Timothy
><timothy.mcdaniel@intel.com> wrote:
>>
>> >-----Original Message-----
>> >From: Jerin Jacob <jerinjacobk@gmail.com>
>> >Sent: Tuesday, June 30, 2020 10:58 AM
>> >To: McDaniel, Timothy <timothy.mcdaniel@intel.com>
>> >Cc: Ray Kinsella <mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>;
>> >Jerin Jacob <jerinj@marvell.com>; Mattias Rönnblom
>> ><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>; Eads, Gage
>> ><gage.eads@intel.com>; Van Haaren, Harry <harry.van.haaren@intel.com>
>> >Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream prerequisites
>> >
>> >On Tue, Jun 30, 2020 at 9:12 PM McDaniel, Timothy
>> ><timothy.mcdaniel@intel.com> wrote:
>> >>
>> >> >-----Original Message-----
>> >> >From: Jerin Jacob <jerinjacobk@gmail.com>
>> >> >Sent: Monday, June 29, 2020 11:21 PM
>> >> >To: McDaniel, Timothy <timothy.mcdaniel@intel.com>
>> >> >Cc: Ray Kinsella <mdr@ashroe.eu>; Neil Horman
><nhorman@tuxdriver.com>;
>> >> >Jerin Jacob <jerinj@marvell.com>; Mattias Rönnblom
>> >> ><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>; Eads,
>Gage
>> >> ><gage.eads@intel.com>; Van Haaren, Harry
><harry.van.haaren@intel.com>
>> >> >Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream
>prerequisites
>> >> >
>> >> >On Tue, Jun 30, 2020 at 1:01 AM McDaniel, Timothy
>> >> ><timothy.mcdaniel@intel.com> wrote:
>> >> >>
>> >> >> -----Original Message-----
>> >> >> From: Jerin Jacob <jerinjacobk@gmail.com>
>> >> >> Sent: Saturday, June 27, 2020 2:45 AM
>> >> >> To: McDaniel, Timothy <timothy.mcdaniel@intel.com>; Ray Kinsella
>> >> ><mdr@ashroe.eu>; Neil Horman <nhorman@tuxdriver.com>
>> >> >> Cc: Jerin Jacob <jerinj@marvell.com>; Mattias Rönnblom
>> >> ><mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>; Eads,
>Gage
>> >> ><gage.eads@intel.com>; Van Haaren, Harry
><harry.van.haaren@intel.com>
>> >> >> Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream
>prerequisites
>> >> >>
>> >> >> > +
>> >> >> > +/** Event port configuration structure */
>> >> >> > +struct rte_event_port_conf_v20 {
>> >> >> > +       int32_t new_event_threshold;
>> >> >> > +       /**< A backpressure threshold for new event enqueues on this
>port.
>> >> >> > +        * Use for *closed system* event dev where event capacity is
>limited,
>> >> >> > +        * and cannot exceed the capacity of the event dev.
>> >> >> > +        * Configuring ports with different thresholds can make higher
>priority
>> >> >> > +        * traffic less likely to  be backpressured.
>> >> >> > +        * For example, a port used to inject NIC Rx packets into the event
>dev
>> >> >> > +        * can have a lower threshold so as not to overwhelm the device,
>> >> >> > +        * while ports used for worker pools can have a higher threshold.
>> >> >> > +        * This value cannot exceed the *nb_events_limit*
>> >> >> > +        * which was previously supplied to rte_event_dev_configure().
>> >> >> > +        * This should be set to '-1' for *open system*.
>> >> >> > +        */
>> >> >> > +       uint16_t dequeue_depth;
>> >> >> > +       /**< Configure number of bulk dequeues for this event port.
>> >> >> > +        * This value cannot exceed the *nb_event_port_dequeue_depth*
>> >> >> > +        * which previously supplied to rte_event_dev_configure().
>> >> >> > +        * Ignored when device is not
>RTE_EVENT_DEV_CAP_BURST_MODE
>> >> >capable.
>> >> >> > +        */
>> >> >> > +       uint16_t enqueue_depth;
>> >> >> > +       /**< Configure number of bulk enqueues for this event port.
>> >> >> > +        * This value cannot exceed the *nb_event_port_enqueue_depth*
>> >> >> > +        * which previously supplied to rte_event_dev_configure().
>> >> >> > +        * Ignored when device is not
>RTE_EVENT_DEV_CAP_BURST_MODE
>> >> >capable.
>> >> >> > +        */
>> >> >> >         uint8_t disable_implicit_release;
>> >> >> >         /**< Configure the port not to release outstanding events in
>> >> >> >          * rte_event_dev_dequeue_burst(). If true, all events received
>through
>> >> >> > @@ -733,6 +911,14 @@ struct rte_event_port_conf {
>> >> >> >  rte_event_port_default_conf_get(uint8_t dev_id, uint8_t port_id,
>> >> >> >                                 struct rte_event_port_conf *port_conf);
>> >> >> >
>> >> >> > +int
>> >> >> > +rte_event_port_default_conf_get_v20(uint8_t dev_id, uint8_t port_id,
>> >> >> > +                               struct rte_event_port_conf_v20 *port_conf);
>> >> >> > +
>> >> >> > +int
>> >> >> > +rte_event_port_default_conf_get_v21(uint8_t dev_id, uint8_t port_id,
>> >> >> > +                                     struct rte_event_port_conf *port_conf);
>> >> >>
>> >> >> Hi Timothy,
>> >> >>
>> >> >> + ABI Maintainers (Ray, Neil)
>> >> >>
>> >> >> # As per my understanding, the structures can not be versioned, only
>> >> >> function can be versioned.
>> >> >> i.e we can not make any change to " struct rte_event_port_conf"
>> >> >>
>> >> >> # We have a similar case with ethdev and it deferred to next release
>v20.11
>> >> >> http://patches.dpdk.org/patch/69113/
>> >> >>
>> >> >> Regarding the API changes:
>> >> >> # The slow path changes general looks good to me. I will review the
>> >> >> next level in the coming days
>> >> >> # The following fast path changes bothers to me. Could you share more
>> >> >> details on below change?
>> >> >>
>> >> >> diff --git a/app/test-eventdev/test_order_atq.c
>> >> >> b/app/test-eventdev/test_order_atq.c
>> >> >> index 3366cfc..8246b96 100644
>> >> >> --- a/app/test-eventdev/test_order_atq.c
>> >> >> +++ b/app/test-eventdev/test_order_atq.c
>> >> >> @@ -34,6 +34,8 @@
>> >> >>                         continue;
>> >> >>                 }
>> >> >>
>> >> >> +               ev.flow_id = ev.mbuf->udata64;
>> >> >> +
>> >> >> # Since RC1 is near, I am not sure how to accommodate the API changes
>> >> >> now and sort out ABI stuffs.
>> >> >> # Other concern is eventdev spec get bloated with versioning files
>> >> >> just for ONE release as 20.11 will be OK to change the ABI.
>> >> >> # While we discuss the API change, Please send deprecation notice for
>> >> >> ABI change for 20.11,
>> >> >> so that there is no ambiguity of this patch for the 20.11 release.
>> >> >>
>> >> >> Hello Jerin,
>> >> >>
>> >> >> Thank you for the review comments.
>> >> >>
>> >> >> With regard to your comments regarding the fast path flow_id change,
>the
>> >Intel
>> >> >DLB hardware
>> >> >> is not capable of transferring the flow_id as part of the event itself. We
>> >> >therefore require a mechanism
>> >> >> to accomplish this. What we have done to work around this is to require
>the
>> >> >application to embed the flow_id
>> >> >> within the data payload. The new flag, #define
>> >> >RTE_EVENT_DEV_CAP_CARRY_FLOW_ID (1ULL << 9), can be used
>> >> >> by applications to determine if they need to embed the flow_id, or if its
>> >> >automatically propagated and present in the
>> >> >> received event.
>> >> >>
>> >> >> What we should have done is to wrap the assignment with a conditional.
>> >> >>
>> >> >> if (!(device_capability_flags & RTE_EVENT_DEV_CAP_CARRY_FLOW_ID))
>> >> >>         ev.flow_id = ev.mbuf->udata64;
>> >> >
>> >> >Two problems with this approach,
>> >> >1) we are assuming mbuf udata64 field is available for DLB driver
>> >> >2) It won't work with another adapter, eventdev has no dependency with
>mbuf
>> >> >
>> >>
>> >> This snippet is not intended to suggest that udata64 always be used to store
>the
>> >flow ID, but as an example of how an application could do it. Some
>applications
>> >won’t need to carry the flow ID through; others can select an unused field in
>the
>> >event data (e.g. hash.rss or udata64 if using mbufs), or (worst-case) re-
>generate
>> >the flow ID in pipeline stages that require it.
>> >
>> >OK.
>> >>
>> >> >Question:
>> >> >1) In the case of DLB hardware, on dequeue(),  what HW returns? is it
>> >> >only event pointer and not have any other metadata like schedule_type
>> >> >etc.
>> >> >
>> >>
>> >> The DLB device provides a 16B “queue entry” that consists of:
>> >>
>> >> *       8B event data
>> >> *       Queue ID
>> >> *       Priority
>> >> *       Scheduling type
>> >> *       19 bits of carried-through data
>> >> *       Assorted error/debug/reserved bits that are set by the device (not
>carried-
>> >through)
>> >>
>> >>  For the carried-through 19b, we use 12b for event_type and
>sub_event_type.
>> >
>> >I can only think of TWO options to help
>> >1) Since event pointer always cache aligned, You could grab LSB
>> >6bits(2^6 = 64B ) and 7 bits from (19b - 12b) carried through
>> >structure
>> >2) Have separate mempool driver using existing drivers, ie "event
>> >pointer" + or - some offset have any amount of custom data.
>> >
>>
>> We can't guarantee that the event will contain a pointer -- it's possible that 8B
>is inline data (i.e. struct rte_event's u64 field).
>>
>> It's really an application decision -- for example an app could allocate space in
>the 'mbuf private data' to store the flow ID, if the event device lacks that carry-
>flow-ID capability and the other mbuf fields can't be used for whatever reason.
>> We modified the tests, sample apps to show how this might be done, not
>necessarily how it must be done.
>
>
>Yeah. If HW has limitation we can't do much. It is OK to change
>eventdev spec to support new HW limitations. aka,
>RTE_EVENT_DEV_CAP_CARRY_FLOW_ID is OK.
>Please update existing drivers has this
>RTE_EVENT_DEV_CAP_CARRY_FLOW_ID capability which is missing in the
>patch(I believe)
>
>>
>> >
>> >>
>> >> >
>> >> >>
>> >> >> This would minimize/eliminate any performance impact due to the
>> >processor's
>> >> >branch prediction logic.
>> >
>> >I think, If we need to change common fastpath, better we need to make
>> >it template to create code for compile-time to have absolute zero
>> >overhead
>> >and use runtime.
>> >See app/test-eventdev/test_order_atq.c: function: worker_wrapper()
>> >_create_ worker at compile time based on runtime capability.
>> >
>>
>> Yes, that would be perfect.  Thanks for the example!
>
>Where ever you are making fastpath change, Please follow this scheme
>and send the next version.
>In order to have clean and reusable code, you could have template
>function and with "if" and it can opt-out in _compile_ time.
>i.e
>
>no_inline generic_worker(..., _const_ uint64_t flags)
>{
>..
>..
>
>if (! flags & CAP_CARRY_FLOW_ID)
>    ....
>
>}
>
>worker_with_out_carry_flow_id()
>{
>          generic_worker(.., CAP_CARRY_FLOW_ID)
>}
>
>normal_worker()
>{
>          generic_worker(.., 0)
>}
>
>No other controversial top-level comments with this patch series.
>Once we sorted out the ABI issues then I can review and merge.
>

Thanks Jerin. I'll get these changes into the v3 patch set.

>
>>
>> >
>> >
>> >> >> The assignment then becomes in essence a NOOP for all event devices
>that
>> >are
>> >> >capable of carrying the flow_id as part of the event payload itself.
>> >> >>
>> >> >> Thanks,
>> >> >> Tim
>> >> >>
>> >> >>
>> >> >>
>> >> >> Thanks,
>> >> >> Tim
  
Ray Kinsella July 2, 2020, 3:21 p.m. UTC | #15
On 30/06/2020 13:14, Jerin Jacob wrote:
> On Tue, Jun 30, 2020 at 5:06 PM Kinsella, Ray <mdr@ashroe.eu> wrote:
>>
>>
>>
>> On 30/06/2020 12:30, Jerin Jacob wrote:
>>> On Tue, Jun 30, 2020 at 4:52 PM Kinsella, Ray <mdr@ashroe.eu> wrote:
>>>>
>>>>
>>>>
>>>> On 27/06/2020 08:44, Jerin Jacob wrote:
>>>>>> +
>>>>>> +/** Event port configuration structure */
>>>>>> +struct rte_event_port_conf_v20 {
>>>>>> +       int32_t new_event_threshold;
>>>>>> +       /**< A backpressure threshold for new event enqueues on this port.
>>>>>> +        * Use for *closed system* event dev where event capacity is limited,
>>>>>> +        * and cannot exceed the capacity of the event dev.
>>>>>> +        * Configuring ports with different thresholds can make higher priority
>>>>>> +        * traffic less likely to  be backpressured.
>>>>>> +        * For example, a port used to inject NIC Rx packets into the event dev
>>>>>> +        * can have a lower threshold so as not to overwhelm the device,
>>>>>> +        * while ports used for worker pools can have a higher threshold.
>>>>>> +        * This value cannot exceed the *nb_events_limit*
>>>>>> +        * which was previously supplied to rte_event_dev_configure().
>>>>>> +        * This should be set to '-1' for *open system*.
>>>>>> +        */
>>>>>> +       uint16_t dequeue_depth;
>>>>>> +       /**< Configure number of bulk dequeues for this event port.
>>>>>> +        * This value cannot exceed the *nb_event_port_dequeue_depth*
>>>>>> +        * which previously supplied to rte_event_dev_configure().
>>>>>> +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
>>>>>> +        */
>>>>>> +       uint16_t enqueue_depth;
>>>>>> +       /**< Configure number of bulk enqueues for this event port.
>>>>>> +        * This value cannot exceed the *nb_event_port_enqueue_depth*
>>>>>> +        * which previously supplied to rte_event_dev_configure().
>>>>>> +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
>>>>>> +        */
>>>>>>         uint8_t disable_implicit_release;
>>>>>>         /**< Configure the port not to release outstanding events in
>>>>>>          * rte_event_dev_dequeue_burst(). If true, all events received through
>>>>>> @@ -733,6 +911,14 @@ struct rte_event_port_conf {
>>>>>>  rte_event_port_default_conf_get(uint8_t dev_id, uint8_t port_id,
>>>>>>                                 struct rte_event_port_conf *port_conf);
>>>>>>
>>>>>> +int
>>>>>> +rte_event_port_default_conf_get_v20(uint8_t dev_id, uint8_t port_id,
>>>>>> +                               struct rte_event_port_conf_v20 *port_conf);
>>>>>> +
>>>>>> +int
>>>>>> +rte_event_port_default_conf_get_v21(uint8_t dev_id, uint8_t port_id,
>>>>>> +                                     struct rte_event_port_conf *port_conf);
>>>>>
>>>>> Hi Timothy,
>>>>>
>>>>> + ABI Maintainers (Ray, Neil)
>>>>>
>>>>> # As per my understanding, the structures can not be versioned, only
>>>>> function can be versioned.
>>>>> i.e we can not make any change to " struct rte_event_port_conf"
>>>>
>>>> So the answer is (as always): depends
>>>>
>>>> If the structure is being use in inline functions is when you run into trouble
>>>> - as knowledge of the structure is embedded in the linked application.
>>>>
>>>> However if the structure is _strictly_ being used as a non-inlined function parameter,
>>>> It can be safe to version in this way.
>>>
>>> But based on the optimization applied when building the consumer code
>>> matters. Right?
>>> i.e compiler can "inline" it, based on the optimization even the
>>> source code explicitly mentions it.
>>
>> Well a compiler will typically only inline within the confines of a given object file, or
>> binary, if LTO is enabled.
> 
>>
>> If a function symbol is exported from library however, it won't be inlined in a linked application.
> 
> Yes, With respect to that function.
> But the application can use struct rte_event_port_conf in their code
> and it can be part of other structures.
> Right?

Tim, it looks like you might be inadvertently breaking other symbols also.
For example ... 

int
rte_event_crypto_adapter_create(uint8_t id, uint8_t dev_id,
                                struct rte_event_port_conf *port_config,
                                enum rte_event_crypto_adapter_mode mode);

int
rte_event_port_setup(uint8_t dev_id, uint8_t port_id,
                     const struct rte_event_port_conf *port_conf);

These and others symbols are also using rte_event_port_conf and would need to be updated to use the v20 struct,
if we aren't to break them . 

> 
> 
>> The compiler doesn't have enough information to inline it.
>> All the compiler will know about it is it's offset in memory, and it's signature.
>>
>>>
>>>
>>>>
>>>> So just to be clear, it is still the function that is actually being versioned here.
>>>>
>>>>>
>>>>> # We have a similar case with ethdev and it deferred to next release v20.11
>>>>> http://patches.dpdk.org/patch/69113/
>>>>
>>>> Yes - I spent a why looking at this one, but I am struggling to recall,
>>>> why when I looked it we didn't suggest function versioning as a potential solution in this case.
>>>>
>>>> Looking back at it now, looks like it would have been ok.
>>>
>>> Ok.
>>>
>>>>
>>>>>
>>>>> Regarding the API changes:
>>>>> # The slow path changes general looks good to me. I will review the
>>>>> next level in the coming days
>>>>> # The following fast path changes bothers to me. Could you share more
>>>>> details on below change?
>>>>>
>>>>> diff --git a/app/test-eventdev/test_order_atq.c
>>>>> b/app/test-eventdev/test_order_atq.c
>>>>> index 3366cfc..8246b96 100644
>>>>> --- a/app/test-eventdev/test_order_atq.c
>>>>> +++ b/app/test-eventdev/test_order_atq.c
>>>>> @@ -34,6 +34,8 @@
>>>>>                         continue;
>>>>>                 }
>>>>>
>>>>> +               ev.flow_id = ev.mbuf->udata64;
>>>>> +
>>>>> # Since RC1 is near, I am not sure how to accommodate the API changes
>>>>> now and sort out ABI stuffs.
>>>>> # Other concern is eventdev spec get bloated with versioning files
>>>>> just for ONE release as 20.11 will be OK to change the ABI.
>>>>> # While we discuss the API change, Please send deprecation notice for
>>>>> ABI change for 20.11,
>>>>> so that there is no ambiguity of this patch for the 20.11 release.
>>>>>
  
Timothy McDaniel July 2, 2020, 4:35 p.m. UTC | #16
>-----Original Message-----
>From: Kinsella, Ray <mdr@ashroe.eu>
>Sent: Thursday, July 2, 2020 10:21 AM
>To: Jerin Jacob <jerinjacobk@gmail.com>
>Cc: McDaniel, Timothy <timothy.mcdaniel@intel.com>; Neil Horman
><nhorman@tuxdriver.com>; Jerin Jacob <jerinj@marvell.com>; Mattias
>Rönnblom <mattias.ronnblom@ericsson.com>; dpdk-dev <dev@dpdk.org>; Eads,
>Gage <gage.eads@intel.com>; Van Haaren, Harry <harry.van.haaren@intel.com>
>Subject: Re: [dpdk-dev] [PATCH 01/27] eventdev: dlb upstream prerequisites
>
>
>
>On 30/06/2020 13:14, Jerin Jacob wrote:
>> On Tue, Jun 30, 2020 at 5:06 PM Kinsella, Ray <mdr@ashroe.eu> wrote:
>>>
>>>
>>>
>>> On 30/06/2020 12:30, Jerin Jacob wrote:
>>>> On Tue, Jun 30, 2020 at 4:52 PM Kinsella, Ray <mdr@ashroe.eu> wrote:
>>>>>
>>>>>
>>>>>
>>>>> On 27/06/2020 08:44, Jerin Jacob wrote:
>>>>>>> +
>>>>>>> +/** Event port configuration structure */
>>>>>>> +struct rte_event_port_conf_v20 {
>>>>>>> +       int32_t new_event_threshold;
>>>>>>> +       /**< A backpressure threshold for new event enqueues on this port.
>>>>>>> +        * Use for *closed system* event dev where event capacity is
>limited,
>>>>>>> +        * and cannot exceed the capacity of the event dev.
>>>>>>> +        * Configuring ports with different thresholds can make higher
>priority
>>>>>>> +        * traffic less likely to  be backpressured.
>>>>>>> +        * For example, a port used to inject NIC Rx packets into the event
>dev
>>>>>>> +        * can have a lower threshold so as not to overwhelm the device,
>>>>>>> +        * while ports used for worker pools can have a higher threshold.
>>>>>>> +        * This value cannot exceed the *nb_events_limit*
>>>>>>> +        * which was previously supplied to rte_event_dev_configure().
>>>>>>> +        * This should be set to '-1' for *open system*.
>>>>>>> +        */
>>>>>>> +       uint16_t dequeue_depth;
>>>>>>> +       /**< Configure number of bulk dequeues for this event port.
>>>>>>> +        * This value cannot exceed the *nb_event_port_dequeue_depth*
>>>>>>> +        * which previously supplied to rte_event_dev_configure().
>>>>>>> +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE
>capable.
>>>>>>> +        */
>>>>>>> +       uint16_t enqueue_depth;
>>>>>>> +       /**< Configure number of bulk enqueues for this event port.
>>>>>>> +        * This value cannot exceed the *nb_event_port_enqueue_depth*
>>>>>>> +        * which previously supplied to rte_event_dev_configure().
>>>>>>> +        * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE
>capable.
>>>>>>> +        */
>>>>>>>         uint8_t disable_implicit_release;
>>>>>>>         /**< Configure the port not to release outstanding events in
>>>>>>>          * rte_event_dev_dequeue_burst(). If true, all events received
>through
>>>>>>> @@ -733,6 +911,14 @@ struct rte_event_port_conf {
>>>>>>>  rte_event_port_default_conf_get(uint8_t dev_id, uint8_t port_id,
>>>>>>>                                 struct rte_event_port_conf *port_conf);
>>>>>>>
>>>>>>> +int
>>>>>>> +rte_event_port_default_conf_get_v20(uint8_t dev_id, uint8_t port_id,
>>>>>>> +                               struct rte_event_port_conf_v20 *port_conf);
>>>>>>> +
>>>>>>> +int
>>>>>>> +rte_event_port_default_conf_get_v21(uint8_t dev_id, uint8_t port_id,
>>>>>>> +                                     struct rte_event_port_conf *port_conf);
>>>>>>
>>>>>> Hi Timothy,
>>>>>>
>>>>>> + ABI Maintainers (Ray, Neil)
>>>>>>
>>>>>> # As per my understanding, the structures can not be versioned, only
>>>>>> function can be versioned.
>>>>>> i.e we can not make any change to " struct rte_event_port_conf"
>>>>>
>>>>> So the answer is (as always): depends
>>>>>
>>>>> If the structure is being use in inline functions is when you run into trouble
>>>>> - as knowledge of the structure is embedded in the linked application.
>>>>>
>>>>> However if the structure is _strictly_ being used as a non-inlined function
>parameter,
>>>>> It can be safe to version in this way.
>>>>
>>>> But based on the optimization applied when building the consumer code
>>>> matters. Right?
>>>> i.e compiler can "inline" it, based on the optimization even the
>>>> source code explicitly mentions it.
>>>
>>> Well a compiler will typically only inline within the confines of a given object
>file, or
>>> binary, if LTO is enabled.
>>
>>>
>>> If a function symbol is exported from library however, it won't be inlined in a
>linked application.
>>
>> Yes, With respect to that function.
>> But the application can use struct rte_event_port_conf in their code
>> and it can be part of other structures.
>> Right?
>
>Tim, it looks like you might be inadvertently breaking other symbols also.
>For example ...
>
>int
>rte_event_crypto_adapter_create(uint8_t id, uint8_t dev_id,
>                                struct rte_event_port_conf *port_config,
>                                enum rte_event_crypto_adapter_mode mode);
>
>int
>rte_event_port_setup(uint8_t dev_id, uint8_t port_id,
>                     const struct rte_event_port_conf *port_conf);
>
>These and others symbols are also using rte_event_port_conf and would need to
>be updated to use the v20 struct,
>if we aren't to break them .
>

Yes, we just discovered that after successfully running the ABI checker. I will address those in the v3
patch set.  Thanks.

>>
>>
>>> The compiler doesn't have enough information to inline it.
>>> All the compiler will know about it is it's offset in memory, and it's signature.
>>>
>>>>
>>>>
>>>>>
>>>>> So just to be clear, it is still the function that is actually being versioned
>here.
>>>>>
>>>>>>
>>>>>> # We have a similar case with ethdev and it deferred to next release v20.11
>>>>>> http://patches.dpdk.org/patch/69113/
>>>>>
>>>>> Yes - I spent a why looking at this one, but I am struggling to recall,
>>>>> why when I looked it we didn't suggest function versioning as a potential
>solution in this case.
>>>>>
>>>>> Looking back at it now, looks like it would have been ok.
>>>>
>>>> Ok.
>>>>
>>>>>
>>>>>>
>>>>>> Regarding the API changes:
>>>>>> # The slow path changes general looks good to me. I will review the
>>>>>> next level in the coming days
>>>>>> # The following fast path changes bothers to me. Could you share more
>>>>>> details on below change?
>>>>>>
>>>>>> diff --git a/app/test-eventdev/test_order_atq.c
>>>>>> b/app/test-eventdev/test_order_atq.c
>>>>>> index 3366cfc..8246b96 100644
>>>>>> --- a/app/test-eventdev/test_order_atq.c
>>>>>> +++ b/app/test-eventdev/test_order_atq.c
>>>>>> @@ -34,6 +34,8 @@
>>>>>>                         continue;
>>>>>>                 }
>>>>>>
>>>>>> +               ev.flow_id = ev.mbuf->udata64;
>>>>>> +
>>>>>> # Since RC1 is near, I am not sure how to accommodate the API changes
>>>>>> now and sort out ABI stuffs.
>>>>>> # Other concern is eventdev spec get bloated with versioning files
>>>>>> just for ONE release as 20.11 will be OK to change the ABI.
>>>>>> # While we discuss the API change, Please send deprecation notice for
>>>>>> ABI change for 20.11,
>>>>>> so that there is no ambiguity of this patch for the 20.11 release.
>>>>>>
  

Patch

diff --git a/app/test-eventdev/evt_common.h b/app/test-eventdev/evt_common.h
index f9d7378..120c27b 100644
--- a/app/test-eventdev/evt_common.h
+++ b/app/test-eventdev/evt_common.h
@@ -169,6 +169,7 @@  struct evt_options {
 			.dequeue_timeout_ns = opt->deq_tmo_nsec,
 			.nb_event_queues = nb_queues,
 			.nb_event_ports = nb_ports,
+			.nb_single_link_event_port_queues = 0,
 			.nb_events_limit  = info.max_num_events,
 			.nb_event_queue_flows = opt->nb_flows,
 			.nb_event_port_dequeue_depth =
diff --git a/app/test-eventdev/test_order_atq.c b/app/test-eventdev/test_order_atq.c
index 3366cfc..8246b96 100644
--- a/app/test-eventdev/test_order_atq.c
+++ b/app/test-eventdev/test_order_atq.c
@@ -34,6 +34,8 @@ 
 			continue;
 		}
 
+		ev.flow_id = ev.mbuf->udata64;
+
 		if (ev.sub_event_type == 0) { /* stage 0 from producer */
 			order_atq_process_stage_0(&ev);
 			while (rte_event_enqueue_burst(dev_id, port, &ev, 1)
@@ -68,6 +70,8 @@ 
 		}
 
 		for (i = 0; i < nb_rx; i++) {
+			ev[i].flow_id = ev[i].mbuf->udata64;
+
 			if (ev[i].sub_event_type == 0) { /*stage 0 */
 				order_atq_process_stage_0(&ev[i]);
 			} else if (ev[i].sub_event_type == 1) { /* stage 1 */
diff --git a/app/test-eventdev/test_order_common.c b/app/test-eventdev/test_order_common.c
index 4190f9a..c6fcd05 100644
--- a/app/test-eventdev/test_order_common.c
+++ b/app/test-eventdev/test_order_common.c
@@ -49,6 +49,7 @@ 
 		const uint32_t flow = (uintptr_t)m % nb_flows;
 		/* Maintain seq number per flow */
 		m->seqn = producer_flow_seq[flow]++;
+		m->udata64 = flow;
 
 		ev.flow_id = flow;
 		ev.mbuf = m;
@@ -318,10 +319,11 @@ 
 		opt->wkr_deq_dep = dev_info.max_event_port_dequeue_depth;
 
 	/* port configuration */
-	const struct rte_event_port_conf p_conf = {
+	struct rte_event_port_conf p_conf = {
 			.dequeue_depth = opt->wkr_deq_dep,
 			.enqueue_depth = dev_info.max_event_port_dequeue_depth,
 			.new_event_threshold = dev_info.max_num_events,
+			.event_port_cfg = 0,
 	};
 
 	/* setup one port per worker, linking to all queues */
@@ -351,6 +353,8 @@ 
 	p->queue_id = 0;
 	p->t = t;
 
+	p_conf.new_event_threshold /= 2;
+
 	ret = rte_event_port_setup(opt->dev_id, port, &p_conf);
 	if (ret) {
 		evt_err("failed to setup producer port %d", port);
diff --git a/app/test-eventdev/test_order_queue.c b/app/test-eventdev/test_order_queue.c
index 495efd9..a0a2187 100644
--- a/app/test-eventdev/test_order_queue.c
+++ b/app/test-eventdev/test_order_queue.c
@@ -34,6 +34,8 @@ 
 			continue;
 		}
 
+		ev.flow_id = ev.mbuf->udata64;
+
 		if (ev.queue_id == 0) { /* from ordered queue */
 			order_queue_process_stage_0(&ev);
 			while (rte_event_enqueue_burst(dev_id, port, &ev, 1)
@@ -68,6 +70,8 @@ 
 		}
 
 		for (i = 0; i < nb_rx; i++) {
+			ev[i].flow_id = ev[i].mbuf->udata64;
+
 			if (ev[i].queue_id == 0) { /* from ordered queue */
 				order_queue_process_stage_0(&ev[i]);
 			} else if (ev[i].queue_id == 1) {/* from atomic queue */
diff --git a/app/test-eventdev/test_perf_atq.c b/app/test-eventdev/test_perf_atq.c
index 8fd5100..10846f2 100644
--- a/app/test-eventdev/test_perf_atq.c
+++ b/app/test-eventdev/test_perf_atq.c
@@ -204,6 +204,7 @@ 
 			.dequeue_depth = opt->wkr_deq_dep,
 			.enqueue_depth = dev_info.max_event_port_dequeue_depth,
 			.new_event_threshold = dev_info.max_num_events,
+			.event_port_cfg = 0,
 	};
 
 	ret = perf_event_dev_port_setup(test, opt, 1 /* stride */, nb_queues,
diff --git a/app/test-eventdev/test_perf_queue.c b/app/test-eventdev/test_perf_queue.c
index f4ea3a7..a0119da 100644
--- a/app/test-eventdev/test_perf_queue.c
+++ b/app/test-eventdev/test_perf_queue.c
@@ -219,6 +219,7 @@ 
 			.dequeue_depth = opt->wkr_deq_dep,
 			.enqueue_depth = dev_info.max_event_port_dequeue_depth,
 			.new_event_threshold = dev_info.max_num_events,
+			.event_port_cfg = 0,
 	};
 
 	ret = perf_event_dev_port_setup(test, opt, nb_stages /* stride */,
diff --git a/app/test-eventdev/test_pipeline_atq.c b/app/test-eventdev/test_pipeline_atq.c
index 8e8686c..a95ec0a 100644
--- a/app/test-eventdev/test_pipeline_atq.c
+++ b/app/test-eventdev/test_pipeline_atq.c
@@ -356,6 +356,7 @@ 
 		.dequeue_depth = opt->wkr_deq_dep,
 		.enqueue_depth = info.max_event_port_dequeue_depth,
 		.new_event_threshold = info.max_num_events,
+		.event_port_cfg = 0,
 	};
 
 	if (!t->internal_port)
diff --git a/app/test-eventdev/test_pipeline_queue.c b/app/test-eventdev/test_pipeline_queue.c
index 7bebac3..30817dc 100644
--- a/app/test-eventdev/test_pipeline_queue.c
+++ b/app/test-eventdev/test_pipeline_queue.c
@@ -379,6 +379,7 @@ 
 			.dequeue_depth = opt->wkr_deq_dep,
 			.enqueue_depth = info.max_event_port_dequeue_depth,
 			.new_event_threshold = info.max_num_events,
+			.event_port_cfg = 0,
 	};
 
 	if (!t->internal_port) {
diff --git a/app/test/test_eventdev.c b/app/test/test_eventdev.c
index 43ccb1c..62019c1 100644
--- a/app/test/test_eventdev.c
+++ b/app/test/test_eventdev.c
@@ -559,10 +559,10 @@ 
 	if (!(info.event_dev_cap &
 	      RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE)) {
 		pconf.enqueue_depth = info.max_event_port_enqueue_depth;
-		pconf.disable_implicit_release = 1;
+		pconf.event_port_cfg = RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL;
 		ret = rte_event_port_setup(TEST_DEV_ID, 0, &pconf);
 		TEST_ASSERT(ret == -EINVAL, "Expected -EINVAL, %d", ret);
-		pconf.disable_implicit_release = 0;
+		pconf.event_port_cfg = 0;
 	}
 
 	ret = rte_event_port_setup(TEST_DEV_ID, info.max_event_ports,
diff --git a/drivers/event/dpaa2/dpaa2_eventdev.c b/drivers/event/dpaa2/dpaa2_eventdev.c
index a196ad4..8568bfc 100644
--- a/drivers/event/dpaa2/dpaa2_eventdev.c
+++ b/drivers/event/dpaa2/dpaa2_eventdev.c
@@ -537,7 +537,7 @@  static void dpaa2_eventdev_process_atomic(struct qbman_swp *swp,
 		DPAA2_EVENT_MAX_PORT_DEQUEUE_DEPTH;
 	port_conf->enqueue_depth =
 		DPAA2_EVENT_MAX_PORT_ENQUEUE_DEPTH;
-	port_conf->disable_implicit_release = 0;
+	port_conf->event_port_cfg = 0;
 }
 
 static int
diff --git a/drivers/event/octeontx/ssovf_evdev.c b/drivers/event/octeontx/ssovf_evdev.c
index 1b1a5d9..99c0b2e 100644
--- a/drivers/event/octeontx/ssovf_evdev.c
+++ b/drivers/event/octeontx/ssovf_evdev.c
@@ -224,7 +224,7 @@  struct ssovf_mbox_convert_ns_getworks_iter {
 	port_conf->new_event_threshold = edev->max_num_events;
 	port_conf->dequeue_depth = 1;
 	port_conf->enqueue_depth = 1;
-	port_conf->disable_implicit_release = 0;
+	port_conf->event_port_cfg = 0;
 }
 
 static void
diff --git a/drivers/event/skeleton/skeleton_eventdev.c b/drivers/event/skeleton/skeleton_eventdev.c
index c889220..37d569b 100644
--- a/drivers/event/skeleton/skeleton_eventdev.c
+++ b/drivers/event/skeleton/skeleton_eventdev.c
@@ -209,7 +209,7 @@ 
 	port_conf->new_event_threshold = 32 * 1024;
 	port_conf->dequeue_depth = 16;
 	port_conf->enqueue_depth = 16;
-	port_conf->disable_implicit_release = 0;
+	port_conf->event_port_cfg = 0;
 }
 
 static void
diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c
index fb8e8be..0b3dd9c 100644
--- a/drivers/event/sw/sw_evdev.c
+++ b/drivers/event/sw/sw_evdev.c
@@ -175,7 +175,8 @@ 
 	}
 
 	p->inflight_max = conf->new_event_threshold;
-	p->implicit_release = !conf->disable_implicit_release;
+	p->implicit_release = !(conf->event_port_cfg &
+				RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL);
 
 	/* check if ring exists, same as rx_worker above */
 	snprintf(buf, sizeof(buf), "sw%d_p%u, %s", dev->data->dev_id,
@@ -508,7 +509,7 @@ 
 	port_conf->new_event_threshold = 1024;
 	port_conf->dequeue_depth = 16;
 	port_conf->enqueue_depth = 16;
-	port_conf->disable_implicit_release = 0;
+	port_conf->event_port_cfg = 0;
 }
 
 static int
diff --git a/drivers/event/sw/sw_evdev_selftest.c b/drivers/event/sw/sw_evdev_selftest.c
index 38c21fa..a78d6cd 100644
--- a/drivers/event/sw/sw_evdev_selftest.c
+++ b/drivers/event/sw/sw_evdev_selftest.c
@@ -172,7 +172,7 @@  struct test {
 			.new_event_threshold = 1024,
 			.dequeue_depth = 32,
 			.enqueue_depth = 64,
-			.disable_implicit_release = 0,
+			.event_port_cfg = 0,
 	};
 	if (num_ports > MAX_PORTS)
 		return -1;
@@ -1227,7 +1227,7 @@  struct test_event_dev_stats {
 				.new_event_threshold = 128,
 				.dequeue_depth = 32,
 				.enqueue_depth = 64,
-				.disable_implicit_release = 0,
+				.event_port_cfg = 0,
 		};
 		if (rte_event_port_setup(evdev, 0, &port_conf) < 0) {
 			printf("%d Error setting up port\n", __LINE__);
@@ -1317,7 +1317,7 @@  struct test_event_dev_stats {
 		.new_event_threshold = 128,
 		.dequeue_depth = 32,
 		.enqueue_depth = 64,
-		.disable_implicit_release = 0,
+		.event_port_cfg = 0,
 	};
 	if (rte_event_port_setup(evdev, 0, &port_conf) < 0) {
 		printf("%d Error setting up port\n", __LINE__);
@@ -3079,7 +3079,8 @@  struct test_event_dev_stats {
 	 * only be initialized once - and this needs to be set for multiple runs
 	 */
 	conf.new_event_threshold = 512;
-	conf.disable_implicit_release = disable_implicit_release;
+	conf.event_port_cfg = disable_implicit_release ?
+		RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL : 0;
 
 	if (rte_event_port_setup(evdev, 0, &conf) < 0) {
 		printf("Error setting up RX port\n");
diff --git a/examples/eventdev_pipeline/pipeline_worker_generic.c b/examples/eventdev_pipeline/pipeline_worker_generic.c
index 42ff4ee..a091da3 100644
--- a/examples/eventdev_pipeline/pipeline_worker_generic.c
+++ b/examples/eventdev_pipeline/pipeline_worker_generic.c
@@ -129,6 +129,7 @@ 
 	struct rte_event_dev_config config = {
 			.nb_event_queues = nb_queues,
 			.nb_event_ports = nb_ports,
+			.nb_single_link_event_port_queues = 1,
 			.nb_events_limit  = 4096,
 			.nb_event_queue_flows = 1024,
 			.nb_event_port_dequeue_depth = 128,
@@ -138,12 +139,13 @@ 
 			.dequeue_depth = cdata.worker_cq_depth,
 			.enqueue_depth = 64,
 			.new_event_threshold = 4096,
+			.event_port_cfg = 0,
 	};
 	struct rte_event_queue_conf wkr_q_conf = {
 			.schedule_type = cdata.queue_type,
 			.priority = RTE_EVENT_DEV_PRIORITY_NORMAL,
 			.nb_atomic_flows = 1024,
-		.nb_atomic_order_sequences = 1024,
+			.nb_atomic_order_sequences = 1024,
 	};
 	struct rte_event_queue_conf tx_q_conf = {
 			.priority = RTE_EVENT_DEV_PRIORITY_HIGHEST,
@@ -167,7 +169,8 @@ 
 	disable_implicit_release = (dev_info.event_dev_cap &
 			RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE);
 
-	wkr_p_conf.disable_implicit_release = disable_implicit_release;
+	wkr_p_conf.event_port_cfg = disable_implicit_release ?
+		RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL : 0;
 
 	if (dev_info.max_num_events < config.nb_events_limit)
 		config.nb_events_limit = dev_info.max_num_events;
@@ -417,6 +420,7 @@ 
 		.dequeue_depth = cdata.worker_cq_depth,
 		.enqueue_depth = 64,
 		.new_event_threshold = 4096,
+		.event_port_cfg = 0,
 	};
 
 	if (adptr_p_conf.new_event_threshold > dev_info.max_num_events)
diff --git a/examples/eventdev_pipeline/pipeline_worker_tx.c b/examples/eventdev_pipeline/pipeline_worker_tx.c
index 55bb2f7..e8a9652 100644
--- a/examples/eventdev_pipeline/pipeline_worker_tx.c
+++ b/examples/eventdev_pipeline/pipeline_worker_tx.c
@@ -436,6 +436,7 @@ 
 	struct rte_event_dev_config config = {
 			.nb_event_queues = nb_queues,
 			.nb_event_ports = nb_ports,
+			.nb_single_link_event_port_queues = 0,
 			.nb_events_limit  = 4096,
 			.nb_event_queue_flows = 1024,
 			.nb_event_port_dequeue_depth = 128,
@@ -445,6 +446,7 @@ 
 			.dequeue_depth = cdata.worker_cq_depth,
 			.enqueue_depth = 64,
 			.new_event_threshold = 4096,
+			.event_port_cfg = 0,
 	};
 	struct rte_event_queue_conf wkr_q_conf = {
 			.schedule_type = cdata.queue_type,
@@ -746,6 +748,7 @@  struct rx_adptr_services {
 		.dequeue_depth = cdata.worker_cq_depth,
 		.enqueue_depth = 64,
 		.new_event_threshold = 4096,
+		.event_port_cfg = 0,
 	};
 
 	init_ports(nb_ports);
diff --git a/examples/l2fwd-event/l2fwd_event_generic.c b/examples/l2fwd-event/l2fwd_event_generic.c
index 2dc95e5..e01df04 100644
--- a/examples/l2fwd-event/l2fwd_event_generic.c
+++ b/examples/l2fwd-event/l2fwd_event_generic.c
@@ -126,8 +126,9 @@ 
 	if (def_p_conf.enqueue_depth < event_p_conf.enqueue_depth)
 		event_p_conf.enqueue_depth = def_p_conf.enqueue_depth;
 
-	event_p_conf.disable_implicit_release =
-		evt_rsrc->disable_implicit_release;
+	event_p_conf.event_port_cfg = 0;
+	if (evt_rsrc->disable_implicit_release)
+		event_p_conf.event_port_cfg |= RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL;
 	evt_rsrc->deq_depth = def_p_conf.dequeue_depth;
 
 	for (event_p_id = 0; event_p_id < evt_rsrc->evp.nb_ports;
diff --git a/examples/l2fwd-event/l2fwd_event_internal_port.c b/examples/l2fwd-event/l2fwd_event_internal_port.c
index 63d57b4..f54327b 100644
--- a/examples/l2fwd-event/l2fwd_event_internal_port.c
+++ b/examples/l2fwd-event/l2fwd_event_internal_port.c
@@ -123,8 +123,9 @@ 
 	if (def_p_conf.enqueue_depth < event_p_conf.enqueue_depth)
 		event_p_conf.enqueue_depth = def_p_conf.enqueue_depth;
 
-	event_p_conf.disable_implicit_release =
-		evt_rsrc->disable_implicit_release;
+	event_p_conf.event_port_cfg = 0;
+	if (evt_rsrc->disable_implicit_release)
+		event_p_conf.event_port_cfg |= RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL;
 
 	for (event_p_id = 0; event_p_id < evt_rsrc->evp.nb_ports;
 								event_p_id++) {
diff --git a/examples/l3fwd/l3fwd_event_generic.c b/examples/l3fwd/l3fwd_event_generic.c
index f8c9843..409a410 100644
--- a/examples/l3fwd/l3fwd_event_generic.c
+++ b/examples/l3fwd/l3fwd_event_generic.c
@@ -115,8 +115,9 @@ 
 	if (def_p_conf.enqueue_depth < event_p_conf.enqueue_depth)
 		event_p_conf.enqueue_depth = def_p_conf.enqueue_depth;
 
-	event_p_conf.disable_implicit_release =
-		evt_rsrc->disable_implicit_release;
+	event_p_conf.event_port_cfg = 0;
+	if (evt_rsrc->disable_implicit_release)
+		event_p_conf.event_port_cfg |= RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL;
 	evt_rsrc->deq_depth = def_p_conf.dequeue_depth;
 
 	for (event_p_id = 0; event_p_id < evt_rsrc->evp.nb_ports;
diff --git a/examples/l3fwd/l3fwd_event_internal_port.c b/examples/l3fwd/l3fwd_event_internal_port.c
index 03ac581..df410f1 100644
--- a/examples/l3fwd/l3fwd_event_internal_port.c
+++ b/examples/l3fwd/l3fwd_event_internal_port.c
@@ -113,8 +113,9 @@ 
 	if (def_p_conf.enqueue_depth < event_p_conf.enqueue_depth)
 		event_p_conf.enqueue_depth = def_p_conf.enqueue_depth;
 
-	event_p_conf.disable_implicit_release =
-		evt_rsrc->disable_implicit_release;
+	event_p_conf.event_port_cfg = 0;
+	if (evt_rsrc->disable_implicit_release)
+		event_p_conf.event_port_cfg |= RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL;
 
 	for (event_p_id = 0; event_p_id < evt_rsrc->evp.nb_ports;
 								event_p_id++) {
diff --git a/lib/librte_eal/x86/include/rte_cpuflags.h b/lib/librte_eal/x86/include/rte_cpuflags.h
index c1d2036..ab2c3b3 100644
--- a/lib/librte_eal/x86/include/rte_cpuflags.h
+++ b/lib/librte_eal/x86/include/rte_cpuflags.h
@@ -130,6 +130,7 @@  enum rte_cpu_flag_t {
 	RTE_CPUFLAG_CLDEMOTE,               /**< Cache Line Demote */
 	RTE_CPUFLAG_MOVDIRI,                /**< Direct Store Instructions */
 	RTE_CPUFLAG_MOVDIR64B,              /**< Direct Store Instructions 64B */
+	RTE_CPUFLAG_UMWAIT,                 /**< UMONITOR/UMWAIT */
 	RTE_CPUFLAG_AVX512VP2INTERSECT,     /**< AVX512 Two Register Intersection */
 
 	/* The last item */
diff --git a/lib/librte_eal/x86/rte_cpuflags.c b/lib/librte_eal/x86/rte_cpuflags.c
index 30439e7..69ac0db 100644
--- a/lib/librte_eal/x86/rte_cpuflags.c
+++ b/lib/librte_eal/x86/rte_cpuflags.c
@@ -137,6 +137,7 @@  struct feature_entry {
 	FEAT_DEF(CLDEMOTE, 0x00000007, 0, RTE_REG_ECX, 25)
 	FEAT_DEF(MOVDIRI, 0x00000007, 0, RTE_REG_ECX, 27)
 	FEAT_DEF(MOVDIR64B, 0x00000007, 0, RTE_REG_ECX, 28)
+        FEAT_DEF(UMWAIT, 0x00000007, 0, RTE_REG_ECX, 5)
 	FEAT_DEF(AVX512VP2INTERSECT, 0x00000007, 0, RTE_REG_EDX, 8)
 };
 
diff --git a/lib/librte_eventdev/meson.build b/lib/librte_eventdev/meson.build
index d1f25ee..17f7f40 100644
--- a/lib/librte_eventdev/meson.build
+++ b/lib/librte_eventdev/meson.build
@@ -7,6 +7,7 @@  else
 	cflags += '-DBSD'
 endif
 
+use_function_versioning = true
 sources = files('rte_eventdev.c',
 		'rte_event_ring.c',
 		'eventdev_trace_points.c',
diff --git a/lib/librte_eventdev/rte_event_eth_tx_adapter.c b/lib/librte_eventdev/rte_event_eth_tx_adapter.c
index bb21dc4..8a72256 100644
--- a/lib/librte_eventdev/rte_event_eth_tx_adapter.c
+++ b/lib/librte_eventdev/rte_event_eth_tx_adapter.c
@@ -286,7 +286,7 @@  static int txa_service_queue_del(uint8_t id,
 		return ret;
 	}
 
-	pc->disable_implicit_release = 0;
+	pc->event_port_cfg = 0;
 	ret = rte_event_port_setup(dev_id, port_id, pc);
 	if (ret) {
 		RTE_EDEV_LOG_ERR("failed to setup event port %u\n",
diff --git a/lib/librte_eventdev/rte_eventdev.c b/lib/librte_eventdev/rte_eventdev.c
index 82c177c..e8d7c0d 100644
--- a/lib/librte_eventdev/rte_eventdev.c
+++ b/lib/librte_eventdev/rte_eventdev.c
@@ -32,6 +32,8 @@ 
 #include <rte_ethdev.h>
 #include <rte_cryptodev.h>
 #include <rte_cryptodev_pmd.h>
+#include <rte_compat.h>
+#include <rte_function_versioning.h>
 
 #include "rte_eventdev.h"
 #include "rte_eventdev_pmd.h"
@@ -87,7 +89,47 @@ 
 }
 
 int
-rte_event_dev_info_get(uint8_t dev_id, struct rte_event_dev_info *dev_info)
+rte_event_dev_info_get_v20(uint8_t dev_id,
+			     struct rte_event_dev_info_v20 *dev_info)
+{
+	struct rte_event_dev_info new_dev_info;
+	int err;
+
+	if (dev_info == NULL)
+		return -EINVAL;
+
+	memset(&new_dev_info, 0, sizeof(struct rte_event_dev_info));
+
+	err = rte_event_dev_info_get(dev_id, &new_dev_info);
+	if (err)
+		return err;
+
+	dev_info->driver_name = new_dev_info.driver_name;
+	dev_info->dev = new_dev_info.dev;
+	dev_info->min_dequeue_timeout_ns = new_dev_info.min_dequeue_timeout_ns;
+	dev_info->max_dequeue_timeout_ns = new_dev_info.max_dequeue_timeout_ns;
+	dev_info->max_event_queues = new_dev_info.max_event_queues;
+	dev_info->max_event_queue_flows = new_dev_info.max_event_queue_flows;
+	dev_info->max_event_queue_priority_levels =
+		new_dev_info.max_event_queue_priority_levels;
+	dev_info->max_event_priority_levels =
+		new_dev_info.max_event_priority_levels;
+	dev_info->max_event_ports = new_dev_info.max_event_ports;
+	dev_info->max_event_port_dequeue_depth =
+		new_dev_info.max_event_port_dequeue_depth;
+	dev_info->max_event_port_enqueue_depth =
+		new_dev_info.max_event_port_enqueue_depth;
+	dev_info->max_num_events = new_dev_info.max_num_events;
+	dev_info->event_dev_cap = new_dev_info.event_dev_cap;
+	dev_info->dequeue_timeout_ns = new_dev_info.dequeue_timeout_ns;
+
+	return 0;
+}
+VERSION_SYMBOL(rte_event_dev_info_get, _v20, 20);
+
+int
+rte_event_dev_info_get_v21(uint8_t dev_id,
+			     struct rte_event_dev_info *dev_info)
 {
 	struct rte_eventdev *dev;
 
@@ -107,6 +149,10 @@ 
 	dev_info->dev = dev->dev;
 	return 0;
 }
+BIND_DEFAULT_SYMBOL(rte_event_dev_info_get, _v21, 21);
+MAP_STATIC_SYMBOL(int rte_event_dev_info_get(uint8_t dev_id,
+			struct rte_event_dev_info *dev_info),
+			rte_event_dev_info_get_v21);
 
 int
 rte_event_eth_rx_adapter_caps_get(uint8_t dev_id, uint16_t eth_port_id,
@@ -385,7 +431,29 @@ 
 }
 
 int
-rte_event_dev_configure(uint8_t dev_id,
+rte_event_dev_configure_v20(uint8_t dev_id,
+			      const struct rte_event_dev_config_v20 *dev_conf)
+{
+	struct rte_event_dev_config new_dev_conf;
+
+	new_dev_conf.dequeue_timeout_ns = dev_conf->dequeue_timeout_ns;
+	new_dev_conf.nb_events_limit = dev_conf->nb_events_limit;
+	new_dev_conf.nb_event_queues = dev_conf->nb_event_queues;
+	new_dev_conf.nb_event_ports = dev_conf->nb_event_ports;
+	new_dev_conf.nb_event_queue_flows = dev_conf->nb_event_queue_flows;
+	new_dev_conf.nb_event_port_dequeue_depth =
+		dev_conf->nb_event_port_dequeue_depth;
+	new_dev_conf.nb_event_port_enqueue_depth =
+		dev_conf->nb_event_port_enqueue_depth;
+	new_dev_conf.event_dev_cfg = dev_conf->event_dev_cfg;
+	new_dev_conf.nb_single_link_event_port_queues = 0;
+
+	return rte_event_dev_configure(dev_id, &new_dev_conf);
+}
+VERSION_SYMBOL(rte_event_dev_info_get, _v20, 20);
+
+int
+rte_event_dev_configure_v21(uint8_t dev_id,
 			const struct rte_event_dev_config *dev_conf)
 {
 	struct rte_eventdev *dev;
@@ -437,9 +505,29 @@ 
 					dev_id);
 		return -EINVAL;
 	}
-	if (dev_conf->nb_event_queues > info.max_event_queues) {
-		RTE_EDEV_LOG_ERR("%d nb_event_queues=%d > max_event_queues=%d",
-		dev_id, dev_conf->nb_event_queues, info.max_event_queues);
+	if (dev_conf->nb_event_queues > info.max_event_queues +
+			info.max_single_link_event_port_queue_pairs) {
+		RTE_EDEV_LOG_ERR("%d nb_event_queues=%d > max_event_queues=%d + max_single_link_event_port_queue_pairs=%d",
+				 dev_id, dev_conf->nb_event_queues,
+				 info.max_event_queues,
+				 info.max_single_link_event_port_queue_pairs);
+		return -EINVAL;
+	}
+	if (dev_conf->nb_event_queues -
+			dev_conf->nb_single_link_event_port_queues >
+			info.max_event_queues) {
+		RTE_EDEV_LOG_ERR("id%d nb_event_queues=%d - nb_single_link_event_port_queues=%d > max_event_queues=%d",
+				 dev_id, dev_conf->nb_event_queues,
+				 dev_conf->nb_single_link_event_port_queues,
+				 info.max_event_queues);
+		return -EINVAL;
+	}
+	if (dev_conf->nb_single_link_event_port_queues >
+			dev_conf->nb_event_queues) {
+		RTE_EDEV_LOG_ERR("dev%d nb_single_link_event_port_queues=%d > nb_event_queues=%d",
+				 dev_id,
+				 dev_conf->nb_single_link_event_port_queues,
+				 dev_conf->nb_event_queues);
 		return -EINVAL;
 	}
 
@@ -448,9 +536,31 @@ 
 		RTE_EDEV_LOG_ERR("dev%d nb_event_ports cannot be zero", dev_id);
 		return -EINVAL;
 	}
-	if (dev_conf->nb_event_ports > info.max_event_ports) {
-		RTE_EDEV_LOG_ERR("id%d nb_event_ports=%d > max_event_ports= %d",
-		dev_id, dev_conf->nb_event_ports, info.max_event_ports);
+	if (dev_conf->nb_event_ports > info.max_event_ports +
+			info.max_single_link_event_port_queue_pairs) {
+		RTE_EDEV_LOG_ERR("id%d nb_event_ports=%d > max_event_ports=%d + max_single_link_event_port_queue_pairs=%d",
+				 dev_id, dev_conf->nb_event_ports,
+				 info.max_event_ports,
+				 info.max_single_link_event_port_queue_pairs);
+		return -EINVAL;
+	}
+	if (dev_conf->nb_event_ports -
+			dev_conf->nb_single_link_event_port_queues
+			> info.max_event_ports) {
+		RTE_EDEV_LOG_ERR("id%d nb_event_ports=%d - nb_single_link_event_port_queues=%d > max_event_ports=%d",
+				 dev_id, dev_conf->nb_event_ports,
+				 dev_conf->nb_single_link_event_port_queues,
+				 info.max_event_ports);
+		return -EINVAL;
+	}
+
+	if (dev_conf->nb_single_link_event_port_queues >
+	    dev_conf->nb_event_ports) {
+		RTE_EDEV_LOG_ERR(
+				 "dev%d nb_single_link_event_port_queues=%d > nb_event_ports=%d",
+				 dev_id,
+				 dev_conf->nb_single_link_event_port_queues,
+				 dev_conf->nb_event_ports);
 		return -EINVAL;
 	}
 
@@ -528,6 +638,10 @@ 
 	rte_eventdev_trace_configure(dev_id, dev_conf, diag);
 	return diag;
 }
+BIND_DEFAULT_SYMBOL(rte_event_dev_configure, _v21, 21);
+MAP_STATIC_SYMBOL(int rte_event_dev_configure(uint8_t dev_id,
+			const struct rte_event_dev_config *dev_conf),
+			rte_event_dev_configure_v21);
 
 static inline int
 is_valid_queue(struct rte_eventdev *dev, uint8_t queue_id)
@@ -666,7 +780,33 @@ 
 }
 
 int
-rte_event_port_default_conf_get(uint8_t dev_id, uint8_t port_id,
+rte_event_port_default_conf_get_v20(uint8_t dev_id, uint8_t port_id,
+				 struct rte_event_port_conf_v20 *port_conf)
+{
+	struct rte_event_port_conf new_port_conf;
+	int err;
+
+	if (port_conf == NULL)
+		return -EINVAL;
+
+	memset(&new_port_conf, 0, sizeof(new_port_conf));
+
+	err = rte_event_port_default_conf_get(dev_id, port_id, &new_port_conf);
+	if (err)
+		return err;
+
+	port_conf->new_event_threshold = new_port_conf.new_event_threshold;
+	port_conf->dequeue_depth = new_port_conf.dequeue_depth;
+	port_conf->enqueue_depth = new_port_conf.enqueue_depth;
+	port_conf->disable_implicit_release = !!(new_port_conf.event_port_cfg &
+		RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL);
+
+	return 0;
+}
+VERSION_SYMBOL(rte_event_port_default_conf_get, _v20, 20);
+
+int
+rte_event_port_default_conf_get_v21(uint8_t dev_id, uint8_t port_id,
 				 struct rte_event_port_conf *port_conf)
 {
 	struct rte_eventdev *dev;
@@ -687,9 +827,35 @@ 
 	(*dev->dev_ops->port_def_conf)(dev, port_id, port_conf);
 	return 0;
 }
+BIND_DEFAULT_SYMBOL(rte_event_port_default_conf_get, _v21, 21);
+MAP_STATIC_SYMBOL(int rte_event_port_default_conf_get(uint8_t dev_id,
+			uint8_t port_id, struct rte_event_port_conf *port_conf),
+			rte_event_port_default_conf_get_v21);
 
 int
-rte_event_port_setup(uint8_t dev_id, uint8_t port_id,
+rte_event_port_setup_v20(uint8_t dev_id, uint8_t port_id,
+		     const struct rte_event_port_conf_v20 *port_conf)
+{
+	struct rte_event_port_conf new_port_conf;
+
+	if (port_conf == NULL)
+		return -EINVAL;
+
+	new_port_conf.new_event_threshold = port_conf->new_event_threshold;
+	new_port_conf.dequeue_depth = port_conf->dequeue_depth;
+	new_port_conf.enqueue_depth = port_conf->enqueue_depth;
+	new_port_conf.event_port_cfg = 0;
+	if (port_conf->disable_implicit_release)
+		new_port_conf.event_port_cfg =
+			RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL;
+
+	return rte_event_port_setup(dev_id, port_id, &new_port_conf);
+
+}
+VERSION_SYMBOL(rte_event_port_setup, _v20, 20);
+
+int
+rte_event_port_setup_v21(uint8_t dev_id, uint8_t port_id,
 		     const struct rte_event_port_conf *port_conf)
 {
 	struct rte_eventdev *dev;
@@ -737,7 +903,8 @@ 
 		return -EINVAL;
 	}
 
-	if (port_conf && port_conf->disable_implicit_release &&
+	if (port_conf &&
+	    (port_conf->event_port_cfg & RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL) &&
 	    !(dev->data->event_dev_cap &
 	      RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE)) {
 		RTE_EDEV_LOG_ERR(
@@ -775,6 +942,10 @@ 
 
 	return 0;
 }
+BIND_DEFAULT_SYMBOL(rte_event_port_setup, _v21, 21);
+MAP_STATIC_SYMBOL(int rte_event_port_setup(uint8_t dev_id, uint8_t port_id,
+		  const struct rte_event_port_conf *port_conf),
+		  rte_event_port_setup_v21);
 
 int
 rte_event_dev_attr_get(uint8_t dev_id, uint32_t attr_id,
@@ -809,6 +980,7 @@ 
 			uint32_t *attr_value)
 {
 	struct rte_eventdev *dev;
+	uint32_t config;
 
 	if (!attr_value)
 		return -EINVAL;
@@ -830,6 +1002,10 @@ 
 	case RTE_EVENT_PORT_ATTR_NEW_EVENT_THRESHOLD:
 		*attr_value = dev->data->ports_cfg[port_id].new_event_threshold;
 		break;
+	case RTE_EVENT_PORT_ATTR_IMPLICIT_RELEASE_DISABLE:
+		config = dev->data->ports_cfg[port_id].event_port_cfg;
+		*attr_value = !!(config & RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL);
+		break;
 	default:
 		return -EINVAL;
 	};
diff --git a/lib/librte_eventdev/rte_eventdev.h b/lib/librte_eventdev/rte_eventdev.h
index 7dc8323..e7155e6 100644
--- a/lib/librte_eventdev/rte_eventdev.h
+++ b/lib/librte_eventdev/rte_eventdev.h
@@ -291,6 +291,12 @@ 
  * single queue to each port or map a single queue to many port.
  */
 
+#define RTE_EVENT_DEV_CAP_CARRY_FLOW_ID (1ULL << 9)
+/**< Event device preserves the flow ID from the enqueued
+ * event to the dequeued event if the flag is set. Otherwise,
+ * the content of this field is implementation dependent.
+ */
+
 /* Event device priority levels */
 #define RTE_EVENT_DEV_PRIORITY_HIGHEST   0
 /**< Highest priority expressed across eventdev subsystem
@@ -380,6 +386,58 @@  struct rte_event_dev_info {
 	 * event port by this device.
 	 * A device that does not support bulk enqueue will set this as 1.
 	 */
+	uint8_t max_event_port_links;
+	/**< Maximum number of queues that can be linked to a single event
+	 * port by this device.
+	 */
+	int32_t max_num_events;
+	/**< A *closed system* event dev has a limit on the number of events it
+	 * can manage at a time. An *open system* event dev does not have a
+	 * limit and will specify this as -1.
+	 */
+	uint32_t event_dev_cap;
+	/**< Event device capabilities(RTE_EVENT_DEV_CAP_)*/
+	uint8_t max_single_link_event_port_queue_pairs;
+	/**< Maximum number of event ports and queues that are optimized for
+	 * (and only capable of) single-link configurations supported by this
+	 * device. These ports and queues are not accounted for in
+	 * max_event_ports or max_event_queues.
+	 */
+};
+
+struct rte_event_dev_info_v20 {
+	const char *driver_name;	/**< Event driver name */
+	struct rte_device *dev;	/**< Device information */
+	uint32_t min_dequeue_timeout_ns;
+	/**< Minimum supported global dequeue timeout(ns) by this device */
+	uint32_t max_dequeue_timeout_ns;
+	/**< Maximum supported global dequeue timeout(ns) by this device */
+	uint32_t dequeue_timeout_ns;
+	/**< Configured global dequeue timeout(ns) for this device */
+	uint8_t max_event_queues;
+	/**< Maximum event_queues supported by this device */
+	uint32_t max_event_queue_flows;
+	/**< Maximum supported flows in an event queue by this device*/
+	uint8_t max_event_queue_priority_levels;
+	/**< Maximum number of event queue priority levels by this device.
+	 * Valid when the device has RTE_EVENT_DEV_CAP_QUEUE_QOS capability
+	 */
+	uint8_t max_event_priority_levels;
+	/**< Maximum number of event priority levels by this device.
+	 * Valid when the device has RTE_EVENT_DEV_CAP_EVENT_QOS capability
+	 */
+	uint8_t max_event_ports;
+	/**< Maximum number of event ports supported by this device */
+	uint8_t max_event_port_dequeue_depth;
+	/**< Maximum number of events can be dequeued at a time from an
+	 * event port by this device.
+	 * A device that does not support bulk dequeue will set this as 1.
+	 */
+	uint32_t max_event_port_enqueue_depth;
+	/**< Maximum number of events can be enqueued at a time from an
+	 * event port by this device.
+	 * A device that does not support bulk enqueue will set this as 1.
+	 */
 	int32_t max_num_events;
 	/**< A *closed system* event dev has a limit on the number of events it
 	 * can manage at a time. An *open system* event dev does not have a
@@ -407,6 +465,14 @@  struct rte_event_dev_info {
 int
 rte_event_dev_info_get(uint8_t dev_id, struct rte_event_dev_info *dev_info);
 
+int
+rte_event_dev_info_get_v20(uint8_t dev_id,
+			     struct rte_event_dev_info_v20 *dev_info);
+
+int
+rte_event_dev_info_get_v21(uint8_t dev_id,
+			     struct rte_event_dev_info *dev_info);
+
 /**
  * The count of ports.
  */
@@ -494,6 +560,67 @@  struct rte_event_dev_config {
 	 */
 	uint32_t event_dev_cfg;
 	/**< Event device config flags(RTE_EVENT_DEV_CFG_)*/
+	uint8_t nb_single_link_event_port_queues;
+	/**< Number of event ports and queues that will be singly-linked to
+	 * each other. These are a subset of the overall event ports and
+	 * queues; this value cannot exceed *nb_event_ports* or
+	 * *nb_event_queues*. If the device has ports and queues that are
+	 * optimized for single-link usage, this field is a hint for how many
+	 * to allocate; otherwise, regular event ports and queues can be used.
+	 */
+};
+
+/** Event device configuration structure */
+struct rte_event_dev_config_v20 {
+	uint32_t dequeue_timeout_ns;
+	/**< rte_event_dequeue_burst() timeout on this device.
+	 * This value should be in the range of *min_dequeue_timeout_ns* and
+	 * *max_dequeue_timeout_ns* which previously provided in
+	 * rte_event_dev_info_get()
+	 * The value 0 is allowed, in which case, default dequeue timeout used.
+	 * @see RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT
+	 */
+	int32_t nb_events_limit;
+	/**< In a *closed system* this field is the limit on maximum number of
+	 * events that can be inflight in the eventdev at a given time. The
+	 * limit is required to ensure that the finite space in a closed system
+	 * is not overwhelmed. The value cannot exceed the *max_num_events*
+	 * as provided by rte_event_dev_info_get().
+	 * This value should be set to -1 for *open system*.
+	 */
+	uint8_t nb_event_queues;
+	/**< Number of event queues to configure on this device.
+	 * This value cannot exceed the *max_event_queues* which previously
+	 * provided in rte_event_dev_info_get()
+	 */
+	uint8_t nb_event_ports;
+	/**< Number of event ports to configure on this device.
+	 * This value cannot exceed the *max_event_ports* which previously
+	 * provided in rte_event_dev_info_get()
+	 */
+	uint32_t nb_event_queue_flows;
+	/**< Number of flows for any event queue on this device.
+	 * This value cannot exceed the *max_event_queue_flows* which previously
+	 * provided in rte_event_dev_info_get()
+	 */
+	uint32_t nb_event_port_dequeue_depth;
+	/**< Maximum number of events can be dequeued at a time from an
+	 * event port by this device.
+	 * This value cannot exceed the *max_event_port_dequeue_depth*
+	 * which previously provided in rte_event_dev_info_get().
+	 * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
+	 * @see rte_event_port_setup()
+	 */
+	uint32_t nb_event_port_enqueue_depth;
+	/**< Maximum number of events can be enqueued at a time from an
+	 * event port by this device.
+	 * This value cannot exceed the *max_event_port_enqueue_depth*
+	 * which previously provided in rte_event_dev_info_get().
+	 * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
+	 * @see rte_event_port_setup()
+	 */
+	uint32_t event_dev_cfg;
+	/**< Event device config flags(RTE_EVENT_DEV_CFG_)*/
 };
 
 /**
@@ -519,6 +646,13 @@  struct rte_event_dev_config {
 rte_event_dev_configure(uint8_t dev_id,
 			const struct rte_event_dev_config *dev_conf);
 
+int
+rte_event_dev_configure_v20(uint8_t dev_id,
+			const struct rte_event_dev_config_v20 *dev_conf);
+
+int
+rte_event_dev_configure_v21(uint8_t dev_id,
+			const struct rte_event_dev_config *dev_conf);
 
 /* Event queue specific APIs */
 
@@ -671,6 +805,20 @@  struct rte_event_queue_conf {
 
 /* Event port specific APIs */
 
+/* Event port configuration bitmap flags */
+#define RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL    (1ULL << 0)
+/**< Configure the port not to release outstanding events in
+ * rte_event_dev_dequeue_burst(). If set, all events received through
+ * the port must be explicitly released with RTE_EVENT_OP_RELEASE or
+ * RTE_EVENT_OP_FORWARD. Must be unset if the device is not
+ * RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE capable.
+ */
+#define RTE_EVENT_PORT_CFG_SINGLE_LINK         (1ULL << 1)
+/**< This event port links only to a single event queue.
+ *
+ *  @see rte_event_port_setup(), rte_event_port_link()
+ */
+
 /** Event port configuration structure */
 struct rte_event_port_conf {
 	int32_t new_event_threshold;
@@ -698,6 +846,36 @@  struct rte_event_port_conf {
 	 * which previously supplied to rte_event_dev_configure().
 	 * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
 	 */
+	uint32_t event_port_cfg; /**< Port cfg flags(EVENT_PORT_CFG_) */
+};
+
+/** Event port configuration structure */
+struct rte_event_port_conf_v20 {
+	int32_t new_event_threshold;
+	/**< A backpressure threshold for new event enqueues on this port.
+	 * Use for *closed system* event dev where event capacity is limited,
+	 * and cannot exceed the capacity of the event dev.
+	 * Configuring ports with different thresholds can make higher priority
+	 * traffic less likely to  be backpressured.
+	 * For example, a port used to inject NIC Rx packets into the event dev
+	 * can have a lower threshold so as not to overwhelm the device,
+	 * while ports used for worker pools can have a higher threshold.
+	 * This value cannot exceed the *nb_events_limit*
+	 * which was previously supplied to rte_event_dev_configure().
+	 * This should be set to '-1' for *open system*.
+	 */
+	uint16_t dequeue_depth;
+	/**< Configure number of bulk dequeues for this event port.
+	 * This value cannot exceed the *nb_event_port_dequeue_depth*
+	 * which previously supplied to rte_event_dev_configure().
+	 * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
+	 */
+	uint16_t enqueue_depth;
+	/**< Configure number of bulk enqueues for this event port.
+	 * This value cannot exceed the *nb_event_port_enqueue_depth*
+	 * which previously supplied to rte_event_dev_configure().
+	 * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
+	 */
 	uint8_t disable_implicit_release;
 	/**< Configure the port not to release outstanding events in
 	 * rte_event_dev_dequeue_burst(). If true, all events received through
@@ -733,6 +911,14 @@  struct rte_event_port_conf {
 rte_event_port_default_conf_get(uint8_t dev_id, uint8_t port_id,
 				struct rte_event_port_conf *port_conf);
 
+int
+rte_event_port_default_conf_get_v20(uint8_t dev_id, uint8_t port_id,
+				struct rte_event_port_conf_v20 *port_conf);
+
+int
+rte_event_port_default_conf_get_v21(uint8_t dev_id, uint8_t port_id,
+				      struct rte_event_port_conf *port_conf);
+
 /**
  * Allocate and set up an event port for an event device.
  *
@@ -757,6 +943,14 @@  struct rte_event_port_conf {
 rte_event_port_setup(uint8_t dev_id, uint8_t port_id,
 		     const struct rte_event_port_conf *port_conf);
 
+int
+rte_event_port_setup_v20(uint8_t dev_id, uint8_t port_id,
+			   const struct rte_event_port_conf_v20 *port_conf);
+
+int
+rte_event_port_setup_v21(uint8_t dev_id, uint8_t port_id,
+			   const struct rte_event_port_conf *port_conf);
+
 /**
  * The queue depth of the port on the enqueue side
  */
@@ -769,6 +963,10 @@  struct rte_event_port_conf {
  * The new event threshold of the port
  */
 #define RTE_EVENT_PORT_ATTR_NEW_EVENT_THRESHOLD 2
+/**
+ * The implicit release disable attribute of the port
+ */
+#define RTE_EVENT_PORT_ATTR_IMPLICIT_RELEASE_DISABLE 3
 
 /**
  * Get an attribute from a port.
diff --git a/lib/librte_eventdev/rte_eventdev_pmd_pci.h b/lib/librte_eventdev/rte_eventdev_pmd_pci.h
index 443cd38..1572999 100644
--- a/lib/librte_eventdev/rte_eventdev_pmd_pci.h
+++ b/lib/librte_eventdev/rte_eventdev_pmd_pci.h
@@ -88,6 +88,60 @@ 
 	return -ENXIO;
 }
 
+/**
+ * @internal
+ * Wrapper for use by pci drivers as a .probe function to attach to a event
+ * interface.  Same as rte_event_pmd_pci_probe, except caller can specify
+ * the name.
+ */
+static inline int
+rte_event_pmd_pci_probe_named(struct rte_pci_driver *pci_drv,
+			    struct rte_pci_device *pci_dev,
+			    size_t private_data_size,
+			    eventdev_pmd_pci_callback_t devinit,
+			    const char *name)
+{
+	struct rte_eventdev *eventdev;
+
+	int retval;
+
+	if (devinit == NULL)
+		return -EINVAL;
+
+	eventdev = rte_event_pmd_allocate(name,
+			 pci_dev->device.numa_node);
+	if (eventdev == NULL)
+		return -ENOMEM;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		eventdev->data->dev_private =
+				rte_zmalloc_socket(
+						"eventdev private structure",
+						private_data_size,
+						RTE_CACHE_LINE_SIZE,
+						rte_socket_id());
+
+		if (eventdev->data->dev_private == NULL)
+			rte_panic("Cannot allocate memzone for private "
+					"device data");
+	}
+
+	eventdev->dev = &pci_dev->device;
+
+	/* Invoke PMD device initialization function */
+	retval = devinit(eventdev);
+	if (retval == 0)
+		return 0;
+
+	RTE_EDEV_LOG_ERR("driver %s: (vendor_id=0x%x device_id=0x%x)"
+			" failed", pci_drv->driver.name,
+			(unsigned int) pci_dev->id.vendor_id,
+			(unsigned int) pci_dev->id.device_id);
+
+	rte_event_pmd_release(eventdev);
+
+	return -ENXIO;
+}
 
 /**
  * @internal
diff --git a/lib/librte_eventdev/rte_eventdev_version.map b/lib/librte_eventdev/rte_eventdev_version.map
index 91a62cd..081fcb6 100644
--- a/lib/librte_eventdev/rte_eventdev_version.map
+++ b/lib/librte_eventdev/rte_eventdev_version.map
@@ -94,13 +94,21 @@  DPDK_20.0 {
 	local: *;
 };
 
+DPDK_21 {
+        global:
+
+	rte_event_dev_info_get;
+	rte_event_dev_configure;
+	rte_event_port_default_conf_get;
+	rte_event_port_setup;
+} DPDK_20.0;
+
 EXPERIMENTAL {
 	global:
 
 	# added in 20.05
 	__rte_eventdev_trace_configure;
 	__rte_eventdev_trace_queue_setup;
-	__rte_eventdev_trace_port_setup;
 	__rte_eventdev_trace_port_link;
 	__rte_eventdev_trace_port_unlink;
 	__rte_eventdev_trace_start;
@@ -134,4 +142,7 @@  EXPERIMENTAL {
 	__rte_eventdev_trace_crypto_adapter_queue_pair_del;
 	__rte_eventdev_trace_crypto_adapter_start;
 	__rte_eventdev_trace_crypto_adapter_stop;
+
+	# changed in 20.08
+	__rte_eventdev_trace_port_setup;
 };