[RFC,v2,2/2] eal: add high-performance timer facility

Message ID 20230315170342.214127-3-mattias.ronnblom@ericsson.com (mailing list archive)
State New
Delegated to: Thomas Monjalon
Headers
Series Add high-performance timer facility |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation fail Compilation issues
ci/intel-Testing success Testing PASS
ci/intel-Functional success Functional PASS

Commit Message

Mattias Rönnblom March 15, 2023, 5:03 p.m. UTC
  The htimer library attempts at providing a timer facility with roughly
the same functionality, but less overhead and better scalability than
DPDK timer library.

The htimer library employs per-lcore hierarchical timer wheels and a
message-based synchronization/MT-safety scheme.

RFC v2:
 * Fix spelling.
 * Fix signed/unsigned comparisons and discontinue the use of name-less
   function parameters, both of which may result in compiler warnings.
 * Undo the accidental removal of the bitset tests from the 'fast_tests'.
 * Add a number of missing include files, causing build failures
   (e.g., on AArch64 builds).
 * Add perf test attempting to compare rte_timer, rte_htimer and rte_htw.
 * Use nanoseconds (instead of TSC) as the default time unit.
 * add() and manage() has flags which allows the caller to specify the
   time unit (nanoseconds, TSC, or ticks) for the times provided.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 app/test/meson.build                  |   8 +
 app/test/test_htimer_mgr.c            | 674 +++++++++++++++++++++++++
 app/test/test_htimer_mgr_perf.c       | 322 ++++++++++++
 app/test/test_htw.c                   | 478 ++++++++++++++++++
 app/test/test_htw_perf.c              | 181 +++++++
 app/test/test_timer_htimer_htw_perf.c | 693 ++++++++++++++++++++++++++
 doc/api/doxy-api-index.md             |   5 +-
 doc/api/doxy-api.conf.in              |   1 +
 lib/htimer/meson.build                |   7 +
 lib/htimer/rte_htimer.h               |  68 +++
 lib/htimer/rte_htimer_mgr.c           | 547 ++++++++++++++++++++
 lib/htimer/rte_htimer_mgr.h           | 516 +++++++++++++++++++
 lib/htimer/rte_htimer_msg.h           |  44 ++
 lib/htimer/rte_htimer_msg_ring.c      |  18 +
 lib/htimer/rte_htimer_msg_ring.h      |  55 ++
 lib/htimer/rte_htw.c                  | 445 +++++++++++++++++
 lib/htimer/rte_htw.h                  |  49 ++
 lib/htimer/version.map                |  17 +
 lib/meson.build                       |   1 +
 19 files changed, 4128 insertions(+), 1 deletion(-)
 create mode 100644 app/test/test_htimer_mgr.c
 create mode 100644 app/test/test_htimer_mgr_perf.c
 create mode 100644 app/test/test_htw.c
 create mode 100644 app/test/test_htw_perf.c
 create mode 100644 app/test/test_timer_htimer_htw_perf.c
 create mode 100644 lib/htimer/meson.build
 create mode 100644 lib/htimer/rte_htimer.h
 create mode 100644 lib/htimer/rte_htimer_mgr.c
 create mode 100644 lib/htimer/rte_htimer_mgr.h
 create mode 100644 lib/htimer/rte_htimer_msg.h
 create mode 100644 lib/htimer/rte_htimer_msg_ring.c
 create mode 100644 lib/htimer/rte_htimer_msg_ring.h
 create mode 100644 lib/htimer/rte_htw.c
 create mode 100644 lib/htimer/rte_htw.h
 create mode 100644 lib/htimer/version.map
  

Comments

Tyler Retzlaff March 16, 2023, 3:55 a.m. UTC | #1
On Wed, Mar 15, 2023 at 06:03:42PM +0100, Mattias Rönnblom wrote:
> The htimer library attempts at providing a timer facility with roughly
> the same functionality, but less overhead and better scalability than
> DPDK timer library.
> 
> The htimer library employs per-lcore hierarchical timer wheels and a
> message-based synchronization/MT-safety scheme.
> 
> RFC v2:
>  * Fix spelling.
>  * Fix signed/unsigned comparisons and discontinue the use of name-less
>    function parameters, both of which may result in compiler warnings.
>  * Undo the accidental removal of the bitset tests from the 'fast_tests'.
>  * Add a number of missing include files, causing build failures
>    (e.g., on AArch64 builds).
>  * Add perf test attempting to compare rte_timer, rte_htimer and rte_htw.
>  * Use nanoseconds (instead of TSC) as the default time unit.
>  * add() and manage() has flags which allows the caller to specify the
>    time unit (nanoseconds, TSC, or ticks) for the times provided.
> 
> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> ---
>  app/test/meson.build                  |   8 +
>  app/test/test_htimer_mgr.c            | 674 +++++++++++++++++++++++++
>  app/test/test_htimer_mgr_perf.c       | 322 ++++++++++++
>  app/test/test_htw.c                   | 478 ++++++++++++++++++
>  app/test/test_htw_perf.c              | 181 +++++++
>  app/test/test_timer_htimer_htw_perf.c | 693 ++++++++++++++++++++++++++
>  doc/api/doxy-api-index.md             |   5 +-
>  doc/api/doxy-api.conf.in              |   1 +
>  lib/htimer/meson.build                |   7 +
>  lib/htimer/rte_htimer.h               |  68 +++
>  lib/htimer/rte_htimer_mgr.c           | 547 ++++++++++++++++++++
>  lib/htimer/rte_htimer_mgr.h           | 516 +++++++++++++++++++
>  lib/htimer/rte_htimer_msg.h           |  44 ++
>  lib/htimer/rte_htimer_msg_ring.c      |  18 +
>  lib/htimer/rte_htimer_msg_ring.h      |  55 ++
>  lib/htimer/rte_htw.c                  | 445 +++++++++++++++++
>  lib/htimer/rte_htw.h                  |  49 ++
>  lib/htimer/version.map                |  17 +
>  lib/meson.build                       |   1 +
>  19 files changed, 4128 insertions(+), 1 deletion(-)
>  create mode 100644 app/test/test_htimer_mgr.c
>  create mode 100644 app/test/test_htimer_mgr_perf.c
>  create mode 100644 app/test/test_htw.c
>  create mode 100644 app/test/test_htw_perf.c
>  create mode 100644 app/test/test_timer_htimer_htw_perf.c
>  create mode 100644 lib/htimer/meson.build
>  create mode 100644 lib/htimer/rte_htimer.h
>  create mode 100644 lib/htimer/rte_htimer_mgr.c
>  create mode 100644 lib/htimer/rte_htimer_mgr.h
>  create mode 100644 lib/htimer/rte_htimer_msg.h
>  create mode 100644 lib/htimer/rte_htimer_msg_ring.c
>  create mode 100644 lib/htimer/rte_htimer_msg_ring.h
>  create mode 100644 lib/htimer/rte_htw.c
>  create mode 100644 lib/htimer/rte_htw.h
>  create mode 100644 lib/htimer/version.map
> 
> diff --git a/app/test/meson.build b/app/test/meson.build
> index 03811ff692..d0308ac09d 100644
> --- a/app/test/meson.build
> +++ b/app/test/meson.build
> @@ -140,9 +140,14 @@ test_sources = files(
>          'test_thash_perf.c',
>          'test_threads.c',
>          'test_timer.c',
> +        'test_timer_htimer_htw_perf.c',
>          'test_timer_perf.c',
>          'test_timer_racecond.c',
>          'test_timer_secondary.c',
> +        'test_htimer_mgr.c',
> +        'test_htimer_mgr_perf.c',
> +        'test_htw.c',
> +        'test_htw_perf.c',
>          'test_ticketlock.c',
>          'test_trace.c',
>          'test_trace_register.c',
> @@ -193,6 +198,7 @@ fast_tests = [
>          ['fib6_autotest', true, true],
>          ['func_reentrancy_autotest', false, true],
>          ['hash_autotest', true, true],
> +        ['htimer_mgr_autotest', true, true],
>          ['interrupt_autotest', true, true],
>          ['ipfrag_autotest', false, true],
>          ['lcores_autotest', true, true],
> @@ -265,6 +271,8 @@ perf_test_names = [
>          'memcpy_perf_autotest',
>          'hash_perf_autotest',
>          'timer_perf_autotest',
> +        'htimer_mgr_perf_autotest',
> +        'htw_perf_autotest',
>          'reciprocal_division',
>          'reciprocal_division_perf',
>          'lpm_perf_autotest',
> diff --git a/app/test/test_htimer_mgr.c b/app/test/test_htimer_mgr.c
> new file mode 100644
> index 0000000000..9e46dec53e
> --- /dev/null
> +++ b/app/test/test_htimer_mgr.c
> @@ -0,0 +1,674 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2023 Ericsson AB
> + */
> +
> +#include "test.h"
> +
> +#include <sys/queue.h>
> +#include <stdlib.h>
> +#include <inttypes.h>
> +
> +#include <rte_common.h>
> +#include <rte_cycles.h>
> +#include <rte_htimer_mgr.h>
> +#include <rte_launch.h>
> +#include <rte_lcore.h>
> +#include <rte_random.h>
> +
> +static int
> +timer_lcore(void *arg)
> +{
> +	bool *stop = arg;
> +
> +	while (!__atomic_load_n(stop, __ATOMIC_RELAXED))
> +		rte_htimer_mgr_manage();
> +
> +	return 0;
> +}
> +
> +static void
> +count_timer_cb(struct rte_htimer *timer __rte_unused, void *arg)
> +{
> +	unsigned int *count = arg;
> +
> +	__atomic_fetch_add(count, 1, __ATOMIC_RELAXED);
> +}
> +
> +static void
> +count_async_cb(struct rte_htimer *timer __rte_unused, int result,
> +	       void *cb_arg)
> +{
> +	unsigned int *count = cb_arg;
> +
> +	if (result == RTE_HTIMER_MGR_ASYNC_RESULT_ADDED)
> +		__atomic_fetch_add(count, 1, __ATOMIC_RELAXED);
> +}
> +
> +static uint64_t
> +s_to_tsc(double s)
> +{
> +	return s * rte_get_tsc_hz();
> +}
> +
> +#define ASYNC_ADD_TEST_EXPIRATION_TIME (250*1000) /* ns */
> +#define ASYNC_TEST_TICK (10*1000) /* ns */
> +
> +static int
> +test_htimer_mgr_async_add(unsigned int num_timers_per_lcore)
> +{
> +	struct rte_htimer *timers;
> +	unsigned int timer_idx;
> +	unsigned int lcore_id;
> +	bool stop = false;
> +	unsigned int timeout_count = 0;
> +	unsigned int async_count = 0;
> +	unsigned int num_workers = 0;
> +	uint64_t expiration_time;
> +	unsigned int num_total_timers;
> +
> +	rte_htimer_mgr_init(ASYNC_TEST_TICK);
> +
> +	RTE_LCORE_FOREACH_WORKER(lcore_id) {
> +		if (rte_eal_remote_launch(timer_lcore, &stop, lcore_id) != 0)
> +			rte_panic("Unable to launch timer lcore\n");
> +		num_workers++;
> +	}
> +
> +	num_total_timers = num_workers * num_timers_per_lcore;
> +
> +	timers = malloc(num_total_timers * sizeof(struct rte_htimer));
> +	timer_idx = 0;
> +
> +	if (timers == NULL)
> +		rte_panic("Unable to allocate heap memory\n");
> +
> +	expiration_time = ASYNC_ADD_TEST_EXPIRATION_TIME;
> +
> +	RTE_LCORE_FOREACH_WORKER(lcore_id) {
> +		unsigned int i;
> +
> +		for (i = 0; i < num_timers_per_lcore; i++) {
> +			struct rte_htimer *timer = &timers[timer_idx++];
> +
> +			for (;;) {
> +				int rc;
> +
> +				rc = rte_htimer_mgr_async_add(timer, lcore_id,
> +						      expiration_time,
> +						      RTE_HTIMER_FLAG_TIME_TSC,
> +						      count_timer_cb,
> +						      &timeout_count, 0,
> +						      count_async_cb,
> +						      &async_count);
> +				if (unlikely(rc == -EBUSY))
> +					rte_htimer_mgr_process();
> +				else
> +					break;
> +			}
> +		}
> +	}
> +
> +	while (__atomic_load_n(&async_count, __ATOMIC_RELAXED) !=
> +	       num_total_timers ||
> +	       __atomic_load_n(&timeout_count, __ATOMIC_RELAXED) !=
> +	       num_total_timers)
> +		rte_htimer_mgr_manage();
> +
> +	__atomic_store_n(&stop, true, __ATOMIC_RELAXED);
> +
> +	rte_eal_mp_wait_lcore();
> +
> +	rte_htimer_mgr_deinit();
> +
> +	free(timers);
> +
> +	return TEST_SUCCESS;
> +}
> +
> +struct async_recorder_state {
> +	bool timer_cb_run;
> +	bool async_add_cb_run;
> +	bool async_cancel_cb_run;
> +	bool failed;
> +};
> +
> +static void
> +record_async_add_cb(struct rte_htimer *timer __rte_unused,
> +		    int result, void *cb_arg)
> +{
> +	struct async_recorder_state *state = cb_arg;
> +
> +	if (state->failed)
> +		return;
> +
> +	if (state->async_add_cb_run ||
> +	    result != RTE_HTIMER_MGR_ASYNC_RESULT_ADDED) {
> +		puts("async add run already");
> +		state->failed = true;
> +	}
> +
> +	state->async_add_cb_run = true;
> +}
> +
> +static void
> +record_async_cancel_cb(struct rte_htimer *timer __rte_unused,
> +		       int result, void *cb_arg)
> +{
> +	struct async_recorder_state *state = cb_arg;
> +
> +	if (state->failed)
> +		return;
> +
> +	if (state->async_cancel_cb_run) {
> +		state->failed = true;
> +		return;
> +	}
> +
> +	switch (result) {
> +	case RTE_HTIMER_MGR_ASYNC_RESULT_EXPIRED:
> +		if (!state->timer_cb_run)
> +			state->failed = true;
> +		break;
> +	case RTE_HTIMER_MGR_ASYNC_RESULT_CANCELED:
> +		if (state->timer_cb_run)
> +			state->failed = true;
> +		break;
> +	case RTE_HTIMER_MGR_ASYNC_RESULT_ALREADY_CANCELED:
> +		state->failed = true;
> +	}
> +
> +	state->async_cancel_cb_run = true;
> +}
> +
> +static int
> +record_check_consistency(struct async_recorder_state *state)
> +{
> +	if (state->failed)
> +		return -1;
> +
> +	return state->async_cancel_cb_run ? 1 : 0;
> +}
> +
> +static int
> +records_check_consistency(struct async_recorder_state *states,
> +			  unsigned int num_states)
> +{
> +	unsigned int i;
> +	int canceled = 0;
> +
> +	for (i = 0; i < num_states; i++) {
> +		int rc;
> +
> +		rc = record_check_consistency(&states[i]);
> +
> +		if (rc < 0)
> +			return -1;
> +		canceled += rc;
> +	}
> +
> +	return canceled;
> +}
> +
> +static void
> +log_timer_expiry_cb(struct rte_htimer *timer __rte_unused,
> +		    void *arg)
> +{
> +	bool *timer_run = arg;
> +
> +	*timer_run = true;
> +}
> +
> +
> +#define ASYNC_ADD_CANCEL_TEST_EXPIRATION_TIME_MAX 10e-3 /* s */
> +
> +static int
> +test_htimer_mgr_async_add_cancel(unsigned int num_timers_per_lcore)
> +{
> +	struct rte_htimer *timers;
> +	struct async_recorder_state *recorder_states;
> +	unsigned int timer_idx = 0;
> +	unsigned int lcore_id;
> +	uint64_t now;
> +	unsigned int num_workers = 0;
> +	bool stop = false;
> +	uint64_t max_expiration_time =
> +		s_to_tsc(ASYNC_ADD_CANCEL_TEST_EXPIRATION_TIME_MAX);
> +	unsigned int num_total_timers;
> +	int canceled = 0;
> +
> +	rte_htimer_mgr_init(ASYNC_TEST_TICK);
> +
> +	RTE_LCORE_FOREACH_WORKER(lcore_id) {
> +		if (rte_eal_remote_launch(timer_lcore, &stop, lcore_id) != 0)
> +			rte_panic("Unable to launch timer lcore\n");
> +		num_workers++;
> +	}
> +
> +	num_total_timers = num_workers * num_timers_per_lcore;
> +
> +	timers = malloc(num_total_timers * sizeof(struct rte_htimer));
> +	recorder_states =
> +		malloc(num_total_timers * sizeof(struct async_recorder_state));
> +
> +	if (timers == NULL || recorder_states == NULL)
> +		rte_panic("Unable to allocate heap memory\n");
> +
> +	now = rte_get_tsc_cycles();
> +
> +	RTE_LCORE_FOREACH_WORKER(lcore_id) {
> +		unsigned int i;
> +
> +		for (i = 0; i < num_timers_per_lcore; i++) {
> +			struct rte_htimer *timer = &timers[timer_idx];
> +			struct async_recorder_state *state =
> +				&recorder_states[timer_idx];
> +
> +			timer_idx++;
> +
> +			*state = (struct async_recorder_state) {};
> +
> +			uint64_t expiration_time =
> +				now + rte_rand_max(max_expiration_time);
> +
> +			for (;;) {
> +				int rc;
> +
> +				rc = rte_htimer_mgr_async_add(timer, lcore_id,
> +							 expiration_time,
> +							 0,
> +							 log_timer_expiry_cb,
> +							 &state->timer_cb_run,
> +							 0,
> +							 record_async_add_cb,
> +							 state);
> +
> +				if (unlikely(rc == -EBUSY))
> +					rte_htimer_mgr_process();
> +				else
> +					break;
> +			}
> +		}
> +	}
> +
> +	timer_idx = 0;
> +
> +	RTE_LCORE_FOREACH_WORKER(lcore_id) {
> +		unsigned int i;
> +
> +		for (i = 0; i < num_timers_per_lcore; i++) {
> +			struct rte_htimer *timer = &timers[timer_idx];
> +			struct async_recorder_state *state =
> +				&recorder_states[timer_idx];
> +
> +			timer_idx++;
> +
> +			/* cancel roughly half of the timers */
> +			if (rte_rand_max(2) == 0)
> +				continue;
> +
> +			for (;;) {
> +				int rc;
> +
> +				rc = rte_htimer_mgr_async_cancel(timer,
> +							record_async_cancel_cb,
> +							state);
> +
> +				if (unlikely(rc == -EBUSY)) {
> +					puts("busy");
> +					rte_htimer_mgr_process();
> +				} else
> +					break;
> +			}
> +
> +			canceled++;
> +		}
> +	}
> +
> +	for (;;) {
> +		int cancel_completed;
> +
> +		cancel_completed = records_check_consistency(recorder_states,
> +							     num_total_timers);
> +
> +		if (cancel_completed < 0) {
> +			puts("Inconstinency found");
> +			return TEST_FAILED;
> +		}
> +
> +		if (cancel_completed == canceled)
> +			break;
> +
> +		rte_htimer_mgr_process();
> +	}
> +
> +	__atomic_store_n(&stop, true, __ATOMIC_RELAXED);
> +
> +	rte_eal_mp_wait_lcore();
> +
> +	rte_htimer_mgr_deinit();
> +
> +	free(timers);
> +	free(recorder_states);
> +
> +	return TEST_SUCCESS;
> +}
> +
> +/*
> + * This is a test case where one thread asynchronously adds two timers,
> + * with the same expiration time; one on the local lcore and one on a
> + * remote lcore. This creates a tricky situation for the timer
> + * manager, and for the application as well, if the htimer struct is
> + * dynamically allocated.
> + */
> +
> +struct test_timer {
> +	uint32_t ref_cnt;
> +	uint64_t expiration_time; /* in TSC, not tick */
> +	uint32_t *timeout_count;
> +	bool *failure_occurred;
> +	struct rte_htimer htimer;
> +};
> +
> +
> +static struct test_timer *
> +test_timer_create(uint64_t expiration_time, uint32_t *timeout_count,
> +		  bool *failure_occurred)
> +{
> +	struct test_timer *timer;
> +
> +	timer = malloc(sizeof(struct test_timer));
> +
> +	if (timer == NULL)
> +		rte_panic("Unable to allocate timer memory\n");
> +
> +	timer->ref_cnt = 1;
> +	timer->expiration_time = expiration_time;
> +	timer->timeout_count = timeout_count;
> +	timer->failure_occurred = failure_occurred;
> +
> +	return timer;
> +}
> +
> +static void
> +test_timer_inc_ref_cnt(struct test_timer *timer)
> +{
> +	__atomic_add_fetch(&timer->ref_cnt, 1, __ATOMIC_RELEASE);

__atomic_fetch_add instead please

there's future work to align with C11 atomics using the previous
__atomic_fetch_<op> is preferred because it just becomes
s/__atomic/atomic/ (well mostly...)


> +}
> +
> +static void
> +test_timer_dec_ref_cnt(struct test_timer *timer)
> +{
> +	if (timer != NULL) {
> +		uint32_t cnt = __atomic_sub_fetch(&timer->ref_cnt, 1,
> +						  __ATOMIC_RELEASE);

same here

i'll try to get a patch up for checkpatches warning soon.

thanks!
  
Stephen Hemminger March 17, 2023, 1:58 a.m. UTC | #2
On Wed, 15 Mar 2023 18:03:42 +0100
Mattias Rönnblom <mattias.ronnblom@ericsson.com> wrote:

> The htimer library attempts at providing a timer facility with roughly
> the same functionality, but less overhead and better scalability than
> DPDK timer library.
> 
> The htimer library employs per-lcore hierarchical timer wheels and a
> message-based synchronization/MT-safety scheme.
> 
> RFC v2:
>  * Fix spelling.
>  * Fix signed/unsigned comparisons and discontinue the use of name-less
>    function parameters, both of which may result in compiler warnings.
>  * Undo the accidental removal of the bitset tests from the 'fast_tests'.
>  * Add a number of missing include files, causing build failures
>    (e.g., on AArch64 builds).
>  * Add perf test attempting to compare rte_timer, rte_htimer and rte_htw.
>  * Use nanoseconds (instead of TSC) as the default time unit.
>  * add() and manage() has flags which allows the caller to specify the
>    time unit (nanoseconds, TSC, or ticks) for the times provided.
> 
> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>

Initial reactions.

Good:
  - timer API does need work
  - the units and API's model seems good, would have to look at real applications
  - tests look good as well.

Bad:
  - why do we need a new timer infrastructure. Could this not be done
    by extending and embracing the existing rte_timer() API's.
  - having fast rte_timer() would make existing app's faster.
  

PS:
  - ok to drop all the rte_alt_timer stuff, don't think any application depends on it.
    my survey of github projects, only one usage (OpenDataplane).



  -
  
Morten Brørup March 22, 2023, 12:18 p.m. UTC | #3
> From: Mattias Rönnblom [mailto:mattias.ronnblom@ericsson.com]
> Sent: Wednesday, 15 March 2023 18.04

> +++ b/lib/htimer/rte_htimer.h
> @@ -0,0 +1,68 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2023 Ericsson AB
> + */
> +
> +#ifndef _RTE_HTIMER_H_
> +#define _RTE_HTIMER_H_
> +
> +#include <stdbool.h>
> +#include <stdint.h>
> +#include <sys/queue.h>
> +
> +#include <rte_bitops.h>
> +
> +struct rte_htimer;
> +
> +typedef void (*rte_htimer_cb_t)(struct rte_htimer *, void *);
> +
> +struct rte_htimer {
> +	/**
> +	 * Absolute timer expiration time (in ticks).
> +	 */
> +	uint64_t expiration_time;
> +	/**
> +	 * Time between expirations (in ticks). Zero for one-shot timers.
> +	 */
> +	uint64_t period;
> +	/**
> +	 * Owning lcore. May safely be read from any thread.
> +	 */
> +	uint32_t owner_lcore_id;
> +	/**
> +	 * The current state of the timer.
> +	 */
> +	uint32_t state:4;
> +	/**
> +	 * Flags set on this timer.
> +	 */
> +	uint32_t flags:28;
> +	/**
> +	 * User-specified callback function pointer.
> +	 */
> +	rte_htimer_cb_t cb;
> +	/**
> +	 * Argument for user callback.
> +	 */
> +	void *cb_arg;
> +	/**
> +	 * Pointers used to add timer to various internal lists.
> +	 */
> +	LIST_ENTRY(rte_htimer) entry;
> +};

If the rte_htimer structure is supposed to be used in some other data structure, e.g. in a TCP/IP flow structure, it seems unnecessarily bloated.

Generally, if there is no significant performance benefit to the "period" feature, please remove it.

Let's say that this library is used for handling the timers of flows in an IP stack, then the vast majority of timers will be timers related to flows. I would prefer if this high-performance timer library is optimized for such high-volume use cases, rather than offering generic features for low-volume use cases.

And if one HTW instance is used for a single purpose (e.g. the IP stack state machine), both "cb" and "cb_arg" can be removed: The application can derive the pointer to the flow by the using container_of() with the pointer to the rte_htimer, and the cb_arg will effectively be a shadow variable of the flow's state anyway (if not just a pointer to the flow).

Here's an idea, which will offer both: For the high-volume single-purpose use cases you could provide a struct rte_htimer_core without the generic fields, and for the generic use cases, you could provide a struct rte_htimer containing a struct rte_htimer_core and the additional fields for generic use.

> +
> +#define RTE_HTIMER_FLAG_ABSOLUTE_TIME RTE_BIT32(0)
> +#define RTE_HTIMER_FLAG_PERIODICAL RTE_BIT32(1)
> +#define RTE_HTIMER_FLAG_TIME_TICK RTE_BIT32(2)
> +#define RTE_HTIMER_FLAG_TIME_TSC RTE_BIT32(3)
> +
> +#define RTE_HTIMER_STATE_PENDING 1
> +#define RTE_HTIMER_STATE_EXPIRED 2
> +#define RTE_HTIMER_STATE_CANCELED 3
> +
> +LIST_HEAD(rte_htimer_list, rte_htimer);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_HTIMER_H_ */
  
Morten Brørup March 24, 2023, 4 p.m. UTC | #4
> From: Mattias Rönnblom [mailto:mattias.ronnblom@ericsson.com]
> Sent: Wednesday, 15 March 2023 18.04
> 
> The htimer library attempts at providing a timer facility with roughly
> the same functionality, but less overhead and better scalability than
> DPDK timer library.
> 
> The htimer library employs per-lcore hierarchical timer wheels and a
> message-based synchronization/MT-safety scheme.
> 
> RFC v2:
>  * Fix spelling.
>  * Fix signed/unsigned comparisons and discontinue the use of name-less
>    function parameters, both of which may result in compiler warnings.
>  * Undo the accidental removal of the bitset tests from the 'fast_tests'.
>  * Add a number of missing include files, causing build failures
>    (e.g., on AArch64 builds).
>  * Add perf test attempting to compare rte_timer, rte_htimer and rte_htw.
>  * Use nanoseconds (instead of TSC) as the default time unit.
>  * add() and manage() has flags which allows the caller to specify the
>    time unit (nanoseconds, TSC, or ticks) for the times provided.
> 
> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> ---

Two more series of comments, see inline below:
1. Arguing for using "tick" as the default unit of time.
2. Some bugs in the time conversion functions.

[...]

> diff --git a/lib/htimer/meson.build b/lib/htimer/meson.build
> new file mode 100644
> index 0000000000..2dd5d6a24b
> --- /dev/null
> +++ b/lib/htimer/meson.build
> @@ -0,0 +1,7 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright(c) 2023 Ericsson AB
> +
> +sources = files('rte_htw.c', 'rte_htimer_msg_ring.c', 'rte_htimer_mgr.c')
> +headers = files('rte_htimer_mgr.h', 'rte_htimer.h')
> +
> +deps += ['ring']
> diff --git a/lib/htimer/rte_htimer.h b/lib/htimer/rte_htimer.h
> new file mode 100644
> index 0000000000..6ac86292b5
> --- /dev/null
> +++ b/lib/htimer/rte_htimer.h
> @@ -0,0 +1,68 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2023 Ericsson AB
> + */
> +
> +#ifndef _RTE_HTIMER_H_
> +#define _RTE_HTIMER_H_
> +
> +#include <stdbool.h>
> +#include <stdint.h>
> +#include <sys/queue.h>
> +
> +#include <rte_bitops.h>
> +
> +struct rte_htimer;
> +
> +typedef void (*rte_htimer_cb_t)(struct rte_htimer *, void *);
> +
> +struct rte_htimer {
> +	/**
> +	 * Absolute timer expiration time (in ticks).
> +	 */
> +	uint64_t expiration_time;
> +	/**
> +	 * Time between expirations (in ticks). Zero for one-shot timers.
> +	 */
> +	uint64_t period;
> +	/**
> +	 * Owning lcore. May safely be read from any thread.
> +	 */
> +	uint32_t owner_lcore_id;
> +	/**
> +	 * The current state of the timer.
> +	 */
> +	uint32_t state:4;
> +	/**
> +	 * Flags set on this timer.
> +	 */
> +	uint32_t flags:28;
> +	/**
> +	 * User-specified callback function pointer.
> +	 */
> +	rte_htimer_cb_t cb;
> +	/**
> +	 * Argument for user callback.
> +	 */
> +	void *cb_arg;
> +	/**
> +	 * Pointers used to add timer to various internal lists.
> +	 */
> +	LIST_ENTRY(rte_htimer) entry;
> +};
> +
> +#define RTE_HTIMER_FLAG_ABSOLUTE_TIME RTE_BIT32(0)
> +#define RTE_HTIMER_FLAG_PERIODICAL RTE_BIT32(1)
> +#define RTE_HTIMER_FLAG_TIME_TICK RTE_BIT32(2)
> +#define RTE_HTIMER_FLAG_TIME_TSC RTE_BIT32(3)

After further consideration, and taking the time conversion functions into account, I think the default unit of time should be "tick", not nanoseconds. It seems more natural, and might offer more flexibility in the future.

So instead of:

+#define RTE_HTIMER_FLAG_TIME_TICK RTE_BIT32(2)
+#define RTE_HTIMER_FLAG_TIME_TSC RTE_BIT32(3)

then:

+#define RTE_HTIMER_FLAG_TIME_TSC RTE_BIT32(2)
+#define RTE_HTIMER_FLAG_TIME_NS RTE_BIT32(3)

and perhaps in the future:

+#define RTE_HTIMER_FLAG_TIME_US RTE_BIT32(4)

> +
> +#define RTE_HTIMER_STATE_PENDING 1
> +#define RTE_HTIMER_STATE_EXPIRED 2
> +#define RTE_HTIMER_STATE_CANCELED 3
> +
> +LIST_HEAD(rte_htimer_list, rte_htimer);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_HTIMER_H_ */
> diff --git a/lib/htimer/rte_htimer_mgr.c b/lib/htimer/rte_htimer_mgr.c
> new file mode 100644
> index 0000000000..efdfcf0985
> --- /dev/null
> +++ b/lib/htimer/rte_htimer_mgr.c
> @@ -0,0 +1,547 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2023 Ericsson AB
> + */
> +
> +#include <inttypes.h>
> +#include <math.h>
> +#include <stdbool.h>
> +#include <sys/queue.h>
> +#include <unistd.h>
> +
> +#include <rte_branch_prediction.h>
> +#include <rte_common.h>
> +#include <rte_cycles.h>
> +#include <rte_errno.h>
> +#include <rte_htw.h>
> +#include <rte_prefetch.h>
> +#include <rte_ring_elem.h>
> +
> +#include "rte_htimer_mgr.h"
> +#include "rte_htimer_msg.h"
> +#include "rte_htimer_msg_ring.h"
> +
> +#define MAX_MSG_BATCH_SIZE 16
> +
> +struct htimer_mgr {
> +	struct rte_htimer_msg_ring *msg_ring;
> +	struct rte_htw *htw;
> +
> +	unsigned int async_msgs_idx __rte_cache_aligned;
> +	unsigned int num_async_msgs;
> +	struct rte_htimer_msg async_msgs[MAX_MSG_BATCH_SIZE];
> +} __rte_cache_aligned;
> +
> +static uint64_t ns_per_tick;
> +static double tsc_per_tick;
> +
> +static struct htimer_mgr mgrs[RTE_MAX_LCORE + 1];
> +
> +#define MAX_ASYNC_TRANSACTIONS 1024
> +#define MSG_RING_SIZE MAX_ASYNC_TRANSACTIONS
> +
> +static inline uint64_t
> +tsc_to_tick(uint64_t tsc)
> +{
> +	return tsc / tsc_per_tick;
> +}
> +
> +static inline uint64_t
> +tsc_to_tick_round_up(uint64_t tsc)
> +{
> +	uint64_t tick;
> +
> +	tick = (tsc + tsc_per_tick / 2) / tsc_per_tick;

This does not round up, it rounds off.

E.g. tsc_per_tick=10.0, tsc=1 becomes (1 + 5.0) / 10.0 = 0.6, which becomes 0 (when converted to integer).

E.g. tsc_per_tick=10.0, tsc=5 becomes (5 + 5.0) / 10.0 = 1.0, which becomes 1.

> +
> +	return tick;
> +}
> +
> +static inline uint64_t
> +ns_to_tick(uint64_t ns)
> +{
> +	return ns / ns_per_tick;
> +}
> +
> +static inline uint64_t
> +ns_to_tick_round_up(uint64_t ns)
> +{
> +	uint64_t tick;
> +
> +	tick = ceil(ns / ns_per_tick);

ns_per_tick is integer, not floating point, so the division is performed as integer division, and ceil() has no effect; i.e. the above is the same as:

tick = ns / ns_per_tick;

Which also means that it does not round up.

> +
> +	return tick;
> +}
> +
> +static inline uint64_t
> +tick_to_ns(uint64_t tick)
> +{
> +	return tick * ns_per_tick;
> +}
> +
> +static struct htimer_mgr *
> +mgr_get(unsigned int lcore_id)
> +{
> +	return &mgrs[lcore_id];
> +}
> +
> +static int
> +mgr_init(unsigned int lcore_id)
> +{
> +	char ring_name[RTE_RING_NAMESIZE];
> +	unsigned int socket_id;
> +	struct htimer_mgr *mgr = &mgrs[lcore_id];
> +
> +	socket_id = rte_lcore_to_socket_id(lcore_id);
> +
> +	snprintf(ring_name, sizeof(ring_name), "htimer_%d", lcore_id);
> +
> +	mgr->msg_ring =
> +		rte_htimer_msg_ring_create(ring_name, MSG_RING_SIZE, socket_id,
> +					   RING_F_SC_DEQ);
> +
> +	if (mgr->msg_ring == NULL)
> +		goto err;
> +
> +	mgr->htw = rte_htw_create();
> +
> +	if (mgr->htw == NULL)
> +		goto err_free_ring;
> +
> +	mgr->async_msgs_idx = 0;
> +	mgr->num_async_msgs = 0;
> +
> +	return 0;
> +
> +err_free_ring:
> +	rte_htimer_msg_ring_free(mgr->msg_ring);
> +err:
> +	return -ENOMEM;
> +}
> +
> +static void
> +mgr_deinit(unsigned int lcore_id)
> +{
> +	struct htimer_mgr *mgr = &mgrs[lcore_id];
> +
> +	rte_htw_destroy(mgr->htw);
> +
> +	rte_htimer_msg_ring_free(mgr->msg_ring);
> +}
> +
> +static volatile bool initialized;
> +
> +static void
> +assure_initialized(void)
> +{
> +	RTE_ASSERT(initialized);
> +}
> +
> +int
> +rte_htimer_mgr_init(uint64_t _ns_per_tick)
> +{
> +	unsigned int lcore_id;
> +
> +	RTE_VERIFY(!initialized);
> +
> +	ns_per_tick = _ns_per_tick;
> +
> +	tsc_per_tick = (ns_per_tick / 1e9) * rte_get_tsc_hz();
> +
> +	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> +		int rc;
> +
> +		rc = mgr_init(lcore_id);
> +
> +		if (rc < 0) {
> +			unsigned int deinit_lcore_id;
> +
> +			for (deinit_lcore_id = 0; deinit_lcore_id < lcore_id;
> +			     deinit_lcore_id++)
> +				mgr_deinit(deinit_lcore_id);
> +
> +			return rc;
> +		}
> +	}
> +
> +	initialized = true;
> +
> +	return 0;
> +}
> +
> +void
> +rte_htimer_mgr_deinit(void)
> +{
> +	unsigned int lcore_id;
> +
> +	assure_initialized();
> +
> +	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
> +		mgr_deinit(lcore_id);
> +
> +	initialized = false;
> +}
> +
> +static void
> +assure_valid_time_conversion_flags(uint32_t flags __rte_unused)
> +{
> +	RTE_ASSERT(!((flags & RTE_HTIMER_FLAG_TIME_TSC) &&
> +		     (flags & RTE_HTIMER_FLAG_TIME_TICK)));

With my above suggestion of using tick as default time unit, this would be changed to:

+	RTE_ASSERT(!((flags & RTE_HTIMER_FLAG_TIME_TSC) &&
+		     (flags & RTE_HTIMER_FLAG_TIME_NS) &&
+		     (flags & RTE_HTIMER_FLAG_TIME_US)));

> +}
> +
> +static void
> +assure_valid_add_flags(uint32_t flags)
> +{
> +	assure_valid_time_conversion_flags(flags);
> +
> +	RTE_ASSERT(!(flags & ~(RTE_HTIMER_FLAG_PERIODICAL |
> +			       RTE_HTIMER_FLAG_ABSOLUTE_TIME |
> +			       RTE_HTIMER_FLAG_TIME_TSC |
> +			       RTE_HTIMER_FLAG_TIME_TICK)));

With my above suggestion of using tick as default time unit, this would be changed to:

+	RTE_ASSERT(!(flags & ~(RTE_HTIMER_FLAG_PERIODICAL |
+			       RTE_HTIMER_FLAG_ABSOLUTE_TIME |
+			       RTE_HTIMER_FLAG_TIME_TSC |
+			       RTE_HTIMER_FLAG_TIME_NS |
+			       RTE_HTIMER_FLAG_TIME_US)));

> +}
> +
> +static uint64_t
> +convert_time(uint64_t t, uint32_t flags)
> +{
> +	if (flags & RTE_HTIMER_FLAG_TIME_TSC)
> +		return tsc_to_tick(t);
> +	else if (flags & RTE_HTIMER_FLAG_TIME_TICK)
> +		return t;
> +	else
> +		return ns_to_tick(t);

With my above suggestion of using tick as default time unit, this would be changed to:

+	if (flags & RTE_HTIMER_FLAG_TIME_TSC)
+		return tsc_to_tick(t);
+	else if (flags & RTE_HTIMER_FLAG_TIME_NS)
+		return ns_to_tick(t);
+	else if (flags & RTE_HTIMER_FLAG_TIME_US)
+		return us_to_tick(t);
+	else
+		return t;

> +}
> +
> +void
> +rte_htimer_mgr_add(struct rte_htimer *timer, uint64_t expiration_time,
> +		   uint64_t period, rte_htimer_cb_t timer_cb,
> +		   void *timer_cb_arg, uint32_t flags)
> +{
> +	unsigned int lcore_id = rte_lcore_id();
> +	struct htimer_mgr *mgr = mgr_get(lcore_id);
> +	uint64_t expiration_time_tick;
> +	uint64_t period_tick;
> +
> +	assure_initialized();
> +
> +	assure_valid_add_flags(flags);
> +
> +	expiration_time_tick = convert_time(expiration_time, flags);
> +
> +	period_tick = convert_time(period, flags);
> +
> +	rte_htw_add(mgr->htw, timer, expiration_time_tick, period_tick,
> +		    timer_cb, timer_cb_arg, flags);
> +
> +	timer->owner_lcore_id = lcore_id;
> +}
  
Mattias Rönnblom April 3, 2023, 12:04 p.m. UTC | #5
On 2023-03-22 13:18, Morten Brørup wrote:
>> From: Mattias Rönnblom [mailto:mattias.ronnblom@ericsson.com]
>> Sent: Wednesday, 15 March 2023 18.04
> 
>> +++ b/lib/htimer/rte_htimer.h
>> @@ -0,0 +1,68 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(c) 2023 Ericsson AB
>> + */
>> +
>> +#ifndef _RTE_HTIMER_H_
>> +#define _RTE_HTIMER_H_
>> +
>> +#include <stdbool.h>
>> +#include <stdint.h>
>> +#include <sys/queue.h>
>> +
>> +#include <rte_bitops.h>
>> +
>> +struct rte_htimer;
>> +
>> +typedef void (*rte_htimer_cb_t)(struct rte_htimer *, void *);
>> +
>> +struct rte_htimer {
>> +	/**
>> +	 * Absolute timer expiration time (in ticks).
>> +	 */
>> +	uint64_t expiration_time;
>> +	/**
>> +	 * Time between expirations (in ticks). Zero for one-shot timers.
>> +	 */
>> +	uint64_t period;
>> +	/**
>> +	 * Owning lcore. May safely be read from any thread.
>> +	 */
>> +	uint32_t owner_lcore_id;
>> +	/**
>> +	 * The current state of the timer.
>> +	 */
>> +	uint32_t state:4;
>> +	/**
>> +	 * Flags set on this timer.
>> +	 */
>> +	uint32_t flags:28;
>> +	/**
>> +	 * User-specified callback function pointer.
>> +	 */
>> +	rte_htimer_cb_t cb;
>> +	/**
>> +	 * Argument for user callback.
>> +	 */
>> +	void *cb_arg;
>> +	/**
>> +	 * Pointers used to add timer to various internal lists.
>> +	 */
>> +	LIST_ENTRY(rte_htimer) entry;
>> +};
> 
> If the rte_htimer structure is supposed to be used in some other data structure, e.g. in a TCP/IP flow structure, it seems unnecessarily bloated.
> 
> Generally, if there is no significant performance benefit to the "period" feature, please remove it.
> 
> Let's say that this library is used for handling the timers of flows in an IP stack, then the vast majority of timers will be timers related to flows. I would prefer if this high-performance timer library is optimized for such high-volume use cases, rather than offering generic features for low-volume use cases.
> 
> And if one HTW instance is used for a single purpose (e.g. the IP stack state machine), both "cb" and "cb_arg" can be removed: The application can derive the pointer to the flow by the using container_of() with the pointer to the rte_htimer, and the cb_arg will effectively be a shadow variable of the flow's state anyway (if not just a pointer to the flow).
> 
> Here's an idea, which will offer both: For the high-volume single-purpose use cases you could provide a struct rte_htimer_core without the generic fields, and for the generic use cases, you could provide a struct rte_htimer containing a struct rte_htimer_core and the additional fields for generic use.
> 
>> 

Good points.

I will look into:
a) making <rte_htw.h> public
b) split rte_htimer into two timer structs (where the now-public 
rte_htw_timer struct may be used from the rte_htimer_timer struct).
c) ...where the htw timer struct won't have any callbacks
d) merge rte_htimer_timer.h into rte_htimer.h.
e) remove the periodic feature, at least from the core timer wheel

+
>> +#define RTE_HTIMER_FLAG_ABSOLUTE_TIME RTE_BIT32(0)
>> +#define RTE_HTIMER_FLAG_PERIODICAL RTE_BIT32(1)
>> +#define RTE_HTIMER_FLAG_TIME_TICK RTE_BIT32(2)
>> +#define RTE_HTIMER_FLAG_TIME_TSC RTE_BIT32(3)
>> +
>> +#define RTE_HTIMER_STATE_PENDING 1
>> +#define RTE_HTIMER_STATE_EXPIRED 2
>> +#define RTE_HTIMER_STATE_CANCELED 3
>> +
>> +LIST_HEAD(rte_htimer_list, rte_htimer);
>> +
>> +#ifdef __cplusplus
>> +}
>> +#endif
>> +
>> +#endif /* _RTE_HTIMER_H_ */
  
Morten Brørup April 4, 2023, 7:32 a.m. UTC | #6
> From: Mattias Rönnblom [mailto:mattias.ronnblom@ericsson.com]
> Sent: Monday, 3 April 2023 14.04
> 
> On 2023-03-22 13:18, Morten Brørup wrote:
> >> From: Mattias Rönnblom [mailto:mattias.ronnblom@ericsson.com]
> >> Sent: Wednesday, 15 March 2023 18.04
> >
> >> +++ b/lib/htimer/rte_htimer.h
> >> @@ -0,0 +1,68 @@
> >> +/* SPDX-License-Identifier: BSD-3-Clause
> >> + * Copyright(c) 2023 Ericsson AB
> >> + */
> >> +
> >> +#ifndef _RTE_HTIMER_H_
> >> +#define _RTE_HTIMER_H_
> >> +
> >> +#include <stdbool.h>
> >> +#include <stdint.h>
> >> +#include <sys/queue.h>
> >> +
> >> +#include <rte_bitops.h>
> >> +
> >> +struct rte_htimer;
> >> +
> >> +typedef void (*rte_htimer_cb_t)(struct rte_htimer *, void *);
> >> +
> >> +struct rte_htimer {
> >> +	/**
> >> +	 * Absolute timer expiration time (in ticks).
> >> +	 */
> >> +	uint64_t expiration_time;
> >> +	/**
> >> +	 * Time between expirations (in ticks). Zero for one-shot timers.
> >> +	 */
> >> +	uint64_t period;
> >> +	/**
> >> +	 * Owning lcore. May safely be read from any thread.
> >> +	 */
> >> +	uint32_t owner_lcore_id;
> >> +	/**
> >> +	 * The current state of the timer.
> >> +	 */
> >> +	uint32_t state:4;
> >> +	/**
> >> +	 * Flags set on this timer.
> >> +	 */
> >> +	uint32_t flags:28;
> >> +	/**
> >> +	 * User-specified callback function pointer.
> >> +	 */
> >> +	rte_htimer_cb_t cb;
> >> +	/**
> >> +	 * Argument for user callback.
> >> +	 */
> >> +	void *cb_arg;
> >> +	/**
> >> +	 * Pointers used to add timer to various internal lists.
> >> +	 */
> >> +	LIST_ENTRY(rte_htimer) entry;
> >> +};
> >
> > If the rte_htimer structure is supposed to be used in some other data
> structure, e.g. in a TCP/IP flow structure, it seems unnecessarily
> bloated.
> >
> > Generally, if there is no significant performance benefit to the
> "period" feature, please remove it.
> >
> > Let's say that this library is used for handling the timers of flows
> in an IP stack, then the vast majority of timers will be timers related
> to flows. I would prefer if this high-performance timer library is
> optimized for such high-volume use cases, rather than offering generic
> features for low-volume use cases.
> >
> > And if one HTW instance is used for a single purpose (e.g. the IP
> stack state machine), both "cb" and "cb_arg" can be removed: The
> application can derive the pointer to the flow by the using
> container_of() with the pointer to the rte_htimer, and the cb_arg will
> effectively be a shadow variable of the flow's state anyway (if not just
> a pointer to the flow).
> >
> > Here's an idea, which will offer both: For the high-volume single-
> purpose use cases you could provide a struct rte_htimer_core without the
> generic fields, and for the generic use cases, you could provide a
> struct rte_htimer containing a struct rte_htimer_core and the additional
> fields for generic use.
> >
> >>
> 
> Good points.
> 
> I will look into:
> a) making <rte_htw.h> public
> b) split rte_htimer into two timer structs (where the now-public
> rte_htw_timer struct may be used from the rte_htimer_timer struct).
> c) ...where the htw timer struct won't have any callbacks
> d) merge rte_htimer_timer.h into rte_htimer.h.
> e) remove the periodic feature, at least from the core timer wheel

Sounds good, Mattias. Looking forward to reviewing the next version. :-)
  
Stephen Hemminger July 6, 2023, 10:41 p.m. UTC | #7
On Wed, 15 Mar 2023 18:03:42 +0100
Mattias Rönnblom <mattias.ronnblom@ericsson.com> wrote:

> The htimer library attempts at providing a timer facility with roughly
> the same functionality, but less overhead and better scalability than
> DPDK timer library.

I don't understand. Why not just fix and extend existing timers.
Sure you will need to add some API's and maybe drop some of the existing
experimental ones (ie alt_timer). Even change the ABI.

It would be better to have one high performance, scaleable timer than
spend the next 3 years telling users which one to use and why!

So please make rte_timer work better in 23.11 release rather
than reinventing a new variant.
  
Mattias Rönnblom July 12, 2023, 8:58 a.m. UTC | #8
On 2023-07-07 00:41, Stephen Hemminger wrote:
> On Wed, 15 Mar 2023 18:03:42 +0100
> Mattias Rönnblom <mattias.ronnblom@ericsson.com> wrote:
> 
>> The htimer library attempts at providing a timer facility with roughly
>> the same functionality, but less overhead and better scalability than
>> DPDK timer library.
> 
> I don't understand. Why not just fix and extend existing timers.
> Sure you will need to add some API's and maybe drop some of the existing
> experimental ones (ie alt_timer). Even change the ABI.
> 
> It would be better to have one high performance, scaleable timer than
> spend the next 3 years telling users which one to use and why!
> 
> So please make rte_timer work better in 23.11 release rather
> than reinventing a new variant.

I wanted to explore how a data plane timer API should look like. 
Something like a "first principles" type approach. As it happens, it 
seems like I will converge on something that's pretty similar to how 
rte_timer (and most kernel timers) API works, for example in regards to 
timer memory allocation.

Clearly, there should not be two DPDK timer APIs that provide the same 
functionality. That was never the intention. Since so much DPDK code and 
more importantly application code depends on <rte_timer.h> it wasn't 
obvious that the best option was make extensive changes to rte_timer API 
and implementation. One way that seemed like a plausible option (how 
much so depending on the extend of the rte_timer vs rte_htimer API 
differences) was to have a new API, and depreciate <rte_timer.h> in the 
release htimer was introduced.

That said, at this point, it's not clear to me which option is the best 
one of "making extensive changes to rte_timer" or "having rte_htimer on 
the side for a couple of releases".

An imaginary alternative where the <rte_timer.h> API/ABI can be 
maintained, and you get all the performance and scalability and improved 
API semantics of htimer, would obviously be the best option. But I don't 
think that is possible. Especially not if you want to end up with a 
nice, orthogonal API and a clean implementation.

I think changes in both ABI and API are inevitable, and a good thing, 
considering some of the quirks for the current API.

A side note: It seems to me at this point there should be two public 
timer APIs, but providing different functionality, at slightly different 
levels of abstraction. One is the <rte_timer.h> lookalike, and the other 
what in the current patchset is represented by <rte_htw.h>, but minus 
the callbacks, as per Morten Brørup's suggestion. The latter would be a 
low-level HTW only, with no MT safety, no lcore knowledge, no opinions 
on time source, etc.
  

Patch

diff --git a/app/test/meson.build b/app/test/meson.build
index 03811ff692..d0308ac09d 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -140,9 +140,14 @@  test_sources = files(
         'test_thash_perf.c',
         'test_threads.c',
         'test_timer.c',
+        'test_timer_htimer_htw_perf.c',
         'test_timer_perf.c',
         'test_timer_racecond.c',
         'test_timer_secondary.c',
+        'test_htimer_mgr.c',
+        'test_htimer_mgr_perf.c',
+        'test_htw.c',
+        'test_htw_perf.c',
         'test_ticketlock.c',
         'test_trace.c',
         'test_trace_register.c',
@@ -193,6 +198,7 @@  fast_tests = [
         ['fib6_autotest', true, true],
         ['func_reentrancy_autotest', false, true],
         ['hash_autotest', true, true],
+        ['htimer_mgr_autotest', true, true],
         ['interrupt_autotest', true, true],
         ['ipfrag_autotest', false, true],
         ['lcores_autotest', true, true],
@@ -265,6 +271,8 @@  perf_test_names = [
         'memcpy_perf_autotest',
         'hash_perf_autotest',
         'timer_perf_autotest',
+        'htimer_mgr_perf_autotest',
+        'htw_perf_autotest',
         'reciprocal_division',
         'reciprocal_division_perf',
         'lpm_perf_autotest',
diff --git a/app/test/test_htimer_mgr.c b/app/test/test_htimer_mgr.c
new file mode 100644
index 0000000000..9e46dec53e
--- /dev/null
+++ b/app/test/test_htimer_mgr.c
@@ -0,0 +1,674 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#include "test.h"
+
+#include <sys/queue.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_htimer_mgr.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
+#include <rte_random.h>
+
+static int
+timer_lcore(void *arg)
+{
+	bool *stop = arg;
+
+	while (!__atomic_load_n(stop, __ATOMIC_RELAXED))
+		rte_htimer_mgr_manage();
+
+	return 0;
+}
+
+static void
+count_timer_cb(struct rte_htimer *timer __rte_unused, void *arg)
+{
+	unsigned int *count = arg;
+
+	__atomic_fetch_add(count, 1, __ATOMIC_RELAXED);
+}
+
+static void
+count_async_cb(struct rte_htimer *timer __rte_unused, int result,
+	       void *cb_arg)
+{
+	unsigned int *count = cb_arg;
+
+	if (result == RTE_HTIMER_MGR_ASYNC_RESULT_ADDED)
+		__atomic_fetch_add(count, 1, __ATOMIC_RELAXED);
+}
+
+static uint64_t
+s_to_tsc(double s)
+{
+	return s * rte_get_tsc_hz();
+}
+
+#define ASYNC_ADD_TEST_EXPIRATION_TIME (250*1000) /* ns */
+#define ASYNC_TEST_TICK (10*1000) /* ns */
+
+static int
+test_htimer_mgr_async_add(unsigned int num_timers_per_lcore)
+{
+	struct rte_htimer *timers;
+	unsigned int timer_idx;
+	unsigned int lcore_id;
+	bool stop = false;
+	unsigned int timeout_count = 0;
+	unsigned int async_count = 0;
+	unsigned int num_workers = 0;
+	uint64_t expiration_time;
+	unsigned int num_total_timers;
+
+	rte_htimer_mgr_init(ASYNC_TEST_TICK);
+
+	RTE_LCORE_FOREACH_WORKER(lcore_id) {
+		if (rte_eal_remote_launch(timer_lcore, &stop, lcore_id) != 0)
+			rte_panic("Unable to launch timer lcore\n");
+		num_workers++;
+	}
+
+	num_total_timers = num_workers * num_timers_per_lcore;
+
+	timers = malloc(num_total_timers * sizeof(struct rte_htimer));
+	timer_idx = 0;
+
+	if (timers == NULL)
+		rte_panic("Unable to allocate heap memory\n");
+
+	expiration_time = ASYNC_ADD_TEST_EXPIRATION_TIME;
+
+	RTE_LCORE_FOREACH_WORKER(lcore_id) {
+		unsigned int i;
+
+		for (i = 0; i < num_timers_per_lcore; i++) {
+			struct rte_htimer *timer = &timers[timer_idx++];
+
+			for (;;) {
+				int rc;
+
+				rc = rte_htimer_mgr_async_add(timer, lcore_id,
+						      expiration_time,
+						      RTE_HTIMER_FLAG_TIME_TSC,
+						      count_timer_cb,
+						      &timeout_count, 0,
+						      count_async_cb,
+						      &async_count);
+				if (unlikely(rc == -EBUSY))
+					rte_htimer_mgr_process();
+				else
+					break;
+			}
+		}
+	}
+
+	while (__atomic_load_n(&async_count, __ATOMIC_RELAXED) !=
+	       num_total_timers ||
+	       __atomic_load_n(&timeout_count, __ATOMIC_RELAXED) !=
+	       num_total_timers)
+		rte_htimer_mgr_manage();
+
+	__atomic_store_n(&stop, true, __ATOMIC_RELAXED);
+
+	rte_eal_mp_wait_lcore();
+
+	rte_htimer_mgr_deinit();
+
+	free(timers);
+
+	return TEST_SUCCESS;
+}
+
+struct async_recorder_state {
+	bool timer_cb_run;
+	bool async_add_cb_run;
+	bool async_cancel_cb_run;
+	bool failed;
+};
+
+static void
+record_async_add_cb(struct rte_htimer *timer __rte_unused,
+		    int result, void *cb_arg)
+{
+	struct async_recorder_state *state = cb_arg;
+
+	if (state->failed)
+		return;
+
+	if (state->async_add_cb_run ||
+	    result != RTE_HTIMER_MGR_ASYNC_RESULT_ADDED) {
+		puts("async add run already");
+		state->failed = true;
+	}
+
+	state->async_add_cb_run = true;
+}
+
+static void
+record_async_cancel_cb(struct rte_htimer *timer __rte_unused,
+		       int result, void *cb_arg)
+{
+	struct async_recorder_state *state = cb_arg;
+
+	if (state->failed)
+		return;
+
+	if (state->async_cancel_cb_run) {
+		state->failed = true;
+		return;
+	}
+
+	switch (result) {
+	case RTE_HTIMER_MGR_ASYNC_RESULT_EXPIRED:
+		if (!state->timer_cb_run)
+			state->failed = true;
+		break;
+	case RTE_HTIMER_MGR_ASYNC_RESULT_CANCELED:
+		if (state->timer_cb_run)
+			state->failed = true;
+		break;
+	case RTE_HTIMER_MGR_ASYNC_RESULT_ALREADY_CANCELED:
+		state->failed = true;
+	}
+
+	state->async_cancel_cb_run = true;
+}
+
+static int
+record_check_consistency(struct async_recorder_state *state)
+{
+	if (state->failed)
+		return -1;
+
+	return state->async_cancel_cb_run ? 1 : 0;
+}
+
+static int
+records_check_consistency(struct async_recorder_state *states,
+			  unsigned int num_states)
+{
+	unsigned int i;
+	int canceled = 0;
+
+	for (i = 0; i < num_states; i++) {
+		int rc;
+
+		rc = record_check_consistency(&states[i]);
+
+		if (rc < 0)
+			return -1;
+		canceled += rc;
+	}
+
+	return canceled;
+}
+
+static void
+log_timer_expiry_cb(struct rte_htimer *timer __rte_unused,
+		    void *arg)
+{
+	bool *timer_run = arg;
+
+	*timer_run = true;
+}
+
+
+#define ASYNC_ADD_CANCEL_TEST_EXPIRATION_TIME_MAX 10e-3 /* s */
+
+static int
+test_htimer_mgr_async_add_cancel(unsigned int num_timers_per_lcore)
+{
+	struct rte_htimer *timers;
+	struct async_recorder_state *recorder_states;
+	unsigned int timer_idx = 0;
+	unsigned int lcore_id;
+	uint64_t now;
+	unsigned int num_workers = 0;
+	bool stop = false;
+	uint64_t max_expiration_time =
+		s_to_tsc(ASYNC_ADD_CANCEL_TEST_EXPIRATION_TIME_MAX);
+	unsigned int num_total_timers;
+	int canceled = 0;
+
+	rte_htimer_mgr_init(ASYNC_TEST_TICK);
+
+	RTE_LCORE_FOREACH_WORKER(lcore_id) {
+		if (rte_eal_remote_launch(timer_lcore, &stop, lcore_id) != 0)
+			rte_panic("Unable to launch timer lcore\n");
+		num_workers++;
+	}
+
+	num_total_timers = num_workers * num_timers_per_lcore;
+
+	timers = malloc(num_total_timers * sizeof(struct rte_htimer));
+	recorder_states =
+		malloc(num_total_timers * sizeof(struct async_recorder_state));
+
+	if (timers == NULL || recorder_states == NULL)
+		rte_panic("Unable to allocate heap memory\n");
+
+	now = rte_get_tsc_cycles();
+
+	RTE_LCORE_FOREACH_WORKER(lcore_id) {
+		unsigned int i;
+
+		for (i = 0; i < num_timers_per_lcore; i++) {
+			struct rte_htimer *timer = &timers[timer_idx];
+			struct async_recorder_state *state =
+				&recorder_states[timer_idx];
+
+			timer_idx++;
+
+			*state = (struct async_recorder_state) {};
+
+			uint64_t expiration_time =
+				now + rte_rand_max(max_expiration_time);
+
+			for (;;) {
+				int rc;
+
+				rc = rte_htimer_mgr_async_add(timer, lcore_id,
+							 expiration_time,
+							 0,
+							 log_timer_expiry_cb,
+							 &state->timer_cb_run,
+							 0,
+							 record_async_add_cb,
+							 state);
+
+				if (unlikely(rc == -EBUSY))
+					rte_htimer_mgr_process();
+				else
+					break;
+			}
+		}
+	}
+
+	timer_idx = 0;
+
+	RTE_LCORE_FOREACH_WORKER(lcore_id) {
+		unsigned int i;
+
+		for (i = 0; i < num_timers_per_lcore; i++) {
+			struct rte_htimer *timer = &timers[timer_idx];
+			struct async_recorder_state *state =
+				&recorder_states[timer_idx];
+
+			timer_idx++;
+
+			/* cancel roughly half of the timers */
+			if (rte_rand_max(2) == 0)
+				continue;
+
+			for (;;) {
+				int rc;
+
+				rc = rte_htimer_mgr_async_cancel(timer,
+							record_async_cancel_cb,
+							state);
+
+				if (unlikely(rc == -EBUSY)) {
+					puts("busy");
+					rte_htimer_mgr_process();
+				} else
+					break;
+			}
+
+			canceled++;
+		}
+	}
+
+	for (;;) {
+		int cancel_completed;
+
+		cancel_completed = records_check_consistency(recorder_states,
+							     num_total_timers);
+
+		if (cancel_completed < 0) {
+			puts("Inconstinency found");
+			return TEST_FAILED;
+		}
+
+		if (cancel_completed == canceled)
+			break;
+
+		rte_htimer_mgr_process();
+	}
+
+	__atomic_store_n(&stop, true, __ATOMIC_RELAXED);
+
+	rte_eal_mp_wait_lcore();
+
+	rte_htimer_mgr_deinit();
+
+	free(timers);
+	free(recorder_states);
+
+	return TEST_SUCCESS;
+}
+
+/*
+ * This is a test case where one thread asynchronously adds two timers,
+ * with the same expiration time; one on the local lcore and one on a
+ * remote lcore. This creates a tricky situation for the timer
+ * manager, and for the application as well, if the htimer struct is
+ * dynamically allocated.
+ */
+
+struct test_timer {
+	uint32_t ref_cnt;
+	uint64_t expiration_time; /* in TSC, not tick */
+	uint32_t *timeout_count;
+	bool *failure_occurred;
+	struct rte_htimer htimer;
+};
+
+
+static struct test_timer *
+test_timer_create(uint64_t expiration_time, uint32_t *timeout_count,
+		  bool *failure_occurred)
+{
+	struct test_timer *timer;
+
+	timer = malloc(sizeof(struct test_timer));
+
+	if (timer == NULL)
+		rte_panic("Unable to allocate timer memory\n");
+
+	timer->ref_cnt = 1;
+	timer->expiration_time = expiration_time;
+	timer->timeout_count = timeout_count;
+	timer->failure_occurred = failure_occurred;
+
+	return timer;
+}
+
+static void
+test_timer_inc_ref_cnt(struct test_timer *timer)
+{
+	__atomic_add_fetch(&timer->ref_cnt, 1, __ATOMIC_RELEASE);
+}
+
+static void
+test_timer_dec_ref_cnt(struct test_timer *timer)
+{
+	if (timer != NULL) {
+		uint32_t cnt = __atomic_sub_fetch(&timer->ref_cnt, 1,
+						  __ATOMIC_RELEASE);
+		if (cnt == 0)
+			free(timer);
+	}
+}
+
+static void
+test_timer_cb(struct rte_htimer *timer, void *arg __rte_unused)
+{
+	struct test_timer *test_timer =
+		container_of(timer, struct test_timer, htimer);
+	uint64_t now = rte_get_tsc_cycles();
+
+	if (now < test_timer->expiration_time)
+		*(test_timer->failure_occurred) = true;
+
+	__atomic_fetch_add(test_timer->timeout_count, 1, __ATOMIC_RELAXED);
+
+	test_timer_dec_ref_cnt(test_timer);
+}
+
+static int
+worker_lcore(void *arg)
+{
+	bool *stop = arg;
+
+	while (!__atomic_load_n(stop, __ATOMIC_RELAXED))
+		rte_htimer_mgr_manage();
+
+	return 0;
+}
+
+struct cancel_timer {
+	bool cancel;
+	struct rte_htimer *target_timer;
+	uint32_t *cancel_count;
+	uint32_t *expired_count;
+	bool *failure_occurred;
+	struct rte_htimer htimer;
+};
+
+static struct cancel_timer *
+cancel_timer_create(bool cancel, struct rte_htimer *target_timer,
+		    uint32_t *cancel_count, uint32_t *expired_count,
+		    bool *failure_occurred)
+{
+	struct cancel_timer *timer;
+
+	timer = malloc(sizeof(struct cancel_timer));
+
+	if (timer == NULL)
+		rte_panic("Unable to allocate timer memory\n");
+
+	timer->cancel = cancel;
+	timer->target_timer = target_timer;
+	timer->cancel_count = cancel_count;
+	timer->expired_count = expired_count;
+	timer->failure_occurred = failure_occurred;
+
+	return timer;
+}
+
+static void
+async_cancel_cb(struct rte_htimer *timer, int result, void *cb_arg)
+{
+	struct test_timer *test_timer =
+		container_of(timer, struct test_timer, htimer);
+	struct cancel_timer *cancel_timer = cb_arg;
+	bool *failure_occurred = cancel_timer->failure_occurred;
+
+	if (!cancel_timer->cancel || cancel_timer->target_timer != timer)
+		*failure_occurred = true;
+
+	if (result == RTE_HTIMER_MGR_ASYNC_RESULT_CANCELED) {
+		uint32_t *cancel_count = cancel_timer->cancel_count;
+
+		/* decrease target lcore's ref count */
+		test_timer_dec_ref_cnt(test_timer);
+		(*cancel_count)++;
+	} else if (result == RTE_HTIMER_MGR_ASYNC_RESULT_EXPIRED) {
+		uint32_t *expired_count = cancel_timer->expired_count;
+
+		(*expired_count)++;
+	} else
+		*failure_occurred = true;
+
+	/* source lcore's ref count */
+	test_timer_dec_ref_cnt(test_timer);
+
+	free(cancel_timer);
+}
+
+static void
+cancel_timer_cb(struct rte_htimer *timer, void *arg __rte_unused)
+{
+	struct cancel_timer *cancel_timer =
+		container_of(timer, struct cancel_timer, htimer);
+
+	if (cancel_timer->cancel) {
+		int rc;
+
+		rc = rte_htimer_mgr_async_cancel(cancel_timer->target_timer,
+						 async_cancel_cb, cancel_timer);
+
+		if (rc == -EBUSY)
+			rte_htimer_mgr_add(timer, 0, 0, cancel_timer_cb,
+					   NULL, 0);
+	} else
+		free(cancel_timer);
+}
+
+#define REF_CNT_TEST_TICK 10 /* ns */
+#define REF_CNT_AVG_EXPIRATION_TIME (50 * 1000) /* ns */
+#define REF_CNT_MAX_EXPIRATION_TIME (2 * REF_CNT_AVG_EXPIRATION_TIME)
+#define REF_CNT_CANCEL_FUZZ(expiration_time) \
+	((uint64_t)((expiration_time) * (rte_drand()/10 + 0.95)))
+
+static int
+test_htimer_mgr_ref_cnt_timers(unsigned int num_timers_per_lcore)
+{
+	unsigned int lcore_id;
+	bool stop = false;
+	unsigned int num_workers = 0;
+	struct test_timer **timers;
+	struct cancel_timer **cancel_timers;
+	unsigned int num_timers;
+	uint32_t timeout_count = 0;
+	uint32_t cancel_count = 0;
+	uint32_t expired_count = 0;
+	bool failure_occurred = false;
+	unsigned int timer_idx;
+	unsigned int expected_cancel_attempts;
+	uint64_t deadline;
+	uint64_t now;
+
+	rte_htimer_mgr_init(REF_CNT_TEST_TICK);
+
+	RTE_LCORE_FOREACH_WORKER(lcore_id) {
+		if (rte_eal_remote_launch(worker_lcore, &stop, lcore_id) != 0)
+			rte_panic("Unable to launch timer lcore\n");
+		num_workers++;
+	}
+
+	/* give the workers a chance to get going */
+	rte_delay_us_block(10*1000);
+
+	num_timers = num_timers_per_lcore * num_workers;
+
+	timers = malloc(sizeof(struct test_timer *) * num_timers);
+	cancel_timers = malloc(sizeof(struct cancel_timer *) * num_timers);
+
+	if (timers == NULL || cancel_timers == NULL)
+		rte_panic("Unable to allocate memory\n");
+
+	timer_idx = 0;
+	expected_cancel_attempts = 0;
+
+	RTE_LCORE_FOREACH_WORKER(lcore_id) {
+		unsigned int i;
+
+		for (i = 0; i < num_timers_per_lcore; i++) {
+			uint64_t expiration_time;
+			struct test_timer *timer;
+			struct rte_htimer *htimer;
+			bool cancel;
+			struct cancel_timer *cancel_timer;
+			uint64_t cancel_expiration_time;
+
+			expiration_time =
+				REF_CNT_MAX_EXPIRATION_TIME * rte_drand();
+
+			timer = test_timer_create(expiration_time,
+						  &timeout_count,
+						  &failure_occurred);
+			htimer = &timer->htimer;
+
+			timers[timer_idx++] = timer;
+
+			/* for the target lcore's usage of this time */
+			test_timer_inc_ref_cnt(timer);
+
+			for (;;) {
+				int rc;
+
+				rc = rte_htimer_mgr_async_add(htimer, lcore_id,
+							      expiration_time,
+							      0, test_timer_cb,
+							      NULL, 0, NULL,
+							      NULL);
+				if (unlikely(rc == -EBUSY))
+					rte_htimer_mgr_process();
+				else
+					break;
+			}
+
+			cancel = rte_rand_max(2);
+
+			cancel_timer =
+				cancel_timer_create(cancel, &timer->htimer,
+						    &cancel_count,
+						    &expired_count,
+						    &failure_occurred);
+
+			cancel_expiration_time =
+				REF_CNT_CANCEL_FUZZ(expiration_time);
+
+			rte_htimer_mgr_add(&cancel_timer->htimer,
+					   cancel_expiration_time, 0,
+					   cancel_timer_cb, NULL, 0);
+
+			if (cancel)
+				expected_cancel_attempts++;
+		}
+	}
+
+	deadline = rte_get_tsc_cycles() + REF_CNT_MAX_EXPIRATION_TIME +
+		s_to_tsc(0.25);
+
+	do {
+		now = rte_get_tsc_cycles();
+
+		rte_htimer_mgr_manage_time(now, RTE_HTIMER_FLAG_TIME_TSC);
+
+	} while (now < deadline);
+
+	__atomic_store_n(&stop, true, __ATOMIC_RELAXED);
+
+	rte_eal_mp_wait_lcore();
+
+	if (failure_occurred)
+		return TEST_FAILED;
+
+	if ((cancel_count + expired_count) != expected_cancel_attempts)
+		return TEST_FAILED;
+
+	if (timeout_count != (num_timers - cancel_count))
+		return TEST_FAILED;
+
+	rte_htimer_mgr_deinit();
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_htimer_mgr(void)
+{
+	int rc;
+
+	rc = test_htimer_mgr_async_add(1);
+	if (rc != TEST_SUCCESS)
+		return rc;
+
+	rc = test_htimer_mgr_async_add(100000);
+	if (rc != TEST_SUCCESS)
+		return rc;
+
+	rc = test_htimer_mgr_async_add_cancel(100);
+	if (rc != TEST_SUCCESS)
+		return rc;
+
+	rc = test_htimer_mgr_ref_cnt_timers(10);
+	if (rc != TEST_SUCCESS)
+		return rc;
+
+	rc = test_htimer_mgr_ref_cnt_timers(10000);
+	if (rc != TEST_SUCCESS)
+		return rc;
+
+	return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(htimer_mgr_autotest, test_htimer_mgr);
diff --git a/app/test/test_htimer_mgr_perf.c b/app/test/test_htimer_mgr_perf.c
new file mode 100644
index 0000000000..cdc513228f
--- /dev/null
+++ b/app/test/test_htimer_mgr_perf.c
@@ -0,0 +1,322 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#include "test.h"
+
+#include <sys/queue.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include <rte_cycles.h>
+#include <rte_htimer_mgr.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
+#include <rte_malloc.h>
+#include <rte_random.h>
+
+static void
+nop_cb(struct rte_htimer *timer __rte_unused, void *cb_arg __rte_unused)
+{
+}
+
+static uint64_t
+add_rand_timers(struct rte_htimer *timers, uint64_t num,
+		uint64_t timeout_start, uint64_t max_timeout)
+{
+	uint64_t i;
+	uint64_t expiration_times[num];
+	uint64_t start_ts;
+	uint64_t end_ts;
+
+	for (i = 0; i < num; i++)
+		expiration_times[i] =
+			1 + timeout_start + rte_rand_max(max_timeout - 1);
+
+	start_ts = rte_get_tsc_cycles();
+
+	for (i = 0; i < num; i++)
+		rte_htimer_mgr_add(&timers[i], expiration_times[i], 0, nop_cb,
+				   NULL, RTE_HTIMER_FLAG_ABSOLUTE_TIME);
+
+	/* make sure the timers are actually scheduled in the wheel */
+	rte_htimer_mgr_process();
+
+	end_ts = rte_get_tsc_cycles();
+
+	return end_ts - start_ts;
+}
+
+#define TIME_STEP 16
+
+static void
+test_add_manage_perf(const char *scenario_name, uint64_t num_timers,
+		     uint64_t timespan)
+{
+	uint64_t manage_calls;
+	struct rte_htimer *timers;
+	uint64_t start;
+	uint64_t now;
+	uint64_t start_ts;
+	uint64_t end_ts;
+	uint64_t add_latency;
+	uint64_t manage_latency;
+
+	rte_htimer_mgr_init(1);
+
+	manage_calls = timespan / TIME_STEP;
+
+	printf("Scenario: %s\n", scenario_name);
+	printf("    Configuration:\n");
+	printf("        Timers: %"PRIu64"\n", num_timers);
+	printf("        Max timeout: %"PRIu64" ticks\n", timespan);
+	printf("        Average timeouts/manage call: %.3f\n",
+	       num_timers / (double)manage_calls);
+	printf("        Time advance per manage call: %d\n", TIME_STEP);
+
+	printf("    Results:\n");
+
+	timers = rte_malloc(NULL, sizeof(struct rte_htimer) * num_timers, 0);
+
+	if (timers == NULL)
+		rte_panic("Unable to allocate memory\n");
+
+	start = 1 + rte_rand_max(UINT64_MAX / 2);
+
+	rte_htimer_mgr_manage_time(start - 1, 0);
+
+	add_latency = add_rand_timers(timers, num_timers, start, timespan);
+
+	start_ts = rte_get_tsc_cycles();
+
+	for (now = start; now < (start + timespan); now += TIME_STEP)
+		rte_htimer_mgr_manage_time(now, 0);
+
+	end_ts = rte_get_tsc_cycles();
+
+	manage_latency = end_ts - start_ts;
+
+	printf("        %.0f TSC cycles / add op\n",
+	       (double)add_latency / num_timers);
+	printf("        %.0f TSC cycles / manage call\n",
+	       (double)manage_latency / manage_calls);
+	printf("        %.1f TSC cycles / tick\n",
+	       (double)manage_latency / timespan);
+
+	rte_htimer_mgr_deinit();
+
+	rte_free(timers);
+}
+
+static uint64_t
+s_to_tsc(double s)
+{
+	return s * rte_get_tsc_hz();
+}
+
+static double
+tsc_to_s(uint64_t tsc)
+{
+	return (double)tsc / (double)rte_get_tsc_hz();
+}
+
+#define ITERATIONS 500
+
+static int
+test_del_perf(uint64_t num_timers, uint64_t timespan)
+{
+	struct rte_htimer *timers;
+	uint64_t start;
+	uint64_t i, j;
+	uint64_t start_ts;
+	uint64_t end_ts;
+	uint64_t latency = 0;
+
+	rte_htimer_mgr_init(1);
+
+	timers =
+	    rte_malloc(NULL, sizeof(struct rte_htimer) * num_timers, 0);
+
+	if (timers == NULL)
+		rte_panic("Unable to allocate memory\n");
+
+	start = 1 + rte_rand_max(UINT64_MAX / 2);
+
+	for (i = 0; i < ITERATIONS; i++) {
+		rte_htimer_mgr_manage_time(start - 1, 0);
+
+		add_rand_timers(timers, num_timers, start, timespan);
+
+		/* A manage (or process) call is required to get all
+		 * timers scheduled, which may in turn make them a
+		 * little more expensive to remove.
+		 */
+		rte_htimer_mgr_manage_time(start, 0);
+
+		start_ts = rte_get_tsc_cycles();
+
+		for (j = 0; j < num_timers; j++)
+			if (rte_htimer_mgr_cancel(&timers[j]) < 0)
+				return TEST_FAILED;
+
+		end_ts = rte_get_tsc_cycles();
+
+		latency += (end_ts - start_ts);
+
+		start += (timespan + 1);
+	}
+
+	printf("Timer delete: %.0f TSC cycles / call\n",
+	       (double)latency / (double)ITERATIONS / (double)num_timers);
+
+	rte_htimer_mgr_deinit();
+
+	rte_free(timers);
+
+	return TEST_SUCCESS;
+}
+
+static int
+target_lcore(void *arg)
+{
+	bool *stop = arg;
+
+	while (!__atomic_load_n(stop, __ATOMIC_RELAXED))
+		rte_htimer_mgr_manage();
+
+	return 0;
+}
+
+static void
+count_async_cb(struct rte_htimer *timer __rte_unused, int result,
+	       void *cb_arg)
+{
+	unsigned int *count = cb_arg;
+
+	if (result == RTE_HTIMER_MGR_ASYNC_RESULT_ADDED)
+		(*count)++;
+}
+
+#define ASYNC_ADD_TEST_TICK s_to_tsc(500e-9)
+/*
+ * The number of test timers must be kept less than size of the
+ * htimer-internal message ring for this test case to work.
+ */
+#define ASYNC_ADD_TEST_NUM_TIMERS 1000
+#define ASYNC_ADD_TEST_MIN_TIMEOUT (ASYNC_ADD_TEST_NUM_TIMERS * s_to_tsc(1e-6))
+#define ASYNC_ADD_TEST_MAX_TIMEOUT (2 * ASYNC_ADD_TEST_MIN_TIMEOUT)
+
+static void
+test_async_add_perf(void)
+{
+	uint64_t max_timeout = ASYNC_ADD_TEST_MAX_TIMEOUT;
+	uint64_t min_timeout = ASYNC_ADD_TEST_MIN_TIMEOUT;
+	unsigned int num_timers = ASYNC_ADD_TEST_NUM_TIMERS;
+	struct rte_htimer *timers;
+	bool *stop;
+	unsigned int lcore_id = rte_lcore_id();
+	unsigned int target_lcore_id =
+		rte_get_next_lcore(lcore_id, true, true);
+	uint64_t now;
+	uint64_t request_latency = 0;
+	uint64_t response_latency = 0;
+	unsigned int i;
+
+	rte_htimer_mgr_init(ASYNC_ADD_TEST_TICK);
+
+	timers = rte_malloc(NULL, sizeof(struct rte_htimer) * num_timers,
+			    RTE_CACHE_LINE_SIZE);
+	stop = rte_malloc(NULL, sizeof(bool), RTE_CACHE_LINE_SIZE);
+
+	if (timers == NULL || stop == NULL)
+		rte_panic("Unable to allocate memory\n");
+
+	*stop = false;
+
+	if (rte_eal_remote_launch(target_lcore, stop, target_lcore_id) != 0)
+		rte_panic("Unable to launch worker lcore\n");
+
+	/* wait for launch to complete */
+	rte_delay_us_block(100);
+
+	for (i = 0; i < ITERATIONS; i++) {
+		uint64_t expiration_times[num_timers];
+		unsigned int j;
+		uint64_t start_ts;
+		uint64_t end_ts;
+		unsigned int count = 0;
+
+		now = rte_get_tsc_cycles();
+
+		for (j = 0; j < num_timers; j++)
+			expiration_times[j] = now + min_timeout +
+				rte_rand_max(max_timeout - min_timeout);
+
+		start_ts = rte_get_tsc_cycles();
+
+		for (j = 0; j < num_timers; j++)
+			rte_htimer_mgr_async_add(&timers[j], target_lcore_id,
+					     expiration_times[j], 0,
+					     nop_cb, NULL,
+					     RTE_HTIMER_FLAG_ABSOLUTE_TIME,
+					     count_async_cb, &count);
+
+		end_ts = rte_get_tsc_cycles();
+
+		request_latency += (end_ts - start_ts);
+
+		/* wait long-enough for the target lcore to answered */
+		rte_delay_us_block(1 * num_timers);
+
+		start_ts = rte_get_tsc_cycles();
+
+		while (count != num_timers)
+			rte_htimer_mgr_process();
+
+		end_ts = rte_get_tsc_cycles();
+
+		response_latency += (end_ts - start_ts);
+
+		/* wait until all timeouts have fired */
+		rte_delay_us_block(tsc_to_s(max_timeout) * 1e6);
+	}
+
+	__atomic_store_n(stop, true, __ATOMIC_RELAXED);
+
+	rte_eal_mp_wait_lcore();
+
+	rte_free(timers);
+
+	rte_htimer_mgr_deinit();
+
+	printf("Timer async add:\n");
+	printf("    Configuration:\n");
+	printf("        Timers: %d\n", ASYNC_ADD_TEST_NUM_TIMERS);
+	printf("    Results:\n");
+	printf("        Source lcore cost: %.0f TSC cycles / add request\n",
+	       (double)request_latency / (double)ITERATIONS / num_timers);
+	printf("                           %.0f TSC cycles / add response\n",
+	       (double)response_latency / (double)ITERATIONS / num_timers);
+}
+
+static int
+test_htimer_mgr_perf(void)
+{
+	/* warm up */
+	rte_delay_us_block(10000);
+
+	test_add_manage_perf("Sparse", 100000, 10000000);
+
+	test_add_manage_perf("Dense", 100000, 200000);
+
+	test_add_manage_perf("Idle", 10, 100000);
+
+	if (test_del_perf(100000, 100000) != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	test_async_add_perf();
+
+	return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(htimer_mgr_perf_autotest, test_htimer_mgr_perf);
diff --git a/app/test/test_htw.c b/app/test/test_htw.c
new file mode 100644
index 0000000000..3cddfaed7f
--- /dev/null
+++ b/app/test/test_htw.c
@@ -0,0 +1,478 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#include "test.h"
+
+#include <sys/queue.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include <rte_cycles.h>
+#include <rte_htw.h>
+#include <rte_random.h>
+
+struct recorder {
+	struct rte_htimer_list timeout_list;
+	uint64_t num_timeouts;
+};
+
+static void
+recorder_init(struct recorder *recorder)
+{
+	recorder->num_timeouts = 0;
+	LIST_INIT(&recorder->timeout_list);
+}
+
+static void
+recorder_cb(struct rte_htimer *timer, void *arg)
+{
+	struct recorder *recorder = arg;
+
+	recorder->num_timeouts++;
+
+	LIST_INSERT_HEAD(&recorder->timeout_list, timer, entry);
+}
+
+static int
+recorder_verify(struct recorder *recorder, uint64_t min_expiry,
+		uint64_t max_expiry)
+{
+	struct rte_htimer *timer;
+
+	LIST_FOREACH(timer, &recorder->timeout_list, entry) {
+		if (timer->expiration_time > max_expiry)
+			return TEST_FAILED;
+
+		if (timer->expiration_time < min_expiry)
+			return TEST_FAILED;
+	}
+
+	return TEST_SUCCESS;
+}
+
+static void
+add_rand_timers(struct rte_htw *htw, struct rte_htimer *timers,
+		uint64_t num, uint64_t timeout_start, uint64_t max_timeout,
+		rte_htimer_cb_t cb, void *cb_arg)
+{
+	uint64_t i;
+
+	for (i = 0; i < num; i++) {
+		struct rte_htimer *timer = &timers[i];
+		bool use_absolute = rte_rand() & 1;
+		unsigned int flags = 0;
+		uint64_t expiration_time;
+
+		expiration_time = timeout_start + rte_rand_max(max_timeout);
+
+		if (use_absolute)
+			flags |= RTE_HTIMER_FLAG_ABSOLUTE_TIME;
+		else {
+			uint64_t htw_current_time;
+
+			htw_current_time = rte_htw_current_time(htw);
+
+			if (expiration_time < htw_current_time)
+				expiration_time = 0;
+			else
+				expiration_time -= htw_current_time;
+		}
+
+		rte_htw_add(htw, timer, expiration_time, 0, cb, cb_arg, flags);
+	}
+}
+
+#define ADVANCE_TIME_MAX_STEP 16
+
+static int
+test_rand_timers(uint64_t in_flight_timers, uint64_t max_timeout,
+		 uint64_t runtime)
+{
+	struct recorder recorder;
+	struct rte_htimer *timers;
+	uint64_t fired = 0;
+	uint64_t start;
+	uint64_t now;
+	struct rte_htw *htw;
+	uint64_t added;
+
+	recorder_init(&recorder);
+
+	timers = malloc(sizeof(struct rte_htimer) * in_flight_timers);
+
+	if (timers == NULL)
+		rte_panic("Unable to allocate heap memory\n");
+
+	start = rte_rand_max(UINT64_MAX - max_timeout);
+
+	htw = rte_htw_create();
+
+	if (htw == NULL)
+		return TEST_FAILED;
+
+	added = in_flight_timers;
+	add_rand_timers(htw, timers, added, start + 1, max_timeout,
+			recorder_cb, &recorder);
+
+	for (now = start; now < (start + runtime); ) {
+		uint64_t advance;
+
+		advance = rte_rand_max(ADVANCE_TIME_MAX_STEP);
+
+		now += advance;
+
+		rte_htw_manage(htw, now);
+
+		if (recorder.num_timeouts > 0) {
+			struct rte_htimer *timer;
+
+			if (advance == 0)
+				return TEST_FAILED;
+
+			if (recorder_verify(&recorder, now - advance + 1, now)
+			    != TEST_SUCCESS)
+				return TEST_FAILED;
+
+			while ((timer = LIST_FIRST(&recorder.timeout_list))
+			       != NULL) {
+				LIST_REMOVE(timer, entry);
+
+				add_rand_timers(htw, timer, 1,
+						now + 1, max_timeout,
+						recorder_cb, &recorder);
+				added++;
+				fired++;
+			}
+
+			recorder.num_timeouts = 0;
+		}
+	}
+
+	/* finish the remaining timeouts */
+
+	rte_htw_manage(htw, now + max_timeout);
+
+	if (recorder_verify(&recorder, now, now + max_timeout) != TEST_SUCCESS)
+		return TEST_FAILED;
+	fired += recorder.num_timeouts;
+
+	if (fired != added)
+		return TEST_FAILED;
+
+	rte_htw_destroy(htw);
+
+	free(timers);
+
+	return TEST_SUCCESS;
+}
+
+struct counter_state {
+	int calls;
+	struct rte_htw *htw;
+	bool cancel;
+};
+
+static void
+count_timeouts_cb(struct rte_htimer *timer __rte_unused, void *arg)
+{
+	struct counter_state *state = arg;
+
+	state->calls++;
+
+	if (state->cancel)
+		rte_htw_cancel(state->htw, timer);
+}
+
+static int
+test_single_timeout_type(uint64_t now, uint64_t distance, bool use_absolute)
+{
+	struct rte_htw *htw;
+	struct counter_state cstate = {};
+	struct rte_htimer timer;
+	uint64_t expiration_time;
+	unsigned int flags = 0;
+
+	htw = rte_htw_create();
+
+	rte_htw_manage(htw, now);
+
+	if (use_absolute) {
+		expiration_time = now + distance;
+		flags |= RTE_HTIMER_FLAG_ABSOLUTE_TIME;
+	} else
+		expiration_time = distance;
+
+	rte_htw_add(htw, &timer, expiration_time, 0, count_timeouts_cb,
+		    &cstate, flags);
+
+	rte_htw_manage(htw, now);
+
+	if (cstate.calls != 0)
+		return TEST_FAILED;
+
+	rte_htw_manage(htw, now + distance - 1);
+
+	if (cstate.calls != 0)
+		return TEST_FAILED;
+
+	rte_htw_manage(htw, now + distance);
+
+
+	if (cstate.calls != 1)
+		return TEST_FAILED;
+
+	rte_htw_manage(htw, now + distance);
+
+	if (cstate.calls != 1)
+		return TEST_FAILED;
+
+	rte_htw_manage(htw, now + distance + 1);
+
+	if (cstate.calls != 1)
+		return TEST_FAILED;
+
+	rte_htw_destroy(htw);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_single_timeout(uint64_t now, uint64_t distance)
+{
+
+	int rc;
+
+	rc = test_single_timeout_type(now, distance, true);
+	if (rc < 0)
+		return rc;
+
+	rc = test_single_timeout_type(now, distance, false);
+	if (rc < 0)
+		return rc;
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_periodical_timer(uint64_t now, uint64_t start, uint64_t period)
+{
+	struct rte_htw *htw;
+	struct counter_state cstate;
+	struct rte_htimer timer;
+
+	htw = rte_htw_create();
+
+	cstate = (struct counter_state) {
+		.htw = htw
+	};
+
+	rte_htw_manage(htw, now);
+
+	rte_htw_add(htw, &timer, start, period, count_timeouts_cb,
+		    &cstate, RTE_HTIMER_FLAG_PERIODICAL);
+
+	rte_htw_manage(htw, now);
+
+	if (cstate.calls != 0)
+		return TEST_FAILED;
+
+	rte_htw_manage(htw, now + start - 1);
+
+	if (cstate.calls != 0)
+		return TEST_FAILED;
+
+	rte_htw_manage(htw, now + start);
+
+	if (cstate.calls != 1)
+		return TEST_FAILED;
+
+	rte_htw_manage(htw, now + start + 1);
+
+	if (cstate.calls != 1)
+		return TEST_FAILED;
+
+	rte_htw_manage(htw, now + start + period);
+
+	if (cstate.calls != 2)
+		return TEST_FAILED;
+
+	cstate.cancel = true;
+
+	rte_htw_manage(htw, now + start + 2 * period);
+
+	if (cstate.calls != 3)
+		return TEST_FAILED;
+
+	rte_htw_manage(htw, now + start + 3 * period);
+
+	if (cstate.calls != 3)
+		return TEST_FAILED;
+
+	rte_htw_destroy(htw);
+
+	return TEST_SUCCESS;
+}
+
+#define CANCEL_ITERATIONS 1000
+#define CANCEL_NUM_TIMERS 1000
+#define CANCEL_MAX_DISTANCE 10000
+
+static int
+test_cancel_timer(void)
+{
+	uint64_t now;
+	struct rte_htw *htw;
+	int i;
+	struct rte_htimer timers[CANCEL_NUM_TIMERS];
+	struct counter_state timeouts[CANCEL_NUM_TIMERS];
+
+	now = rte_rand_max(UINT64_MAX / 2);
+
+	htw = rte_htw_create();
+
+	for (i = 0; i < CANCEL_ITERATIONS; i++) {
+		int j;
+		int target;
+
+		for (j = 0; j < CANCEL_NUM_TIMERS; j++) {
+			struct rte_htimer *timer = &timers[j];
+			uint64_t expiration_time;
+
+			timeouts[j] = (struct counter_state) {};
+
+			expiration_time = now + 1 +
+				rte_rand_max(CANCEL_MAX_DISTANCE);
+
+			rte_htw_add(htw, timer, expiration_time, 0,
+				    count_timeouts_cb, &timeouts[j],
+				    RTE_HTIMER_FLAG_ABSOLUTE_TIME);
+		}
+
+		target = rte_rand_max(CANCEL_NUM_TIMERS);
+
+		rte_htw_cancel(htw, &timers[target]);
+
+		now += CANCEL_MAX_DISTANCE;
+
+		rte_htw_manage(htw, now);
+
+		for (j = 0; j < CANCEL_NUM_TIMERS; j++) {
+			if (j != target) {
+				if (timeouts[j].calls != 1)
+					return TEST_FAILED;
+			} else {
+				if (timeouts[j].calls > 0)
+					return TEST_FAILED;
+			}
+		}
+	}
+
+	rte_htw_destroy(htw);
+
+	return TEST_SUCCESS;
+}
+
+static void
+nop_cb(struct rte_htimer *timer __rte_unused, void *arg __rte_unused)
+{
+}
+
+#define NEXT_NUM_TIMERS 1000
+#define NEXT_MAX_DISTANCE 10000
+
+static int
+test_next_timeout(void)
+{
+	uint64_t now;
+	struct rte_htw *htw;
+	int i;
+	struct rte_htimer timers[NEXT_NUM_TIMERS];
+	uint64_t last_expiration;
+
+	now = rte_rand_max(NEXT_MAX_DISTANCE);
+
+	htw = rte_htw_create();
+
+	if (rte_htw_next_timeout(htw, UINT64_MAX) != UINT64_MAX)
+		return TEST_FAILED;
+	if (rte_htw_next_timeout(htw, now + 1) != (now + 1))
+		return TEST_FAILED;
+
+	rte_htw_manage(htw, now);
+
+	last_expiration = now + NEXT_MAX_DISTANCE * NEXT_NUM_TIMERS;
+
+	for (i = 0; i < NEXT_NUM_TIMERS; i++) {
+		struct rte_htimer *timer = &timers[i];
+		uint64_t expiration;
+		uint64_t upper_bound;
+
+		/* add timers, each new one closer than the last */
+
+		expiration = last_expiration - rte_rand_max(NEXT_MAX_DISTANCE);
+
+		rte_htw_add(htw, timer, expiration, 0, nop_cb, NULL,
+			    RTE_HTIMER_FLAG_ABSOLUTE_TIME);
+
+		if (rte_htw_next_timeout(htw, UINT64_MAX) != expiration)
+			return TEST_FAILED;
+
+		upper_bound = expiration + rte_rand_max(100000);
+
+		if (rte_htw_next_timeout(htw, upper_bound) != expiration)
+			return TEST_FAILED;
+
+		upper_bound = expiration - rte_rand_max(expiration);
+
+		if (rte_htw_next_timeout(htw, upper_bound) != upper_bound)
+			return TEST_FAILED;
+
+		last_expiration = expiration;
+	}
+
+	rte_htw_destroy(htw);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_htw(void)
+{
+	if (test_single_timeout(0, 10) != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	if (test_single_timeout(0, 254) != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	if (test_single_timeout(0, 255) != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	if (test_single_timeout(255, 1) != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	if (test_single_timeout(254, 2) != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	if (test_periodical_timer(10000, 500, 2) != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	if (test_periodical_timer(1234567, 12345, 100000) != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	if (test_cancel_timer() != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	if (test_rand_timers(1000, 100000, 100000000) != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	if (test_rand_timers(100000, 100000, 1000000) != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	if (test_next_timeout() != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(htw_autotest, test_htw);
diff --git a/app/test/test_htw_perf.c b/app/test/test_htw_perf.c
new file mode 100644
index 0000000000..65901f0874
--- /dev/null
+++ b/app/test/test_htw_perf.c
@@ -0,0 +1,181 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#include "test.h"
+
+#include <sys/queue.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include <rte_cycles.h>
+#include <rte_htw.h>
+#include <rte_malloc.h>
+#include <rte_random.h>
+
+static void
+nop_cb(struct rte_htimer *timer __rte_unused, void *arg __rte_unused)
+{
+}
+
+static void
+add_rand_timers(struct rte_htw *htw, struct rte_htimer *timers,
+		uint64_t num, uint64_t timeout_start, uint64_t max_timeout)
+{
+	uint64_t i;
+	uint64_t expiration_times[num];
+	uint64_t start_ts;
+	uint64_t end_ts;
+
+	for (i = 0; i < num; i++)
+		expiration_times[i] = timeout_start + rte_rand_max(max_timeout);
+
+	start_ts = rte_get_tsc_cycles();
+
+	for (i = 0; i < num; i++) {
+		struct rte_htimer *timer = &timers[i];
+
+		rte_htw_add(htw, timer, expiration_times[i], 0, nop_cb, NULL,
+			    RTE_HTIMER_FLAG_ABSOLUTE_TIME);
+	}
+
+	/* actually install the timers */
+	rte_htw_process(htw);
+
+	end_ts = rte_get_tsc_cycles();
+
+	printf("        %.0f TSC cycles / add op\n",
+	       (double)(end_ts - start_ts) / num);
+}
+
+#define TIME_STEP 16
+
+static int
+test_add_manage_perf(const char *scenario_name, uint64_t num_timers,
+		     uint64_t timespan)
+{
+	uint64_t manage_calls;
+	struct rte_htimer *timers;
+	uint64_t start;
+	uint64_t now;
+	struct rte_htw *htw;
+	uint64_t start_ts;
+	uint64_t end_ts;
+	double latency;
+
+	manage_calls = timespan / TIME_STEP;
+
+	printf("Scenario: %s\n", scenario_name);
+	printf("    Configuration:\n");
+	printf("        Timers: %"PRIu64"\n", num_timers);
+	printf("        Max timeout: %"PRIu64" ticks\n", timespan);
+	printf("        Average timeouts/manage call: %.3f\n",
+	       num_timers / (double)manage_calls);
+	printf("        Time advance per manage call: %d\n", TIME_STEP);
+
+	printf("    Results:\n");
+
+	timers = rte_malloc(NULL, sizeof(struct rte_htimer) *
+			    num_timers, 0);
+
+	if (timers == NULL)
+		rte_panic("Unable to allocate memory\n");
+
+	htw = rte_htw_create();
+
+	if (htw == NULL)
+		return TEST_FAILED;
+
+	start = 1 + rte_rand_max(UINT64_MAX / 2);
+
+	rte_htw_manage(htw, start - 1);
+
+	add_rand_timers(htw, timers, num_timers, start, timespan);
+
+	start_ts = rte_get_tsc_cycles();
+
+	for (now = start; now < (start + timespan); now += TIME_STEP)
+		rte_htw_manage(htw, now);
+
+	end_ts = rte_get_tsc_cycles();
+
+	latency = end_ts - start_ts;
+
+	printf("        %.0f TSC cycles / manage call\n",
+	       latency / manage_calls);
+	printf("        %.1f TSC cycles / tick\n", latency / timespan);
+
+	rte_htw_destroy(htw);
+
+	rte_free(timers);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_cancel_perf(uint64_t num_timers, uint64_t timespan)
+{
+	struct rte_htimer *timers;
+	uint64_t start;
+	struct rte_htw *htw;
+	uint64_t i;
+	uint64_t start_ts;
+	uint64_t end_ts;
+	double latency;
+
+	timers = rte_malloc(NULL, sizeof(struct rte_htimer) * num_timers, 0);
+
+	if (timers == NULL)
+		rte_panic("Unable to allocate memory\n");
+
+	htw = rte_htw_create();
+
+	if (htw == NULL)
+		return TEST_FAILED;
+
+	start = 1 + rte_rand_max(UINT64_MAX / 2);
+
+	rte_htw_manage(htw, start - 1);
+
+	add_rand_timers(htw, timers, num_timers, start, timespan);
+
+	start_ts = rte_get_tsc_cycles();
+
+	for (i = 0; i < num_timers; i++)
+		rte_htw_cancel(htw, &timers[i]);
+
+	end_ts = rte_get_tsc_cycles();
+
+	latency = end_ts - start_ts;
+
+	printf("Timer delete: %.0f TSC cycles / call\n",
+	       latency / num_timers);
+
+	rte_htw_destroy(htw);
+
+	rte_free(timers);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_htw_perf(void)
+{
+	rte_delay_us_block(100);
+
+	if (test_add_manage_perf("Sparse", 100000, 10000000) != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	if (test_add_manage_perf("Dense", 100000, 200000) != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	if (test_add_manage_perf("Idle", 10, 100000) != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	if (test_cancel_perf(100000, 100000) != TEST_SUCCESS)
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(htw_perf_autotest, test_htw_perf);
diff --git a/app/test/test_timer_htimer_htw_perf.c b/app/test/test_timer_htimer_htw_perf.c
new file mode 100644
index 0000000000..e51fc7282f
--- /dev/null
+++ b/app/test/test_timer_htimer_htw_perf.c
@@ -0,0 +1,693 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#include "test.h"
+
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <rte_cycles.h>
+#include <rte_htimer_mgr.h>
+#include <rte_htw.h>
+#include <rte_lcore.h>
+#include <rte_malloc.h>
+#include <rte_random.h>
+#include <rte_timer.h>
+
+static uint64_t
+s_to_tsc(double s)
+{
+	return s * rte_get_tsc_hz();
+}
+
+static double
+tsc_to_s(uint64_t tsc)
+{
+	return (double)tsc / (double)rte_get_tsc_hz();
+}
+
+struct timer_conf {
+	uint64_t start;
+	uint64_t interval;
+};
+
+static void
+get_timer_confs(double aggregate_expiration_rate,
+		struct timer_conf *timer_confs,
+		size_t num_timers)
+{
+	double avg_expiration_rate;
+	size_t i;
+
+	avg_expiration_rate = aggregate_expiration_rate / num_timers;
+
+	for (i = 0; i < num_timers; i++) {
+		struct timer_conf *conf = &timer_confs[i];
+		double expiration_rate;
+
+		expiration_rate = avg_expiration_rate * (rte_drand() + 0.5);
+
+		conf->interval = rte_get_tsc_hz() / expiration_rate;
+		conf->start = rte_rand_max(conf->interval);
+	}
+}
+
+struct timer_lib_ops {
+	const char *name;
+
+	void *(*create)(const struct timer_conf *timer_confs,
+			size_t num_timers, bool cancel, uint64_t *fired);
+	void (*manage_time)(void *data, uint64_t current_time);
+	void (*manage)(void *data);
+	void (*destroy)(void *data);
+};
+
+static void *
+nop_create(const struct timer_conf *timer_confs __rte_unused,
+	   size_t num_timers __rte_unused, bool cancel __rte_unused,
+	   uint64_t *fired __rte_unused)
+{
+	return NULL;
+}
+
+static __rte_noinline void
+nop_manage(void *data __rte_unused)
+{
+}
+
+static __rte_noinline void
+nop_manage_time(void *data __rte_unused, uint64_t current_time __rte_unused)
+{
+}
+
+static void
+nop_destroy(void *data __rte_unused)
+{
+}
+
+static struct timer_lib_ops nop_ops = {
+	.name = "nop",
+	.create = nop_create,
+	.manage = nop_manage,
+	.manage_time = nop_manage_time,
+	.destroy = nop_destroy
+};
+
+struct ctimer {
+	uint64_t interval;
+	struct rte_timer timer;
+	uint64_t cancel_offset;
+	struct rte_timer canceled_timer;
+};
+
+static void
+crash_cb(struct rte_timer *timer __rte_unused, void *cb_arg __rte_unused)
+{
+	abort();
+}
+
+#define CANCELED_OFFSET (0.5) /* s */
+
+static void
+test_cb(struct rte_timer *timer, void *cb_arg)
+{
+	struct ctimer *ctimer =
+		container_of(timer, struct ctimer, timer);
+	uint64_t *fired = cb_arg;
+
+	rte_timer_reset(timer, ctimer->interval, SINGLE,
+			rte_lcore_id(), test_cb, cb_arg);
+
+	if (ctimer->cancel_offset > 0)
+		rte_timer_reset(&ctimer->canceled_timer,
+				ctimer->interval + ctimer->cancel_offset,
+				SINGLE, rte_lcore_id(), crash_cb, NULL);
+
+	(*fired)++;
+}
+
+static void *
+timer_create1(const struct timer_conf *timer_confs, size_t num_timers,
+	      bool cancel, uint64_t *fired)
+{
+	struct ctimer *ctimers;
+	unsigned int i;
+
+	ctimers = rte_malloc(NULL, sizeof(struct ctimer) * num_timers, 0);
+
+	if (num_timers > 0 && ctimers == NULL)
+		rte_panic("Unable to allocate memory\n");
+
+	rte_timer_subsystem_init();
+
+	for (i = 0; i < num_timers; i++) {
+		const struct timer_conf *timer_conf = &timer_confs[i];
+		struct ctimer *ctimer = &ctimers[i];
+		struct rte_timer *timer = &ctimer->timer;
+
+		rte_timer_init(timer);
+
+		ctimer->interval = timer_conf->interval;
+
+		rte_timer_reset(timer, timer_conf->start, SINGLE,
+				rte_lcore_id(),	test_cb, fired);
+
+		if (cancel) {
+			ctimer->cancel_offset = s_to_tsc(CANCELED_OFFSET);
+
+			rte_timer_reset(&ctimer->canceled_timer,
+				    timer_conf->start + ctimer->cancel_offset,
+				    SINGLE, rte_lcore_id(),
+				    crash_cb, NULL);
+		} else
+			ctimer->cancel_offset = 0;
+	}
+
+	return ctimers;
+}
+
+static void
+timer_manage(void *data __rte_unused)
+{
+	rte_timer_manage();
+}
+
+static void
+timer_manage_time(void *data __rte_unused, uint64_t current_time __rte_unused)
+{
+	rte_timer_manage();
+}
+
+static void
+timer_destroy(void *data)
+{
+	rte_free(data);
+
+	rte_timer_subsystem_finalize();
+}
+
+static struct timer_lib_ops timer_ops = {
+	.name = "timer",
+	.create = timer_create1,
+	.manage = timer_manage,
+	.manage_time = timer_manage_time,
+	.destroy = timer_destroy
+};
+
+struct chtimer {
+	uint64_t interval;
+	struct rte_htimer htimer;
+	uint64_t cancel_offset;
+	struct rte_htimer canceled_htimer;
+};
+
+static void
+hcrash_cb(struct rte_htimer *timer __rte_unused, void *cb_arg __rte_unused)
+{
+	abort();
+}
+
+static void
+htest_cb(struct rte_htimer *timer, void *cb_arg)
+{
+	struct chtimer *chtimer =
+		container_of(timer, struct chtimer, htimer);
+	uint64_t *fired = cb_arg;
+
+	rte_htimer_mgr_add(timer, chtimer->interval, 0, htest_cb, cb_arg,
+			   RTE_HTIMER_FLAG_TIME_TSC);
+
+	if (chtimer->cancel_offset > 0) {
+		struct rte_htimer *canceled_htimer =
+			&chtimer->canceled_htimer;
+		uint64_t cancel_expiration_time = chtimer->interval +
+			chtimer->cancel_offset;
+
+		rte_htimer_mgr_cancel(canceled_htimer);
+
+		rte_htimer_mgr_add(canceled_htimer, cancel_expiration_time, 0,
+				   hcrash_cb, NULL, RTE_HTIMER_FLAG_TIME_TSC);
+	}
+
+	(*fired)++;
+}
+
+#define TICK_LENGTH (1e-6)
+
+static void *
+htimer_create(const struct timer_conf *timer_confs, size_t num_timers,
+	      bool cancel, uint64_t *fired)
+{
+	struct chtimer *chtimers;
+	unsigned int i;
+
+	chtimers = rte_malloc(NULL, sizeof(struct chtimer) * num_timers, 0);
+
+	if (num_timers > 0 && chtimers == NULL)
+		rte_panic("Unable to allocate memory\n");
+
+	rte_htimer_mgr_init(TICK_LENGTH * NS_PER_S);
+
+	rte_htimer_mgr_manage();
+
+	for (i = 0; i < num_timers; i++) {
+		const struct timer_conf *timer_conf = &timer_confs[i];
+		struct chtimer *chtimer = &chtimers[i];
+
+		chtimer->interval = timer_conf->interval;
+
+		rte_htimer_mgr_add(&chtimer->htimer, timer_conf->start, 0,
+				   htest_cb, fired, RTE_HTIMER_FLAG_TIME_TSC);
+
+		if (cancel) {
+			uint64_t cancel_start;
+
+			chtimer->cancel_offset = s_to_tsc(CANCELED_OFFSET);
+
+			cancel_start =
+				timer_conf->start + chtimer->cancel_offset;
+
+			rte_htimer_mgr_add(&chtimer->canceled_htimer,
+					   cancel_start, 0,
+					   hcrash_cb, NULL,
+					   RTE_HTIMER_FLAG_TIME_TSC);
+		} else
+			chtimer->cancel_offset = 0;
+	}
+
+	rte_htimer_mgr_process();
+
+	return chtimers;
+}
+
+static void
+htimer_manage(void *data __rte_unused)
+{
+	rte_htimer_mgr_manage();
+}
+
+static void
+htimer_manage_time(void *data __rte_unused, uint64_t current_time)
+{
+	rte_htimer_mgr_manage_time(current_time, RTE_HTIMER_FLAG_TIME_TSC);
+}
+
+static void
+htimer_destroy(void *data)
+{
+	rte_free(data);
+
+	rte_htimer_mgr_deinit();
+}
+
+static struct timer_lib_ops htimer_ops = {
+	.name = "htimer",
+	.create = htimer_create,
+	.manage = htimer_manage,
+	.manage_time = htimer_manage_time,
+	.destroy = htimer_destroy,
+};
+
+struct htw {
+	struct rte_htw *htw;
+	struct chtimer *chtimers;
+	uint64_t tsc_per_tick;
+	uint64_t *fired;
+};
+
+static void
+htw_manage_time(void *timer_data, uint64_t current_time)
+{
+	struct htw *htw = timer_data;
+	uint64_t tick;
+
+	tick = current_time / htw->tsc_per_tick;
+
+	rte_htw_manage(htw->htw, tick);
+}
+
+static void
+htw_manage(void *timer_data)
+{
+	uint64_t now;
+
+	now = rte_get_tsc_cycles();
+
+	htw_manage_time(timer_data, now);
+}
+
+static void
+htwcrash_cb(struct rte_htimer *timer __rte_unused, void *cb_arg __rte_unused)
+{
+	abort();
+}
+
+static void
+htwtest_cb(struct rte_htimer *timer, void *cb_arg)
+{
+	struct chtimer *chtimer =
+		container_of(timer, struct chtimer, htimer);
+	struct htw *htw = cb_arg;
+
+	rte_htw_add(htw->htw, timer, chtimer->interval, 0, htwtest_cb,
+		    cb_arg, 0);
+
+	if (chtimer->cancel_offset > 0) {
+		struct rte_htimer *canceled_htimer =
+			&chtimer->canceled_htimer;
+		uint64_t cancel_expiration_time = chtimer->interval +
+			chtimer->cancel_offset;
+
+		rte_htw_cancel(htw->htw, canceled_htimer);
+
+		rte_htw_add(htw->htw, canceled_htimer,
+			    cancel_expiration_time, 0,
+			    htwcrash_cb, cb_arg, 0);
+	}
+
+	(*htw->fired)++;
+}
+
+static void *
+htw_create(const struct timer_conf *timer_confs, size_t num_timers,
+	   bool cancel, uint64_t *fired)
+{
+	unsigned int i;
+	struct htw *htw;
+
+	htw = rte_malloc(NULL, sizeof(struct htw), 0);
+	if (htw == NULL)
+		rte_panic("Unable to allocate memory\n");
+
+	htw->htw = rte_htw_create();
+	if (htw == NULL)
+		rte_panic("Unable to create HTW\n");
+
+	htw->chtimers =
+		rte_malloc(NULL, sizeof(struct chtimer) * num_timers, 0);
+	if (num_timers > 0 && htw->chtimers == NULL)
+		rte_panic("Unable to allocate memory\n");
+
+	htw->tsc_per_tick = s_to_tsc(TICK_LENGTH);
+
+	htw->fired = fired;
+
+	htw_manage(htw);
+
+	for (i = 0; i < num_timers; i++) {
+		const struct timer_conf *timer_conf = &timer_confs[i];
+		struct chtimer *chtimer = &htw->chtimers[i];
+		uint64_t start;
+
+		chtimer->interval = timer_conf->interval / htw->tsc_per_tick;
+
+		start = timer_conf->start / htw->tsc_per_tick;
+
+		rte_htw_add(htw->htw, &chtimer->htimer,
+			    start, 0, htwtest_cb, htw, 0);
+
+		if (cancel) {
+			uint64_t cancel_start;
+
+			chtimer->cancel_offset =
+				s_to_tsc(CANCELED_OFFSET) / htw->tsc_per_tick;
+
+			cancel_start = start + chtimer->cancel_offset;
+
+			rte_htw_add(htw->htw, &chtimer->canceled_htimer,
+				    cancel_start, 0, htwcrash_cb, NULL, 0);
+		} else
+			chtimer->cancel_offset = 0;
+	}
+
+	rte_htw_process(htw->htw);
+
+	return htw;
+}
+
+static void
+htw_destroy(void *data)
+{
+	struct htw *htw = data;
+
+	rte_htw_destroy(htw->htw);
+
+	rte_free(htw->chtimers);
+
+	rte_free(htw);
+}
+
+static struct timer_lib_ops htw_ops = {
+	.name = "htw",
+	.create = htw_create,
+	.manage = htw_manage,
+	.manage_time = htw_manage_time,
+	.destroy = htw_destroy,
+};
+
+static const struct timer_lib_ops *lib_ops[] = {
+	&timer_ops, &htimer_ops, &htw_ops
+};
+
+#define DUMMY_TASK_SIZE (2500)
+
+static __rte_noinline uint64_t
+do_dummy_task(void)
+{
+	uint64_t result = 0;
+	unsigned int i;
+
+	for (i = 0; i < DUMMY_TASK_SIZE; i++)
+		result += rte_rand();
+
+	return result;
+}
+
+struct work_log {
+	uint64_t tasks_completed;
+	uint64_t runtime;
+};
+
+#define TARGET_RUNTIME (4.0) /* s */
+
+struct run_result {
+	uint64_t tasks_completed;
+	uint64_t timer_fired;
+	uint64_t latency;
+};
+
+static void
+run_with_lib(const struct timer_lib_ops *timer_ops,
+	     const struct timer_conf *timer_confs, size_t num_timers,
+	     bool cancel, struct run_result *result)
+{
+	void *timer_data;
+	uint64_t deadline;
+	uint64_t start;
+	uint64_t now;
+	volatile uint64_t sum = 0;
+
+	result->tasks_completed = 0;
+	result->timer_fired = 0;
+
+	timer_data = timer_ops->create(timer_confs, num_timers, cancel,
+				       &result->timer_fired);
+
+	start = rte_get_tsc_cycles();
+
+	deadline = start + s_to_tsc(TARGET_RUNTIME);
+
+	do {
+		sum += do_dummy_task();
+
+		result->tasks_completed++;
+
+		now = rte_get_tsc_cycles();
+
+		timer_ops->manage_time(timer_data, now);
+	} while (now < deadline);
+
+	RTE_VERIFY(sum != 0);
+
+	result->latency = rte_get_tsc_cycles() - start;
+
+	timer_ops->destroy(timer_data);
+}
+
+static void
+benchmark_timer_libs(double aggregate_expiration_rate, uint64_t num_timers,
+		     bool cancel)
+{
+	struct timer_conf timer_confs[num_timers];
+	struct run_result nop_result;
+	double nop_per_task_latency;
+	struct run_result lib_results[RTE_DIM(lib_ops)];
+	uint64_t lib_overhead[RTE_DIM(lib_ops)];
+
+	unsigned int i;
+
+	printf("Configuration:\n");
+	printf("    Aggregate timer expiration rate: %.3e Hz\n",
+	       aggregate_expiration_rate);
+	if (cancel)
+		printf("    Aggregate timer cancellation rate: %.3e Hz\n",
+		       aggregate_expiration_rate);
+	printf("    Concurrent timers: %zd\n", num_timers);
+	printf("    Tick length: %.1e s\n", TICK_LENGTH);
+
+	rte_srand(4711);
+
+	get_timer_confs(aggregate_expiration_rate, timer_confs, num_timers);
+
+	run_with_lib(&nop_ops, NULL, 0, false, &nop_result);
+	nop_per_task_latency =
+		(double)nop_result.latency / nop_result.tasks_completed;
+
+	for (i = 0; i < RTE_DIM(lib_ops); i++) {
+		struct run_result *lib_result = &lib_results[i];
+		double per_task_latency;
+
+		run_with_lib(lib_ops[i], timer_confs, num_timers, cancel,
+			     lib_result);
+
+		per_task_latency = (double)lib_result->latency /
+			lib_result->tasks_completed;
+
+		if (per_task_latency > nop_per_task_latency)
+			lib_overhead[i] =
+				(per_task_latency - nop_per_task_latency) *
+				lib_result->tasks_completed;
+		else
+			lib_overhead[i] = 0;
+	}
+
+	printf("Results:\n");
+
+	printf("    Work between manage calls: %.0f TSC cycles\n",
+	       (double)nop_result.latency / nop_result.tasks_completed);
+
+	printf("\n");
+	printf("%-24s", "");
+	for (i = 0; i < RTE_DIM(lib_ops); i++)
+		printf("%12s", lib_ops[i]->name);
+	printf("\n");
+
+	printf("%-24s", "    Runtime [s]");
+	for (i = 0; i < RTE_DIM(lib_ops); i++)
+		printf("%12.3e", tsc_to_s(lib_results[i].latency));
+	printf("\n");
+
+	printf("%-24s", "    Expiration rate [Hz]");
+	for (i = 0; i < RTE_DIM(lib_ops); i++)
+		printf("%12.3e", lib_results[i].timer_fired /
+		       tsc_to_s(lib_results[i].latency));
+	printf("\n");
+
+	printf("%-24s", "    Overhead [%%]");
+	for (i = 0; i < RTE_DIM(lib_ops); i++)
+		printf("%12.3f", 100 * (double)lib_overhead[i] /
+		       (double)lib_results[i].latency);
+	printf("\n");
+
+	printf("%-24s", "    Per expiration [TSC]");
+	for (i = 0; i < RTE_DIM(lib_ops); i++)
+		printf("%12"PRIu64, lib_overhead[i] /
+		       lib_results[i].timer_fired);
+	printf("\n");
+
+	printf("%-24s", "    Per manage() [TSC]");
+	for (i = 0; i < RTE_DIM(lib_ops); i++)
+		printf("%12"PRIu64, lib_overhead[i] /
+		       lib_results[i].tasks_completed);
+	printf("\n");
+}
+
+static void
+benchmark_timer_libs_mode(double aggregate_expiration_rate, bool cancel)
+{
+	benchmark_timer_libs(aggregate_expiration_rate, 100, cancel);
+	benchmark_timer_libs(aggregate_expiration_rate, 100000, cancel);
+}
+
+static void
+benchmark_timer_libs_rate(double aggregate_expiration_rate)
+{
+	benchmark_timer_libs_mode(aggregate_expiration_rate, false);
+	benchmark_timer_libs_mode(aggregate_expiration_rate, true);
+}
+
+#define MANAGE_ITERATIONS (10000000)
+
+static uint64_t
+run_manage(const struct timer_lib_ops *timer_ops, bool user_provided_time)
+{
+	uint64_t start;
+	uint64_t latency;
+	void *timer_data;
+
+	timer_data = timer_ops->create(NULL, 0, NULL, false);
+
+	start = rte_get_tsc_cycles();
+
+	unsigned int i;
+	for (i = 0; i < MANAGE_ITERATIONS; i++)
+		if (user_provided_time && timer_ops->manage_time != NULL) {
+			uint64_t now;
+
+			now = rte_get_tsc_cycles();
+
+			timer_ops->manage_time(timer_data, now);
+		} else
+			timer_ops->manage(timer_data);
+
+	latency = rte_get_tsc_cycles() - start;
+
+	timer_ops->destroy(timer_data);
+
+	return latency / MANAGE_ITERATIONS;
+}
+
+static void
+benchmark_timer_libs_timeless_manage(bool user_provided_time)
+{
+	unsigned int i;
+	uint64_t nop_latency;
+
+	nop_latency = run_manage(&nop_ops, user_provided_time);
+
+	printf("Zero-timers manage() overhead%s:\n", user_provided_time ?
+	       " (w/ user-provided time)" : "");
+
+	for (i = 0; i < RTE_DIM(lib_ops); i++) {
+		const struct timer_lib_ops *ops = lib_ops[i];
+		uint64_t latency;
+
+		latency = run_manage(ops, user_provided_time);
+
+		if (latency > nop_latency)
+			latency -= nop_latency;
+		else
+			latency = 0;
+
+		printf("    %s: %"PRIu64" TSC cycles\n", ops->name, latency);
+	}
+}
+
+static int
+test_timer_htimer_htw_perf(void)
+{
+	/* warm up */
+	rte_delay_us_block(10000);
+
+	benchmark_timer_libs_rate(1e6);
+
+	benchmark_timer_libs_timeless_manage(false);
+	benchmark_timer_libs_timeless_manage(true);
+
+	return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(timer_htimer_htw_perf_autotest,
+		      test_timer_htimer_htw_perf);
diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index 2deec7ea19..5ea1dfa262 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -67,6 +67,8 @@  The public API headers are grouped by topics:
 - **timers**:
   [cycles](@ref rte_cycles.h),
   [timer](@ref rte_timer.h),
+  [htimer_mgr](@ref rte_htimer_mgr.h),
+  [htimer](@ref rte_htimer.h),
   [alarm](@ref rte_alarm.h)
 
 - **locks**:
@@ -163,7 +165,8 @@  The public API headers are grouped by topics:
   [ring](@ref rte_ring.h),
   [stack](@ref rte_stack.h),
   [tailq](@ref rte_tailq.h),
-  [bitmap](@ref rte_bitmap.h)
+  [bitmap](@ref rte_bitmap.h),
+  [bitset](@ref rte_bitset.h)
 
 - **packet framework**:
   * [port](@ref rte_port.h):
diff --git a/doc/api/doxy-api.conf.in b/doc/api/doxy-api.conf.in
index e859426099..c0cd64db34 100644
--- a/doc/api/doxy-api.conf.in
+++ b/doc/api/doxy-api.conf.in
@@ -45,6 +45,7 @@  INPUT                   = @TOPDIR@/doc/api/doxy-api-index.md \
                           @TOPDIR@/lib/gro \
                           @TOPDIR@/lib/gso \
                           @TOPDIR@/lib/hash \
+                          @TOPDIR@/lib/htimer \
                           @TOPDIR@/lib/ip_frag \
                           @TOPDIR@/lib/ipsec \
                           @TOPDIR@/lib/jobstats \
diff --git a/lib/htimer/meson.build b/lib/htimer/meson.build
new file mode 100644
index 0000000000..2dd5d6a24b
--- /dev/null
+++ b/lib/htimer/meson.build
@@ -0,0 +1,7 @@ 
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2023 Ericsson AB
+
+sources = files('rte_htw.c', 'rte_htimer_msg_ring.c', 'rte_htimer_mgr.c')
+headers = files('rte_htimer_mgr.h', 'rte_htimer.h')
+
+deps += ['ring']
diff --git a/lib/htimer/rte_htimer.h b/lib/htimer/rte_htimer.h
new file mode 100644
index 0000000000..6ac86292b5
--- /dev/null
+++ b/lib/htimer/rte_htimer.h
@@ -0,0 +1,68 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#ifndef _RTE_HTIMER_H_
+#define _RTE_HTIMER_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/queue.h>
+
+#include <rte_bitops.h>
+
+struct rte_htimer;
+
+typedef void (*rte_htimer_cb_t)(struct rte_htimer *, void *);
+
+struct rte_htimer {
+	/**
+	 * Absolute timer expiration time (in ticks).
+	 */
+	uint64_t expiration_time;
+	/**
+	 * Time between expirations (in ticks). Zero for one-shot timers.
+	 */
+	uint64_t period;
+	/**
+	 * Owning lcore. May safely be read from any thread.
+	 */
+	uint32_t owner_lcore_id;
+	/**
+	 * The current state of the timer.
+	 */
+	uint32_t state:4;
+	/**
+	 * Flags set on this timer.
+	 */
+	uint32_t flags:28;
+	/**
+	 * User-specified callback function pointer.
+	 */
+	rte_htimer_cb_t cb;
+	/**
+	 * Argument for user callback.
+	 */
+	void *cb_arg;
+	/**
+	 * Pointers used to add timer to various internal lists.
+	 */
+	LIST_ENTRY(rte_htimer) entry;
+};
+
+#define RTE_HTIMER_FLAG_ABSOLUTE_TIME RTE_BIT32(0)
+#define RTE_HTIMER_FLAG_PERIODICAL RTE_BIT32(1)
+#define RTE_HTIMER_FLAG_TIME_TICK RTE_BIT32(2)
+#define RTE_HTIMER_FLAG_TIME_TSC RTE_BIT32(3)
+
+#define RTE_HTIMER_STATE_PENDING 1
+#define RTE_HTIMER_STATE_EXPIRED 2
+#define RTE_HTIMER_STATE_CANCELED 3
+
+LIST_HEAD(rte_htimer_list, rte_htimer);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_HTIMER_H_ */
diff --git a/lib/htimer/rte_htimer_mgr.c b/lib/htimer/rte_htimer_mgr.c
new file mode 100644
index 0000000000..efdfcf0985
--- /dev/null
+++ b/lib/htimer/rte_htimer_mgr.c
@@ -0,0 +1,547 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#include <inttypes.h>
+#include <math.h>
+#include <stdbool.h>
+#include <sys/queue.h>
+#include <unistd.h>
+
+#include <rte_branch_prediction.h>
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_errno.h>
+#include <rte_htw.h>
+#include <rte_prefetch.h>
+#include <rte_ring_elem.h>
+
+#include "rte_htimer_mgr.h"
+#include "rte_htimer_msg.h"
+#include "rte_htimer_msg_ring.h"
+
+#define MAX_MSG_BATCH_SIZE 16
+
+struct htimer_mgr {
+	struct rte_htimer_msg_ring *msg_ring;
+	struct rte_htw *htw;
+
+	unsigned int async_msgs_idx __rte_cache_aligned;
+	unsigned int num_async_msgs;
+	struct rte_htimer_msg async_msgs[MAX_MSG_BATCH_SIZE];
+} __rte_cache_aligned;
+
+static uint64_t ns_per_tick;
+static double tsc_per_tick;
+
+static struct htimer_mgr mgrs[RTE_MAX_LCORE + 1];
+
+#define MAX_ASYNC_TRANSACTIONS 1024
+#define MSG_RING_SIZE MAX_ASYNC_TRANSACTIONS
+
+static inline uint64_t
+tsc_to_tick(uint64_t tsc)
+{
+	return tsc / tsc_per_tick;
+}
+
+static inline uint64_t
+tsc_to_tick_round_up(uint64_t tsc)
+{
+	uint64_t tick;
+
+	tick = (tsc + tsc_per_tick / 2) / tsc_per_tick;
+
+	return tick;
+}
+
+static inline uint64_t
+ns_to_tick(uint64_t ns)
+{
+	return ns / ns_per_tick;
+}
+
+static inline uint64_t
+ns_to_tick_round_up(uint64_t ns)
+{
+	uint64_t tick;
+
+	tick = ceil(ns / ns_per_tick);
+
+	return tick;
+}
+
+static inline uint64_t
+tick_to_ns(uint64_t tick)
+{
+	return tick * ns_per_tick;
+}
+
+static struct htimer_mgr *
+mgr_get(unsigned int lcore_id)
+{
+	return &mgrs[lcore_id];
+}
+
+static int
+mgr_init(unsigned int lcore_id)
+{
+	char ring_name[RTE_RING_NAMESIZE];
+	unsigned int socket_id;
+	struct htimer_mgr *mgr = &mgrs[lcore_id];
+
+	socket_id = rte_lcore_to_socket_id(lcore_id);
+
+	snprintf(ring_name, sizeof(ring_name), "htimer_%d", lcore_id);
+
+	mgr->msg_ring =
+		rte_htimer_msg_ring_create(ring_name, MSG_RING_SIZE, socket_id,
+					   RING_F_SC_DEQ);
+
+	if (mgr->msg_ring == NULL)
+		goto err;
+
+	mgr->htw = rte_htw_create();
+
+	if (mgr->htw == NULL)
+		goto err_free_ring;
+
+	mgr->async_msgs_idx = 0;
+	mgr->num_async_msgs = 0;
+
+	return 0;
+
+err_free_ring:
+	rte_htimer_msg_ring_free(mgr->msg_ring);
+err:
+	return -ENOMEM;
+}
+
+static void
+mgr_deinit(unsigned int lcore_id)
+{
+	struct htimer_mgr *mgr = &mgrs[lcore_id];
+
+	rte_htw_destroy(mgr->htw);
+
+	rte_htimer_msg_ring_free(mgr->msg_ring);
+}
+
+static volatile bool initialized;
+
+static void
+assure_initialized(void)
+{
+	RTE_ASSERT(initialized);
+}
+
+int
+rte_htimer_mgr_init(uint64_t _ns_per_tick)
+{
+	unsigned int lcore_id;
+
+	RTE_VERIFY(!initialized);
+
+	ns_per_tick = _ns_per_tick;
+
+	tsc_per_tick = (ns_per_tick / 1e9) * rte_get_tsc_hz();
+
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		int rc;
+
+		rc = mgr_init(lcore_id);
+
+		if (rc < 0) {
+			unsigned int deinit_lcore_id;
+
+			for (deinit_lcore_id = 0; deinit_lcore_id < lcore_id;
+			     deinit_lcore_id++)
+				mgr_deinit(deinit_lcore_id);
+
+			return rc;
+		}
+	}
+
+	initialized = true;
+
+	return 0;
+}
+
+void
+rte_htimer_mgr_deinit(void)
+{
+	unsigned int lcore_id;
+
+	assure_initialized();
+
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
+		mgr_deinit(lcore_id);
+
+	initialized = false;
+}
+
+static void
+assure_valid_time_conversion_flags(uint32_t flags __rte_unused)
+{
+	RTE_ASSERT(!((flags & RTE_HTIMER_FLAG_TIME_TSC) &&
+		     (flags & RTE_HTIMER_FLAG_TIME_TICK)));
+}
+
+static void
+assure_valid_add_flags(uint32_t flags)
+{
+	assure_valid_time_conversion_flags(flags);
+
+	RTE_ASSERT(!(flags & ~(RTE_HTIMER_FLAG_PERIODICAL |
+			       RTE_HTIMER_FLAG_ABSOLUTE_TIME |
+			       RTE_HTIMER_FLAG_TIME_TSC |
+			       RTE_HTIMER_FLAG_TIME_TICK)));
+}
+
+static uint64_t
+convert_time(uint64_t t, uint32_t flags)
+{
+	if (flags & RTE_HTIMER_FLAG_TIME_TSC)
+		return tsc_to_tick(t);
+	else if (flags & RTE_HTIMER_FLAG_TIME_TICK)
+		return t;
+	else
+		return ns_to_tick(t);
+}
+
+void
+rte_htimer_mgr_add(struct rte_htimer *timer, uint64_t expiration_time,
+		   uint64_t period, rte_htimer_cb_t timer_cb,
+		   void *timer_cb_arg, uint32_t flags)
+{
+	unsigned int lcore_id = rte_lcore_id();
+	struct htimer_mgr *mgr = mgr_get(lcore_id);
+	uint64_t expiration_time_tick;
+	uint64_t period_tick;
+
+	assure_initialized();
+
+	assure_valid_add_flags(flags);
+
+	expiration_time_tick = convert_time(expiration_time, flags);
+
+	period_tick = convert_time(period, flags);
+
+	rte_htw_add(mgr->htw, timer, expiration_time_tick, period_tick,
+		    timer_cb, timer_cb_arg, flags);
+
+	timer->owner_lcore_id = lcore_id;
+}
+
+int
+rte_htimer_mgr_cancel(struct rte_htimer *timer)
+{
+	unsigned int lcore_id = rte_lcore_id();
+	struct htimer_mgr *mgr = mgr_get(lcore_id);
+
+	assure_initialized();
+
+	RTE_ASSERT(timer->owner_lcore_id == lcore_id);
+
+	switch (timer->state) {
+	case RTE_HTIMER_STATE_PENDING:
+		rte_htw_cancel(mgr->htw, timer);
+		return 0;
+	case RTE_HTIMER_STATE_EXPIRED:
+		return -ETIME;
+	default:
+		RTE_ASSERT(timer->state == RTE_HTIMER_STATE_CANCELED);
+		return -ENOENT;
+	}
+}
+
+static int
+send_msg(unsigned int receiver_lcore_id, enum rte_htimer_msg_type msg_type,
+	 struct rte_htimer *timer, rte_htimer_mgr_async_op_cb_t async_cb,
+	 void *async_cb_arg, const struct rte_htimer_msg_request *request,
+	 const struct rte_htimer_msg_response *response)
+{
+	struct htimer_mgr *receiver_mgr;
+	struct rte_htimer_msg_ring *receiver_ring;
+	struct rte_htimer_msg msg = (struct rte_htimer_msg) {
+		.msg_type = msg_type,
+		.timer = timer,
+		.async_cb = async_cb,
+		.async_cb_arg = async_cb_arg
+	};
+	int rc;
+
+	if (request != NULL)
+		msg.request = *request;
+	else
+		msg.response = *response;
+
+	receiver_mgr = mgr_get(receiver_lcore_id);
+
+	receiver_ring = receiver_mgr->msg_ring;
+
+	rc = rte_htimer_msg_ring_enqueue(receiver_ring, &msg);
+
+	return rc;
+}
+
+static int
+send_request(unsigned int receiver_lcore_id, enum rte_htimer_msg_type msg_type,
+	     struct rte_htimer *timer,
+	     rte_htimer_mgr_async_op_cb_t async_cb, void *async_cb_arg)
+{
+	unsigned int lcore_id = rte_lcore_id();
+	struct rte_htimer_msg_request request = {
+		.source_lcore_id = lcore_id
+	};
+
+	return send_msg(receiver_lcore_id, msg_type, timer, async_cb,
+			async_cb_arg, &request, NULL);
+}
+
+static int
+send_response(unsigned int receiver_lcore_id, enum rte_htimer_msg_type msg_type,
+	      struct rte_htimer *timer,
+	      rte_htimer_mgr_async_op_cb_t async_cb, void *async_cb_arg,
+	      int result)
+{
+	struct rte_htimer_msg_response response = {
+		.result = result
+	};
+
+	return send_msg(receiver_lcore_id, msg_type, timer, async_cb,
+			async_cb_arg, NULL, &response);
+}
+
+int
+rte_htimer_mgr_async_add(struct rte_htimer *timer,
+			 unsigned int target_lcore_id,
+			 uint64_t expiration_time, uint64_t period,
+			 rte_htimer_cb_t timer_cb, void *timer_cb_arg,
+			 uint32_t flags,
+			 rte_htimer_mgr_async_op_cb_t async_cb,
+			 void *async_cb_arg)
+{
+	*timer = (struct rte_htimer) {
+		.expiration_time = expiration_time,
+		.period = period,
+		.owner_lcore_id = target_lcore_id,
+		.flags = flags,
+		.cb = timer_cb,
+		.cb_arg = timer_cb_arg
+	};
+
+	assure_initialized();
+
+	if (send_request(target_lcore_id, rte_htimer_msg_type_add_request,
+			 timer, async_cb, async_cb_arg) < 0)
+		return -EBUSY;
+
+	return 0;
+}
+
+int
+rte_htimer_mgr_async_cancel(struct rte_htimer *timer,
+			    rte_htimer_mgr_async_op_cb_t async_cb,
+			    void *async_cb_arg)
+{
+	if (send_request(timer->owner_lcore_id,
+			 rte_htimer_msg_type_cancel_request,
+			 timer, async_cb, async_cb_arg) < 0)
+		return -EBUSY;
+
+	return 0;
+}
+
+static int
+process_add_request(struct rte_htimer_msg *request)
+{
+	struct rte_htimer *timer = request->timer;
+
+	if (request->async_cb != NULL &&
+	    send_response(request->request.source_lcore_id,
+			  rte_htimer_msg_type_add_response, timer,
+			  request->async_cb, request->async_cb_arg,
+			  RTE_HTIMER_MGR_ASYNC_RESULT_ADDED) < 0)
+		return -EBUSY;
+
+	rte_htimer_mgr_add(timer, timer->expiration_time, timer->period,
+			   timer->cb, timer->cb_arg, timer->flags);
+
+	return 0;
+}
+
+static int
+process_cancel_request(struct rte_htimer_msg *request)
+{
+	unsigned int lcore_id = rte_lcore_id();
+	struct htimer_mgr *mgr = mgr_get(lcore_id);
+	struct rte_htimer *timer = request->timer;
+	int result;
+
+	switch (timer->state) {
+	case RTE_HTIMER_STATE_PENDING:
+		result = RTE_HTIMER_MGR_ASYNC_RESULT_CANCELED;
+		break;
+	case RTE_HTIMER_STATE_CANCELED:
+		result = RTE_HTIMER_MGR_ASYNC_RESULT_ALREADY_CANCELED;
+		break;
+	case RTE_HTIMER_STATE_EXPIRED:
+		result = RTE_HTIMER_MGR_ASYNC_RESULT_EXPIRED;
+		break;
+	default:
+		RTE_ASSERT(0);
+		result = -1;
+	}
+
+	if (request->async_cb != NULL &&
+	    send_response(request->request.source_lcore_id,
+			  rte_htimer_msg_type_cancel_response, timer,
+			  request->async_cb, request->async_cb_arg,
+			  result) < 0)
+		return -EBUSY;
+
+	if (timer->state == RTE_HTIMER_STATE_PENDING)
+		rte_htw_cancel(mgr->htw, timer);
+
+	return 0;
+}
+
+static int
+process_response(struct rte_htimer_msg *msg)
+{
+	struct rte_htimer_msg_response *response = &msg->response;
+
+	if (msg->async_cb != NULL)
+		msg->async_cb(msg->timer, response->result, msg->async_cb_arg);
+
+	return 0;
+}
+
+static int
+process_msg(struct rte_htimer_msg *msg)
+{
+	switch (msg->msg_type) {
+	case rte_htimer_msg_type_add_request:
+		return process_add_request(msg);
+	case rte_htimer_msg_type_cancel_request:
+		return process_cancel_request(msg);
+	case rte_htimer_msg_type_add_response:
+	case rte_htimer_msg_type_cancel_response:
+		return process_response(msg);
+	default:
+		RTE_ASSERT(0);
+		return -EBUSY;
+	}
+}
+
+static void
+dequeue_async_msgs(struct htimer_mgr *mgr)
+{
+	unsigned int i;
+
+	if (likely(rte_htimer_msg_ring_empty(mgr->msg_ring)))
+		return;
+
+	if (unlikely(mgr->num_async_msgs > 0))
+		return;
+
+	mgr->async_msgs_idx = 0;
+
+	mgr->num_async_msgs =
+		rte_htimer_msg_ring_dequeue_burst(mgr->msg_ring,
+						  mgr->async_msgs,
+						  MAX_MSG_BATCH_SIZE);
+
+	for (i = 0; i < mgr->num_async_msgs; i++)
+		rte_prefetch1(mgr->async_msgs[i].timer);
+}
+
+static void
+process_async(struct htimer_mgr *mgr)
+{
+	for (;;) {
+		struct rte_htimer_msg *msg;
+
+		dequeue_async_msgs(mgr);
+
+		if (mgr->num_async_msgs == 0)
+			break;
+
+		msg = &mgr->async_msgs[mgr->async_msgs_idx];
+
+		if (process_msg(msg) < 0)
+			break;
+
+		mgr->num_async_msgs--;
+		mgr->async_msgs_idx++;
+	}
+}
+
+static __rte_always_inline void
+htimer_mgr_manage_time(uint64_t current_time, uint32_t flags)
+{
+	unsigned int lcore_id = rte_lcore_id();
+	struct htimer_mgr *mgr = mgr_get(lcore_id);
+	uint64_t current_tick;
+
+	assure_initialized();
+
+	assure_valid_time_conversion_flags(flags);
+
+	process_async(mgr);
+
+	current_tick = convert_time(current_time, flags);
+
+	rte_htw_manage(mgr->htw, current_tick);
+}
+
+void
+rte_htimer_mgr_manage_time(uint64_t current_time, uint32_t flags)
+{
+	htimer_mgr_manage_time(current_time, flags);
+}
+
+void
+rte_htimer_mgr_manage(void)
+{
+	uint64_t current_time;
+
+	current_time = rte_get_tsc_cycles();
+
+	htimer_mgr_manage_time(current_time, RTE_HTIMER_FLAG_TIME_TSC);
+}
+
+void
+rte_htimer_mgr_process(void)
+{
+	unsigned int lcore_id = rte_lcore_id();
+	struct htimer_mgr *mgr = mgr_get(lcore_id);
+
+	process_async(mgr);
+	assure_initialized();
+
+	rte_htw_process(mgr->htw);
+}
+
+uint64_t
+rte_htimer_mgr_current_time(void)
+{
+	uint64_t current_tick;
+
+	current_tick = rte_htimer_mgr_current_tick();
+
+	return tick_to_ns(current_tick);
+}
+
+uint64_t
+rte_htimer_mgr_current_tick(void)
+{
+	unsigned int lcore_id = rte_lcore_id();
+	struct htimer_mgr *mgr = mgr_get(lcore_id);
+	uint64_t current_tick;
+
+	current_tick = rte_htw_current_time(mgr->htw);
+
+	return current_tick;
+}
diff --git a/lib/htimer/rte_htimer_mgr.h b/lib/htimer/rte_htimer_mgr.h
new file mode 100644
index 0000000000..173a95f9c0
--- /dev/null
+++ b/lib/htimer/rte_htimer_mgr.h
@@ -0,0 +1,516 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#ifndef _RTE_HTIMER_MGR_H_
+#define _RTE_HTIMER_MGR_H_
+
+/**
+ * @file
+ *
+ * RTE High-performance Timer Manager
+ *
+ * The high-performance timer manager (htimer_mgr) API provides access
+ * to a low-overhead, scalable timer service.
+ *
+ * The functionality offered similar to that of <rte_timer.h>, but the
+ * internals differs significantly, and there are slight differences
+ * in the programming interface as well.
+ *
+ * Core timer management is implemented by means of a hierarchical
+ * timer wheel (HTW), as per the Varghese and Lauck paper <em>Hashed
+ * and Hierarchical Timing Wheels: Data Structures for the Efficient
+ * Implementation of a Timer Facility</em>.
+ *
+ * Varghese et al's approach is further enhanced by the placement of a
+ * bitset in front of each wheel's slots. Each slot has a
+ * corresponding bit in the bitset. If a bit is clear, there are no
+ * pending timers scheduled for that slot. A set bit means there
+ * potentially are timers scheduled for that slot. This scheme reduces
+ * the overhead of the rte_htimer_mgr_manage() function, where slots
+ * of one or more of the wheels of the thread's HTW are scanned if
+ * time has progressed since last call. This improves performance is
+ * all cases, except for very densely populated timer wheels.
+ *
+ * One such HTW is instantiated for each lcore (EAL thread), and
+ * instances are also available for registered non-EAL threads.
+ *
+ * The <rte_htimer_mgr.h> API may not be called from unregistered
+ * non-EAL threads.
+ *
+ * The per-lcore-id HTW instance is private to that thread.
+ *
+ * The htimer API supports scheduling timers to a different thread
+ * (and thus, a different HTW) than the caller's. It is also possible
+ * to cancel timers managed by a "remote" timer wheel.
+ *
+ * All interaction (i.e., adding timers to or removing timers from) a
+ * remote HTW is done by sending a request, in the form of message on
+ * a DPDK ring, to that instance. Such requests are processed and, if
+ * required, acknowledged when the remote (target) thread calls
+ * rte_htimer_mgr_manage(), rte_htimer_mgr_manage_time() or
+ * rte_htimer_mgr_process().
+ *
+ * This message-based interaction avoid comparatively heavy-weight
+ * synchronization primitives such as spinlocks. Only release-acquire
+ * type synchronization on the rings are needed.
+ *
+ * Timer memory management is the responsibility of the
+ * application. After library-level initialization has completed, no
+ * more dynamic memory is allocated by the htimer library. When
+ * installing timers on remote lcores, care must be taken by the
+ * application to avoid race conditions, in particular use-after-free
+ * (or use-after-recycle) issues of the rte_timer structure. A timer
+ * struct may only be deallocated and/or recycled if the application
+ * can guarantee that there are no cancel requests in flight.
+ *
+ * The htimer library is able to give a definitive answer to the
+ * question if a remote timer's had expired or not, at the time of
+ * cancellation.
+ *
+ * The htimer library uses TSC as the default time source. A different
+ * time source may be used, in which case the application must
+ * explicitly provide the time using rte_htimer_mgr_manage_time().
+ * This function may also be used even if TSC is the time source, in
+ * cases where the application for some other purpose already is in
+ * possession of the current TSC time, to avoid the overhead of
+ * htimer's `rdtsc` instruction (or its equivalent on non-x86 ISAs).
+ *
+ * The htimer supports periodic and single-shot timers.
+ *
+ * The timer tick defines a quantum of time in the htimer library. The
+ * length of a tick (quantified in nanoseconds) is left to the
+ * application to specify. The core HTW implementation allows for all
+ * 64 bits to be used.
+ *
+ * Very fine-grained ticks increase the HTW overhead (since more slots
+ * needs to be scanned). Long ticks will only allow for very
+ * course-grained timers, and in timer-heavy application may cause
+ * load spikes when time advances into a new tick.
+ *
+ * Seemingly reasonable timer tick length range in between 100 ns and
+ * 100 us (or maybe up to as high as 1 ms), depending on the
+ * application.
+ */
+
+#include <stdint.h>
+
+#include <rte_common.h>
+#include <rte_compat.h>
+#include <rte_htimer.h>
+
+/**
+ * The timer has been added to the timer manager on the target lcore.
+ */
+#define RTE_HTIMER_MGR_ASYNC_RESULT_ADDED 1
+
+/**
+ * The timer cancellation request has completed, before the timer expired
+ * on the target lcore.
+ */
+#define RTE_HTIMER_MGR_ASYNC_RESULT_CANCELED 2
+
+/**
+ * The timer cancellation request was denied, since the timer was
+ * already marked as canceled.
+ */
+#define RTE_HTIMER_MGR_ASYNC_RESULT_ALREADY_CANCELED 3
+
+/**
+ * At the time of the cancellation request process on the target
+ * lcore, the timer had already expired.
+ */
+#define RTE_HTIMER_MGR_ASYNC_RESULT_EXPIRED 4
+
+typedef void (*rte_htimer_mgr_async_op_cb_t)(struct rte_htimer *timer,
+					     int result, void *cb_arg);
+
+/**
+ * Initialize the htimer library.
+ *
+ * Instantiates per-lcore (or per-registered non-EAL thread) timer
+ * wheels and other htimer library data structures, for all current
+ * and future threads.
+ *
+ * This function must be called prior to any other <rte_htimer.h> API
+ * call.
+ *
+ * This function may not be called if the htimer library is already
+ * initialized, but may be called multiple times, provided the library
+ * is deinitialized in between rte_htimer_mgr_init() calls.
+ *
+ * For applications not using TSC as the time source, the \c ns_per_tick
+ * parameter will denote the number of such application time-source-units
+ * per tick.
+ *
+ * This function is not multi-thread safe.
+ *
+ * @param ns_per_tick
+ *   The length (in nanoseconds) of a timer wheel tick.
+ *
+ * @return
+ *   - 0: Success
+ *   - -ENOMEM: Unable to allocate memory needed to initialize timer
+ *      subsystem
+ *
+ * @see rte_htimer_mgr_deinit()
+ * @see rte_get_tsc_hz()
+ */
+
+__rte_experimental
+int
+rte_htimer_mgr_init(uint64_t ns_per_tick);
+
+/**
+ * Deinitialize the htimer library.
+ *
+ * This function deallocates all dynamic memory used by the library,
+ * including HTW instances used by other threads than the caller.
+ *
+ * After this call has been made, no <rte_htimer.h> API call may be
+ * made, except rte_htimer_mgr_init().
+ *
+ * This function may not be called if the htimer library has never be
+ * initialized, or has been be deinitialized but not yet initialized
+ * again.
+ *
+ * This function is not multi-thread safe. In particular, no thread
+ * may call any <rte_htimer.h> functions (e.g., rte_htimer_mgr_manage())
+ * during (or after) the htimer library is deinitialized, except if it
+ * is initialized again.
+ *
+ * @see rte_htimer_mgr_init()
+ */
+
+__rte_experimental
+void
+rte_htimer_mgr_deinit(void);
+
+/**
+ * Adds a timer to the calling thread's timer wheel.
+ *
+ * This function schedules a timer on the calling thread's HTW.
+ *
+ * The \c timer_cb callback is called at a point when this thread
+ * calls rte_htimer_mgr_process(), rte_htimer_mgr_manage(), or
+ * rte_htimer_mgr_manage_time() and the expiration time has passed the
+ * current time (either as retrieved by rte_htimer_mgr_manage() or
+ * specified by the application in rte_htimer_mgr_manage_time().
+ *
+ * The HTW trackes times in units of \c ticks, which are likely more
+ * coarse-grained than nanosecond and TSC resolution.
+ *
+ * By default, the \c expiration_time is interpreted as the number of
+ * the nanoseconds into the future the timer should expired, relative
+ * to the last known current time, rounded up to the nearest tick.
+ * Thus, a timer with a certain expiration time maybe not expire even
+ * though this time was supplied in rte_timer_manage_time(). The
+ * maximum error is the length of one tick (plus any delays caused by
+ * infrequent manage calls).
+ *
+ * If the \c RTE_HTIMER_FLAG_ABSOLUTE_TIME is set in \c flags, the
+ * expiration time is relative to time zero.
+ *
+ * If the \c RTE_HTIMER_FLAG_PERIODICAL flag is set, the timer is
+ * peridoical, and will expire first at the time specified by
+ * the \c expiration_time, and then with an interval as specified
+ * by the \c period parameter.
+ *
+ * An added timer may be canceled using rte_htimer_mgr_cancel() or
+ * rte_htimer_mgr_async_cancel().
+ *
+ * rte_htimer_mgr_add() is multi-thread safe, and may only be called
+ * from an EAL thread or a registered non-EAL thread.
+ *
+ * @param timer
+ *   The chunk of memory used for managing this timer. This memory
+ *   must not be read or written (or free'd) by the application until
+ *   this timer has expired, or any cancellation attempts have
+ *   completed.
+ * @param expiration_time
+ *   The expiration time (in nanoseconds by default). For periodical
+ *   timers, this time represent the first expiration time.
+ * @param period
+ *   The time in between periodic timer expirations (in nanoseconds by
+ *   default).  Must be set to zero unless the
+ *   \c RTE_HTIMER_FLAG_PERIODICAL flag is set, in case it must be a
+ *   positive integer.
+ * @param timer_cb
+ *   The timer callback to be called upon timer expiration.
+ * @param timer_cb_arg
+ *   A pointer which will be supplied back to the application in the
+ *   timer callback call.
+ * @param flags
+ *   A bitmask which may contain these flags:
+ *     * \c RTE_HTIMER_FLAG_PERIODICAL
+ *     * \c RTE_HTIMER_FLAG_ABSOLUTE_TIME
+ *     * Either \c RTE_HTIMER_FLAG_TIME_TICK or \c RTE_HTIMER_FLAG_TIME_TSC
+ */
+
+__rte_experimental
+void
+rte_htimer_mgr_add(struct rte_htimer *timer, uint64_t expiration_time,
+		   uint64_t period, rte_htimer_cb_t timer_cb,
+		   void *timer_cb_arg, uint32_t flags);
+
+/**
+ * Cancel a timer scheduled in the calling thread's timer wheel.
+ *
+ * This function cancel a timer scheduled on the calling thread's HTW.
+ *
+ * rte_htimer_mgr_cancel() may be called on a timer which has already
+ * (synchronously or asynchronously) been canceled, or may have expired.
+ * However, the \c rte_htimer struct pointed to by \c timer may not
+ * have been freed or recycled since.
+ *
+ * rte_htimer_mgr_cancel() may not be called for a timer that was
+ * never (or, not yet) added.
+ *
+ * A timer added using rte_htimer_mgr_async_add() may be not be
+ * canceled using this function until after the add operation has
+ * completed (i.e, the completion callback has been run).
+ *
+ * rte_htimer_mgr_cancel() is multi-thread safe, and may only be
+ * called from an EAL thread or a registered non-EAL thread.
+ *
+ * @param timer
+ *   The timer to be canceled.
+ * @return
+ *   - 0: Success
+ *   - -ETIME: Timer has expired, and thus could not be canceled.
+ *   - -ENOENT: Timer was already canceled.
+ */
+
+__rte_experimental
+int
+rte_htimer_mgr_cancel(struct rte_htimer *timer);
+
+/**
+ * Asynchronuosly add a timer to the specified lcore's timer wheel.
+ *
+ * This function is the equivalent of rte_htimer_mgr_add(), but allows
+ * the calling ("source") thread to scheduled a timer in a HTW other
+ * than it's own. The operation is asynchronous.
+ *
+ * The timer works the same as a timer added locally. Thus, the \c
+ * timer_cb callback is called by the target thread, and it may be
+ * canceled using rte_htimer_mgr_cancel().
+ *
+ * The source thread may be the same as the target thread.
+ *
+ * Only EAL threads or registered non-EAL thread may be targeted.
+ *
+ * A successful rte_htimer_mgr_async_add() call guarantees that the
+ * timer will be scheduled on the target lcore at some future time,
+ * provided the target thread calls either rte_htimer_mgr_process(),
+ * rte_htimer_mgr_manage(), and/or rte_htimer_mgr_manage_time().
+ *
+ * The \c async_cb callback is called on the source thread as a part
+ * of its rte_htimer_mgr_process(), rte_htimer_mgr_manage(), or
+ * rte_htimer_mgr_manage_time() call, when the asynchronous add
+ * operation has completed (i.e., the timer is scheduled in the target
+ * HTW).
+ *
+ * \c async_cb may be NULL, in which case no notification is given.
+ *
+ * An asynchronously added timer may be asynchronously canceled (i.e.,
+ * using rte_htimer_mgr_async_cancel()) at any point, by any thread,
+ * after the rte_htimer_mgr_async_add() call. A asynchronously added
+ * timer may be not be canceled using rte_htimer_mgr_cancel() until
+ * after the completion callback has been executed.
+ *
+ * rte_htimer_mgr_async_add() is multi-thread safe, and may only be called
+ * from an EAL thread or a registered non-EAL thread.
+ *
+ * @param timer
+ *   The chunk of memory used for managing this timer. This memory
+ *   must not be read or written (or free'd) by the application until
+ *   this timer has expired, or any cancellation attempts have
+ *   completed.
+ * @param target_lcore_id
+ *   The lcore id of the thread which HTW will be manage this timer.
+ * @param expiration_time
+ *   The expiration time (measured in nanoseconds). For periodical
+ *   timers, this time represent the first expiration time.
+ * @param period
+ *   The time in between periodic timer expirations (measured in
+ *   nanoseconds).  Must be set to zero unless the
+ *   RTE_HTIMER_FLAG_PERIODICAL flag is set, in case it must be a
+ *   positive integer.
+ * @param timer_cb
+ *   The timer callback to be called upon timer expiration.
+ * @param timer_cb_arg
+ *   A pointer which will be supplied back to the application in the
+ *   timer callback call.
+ * @param async_cb
+ *   The asynchronous operationg callback to be called when the
+ *   add operation is completed.
+ * @param async_cb_arg
+ *   A pointer which will be supplied back to the application in the
+ *   \c async_cb callback call.
+ * @param flags
+ *   RTE_HTIMER_FLAG_ABSOLUTE_TIME and/or RTE_HTIMER_FLAG_PERIODICAL.
+ * @return
+ *   - 0: Success
+ *   - -EBUSY: The maximum number of concurrently queued asynchronous
+ *      operations has been reached.
+ */
+
+__rte_experimental
+int
+rte_htimer_mgr_async_add(struct rte_htimer *timer,
+			 unsigned int target_lcore_id,
+			 uint64_t expiration_time, uint64_t period,
+			 rte_htimer_cb_t timer_cb, void *timer_cb_arg,
+			 uint32_t flags,
+			 rte_htimer_mgr_async_op_cb_t async_cb,
+			 void *async_cb_arg);
+
+/**
+ * Asynchronuosly cancel a timer in any thread's timer wheel.
+ *
+ * This function is the equivalent of rte_htimer_mgr_cancel(), but
+ * allows the calling ("source") thread to also cancel a timer in a
+ * HTW other than it's own. The operation is asynchronous.
+ *
+ * A thread may asynchronously cancel a timer scheduled on its own
+ * HTW.
+ *
+ * The \c async_cb callback is called on the source thread as a part
+ * of its rte_htimer_mgr_process(), rte_htimer_mgr_manage(), or
+ * rte_htimer_mgr_manage_time() call, when the asynchronous add
+ * operation has completed (i.e., the timer is scheduled in the target
+ * HTW).
+ *
+ * \c async_cb may be NULL, in which case no notification is given.
+ *
+ * A timer may be asynchronously canceled at any point, by any thread,
+ * after it has been either synchronously or asynchronously added.
+ *
+ * rte_htimer_mgr_async_cancel() is multi-thread safe, and may only be
+ * called from an EAL thread or a registered non-EAL thread.
+ *
+ * @param timer
+ *   The memory used for managing this timer. This memory must not be
+ *   read or written (or free'd) by the application until this timer
+ *   has expired, or any cancellation attempts have completed.
+ * @param async_cb
+ *   The asynchronous operationg callback to be called when the
+ *   add operation is completed.
+ * @param async_cb_arg
+ *   A pointer which will be supplied back to the application in the
+ *   \c async_cb callback call.
+ * @return
+ *   - 0: Success
+ *   - -EBUSY: The maximum number of concurrently queued asynchronous
+ *      operations has been reached.
+ */
+
+__rte_experimental
+int
+rte_htimer_mgr_async_cancel(struct rte_htimer *timer,
+			    rte_htimer_mgr_async_op_cb_t async_cb,
+			    void *async_cb_arg);
+
+/**
+ * Update HTW time and perform timer expiry and asynchronous operation
+ * processing.
+ *
+ * This function is the equivalent of retrieving the current TSC time,
+ * and calling rte_htimer_mgr_manage_time().
+ *
+ * rte_htimer_mgr_manage() is multi-thread safe, and may only be
+ * called from an EAL thread or a registered non-EAL thread.
+ */
+
+__rte_experimental
+void
+rte_htimer_mgr_manage(void);
+
+/**
+ * Progress HTW time, and perform timer expiry and asynchronous
+ * operation processing in the process.
+ *
+ * This function progress the calling thread's HTW up to the point
+ * specified by \c current_time, calling the callbacks of any expired
+ * timers.
+ *
+ * The time source must be a monotonic clock, and thus each new \c
+ * current_time must be equal or greater than the time supplied in the
+ * previous call.
+ *
+ * The timer precision for timers scheduled on a particular thread's
+ * HTW depends on that threads call frequency to this function.
+ *
+ * rte_htimer_mgr_manage_time() also performs asynchronous operation
+ * processing. See rte_htimer_mgr_process() for details.
+ *
+ * rte_htimer_mgr_manage_time() is multi-thread safe, and may only be
+ * called from an EAL thread or a registered non-EAL thread.
+ *
+ * @param current_time
+ *   The current time (in nanoseconds, by default).
+ * @param flags
+ *   Either \c RTE_HTIMER_FLAG_TIME_TICK or \c RTE_HTIMER_FLAG_TIME_TSC.
+ */
+
+__rte_experimental
+void
+rte_htimer_mgr_manage_time(uint64_t current_time, uint32_t flags);
+
+/**
+ * Perform asynchronous operation processing.
+ *
+ * rte_htimer_mgr_process() serves pending asynchronous add or cancel
+ * requests, and produces the necessary responses. The timer callbacks
+ * of already-expired timers added are called.
+ *
+ * This function also processes asynchronous operation response
+ * messages received, and calls the asynchronous callbacks, if such
+ * was provided by the application.
+ *
+ * rte_htimer_mgr_process() is multi-thread safe, and may only be
+ * called from an EAL thread or a registered non-EAL thread.
+ */
+
+__rte_experimental
+void
+rte_htimer_mgr_process(void);
+
+/**
+ * Return the current local HTW time in nanoseconds.
+ *
+ * This function returns the most recent time provided by this thread,
+ * either via rte_htimer_mgr_manage_time(), or as sampled by
+ * rte_htimer_mgr_manage().
+ *
+ * The initial time, prior to any manage-calls, is 0.
+ *
+ * rte_htimer_mgr_current_time() is multi-thread safe, and may only be
+ * called from an EAL thread or a registered non-EAL thread.
+ */
+
+__rte_experimental
+uint64_t
+rte_htimer_mgr_current_time(void);
+
+/**
+ * Return the current local HTW time in ticks.
+ *
+ * This function returns the current time of the calling thread's HTW. The
+ * tick is the current time provided by the application (via
+ * rte_htimer_mgr_manage_time()), or as retrieved (using
+ * rte_timer_get_tsc_cycles() in rte_htimer_mgr_manage()), divided by the
+ * tick length (as provided in rte_htimer_mgr_init()).
+ *
+ * The initial time, prior to any manage-calls, is 0.
+ *
+ * rte_htimer_mgr_current_tick() is multi-thread safe, and may only be
+ * called from an EAL thread or a registered non-EAL thread.
+ */
+
+__rte_experimental
+uint64_t
+rte_htimer_mgr_current_tick(void);
+
+#endif
diff --git a/lib/htimer/rte_htimer_msg.h b/lib/htimer/rte_htimer_msg.h
new file mode 100644
index 0000000000..ceb106e263
--- /dev/null
+++ b/lib/htimer/rte_htimer_msg.h
@@ -0,0 +1,44 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#ifndef _RTE_HTIMER_MSG_
+#define _RTE_HTIMER_MSG_
+
+#include <rte_htimer.h>
+
+typedef void (*rte_htimer_msg_async_op_cb_t)(struct rte_htimer *timer,
+					     int result, void *cb_arg);
+
+typedef rte_htimer_msg_async_op_cb_t async_cb;
+
+enum rte_htimer_msg_type {
+	rte_htimer_msg_type_add_request,
+	rte_htimer_msg_type_add_response,
+	rte_htimer_msg_type_cancel_request,
+	rte_htimer_msg_type_cancel_response
+};
+
+struct rte_htimer_msg_request {
+	unsigned int source_lcore_id;
+};
+
+struct rte_htimer_msg_response {
+	int result;
+};
+
+struct rte_htimer_msg {
+	enum rte_htimer_msg_type msg_type;
+
+	struct rte_htimer *timer;
+
+	rte_htimer_msg_async_op_cb_t async_cb;
+	void *async_cb_arg;
+
+	union {
+		struct rte_htimer_msg_request request;
+		struct rte_htimer_msg_response response;
+	};
+};
+
+#endif
diff --git a/lib/htimer/rte_htimer_msg_ring.c b/lib/htimer/rte_htimer_msg_ring.c
new file mode 100644
index 0000000000..4019b7819a
--- /dev/null
+++ b/lib/htimer/rte_htimer_msg_ring.c
@@ -0,0 +1,18 @@ 
+#include "rte_htimer_msg_ring.h"
+
+struct rte_htimer_msg_ring *
+rte_htimer_msg_ring_create(const char *name, unsigned int count, int socket_id,
+			   unsigned int flags)
+{
+	struct rte_ring *ring =
+		rte_ring_create_elem(name, sizeof(struct rte_htimer_msg),
+				     count, socket_id, flags);
+
+	return (struct rte_htimer_msg_ring *)ring;
+}
+
+void
+rte_htimer_msg_ring_free(struct rte_htimer_msg_ring *msg_ring)
+{
+	rte_ring_free((struct rte_ring *)msg_ring);
+}
diff --git a/lib/htimer/rte_htimer_msg_ring.h b/lib/htimer/rte_htimer_msg_ring.h
new file mode 100644
index 0000000000..48fcc99189
--- /dev/null
+++ b/lib/htimer/rte_htimer_msg_ring.h
@@ -0,0 +1,55 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#ifndef _RTE_HTIMER_MSG_RING_
+#define _RTE_HTIMER_MSG_RING_
+
+#include <rte_ring.h>
+
+#include "rte_htimer_msg.h"
+
+struct rte_htimer_msg_ring {
+	struct rte_ring ring;
+};
+
+struct rte_htimer_msg_ring *
+rte_htimer_msg_ring_create(const char *name, unsigned int count, int socket_id,
+			   unsigned int flags);
+
+void
+rte_htimer_msg_ring_free(struct rte_htimer_msg_ring *msg_ring);
+
+static inline int
+rte_htimer_msg_ring_empty(struct rte_htimer_msg_ring *msg_ring)
+{
+	return rte_ring_empty(&msg_ring->ring);
+}
+
+static inline unsigned int
+rte_htimer_msg_ring_dequeue_burst(struct rte_htimer_msg_ring *msg_ring,
+				  struct rte_htimer_msg *msgs,
+				  unsigned int n)
+{
+	unsigned int dequeued;
+
+	dequeued = rte_ring_dequeue_burst_elem(&msg_ring->ring, msgs,
+					       sizeof(struct rte_htimer_msg),
+					       n, NULL);
+
+	return dequeued;
+}
+
+static inline unsigned int
+rte_htimer_msg_ring_enqueue(struct rte_htimer_msg_ring *msg_ring,
+			    struct rte_htimer_msg *msg)
+{
+	int rc;
+
+	rc = rte_ring_enqueue_elem(&msg_ring->ring, msg,
+				   sizeof(struct rte_htimer_msg));
+
+	return rc;
+}
+
+#endif
diff --git a/lib/htimer/rte_htw.c b/lib/htimer/rte_htw.c
new file mode 100644
index 0000000000..67fcb8c623
--- /dev/null
+++ b/lib/htimer/rte_htw.c
@@ -0,0 +1,445 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+/*
+ * This is an implementation of a hierarchical timer wheel based on
+ * Hashed and Hierarchical Timing Wheels: Data Structures
+ * for the Efficient Implementation of a Timer Facility by Varghese and
+ * Lauck.
+ *
+ * To improve efficiency when the slots are sparsely populate (i.e.,
+ * many ticks do not have any timers), each slot is represented by a
+ * bit in a separately-managed, per-wheel, bitset. This allows for
+ * very efficient scanning. The cost of managing this bitset is small.
+ */
+
+#include <rte_bitset.h>
+#include <rte_branch_prediction.h>
+#include <rte_debug.h>
+#include <rte_errno.h>
+#include <rte_malloc.h>
+
+#include "rte_htw.h"
+
+#define TICK_BITS 64
+
+#define WHEEL_BITS 8
+#define WHEEL_SLOTS (1U << WHEEL_BITS)
+#define WHEEL_LEVELS (TICK_BITS / WHEEL_BITS)
+
+struct wheel {
+	uint64_t wheel_time;
+	RTE_BITSET_DECLARE(used_slots, WHEEL_SLOTS);
+	struct rte_htimer_list slots[WHEEL_SLOTS];
+};
+
+struct rte_htw {
+	uint64_t current_time;
+
+	struct wheel wheels[WHEEL_LEVELS];
+
+	struct rte_htimer_list added;
+	struct rte_htimer_list expiring;
+
+	struct rte_htimer *running_timer;
+};
+
+static uint64_t
+time_to_wheel_time(uint64_t t, uint16_t level)
+{
+	return t >> (level * WHEEL_BITS);
+}
+
+static uint64_t
+wheel_time_to_time(uint64_t wheel_time, uint16_t level)
+{
+	return wheel_time << (level * WHEEL_BITS);
+}
+
+static void
+wheel_init(struct wheel *wheel)
+{
+	uint16_t i;
+
+	wheel->wheel_time = 0;
+
+	rte_bitset_init(wheel->used_slots, WHEEL_SLOTS);
+
+	for (i = 0; i < WHEEL_SLOTS; i++)
+		LIST_INIT(&wheel->slots[i]);
+}
+
+static uint64_t
+list_next_timeout(struct rte_htimer_list *timers)
+{
+	struct rte_htimer *timer;
+	uint64_t candidate = UINT64_MAX;
+
+	LIST_FOREACH(timer, timers, entry)
+		candidate = RTE_MIN(timer->expiration_time, candidate);
+
+	return candidate;
+}
+
+static uint16_t
+wheel_time_to_slot(uint64_t wheel_time)
+{
+	return wheel_time % WHEEL_SLOTS;
+}
+
+static uint64_t
+wheel_current_slot_time(struct wheel *wheel, uint16_t level)
+{
+	return wheel->wheel_time << (level * WHEEL_BITS);
+}
+
+static uint64_t
+wheel_next_timeout(struct wheel *wheel, uint16_t level, uint64_t upper_bound)
+{
+	uint16_t start_slot;
+	ssize_t slot;
+
+	start_slot = wheel_current_slot_time(wheel, level);
+
+	if (wheel_time_to_time(wheel->wheel_time, level) >= upper_bound)
+		return upper_bound;
+
+	RTE_BITSET_FOREACH_SET_WRAP(slot, wheel->used_slots, WHEEL_SLOTS,
+				    (ssize_t)start_slot, WHEEL_SLOTS) {
+		struct rte_htimer_list *timers = &wheel->slots[slot];
+		uint64_t next_timeout;
+
+		next_timeout = list_next_timeout(timers);
+
+		if (next_timeout != UINT64_MAX)
+			return next_timeout;
+	}
+
+	return UINT64_MAX;
+}
+
+static uint16_t
+get_slot(uint64_t t, uint16_t level)
+{
+	uint64_t wheel_time;
+	uint16_t slot;
+
+	wheel_time = time_to_wheel_time(t, level);
+	slot = wheel_time_to_slot(wheel_time);
+
+	return slot;
+}
+
+struct rte_htw *
+rte_htw_create(void)
+{
+	struct rte_htw *htw;
+	uint16_t level;
+
+	RTE_BUILD_BUG_ON((TICK_BITS % WHEEL_BITS) != 0);
+	RTE_BUILD_BUG_ON(sizeof(uint16_t) * CHAR_BIT <= WHEEL_BITS);
+
+	htw = rte_malloc(NULL, sizeof(struct rte_htw), RTE_CACHE_LINE_SIZE);
+
+	if (htw == NULL) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+
+	htw->current_time = 0;
+
+	LIST_INIT(&htw->added);
+	LIST_INIT(&htw->expiring);
+
+	for (level = 0; level < WHEEL_LEVELS; level++)
+		wheel_init(&htw->wheels[level]);
+
+	return htw;
+}
+
+void
+rte_htw_destroy(struct rte_htw *htw)
+{
+	rte_free(htw);
+}
+
+static uint16_t
+get_level(uint64_t remaining_time)
+{
+	int last_set = 64 - __builtin_clzll(remaining_time);
+
+	return (last_set - 1) / WHEEL_BITS;
+}
+
+static void
+mark_added(struct rte_htw *htw, struct rte_htimer *timer)
+{
+	timer->state = RTE_HTIMER_STATE_PENDING;
+	LIST_INSERT_HEAD(&htw->added, timer, entry);
+}
+
+static void
+assure_valid_add_params(uint64_t period __rte_unused,
+			uint32_t flags __rte_unused)
+{
+	RTE_ASSERT(!(flags & ~(RTE_HTIMER_FLAG_PERIODICAL |
+			       RTE_HTIMER_FLAG_ABSOLUTE_TIME)));
+	RTE_ASSERT(flags & RTE_HTIMER_FLAG_PERIODICAL ?
+		   period > 0 : period == 0);
+}
+
+void
+rte_htw_add(struct rte_htw *htw, struct rte_htimer *timer,
+	    uint64_t expiration_time, uint64_t period,
+	    rte_htimer_cb_t timer_cb, void *timer_cb_arg, uint32_t flags)
+{
+	assure_valid_add_params(period, flags);
+
+	if (flags & RTE_HTIMER_FLAG_ABSOLUTE_TIME)
+		timer->expiration_time = expiration_time;
+	else
+		timer->expiration_time = htw->current_time + expiration_time;
+
+	timer->period = period;
+	timer->flags = flags;
+	timer->cb = timer_cb;
+	timer->cb_arg = timer_cb_arg;
+
+	mark_added(htw, timer);
+}
+
+void
+rte_htw_cancel(struct rte_htw *htw, struct rte_htimer *timer)
+{
+	/*
+	 * One could consider clearing the relevant used_slots bit in
+	 * case this was the last entry in the wheel's slot
+	 * list. However, from a correctness point of view, a "false
+	 * positive" is not an issue. From a performance perspective,
+	 * checking the list head and clearing the bit is likely more
+	 * expensive than just deferring a minor cost to a future
+	 * rte_htw_manage() call.
+	 */
+
+	RTE_ASSERT(timer->state == RTE_HTIMER_STATE_PENDING ||
+		   timer->state == RTE_HTIMER_STATE_EXPIRED);
+
+	if (likely(timer->state == RTE_HTIMER_STATE_PENDING)) {
+		LIST_REMOVE(timer, entry);
+		timer->state = RTE_HTIMER_STATE_CANCELED;
+	} else if (timer == htw->running_timer) {
+		/* periodical timer being canceled by its own callback */
+		RTE_ASSERT(timer->flags & RTE_HTIMER_FLAG_PERIODICAL);
+
+		timer->state = RTE_HTIMER_STATE_CANCELED;
+
+		/* signals running timer canceled */
+		htw->running_timer = NULL;
+	}
+}
+
+static void
+mark_expiring(struct rte_htw *htw, struct rte_htimer *timer)
+{
+	LIST_INSERT_HEAD(&htw->expiring, timer, entry);
+}
+
+static void
+schedule_timer(struct rte_htw *htw, struct rte_htimer *timer)
+{
+	uint64_t remaining_time;
+	uint16_t level;
+	struct wheel *wheel;
+	uint16_t slot;
+	struct rte_htimer_list *slot_timers;
+
+	remaining_time = timer->expiration_time - htw->current_time;
+
+	level = get_level(remaining_time);
+
+	wheel = &htw->wheels[level];
+
+	slot = get_slot(timer->expiration_time, level);
+
+	slot_timers = &htw->wheels[level].slots[slot];
+
+	LIST_INSERT_HEAD(slot_timers, timer, entry);
+
+	rte_bitset_set(wheel->used_slots, slot);
+}
+
+static void
+process_added(struct rte_htw *htw)
+{
+	struct rte_htimer *timer;
+
+	while ((timer = LIST_FIRST(&htw->added)) != NULL) {
+		LIST_REMOVE(timer, entry);
+
+		if (timer->expiration_time > htw->current_time)
+			schedule_timer(htw, timer);
+		else
+			mark_expiring(htw, timer);
+	}
+}
+
+static void
+process_expiring(struct rte_htw *htw)
+{
+	struct rte_htimer *timer;
+
+	while ((timer = LIST_FIRST(&htw->expiring)) != NULL) {
+		bool is_periodical;
+		bool running_timer_canceled;
+
+		/*
+		 * The timer struct may cannot be safely accessed
+		 * after the callback has been called (except for
+		 * non-canceled periodical timers), since the callback
+		 * may have free'd (or reused) the memory.
+		 */
+
+		LIST_REMOVE(timer, entry);
+
+		is_periodical = timer->flags & RTE_HTIMER_FLAG_PERIODICAL;
+
+		timer->state = RTE_HTIMER_STATE_EXPIRED;
+
+		htw->running_timer = timer;
+
+		timer->cb(timer, timer->cb_arg);
+
+		running_timer_canceled = htw->running_timer == NULL;
+
+		htw->running_timer = NULL;
+
+		if (is_periodical && !running_timer_canceled) {
+			timer->expiration_time += timer->period;
+			mark_added(htw, timer);
+		}
+	}
+}
+
+uint64_t
+rte_htw_current_time(struct rte_htw *htw)
+{
+	return htw->current_time;
+}
+
+uint64_t
+rte_htw_next_timeout(struct rte_htw *htw, uint64_t upper_bound)
+{
+	uint16_t level;
+
+	/* scheduling timeouts will sort them in temporal order */
+	process_added(htw);
+
+	if (!LIST_EMPTY(&htw->expiring))
+		return 0;
+
+	for (level = 0; level < WHEEL_LEVELS; level++) {
+		uint64_t wheel_timeout;
+
+		wheel_timeout = wheel_next_timeout(&htw->wheels[level],
+						   level, upper_bound);
+		if (wheel_timeout != UINT64_MAX)
+			return RTE_MIN(wheel_timeout, upper_bound);
+	}
+
+	return upper_bound;
+}
+
+static __rte_always_inline void
+process_slot(struct rte_htw *htw, uint16_t level, struct wheel *wheel,
+	     uint16_t slot)
+{
+	struct rte_htimer_list *slot_timers;
+	struct rte_htimer *timer;
+
+	slot_timers = &wheel->slots[slot];
+
+	rte_bitset_clear(wheel->used_slots, slot);
+
+	while ((timer = LIST_FIRST(slot_timers)) != NULL) {
+		LIST_REMOVE(timer, entry);
+
+		if (level == 0 || timer->expiration_time <= htw->current_time)
+			mark_expiring(htw, timer);
+		else
+			schedule_timer(htw, timer);
+	}
+}
+
+static __rte_always_inline void
+process_slots(struct rte_htw *htw, uint16_t level, struct wheel *wheel,
+	      uint16_t start_slot, uint16_t num_slots)
+{
+	ssize_t slot;
+
+	RTE_BITSET_FOREACH_SET_WRAP(slot, wheel->used_slots, WHEEL_SLOTS,
+				    (ssize_t)start_slot, num_slots)
+		process_slot(htw, level, wheel, slot);
+}
+
+static void
+advance(struct rte_htw *htw)
+{
+	uint16_t level;
+
+	for (level = 0; level < WHEEL_LEVELS; level++) {
+		struct wheel *wheel = &htw->wheels[level];
+		uint64_t new_wheel_time;
+		uint16_t start_slot;
+		uint16_t num_slots;
+
+		new_wheel_time = time_to_wheel_time(htw->current_time, level);
+
+		if (new_wheel_time == wheel->wheel_time)
+			break;
+
+		start_slot = wheel_time_to_slot(wheel->wheel_time + 1);
+		num_slots = RTE_MIN(new_wheel_time - wheel->wheel_time,
+				    WHEEL_SLOTS);
+
+		wheel->wheel_time = new_wheel_time;
+
+		process_slots(htw, level, wheel, start_slot, num_slots);
+	}
+}
+
+void
+rte_htw_manage(struct rte_htw *htw, uint64_t new_time)
+{
+	RTE_VERIFY(new_time >= htw->current_time);
+
+	/*
+	 * Scheduling added timers, core timer wheeling processing and
+	 * expiry callback execution is kept as separate stages, to
+	 * avoid having the core wheel traversal code to deal with a
+	 * situation where a timeout callbacks re-adding the timer.
+	 * This split also results in seemingly reasonable semantics
+	 * in regards to the execution of the callbacks of
+	 * already-expired timeouts (e.g., with time 0) being added in
+	 * a timeout callback. Instead of creating an end-less loop,
+	 * with rte_htw_manage() never returning, it defers the
+	 * execution of the timer until the next rte_htw_manage()
+	 * call.
+	 */
+
+	process_added(htw);
+
+	if (new_time > htw->current_time) {
+		htw->current_time = new_time;
+		advance(htw);
+	}
+
+	process_expiring(htw);
+}
+
+void
+rte_htw_process(struct rte_htw *htw)
+{
+	process_added(htw);
+	process_expiring(htw);
+}
diff --git a/lib/htimer/rte_htw.h b/lib/htimer/rte_htw.h
new file mode 100644
index 0000000000..c93358bb13
--- /dev/null
+++ b/lib/htimer/rte_htw.h
@@ -0,0 +1,49 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#ifndef _RTE_HTW_H_
+#define _RTE_HTW_H_
+
+#include <stdint.h>
+#include <sys/queue.h>
+
+#include <rte_htimer.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rte_htw;
+
+struct rte_htw *
+rte_htw_create(void);
+
+void
+rte_htw_destroy(struct rte_htw *htw);
+
+void
+rte_htw_add(struct rte_htw *htw, struct rte_htimer *timer,
+	    uint64_t expiration_time, uint64_t period,
+	    rte_htimer_cb_t cb, void *cb_arg, uint32_t flags);
+
+void
+rte_htw_cancel(struct rte_htw *htw, struct rte_htimer *timer);
+
+uint64_t
+rte_htw_current_time(struct rte_htw *htw);
+
+uint64_t
+rte_htw_next_timeout(struct rte_htw *htw, uint64_t upper_bound);
+
+void
+rte_htw_manage(struct rte_htw *htw, uint64_t new_time);
+
+void
+rte_htw_process(struct rte_htw *htw);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_HTW_H_ */
diff --git a/lib/htimer/version.map b/lib/htimer/version.map
new file mode 100644
index 0000000000..0e71dc7d57
--- /dev/null
+++ b/lib/htimer/version.map
@@ -0,0 +1,17 @@ 
+EXPERIMENTAL {
+	global:
+
+	rte_htimer_mgr_init;
+	rte_htimer_mgr_deinit;
+	rte_htimer_mgr_add;
+	rte_htimer_mgr_cancel;
+	rte_htimer_mgr_async_add;
+	rte_htimer_mgr_async_cancel;
+	rte_htimer_mgr_manage;
+	rte_htimer_mgr_manage_time;
+	rte_htimer_mgr_process;
+	rte_htimer_mgr_current_time;
+	rte_htimer_mgr_current_tick;
+
+        local: *;
+};
diff --git a/lib/meson.build b/lib/meson.build
index 2bc0932ad5..c7c0e42ae8 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -37,6 +37,7 @@  libraries = [
         'gpudev',
         'gro',
         'gso',
+        'htimer',
         'ip_frag',
         'jobstats',
         'kni',