From patchwork Thu Aug 19 07:09:04 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Joyce Kong X-Patchwork-Id: 97103 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 85D7BA0C47; Thu, 19 Aug 2021 09:10:02 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 9850841220; Thu, 19 Aug 2021 09:09:39 +0200 (CEST) Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by mails.dpdk.org (Postfix) with ESMTP id D51B041220 for ; Thu, 19 Aug 2021 09:09:37 +0200 (CEST) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 5B877101E; Thu, 19 Aug 2021 00:09:37 -0700 (PDT) Received: from net-arm-n1sdp.shanghai.arm.com (net-arm-n1sdp.shanghai.arm.com [10.169.208.222]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 073A83F40C; Thu, 19 Aug 2021 00:09:34 -0700 (PDT) From: Joyce Kong To: John McNamara Cc: dev@dpdk.org, thomas@monjalon.net, david.marchand@redhat.com, honnappa.nagarahalli@arm.com, ruifeng.wang@arm.com, nd@arm.com Date: Thu, 19 Aug 2021 02:09:04 -0500 Message-Id: <20210819070908.25773-5-joyce.kong@arm.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20210819070908.25773-1-joyce.kong@arm.com> References: <20210819070908.25773-1-joyce.kong@arm.com> Subject: [dpdk-dev] [PATCH v1 4/8] examples/performance-thread: use compiler atomics for sync X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Convert rte_atomic usages to compiler atomic built-ins for thread sync. Signed-off-by: Joyce Kong Reviewed-by: Ruifeng Wang --- examples/performance-thread/common/lthread.c | 10 +++--- .../performance-thread/common/lthread_diag.h | 10 +++--- .../performance-thread/common/lthread_int.h | 1 - .../performance-thread/common/lthread_mutex.c | 26 +++++++------- .../performance-thread/common/lthread_mutex.h | 2 +- .../performance-thread/common/lthread_sched.c | 34 ++++++++----------- .../performance-thread/common/lthread_tls.c | 5 +-- .../performance-thread/l3fwd-thread/main.c | 22 +++++------- 8 files changed, 53 insertions(+), 57 deletions(-) diff --git a/examples/performance-thread/common/lthread.c b/examples/performance-thread/common/lthread.c index 3f1f48db43..98123f34f8 100644 --- a/examples/performance-thread/common/lthread.c +++ b/examples/performance-thread/common/lthread.c @@ -357,9 +357,10 @@ void lthread_exit(void *ptr) * - if exit before join then we suspend and resume on join * - if join before exit then we resume the joining thread */ + uint64_t join_initial = LT_JOIN_INITIAL; if ((lt->join == LT_JOIN_INITIAL) - && rte_atomic64_cmpset(<->join, LT_JOIN_INITIAL, - LT_JOIN_EXITING)) { + && __atomic_compare_exchange_n(<->join, &join_initial, + LT_JOIN_EXITING, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) { DIAG_EVENT(lt, LT_DIAG_LTHREAD_EXIT, 1, 0); _suspend(); @@ -415,9 +416,10 @@ int lthread_join(struct lthread *lt, void **ptr) * - if join before exit we suspend and will resume when exit is called * - if exit before join we resume the exiting thread */ + uint64_t join_initial = LT_JOIN_INITIAL; if ((lt->join == LT_JOIN_INITIAL) - && rte_atomic64_cmpset(<->join, LT_JOIN_INITIAL, - LT_JOIN_THREAD_SET)) { + && __atomic_compare_exchange_n(<->join, &join_initial, + LT_JOIN_THREAD_SET, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) { DIAG_EVENT(current, LT_DIAG_LTHREAD_JOIN, lt, 1); _suspend(); diff --git a/examples/performance-thread/common/lthread_diag.h b/examples/performance-thread/common/lthread_diag.h index e876dda6da..7ee89eef38 100644 --- a/examples/performance-thread/common/lthread_diag.h +++ b/examples/performance-thread/common/lthread_diag.h @@ -78,11 +78,11 @@ extern uint64_t diag_mask; } \ } while (0) -#define DIAG_COUNT_DEFINE(x) rte_atomic64_t count_##x -#define DIAG_COUNT_INIT(o, x) rte_atomic64_init(&((o)->count_##x)) -#define DIAG_COUNT_INC(o, x) rte_atomic64_inc(&((o)->count_##x)) -#define DIAG_COUNT_DEC(o, x) rte_atomic64_dec(&((o)->count_##x)) -#define DIAG_COUNT(o, x) rte_atomic64_read(&((o)->count_##x)) +#define DIAG_COUNT_DEFINE(x) uint64_t count_##x +#define DIAG_COUNT_INIT(o, x) __atomic_store_n(&((o)->count_##x), 0, __ATOMIC_RELAXED) +#define DIAG_COUNT_INC(o, x) __atomic_fetch_add(&((o)->count_##x), 1, __ATOMIC_RELAXED) +#define DIAG_COUNT_DEC(o, x) __atomic_fetch_sub(&((o)->count_##x), 1, __ATOMIC_RELAXED) +#define DIAG_COUNT(o, x) __atomic_load_n(&((o)->count_##x), __ATOMIC_RELAXED) #define DIAG_USED diff --git a/examples/performance-thread/common/lthread_int.h b/examples/performance-thread/common/lthread_int.h index a352f13b75..d010126f16 100644 --- a/examples/performance-thread/common/lthread_int.h +++ b/examples/performance-thread/common/lthread_int.h @@ -21,7 +21,6 @@ extern "C" { #include #include #include -#include #include #include diff --git a/examples/performance-thread/common/lthread_mutex.c b/examples/performance-thread/common/lthread_mutex.c index 01da6cad4f..43cc9bbfb9 100644 --- a/examples/performance-thread/common/lthread_mutex.c +++ b/examples/performance-thread/common/lthread_mutex.c @@ -60,7 +60,7 @@ lthread_mutex_init(char *name, struct lthread_mutex **mutex, m->root_sched = THIS_SCHED; m->owner = NULL; - rte_atomic64_init(&m->count); + __atomic_store_n(&m->count, 0, __ATOMIC_RELAXED); DIAG_CREATE_EVENT(m, LT_DIAG_MUTEX_CREATE); /* success */ @@ -115,10 +115,11 @@ int lthread_mutex_lock(struct lthread_mutex *m) } for (;;) { - rte_atomic64_inc(&m->count); + __atomic_fetch_add(&m->count, 1, __ATOMIC_RELAXED); do { - if (rte_atomic64_cmpset - ((uint64_t *) &m->owner, 0, (uint64_t) lt)) { + uint64_t lt_init = 0; + if (__atomic_compare_exchange_n(&m->owner, <_init, lt, + 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) { /* happy days, we got the lock */ DIAG_EVENT(m, LT_DIAG_MUTEX_LOCK, m, 0); return 0; @@ -126,7 +127,7 @@ int lthread_mutex_lock(struct lthread_mutex *m) /* spin due to race with unlock when * nothing was blocked */ - } while ((rte_atomic64_read(&m->count) == 1) && + } while ((__atomic_load_n(&m->count, __ATOMIC_RELAXED) == 1) && (m->owner == NULL)); /* queue the current thread in the blocked queue @@ -160,16 +161,17 @@ int lthread_mutex_trylock(struct lthread_mutex *m) return POSIX_ERRNO(EDEADLK); } - rte_atomic64_inc(&m->count); - if (rte_atomic64_cmpset - ((uint64_t *) &m->owner, (uint64_t) NULL, (uint64_t) lt)) { + __atomic_fetch_add(&m->count, 1, __ATOMIC_RELAXED); + uint64_t lt_init = 0; + if (__atomic_compare_exchange_n(&m->owner, <_init, lt, + 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) { /* got the lock */ DIAG_EVENT(m, LT_DIAG_MUTEX_TRYLOCK, m, 0); return 0; } /* failed so return busy */ - rte_atomic64_dec(&m->count); + __atomic_fetch_sub(&m->count, 1, __ATOMIC_RELAXED); DIAG_EVENT(m, LT_DIAG_MUTEX_TRYLOCK, m, POSIX_ERRNO(EBUSY)); return POSIX_ERRNO(EBUSY); } @@ -193,13 +195,13 @@ int lthread_mutex_unlock(struct lthread_mutex *m) return POSIX_ERRNO(EPERM); } - rte_atomic64_dec(&m->count); + __atomic_fetch_sub(&m->count, 1, __ATOMIC_RELAXED); /* if there are blocked threads then make one ready */ - while (rte_atomic64_read(&m->count) > 0) { + while (__atomic_load_n(&m->count, __ATOMIC_RELAXED) > 0) { unblocked = _lthread_queue_remove(m->blocked); if (unblocked != NULL) { - rte_atomic64_dec(&m->count); + __atomic_fetch_sub(&m->count, 1, __ATOMIC_RELAXED); DIAG_EVENT(m, LT_DIAG_MUTEX_UNLOCKED, m, unblocked); RTE_ASSERT(unblocked->sched != NULL); _ready_queue_insert((struct lthread_sched *) diff --git a/examples/performance-thread/common/lthread_mutex.h b/examples/performance-thread/common/lthread_mutex.h index cd866f87b8..730092bdf8 100644 --- a/examples/performance-thread/common/lthread_mutex.h +++ b/examples/performance-thread/common/lthread_mutex.h @@ -17,7 +17,7 @@ extern "C" { struct lthread_mutex { struct lthread *owner; - rte_atomic64_t count; + uint64_t count; struct lthread_queue *blocked __rte_cache_aligned; struct lthread_sched *root_sched; char name[MAX_MUTEX_NAME_SIZE]; diff --git a/examples/performance-thread/common/lthread_sched.c b/examples/performance-thread/common/lthread_sched.c index 38ca0c45cb..3784b010c2 100644 --- a/examples/performance-thread/common/lthread_sched.c +++ b/examples/performance-thread/common/lthread_sched.c @@ -22,8 +22,6 @@ #include #include -#include -#include #include #include #include @@ -47,8 +45,8 @@ * When a scheduler shuts down it is assumed that the application is terminating */ -static rte_atomic16_t num_schedulers; -static rte_atomic16_t active_schedulers; +static uint16_t num_schedulers; +static uint16_t active_schedulers; /* one scheduler per lcore */ RTE_DEFINE_PER_LCORE(struct lthread_sched *, this_sched) = NULL; @@ -64,10 +62,8 @@ uint64_t diag_mask; RTE_INIT(lthread_sched_ctor) { memset(schedcore, 0, sizeof(schedcore)); - rte_atomic16_init(&num_schedulers); - rte_atomic16_set(&num_schedulers, 1); - rte_atomic16_init(&active_schedulers); - rte_atomic16_set(&active_schedulers, 0); + __atomic_store_n(&num_schedulers, 1, __ATOMIC_RELAXED); + __atomic_store_n(&active_schedulers, 0, __ATOMIC_RELAXED); diag_cb = NULL; } @@ -260,8 +256,8 @@ struct lthread_sched *_lthread_sched_create(size_t stack_size) */ int lthread_num_schedulers_set(int num) { - rte_atomic16_set(&num_schedulers, num); - return (int)rte_atomic16_read(&num_schedulers); + __atomic_store_n(&num_schedulers, num, __ATOMIC_RELAXED); + return (int)__atomic_load_n(&num_schedulers, __ATOMIC_RELAXED); } /* @@ -269,7 +265,7 @@ int lthread_num_schedulers_set(int num) */ int lthread_active_schedulers(void) { - return (int)rte_atomic16_read(&active_schedulers); + return (int)__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED); } @@ -299,8 +295,8 @@ void lthread_scheduler_shutdown_all(void) * for the possibility of a pthread wrapper on lthread_yield(), * something that is not possible unless the scheduler is running. */ - while (rte_atomic16_read(&active_schedulers) < - rte_atomic16_read(&num_schedulers)) + while (__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED) < + __atomic_load_n(&num_schedulers, __ATOMIC_RELAXED)) sched_yield(); for (i = 0; i < LTHREAD_MAX_LCORES; i++) { @@ -415,15 +411,15 @@ static inline int _lthread_sched_isdone(struct lthread_sched *sched) */ static inline void _lthread_schedulers_sync_start(void) { - rte_atomic16_inc(&active_schedulers); + __atomic_fetch_add(&active_schedulers, 1, __ATOMIC_RELAXED); /* wait for lthread schedulers * Note we use sched_yield() rather than pthread_yield() to allow * for the possibility of a pthread wrapper on lthread_yield(), * something that is not possible unless the scheduler is running. */ - while (rte_atomic16_read(&active_schedulers) < - rte_atomic16_read(&num_schedulers)) + while (__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED) < + __atomic_load_n(&num_schedulers, __ATOMIC_RELAXED)) sched_yield(); } @@ -433,15 +429,15 @@ static inline void _lthread_schedulers_sync_start(void) */ static inline void _lthread_schedulers_sync_stop(void) { - rte_atomic16_dec(&active_schedulers); - rte_atomic16_dec(&num_schedulers); + __atomic_fetch_sub(&active_schedulers, 1, __ATOMIC_RELAXED); + __atomic_fetch_sub(&num_schedulers, 1, __ATOMIC_RELAXED); /* wait for schedulers * Note we use sched_yield() rather than pthread_yield() to allow * for the possibility of a pthread wrapper on lthread_yield(), * something that is not possible unless the scheduler is running. */ - while (rte_atomic16_read(&active_schedulers) > 0) + while (__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED) > 0) sched_yield(); } diff --git a/examples/performance-thread/common/lthread_tls.c b/examples/performance-thread/common/lthread_tls.c index 07de6cafab..4ab2e3558b 100644 --- a/examples/performance-thread/common/lthread_tls.c +++ b/examples/performance-thread/common/lthread_tls.c @@ -18,7 +18,6 @@ #include #include #include -#include #include "lthread_tls.h" #include "lthread_queue.h" @@ -52,8 +51,10 @@ void _lthread_key_pool_init(void) bzero(key_table, sizeof(key_table)); + uint64_t pool_init = 0; /* only one lcore should do this */ - if (rte_atomic64_cmpset(&key_pool_init, 0, 1)) { + if (__atomic_compare_exchange_n(&key_pool_init, &pool_init, 1, 0, + __ATOMIC_RELAXED, __ATOMIC_RELAXED)) { snprintf(name, MAX_LTHREAD_NAME_SIZE, diff --git a/examples/performance-thread/l3fwd-thread/main.c b/examples/performance-thread/l3fwd-thread/main.c index 2f593abf26..ba9ad034e6 100644 --- a/examples/performance-thread/l3fwd-thread/main.c +++ b/examples/performance-thread/l3fwd-thread/main.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -570,8 +569,8 @@ RTE_DEFINE_PER_LCORE(struct lcore_conf *, lcore_conf); */ static int lthreads_on = 1; /**< Use lthreads for processing*/ -rte_atomic16_t rx_counter; /**< Number of spawned rx threads */ -rte_atomic16_t tx_counter; /**< Number of spawned tx threads */ +uint16_t rx_counter; /**< Number of spawned rx threads */ +uint16_t tx_counter; /**< Number of spawned tx threads */ struct thread_conf { uint16_t lcore_id; /**< Initial lcore for rx thread */ @@ -1910,11 +1909,8 @@ cpu_load_collector(__rte_unused void *arg) { printf("Waiting for %d rx threads and %d tx threads\n", n_rx_thread, n_tx_thread); - while (rte_atomic16_read(&rx_counter) < n_rx_thread) - rte_pause(); - - while (rte_atomic16_read(&tx_counter) < n_tx_thread) - rte_pause(); + rte_wait_until_equal_16(&rx_counter, n_rx_thread, __ATOMIC_RELAXED); + rte_wait_until_equal_16(&tx_counter, n_tx_thread, __ATOMIC_RELAXED); for (i = 0; i < n_rx_thread; i++) { @@ -2036,7 +2032,7 @@ lthread_tx_per_ring(void *dummy) RTE_LOG(INFO, L3FWD, "entering main tx loop on lcore %u\n", rte_lcore_id()); nb_rx = 0; - rte_atomic16_inc(&tx_counter); + __atomic_fetch_add(&tx_counter, 1, __ATOMIC_RELAXED); while (1) { /* @@ -2161,7 +2157,7 @@ lthread_rx(void *dummy) worker_id = 0; rx_conf->conf.cpu_id = sched_getcpu(); - rte_atomic16_inc(&rx_counter); + __atomic_fetch_add(&rx_counter, 1, __ATOMIC_RELAXED); while (1) { /* @@ -2243,7 +2239,7 @@ lthread_spawner(__rte_unused void *arg) * scheduler as this lthread, yielding is required to let them to run and * prevent deadlock here. */ - while (rte_atomic16_read(&rx_counter) < n_rx_thread) + while (__atomic_load_n(&rx_counter, __ATOMIC_RELAXED) < n_rx_thread) lthread_sleep(100000); /* @@ -2323,7 +2319,7 @@ pthread_tx(void *dummy) RTE_LOG(INFO, L3FWD, "Entering main Tx loop on lcore %u\n", rte_lcore_id()); tx_conf->conf.cpu_id = sched_getcpu(); - rte_atomic16_inc(&tx_counter); + __atomic_fetch_add(&tx_counter, 1, __ATOMIC_RELAXED); while (1) { cur_tsc = rte_rdtsc(); @@ -2406,7 +2402,7 @@ pthread_rx(void *dummy) worker_id = 0; rx_conf->conf.cpu_id = sched_getcpu(); - rte_atomic16_inc(&rx_counter); + __atomic_fetch_add(&rx_counter, 1, __ATOMIC_RELAXED); while (1) { /*