From patchwork Fri Apr 19 23:06:21 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tyler Retzlaff X-Patchwork-Id: 139582 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 6AF6243EB4; Sat, 20 Apr 2024 01:10:15 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 4B92942686; Sat, 20 Apr 2024 01:07:29 +0200 (CEST) Received: from linux.microsoft.com (linux.microsoft.com [13.77.154.182]) by mails.dpdk.org (Postfix) with ESMTP id 251DB40A70 for ; Sat, 20 Apr 2024 01:06:54 +0200 (CEST) Received: by linux.microsoft.com (Postfix, from userid 1086) id BECDE20FE865; Fri, 19 Apr 2024 16:06:48 -0700 (PDT) DKIM-Filter: OpenDKIM Filter v2.11.0 linux.microsoft.com BECDE20FE865 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.microsoft.com; s=default; t=1713568009; bh=pcK/itH1S3yVJp7OnuITB1MwqY//rn9dHPNOayUftj8=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=hflnPPck88ZD9j9NmU0EpKNw51exKfSjV2S5gQS/pf2uOr0bR/30IqrTiiB61/Pcc //4czMOSP55tC/rqH3+5aG2S43nuRnTb6vl7U+/UMf7dyLkH1OBxQZnHiQa4XXFhhO q1zQR8xsTHh1M8yoQNw3vL3JOT4NwL0LNCEtMYGY= From: Tyler Retzlaff To: dev@dpdk.org Cc: =?utf-8?q?Mattias_R=C3=B6nnblom?= , =?utf-8?q?Morten_Br=C3=B8rup?= , Abdullah Sevincer , Ajit Khaparde , Alok Prasad , Anatoly Burakov , Andrew Rybchenko , Anoob Joseph , Bruce Richardson , Byron Marohn , Chenbo Xia , Chengwen Feng , Ciara Loftus , Ciara Power , Dariusz Sosnowski , David Hunt , Devendra Singh Rawat , Erik Gabriel Carrillo , Guoyang Zhou , Harman Kalra , Harry van Haaren , Honnappa Nagarahalli , Jakub Grajciar , Jerin Jacob , Jeroen de Borst , Jian Wang , Jiawen Wu , Jie Hai , Jingjing Wu , Joshua Washington , Joyce Kong , Junfeng Guo , Kevin Laatz , Konstantin Ananyev , Liang Ma , Long Li , Maciej Czekaj , Matan Azrad , Maxime Coquelin , Nicolas Chautru , Ori Kam , Pavan Nikhilesh , Peter Mccarthy , Rahul Lakkireddy , Reshma Pattan , Rosen Xu , Ruifeng Wang , Rushil Gupta , Sameh Gobriel , Sivaprasad Tummala , Somnath Kotur , Stephen Hemminger , Suanming Mou , Sunil Kumar Kori , Sunil Uttarwar , Tetsuya Mukawa , Vamsi Attunuru , Viacheslav Ovsiienko , Vladimir Medvedkin , Xiaoyun Wang , Yipeng Wang , Yisen Zhuang , Yuying Zhang , Yuying Zhang , Ziyang Xuan , Tyler Retzlaff Subject: [PATCH v4 23/45] event/opdl: use rte stdatomic API Date: Fri, 19 Apr 2024 16:06:21 -0700 Message-Id: <1713568003-30453-24-git-send-email-roretzla@linux.microsoft.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1713568003-30453-1-git-send-email-roretzla@linux.microsoft.com> References: <1710967892-7046-1-git-send-email-roretzla@linux.microsoft.com> <1713568003-30453-1-git-send-email-roretzla@linux.microsoft.com> X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Replace the use of gcc builtin __atomic_xxx intrinsics with corresponding rte_atomic_xxx optional rte stdatomic API. Signed-off-by: Tyler Retzlaff Acked-by: Stephen Hemminger --- drivers/event/opdl/opdl_ring.c | 80 +++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/drivers/event/opdl/opdl_ring.c b/drivers/event/opdl/opdl_ring.c index e87ffd5..3476f6b 100644 --- a/drivers/event/opdl/opdl_ring.c +++ b/drivers/event/opdl/opdl_ring.c @@ -47,12 +47,12 @@ struct __rte_cache_aligned shared_state { /* Last known minimum sequence number of dependencies, used for multi * thread operation */ - uint32_t available_seq; + RTE_ATOMIC(uint32_t) available_seq; char _pad1[RTE_CACHE_LINE_SIZE * 3]; - uint32_t head; /* Head sequence number (for multi thread operation) */ + RTE_ATOMIC(uint32_t) head; /* Head sequence number (for multi thread operation) */ char _pad2[RTE_CACHE_LINE_SIZE * 3]; struct opdl_stage *stage; /* back pointer */ - uint32_t tail; /* Tail sequence number */ + RTE_ATOMIC(uint32_t) tail; /* Tail sequence number */ char _pad3[RTE_CACHE_LINE_SIZE * 2]; }; @@ -149,10 +149,10 @@ struct opdl_ring { available(const struct opdl_stage *s) { if (s->threadsafe == true) { - uint32_t n = __atomic_load_n(&s->shared.available_seq, - __ATOMIC_ACQUIRE) - - __atomic_load_n(&s->shared.head, - __ATOMIC_ACQUIRE); + uint32_t n = rte_atomic_load_explicit(&s->shared.available_seq, + rte_memory_order_acquire) - + rte_atomic_load_explicit(&s->shared.head, + rte_memory_order_acquire); /* Return 0 if available_seq needs to be updated */ return (n <= s->num_slots) ? n : 0; @@ -168,7 +168,7 @@ struct opdl_ring { { uint32_t i; uint32_t this_tail = s->shared.tail; - uint32_t min_seq = __atomic_load_n(&s->deps[0]->tail, __ATOMIC_ACQUIRE); + uint32_t min_seq = rte_atomic_load_explicit(&s->deps[0]->tail, rte_memory_order_acquire); /* Input stage sequence numbers are greater than the sequence numbers of * its dependencies so an offset of t->num_slots is needed when * calculating available slots and also the condition which is used to @@ -179,16 +179,16 @@ struct opdl_ring { if (is_input_stage(s)) { wrap = s->num_slots; for (i = 1; i < s->num_deps; i++) { - uint32_t seq = __atomic_load_n(&s->deps[i]->tail, - __ATOMIC_ACQUIRE); + uint32_t seq = rte_atomic_load_explicit(&s->deps[i]->tail, + rte_memory_order_acquire); if ((this_tail - seq) > (this_tail - min_seq)) min_seq = seq; } } else { wrap = 0; for (i = 1; i < s->num_deps; i++) { - uint32_t seq = __atomic_load_n(&s->deps[i]->tail, - __ATOMIC_ACQUIRE); + uint32_t seq = rte_atomic_load_explicit(&s->deps[i]->tail, + rte_memory_order_acquire); if ((seq - this_tail) < (min_seq - this_tail)) min_seq = seq; } @@ -197,8 +197,8 @@ struct opdl_ring { if (s->threadsafe == false) s->available_seq = min_seq + wrap; else - __atomic_store_n(&s->shared.available_seq, min_seq + wrap, - __ATOMIC_RELEASE); + rte_atomic_store_explicit(&s->shared.available_seq, min_seq + wrap, + rte_memory_order_release); } /* Wait until the number of available slots reaches number requested */ @@ -298,7 +298,7 @@ struct opdl_ring { copy_entries_in(t, head, entries, num_entries); s->head += num_entries; - __atomic_store_n(&s->shared.tail, s->head, __ATOMIC_RELEASE); + rte_atomic_store_explicit(&s->shared.tail, s->head, rte_memory_order_release); return num_entries; } @@ -381,18 +381,18 @@ struct opdl_ring { /* There should be no race condition here. If shared.tail * matches, no other core can update it until this one does. */ - if (__atomic_load_n(&s->shared.tail, __ATOMIC_ACQUIRE) == + if (rte_atomic_load_explicit(&s->shared.tail, rte_memory_order_acquire) == tail) { if (num_entries >= (head - tail)) { claim_mgr_remove(disclaims); - __atomic_store_n(&s->shared.tail, head, - __ATOMIC_RELEASE); + rte_atomic_store_explicit(&s->shared.tail, head, + rte_memory_order_release); num_entries -= (head - tail); } else { claim_mgr_move_tail(disclaims, num_entries); - __atomic_store_n(&s->shared.tail, + rte_atomic_store_explicit(&s->shared.tail, num_entries + tail, - __ATOMIC_RELEASE); + rte_memory_order_release); num_entries = 0; } } else if (block == false) @@ -420,7 +420,7 @@ struct opdl_ring { opdl_stage_disclaim_multithread_n(s, disclaims->num_to_disclaim, false); - *old_head = __atomic_load_n(&s->shared.head, __ATOMIC_ACQUIRE); + *old_head = rte_atomic_load_explicit(&s->shared.head, rte_memory_order_acquire); while (true) { bool success; /* If called by opdl_ring_input(), claim does not need to be @@ -440,11 +440,10 @@ struct opdl_ring { if (*num_entries == 0) return; - success = __atomic_compare_exchange_n(&s->shared.head, old_head, + success = rte_atomic_compare_exchange_weak_explicit(&s->shared.head, old_head, *old_head + *num_entries, - true, /* may fail spuriously */ - __ATOMIC_RELEASE, /* memory order on success */ - __ATOMIC_ACQUIRE); /* memory order on fail */ + rte_memory_order_release, /* memory order on success */ + rte_memory_order_acquire); /* memory order on fail */ if (likely(success)) break; rte_pause(); @@ -472,10 +471,11 @@ struct opdl_ring { /* If another thread started inputting before this one, but hasn't * finished, we need to wait for it to complete to update the tail. */ - rte_wait_until_equal_32(&s->shared.tail, old_head, __ATOMIC_ACQUIRE); + rte_wait_until_equal_32((uint32_t *)(uintptr_t)&s->shared.tail, old_head, + rte_memory_order_acquire); - __atomic_store_n(&s->shared.tail, old_head + num_entries, - __ATOMIC_RELEASE); + rte_atomic_store_explicit(&s->shared.tail, old_head + num_entries, + rte_memory_order_release); return num_entries; } @@ -525,8 +525,8 @@ struct opdl_ring { for (j = 0; j < num_entries; j++) { ev = (struct rte_event *)get_slot(t, s->head+j); - event = __atomic_load_n(&(ev->event), - __ATOMIC_ACQUIRE); + event = rte_atomic_load_explicit((uint64_t __rte_atomic *)&ev->event, + rte_memory_order_acquire); opa_id = OPDL_OPA_MASK & (event >> OPDL_OPA_OFFSET); flow_id = OPDL_FLOWID_MASK & event; @@ -627,8 +627,8 @@ struct opdl_ring { num_entries, s->head - old_tail); num_entries = s->head - old_tail; } - __atomic_store_n(&s->shared.tail, num_entries + old_tail, - __ATOMIC_RELEASE); + rte_atomic_store_explicit(&s->shared.tail, num_entries + old_tail, + rte_memory_order_release); } uint32_t @@ -657,7 +657,7 @@ struct opdl_ring { copy_entries_in(t, head, entries, num_entries); s->head += num_entries; - __atomic_store_n(&s->shared.tail, s->head, __ATOMIC_RELEASE); + rte_atomic_store_explicit(&s->shared.tail, s->head, rte_memory_order_release); return num_entries; @@ -676,7 +676,7 @@ struct opdl_ring { copy_entries_out(t, head, entries, num_entries); s->head += num_entries; - __atomic_store_n(&s->shared.tail, s->head, __ATOMIC_RELEASE); + rte_atomic_store_explicit(&s->shared.tail, s->head, rte_memory_order_release); return num_entries; } @@ -755,7 +755,7 @@ struct opdl_ring { return 0; } if (s->threadsafe == false) { - __atomic_store_n(&s->shared.tail, s->head, __ATOMIC_RELEASE); + rte_atomic_store_explicit(&s->shared.tail, s->head, rte_memory_order_release); s->seq += s->num_claimed; s->shadow_head = s->head; s->num_claimed = 0; @@ -1008,8 +1008,8 @@ struct opdl_ring * ev_orig = (struct rte_event *) get_slot(t, s->shadow_head+i); - event = __atomic_load_n(&(ev_orig->event), - __ATOMIC_ACQUIRE); + event = rte_atomic_load_explicit((uint64_t __rte_atomic *)&ev_orig->event, + rte_memory_order_acquire); opa_id = OPDL_OPA_MASK & (event >> OPDL_OPA_OFFSET); flow_id = OPDL_FLOWID_MASK & event; @@ -1026,9 +1026,9 @@ struct opdl_ring * if ((event & OPDL_EVENT_MASK) != ev_temp) { - __atomic_store_n(&(ev_orig->event), - ev_update, - __ATOMIC_RELEASE); + rte_atomic_store_explicit( + (uint64_t __rte_atomic *)&ev_orig->event, + ev_update, rte_memory_order_release); ev_updated = true; } if (ev_orig->u64 != ev->u64) {