[1/2] use abstracted bit count functions

Message ID 20230825101812.2396339-1-david.marchand@redhat.com (mailing list archive)
State Accepted, archived
Delegated to: David Marchand
Headers
Series [1/2] use abstracted bit count functions |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

David Marchand Aug. 25, 2023, 10:18 a.m. UTC
  Now that DPDK provides such bit count functions, make use of them.

This patch was prepared with a "brutal" commandline:

$ old=__builtin_clzll; new=rte_clz64;
  git grep -lw $old :^lib/eal/include/rte_bitops.h |
  xargs sed -i -e "s#\<$old\>#$new#g"
$ old=__builtin_clz; new=rte_clz32;
  git grep -lw $old :^lib/eal/include/rte_bitops.h |
  xargs sed -i -e "s#\<$old\>#$new#g"

$ old=__builtin_ctzll; new=rte_ctz64;
  git grep -lw $old :^lib/eal/include/rte_bitops.h |
  xargs sed -i -e "s#\<$old\>#$new#g"
$ old=__builtin_ctz; new=rte_ctz32;
  git grep -lw $old :^lib/eal/include/rte_bitops.h |
  xargs sed -i -e "s#\<$old\>#$new#g"

$ old=__builtin_popcountll; new=rte_popcount64;
  git grep -lw $old :^lib/eal/include/rte_bitops.h |
  xargs sed -i -e "s#\<$old\>#$new#g"
$ old=__builtin_popcount; new=rte_popcount32;
  git grep -lw $old :^lib/eal/include/rte_bitops.h |
  xargs sed -i -e "s#\<$old\>#$new#g"

Then inclusion of rte_bitops.h was added were necessary.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 app/proc-info/main.c                          |  2 +-
 app/test-fib/main.c                           |  8 +++---
 app/test-pipeline/pipeline_acl.c              |  4 +--
 app/test-pipeline/pipeline_lpm.c              |  4 +--
 app/test-pipeline/pipeline_lpm_ipv6.c         |  4 +--
 app/test-pmd/cmdline.c                        |  8 +++---
 app/test-pmd/config.c                         |  4 +--
 app/test/test_bitmap.c                        |  4 +--
 drivers/common/cnxk/roc_dev.c                 |  2 +-
 drivers/common/cnxk/roc_nix_tm.c              |  2 +-
 drivers/common/cnxk/roc_nix_tm_utils.c        |  2 +-
 drivers/common/cnxk/roc_npa.c                 |  2 +-
 drivers/common/cnxk/roc_npc.c                 |  2 +-
 drivers/common/cnxk/roc_npc_mcam.c            |  8 +++---
 drivers/common/idpf/idpf_common_rxtx_avx512.c |  6 ++---
 drivers/common/qat/qat_qp.c                   |  2 +-
 drivers/crypto/bcmfs/hw/bcmfs4_rm.c           |  2 +-
 drivers/crypto/bcmfs/hw/bcmfs5_rm.c           |  2 +-
 drivers/event/dlb2/dlb2.c                     | 10 +++----
 drivers/event/sw/sw_evdev_scheduler.c         |  2 +-
 drivers/ml/cnxk/cn10k_ml_ocm.c                |  2 +-
 drivers/net/bnxt/bnxt_rxtx_vec_avx2.c         |  2 +-
 drivers/net/bnxt/bnxt_rxtx_vec_sse.c          |  2 +-
 drivers/net/bnxt/tf_core/cfa_tcam_mgr_sbmp.h  | 14 +++++-----
 drivers/net/cnxk/cn10k_rx.h                   |  2 +-
 drivers/net/cnxk/cn9k_rx.h                    |  2 +-
 drivers/net/cnxk/cnxk_ethdev_sec.c            |  2 +-
 drivers/net/cxgbe/cxgbe_compat.h              |  2 +-
 drivers/net/fm10k/fm10k_rxtx_vec.c            |  2 +-
 drivers/net/hns3/hns3_ethdev.c                |  4 +--
 drivers/net/i40e/i40e_ethdev.h                |  2 +-
 drivers/net/i40e/i40e_rxtx_vec_altivec.c      |  2 +-
 drivers/net/i40e/i40e_rxtx_vec_avx2.c         |  4 +--
 drivers/net/i40e/i40e_rxtx_vec_avx512.c       |  4 +--
 drivers/net/i40e/i40e_rxtx_vec_sse.c          |  2 +-
 drivers/net/iavf/iavf_hash.c                  |  2 +-
 drivers/net/iavf/iavf_rxtx_vec_avx2.c         |  8 +++---
 drivers/net/iavf/iavf_rxtx_vec_avx512.c       |  8 +++---
 drivers/net/iavf/iavf_rxtx_vec_sse.c          |  4 +--
 drivers/net/ice/ice_ethdev.h                  |  2 +-
 drivers/net/ice/ice_hash.c                    |  2 +-
 drivers/net/ice/ice_rxtx_vec_avx2.c           |  4 +--
 drivers/net/ice/ice_rxtx_vec_avx512.c         |  4 +--
 drivers/net/ice/ice_rxtx_vec_sse.c            |  2 +-
 drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c       |  2 +-
 drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c        |  2 +-
 drivers/net/mlx5/hws/mlx5dr_buddy.c           |  2 +-
 drivers/net/mlx5/hws/mlx5dr_pool.c            |  2 +-
 drivers/net/mlx5/linux/mlx5_os.c              |  6 ++---
 drivers/net/mlx5/mlx5_flow.c                  |  2 +-
 drivers/net/mlx5/mlx5_flow_dv.c               | 12 ++++-----
 drivers/net/mlx5/mlx5_flow_hw.c               |  6 ++---
 drivers/net/mlx5/mlx5_flow_meter.c            |  2 +-
 drivers/net/mlx5/mlx5_rxtx_vec_altivec.h      |  4 +--
 drivers/net/mlx5/mlx5_rxtx_vec_sse.h          |  4 +--
 drivers/net/mlx5/mlx5_utils.c                 |  4 +--
 drivers/net/mvpp2/mrvl_ethdev.c               |  2 +-
 drivers/net/netvsc/hn_rxtx.c                  |  2 +-
 drivers/net/virtio/virtio_rxtx.c              |  2 +-
 examples/bbdev_app/main.c                     |  4 +--
 examples/eventdev_pipeline/main.c             |  6 ++---
 examples/fips_validation/main.c               |  2 +-
 lib/acl/acl_bld.c                             |  2 +-
 lib/acl/acl_run_avx512.c                      |  4 +--
 lib/acl/acl_run_avx512_common.h               |  2 +-
 lib/bpf/bpf_validate.c                        |  2 +-
 lib/dmadev/rte_dmadev.c                       |  2 +-
 lib/eal/common/eal_common_fbarray.c           | 16 ++++++------
 lib/eal/common/rte_random.c                   |  5 ++--
 lib/eal/common/rte_reciprocal.c               |  4 +--
 lib/eal/common/rte_service.c                  |  2 +-
 lib/eal/linux/eal_vfio.c                      |  4 +--
 lib/ethdev/rte_ethdev.c                       |  4 +--
 lib/fib/dir24_8.c                             |  2 +-
 lib/hash/rte_thash.c                          |  4 +--
 lib/hash/rte_thash_x86_gfni.h                 |  2 +-
 lib/mldev/mldev_utils_scalar.c                |  2 +-
 lib/pipeline/rte_pipeline.c                   | 16 ++++++------
 lib/pipeline/rte_table_action.c               |  8 +++---
 lib/port/rte_port_ethdev.c                    |  8 +++---
 lib/port/rte_port_eventdev.c                  |  8 +++---
 lib/port/rte_port_fd.c                        |  8 +++---
 lib/port/rte_port_ras.c                       |  4 +--
 lib/port/rte_port_ring.c                      |  8 +++---
 lib/port/rte_port_sched.c                     |  4 +--
 lib/port/rte_port_source_sink.c               |  6 ++---
 lib/port/rte_port_sym_crypto.c                |  8 +++---
 lib/rib/rte_rib.c                             |  2 +-
 lib/rib/rte_rib6.c                            |  2 +-
 lib/sched/rte_sched.c                         |  2 +-
 lib/table/rte_swx_table_learner.c             |  6 ++---
 lib/table/rte_table_acl.c                     |  8 +++---
 lib/table/rte_table_array.c                   |  6 ++---
 lib/table/rte_table_hash_cuckoo.c             |  6 ++---
 lib/table/rte_table_hash_ext.c                | 20 +++++++-------
 lib/table/rte_table_hash_key16.c              | 26 +++++++++----------
 lib/table/rte_table_hash_key32.c              | 26 +++++++++----------
 lib/table/rte_table_hash_key8.c               | 26 +++++++++----------
 lib/table/rte_table_hash_lru.c                | 20 +++++++-------
 lib/table/rte_table_lpm.c                     |  6 ++---
 lib/table/rte_table_lpm_ipv6.c                |  6 ++---
 lib/table/rte_table_stub.c                    |  2 +-
 lib/vhost/iotlb.c                             |  2 +-
 lib/vhost/virtio_net.c                        |  2 +-
 104 files changed, 267 insertions(+), 266 deletions(-)
  

Comments

Tyler Retzlaff Aug. 25, 2023, 4:35 p.m. UTC | #1
On Fri, Aug 25, 2023 at 12:18:10PM +0200, David Marchand wrote:
> Now that DPDK provides such bit count functions, make use of them.
> 
> This patch was prepared with a "brutal" commandline:
> 
> $ old=__builtin_clzll; new=rte_clz64;
>   git grep -lw $old :^lib/eal/include/rte_bitops.h |
>   xargs sed -i -e "s#\<$old\>#$new#g"
> $ old=__builtin_clz; new=rte_clz32;
>   git grep -lw $old :^lib/eal/include/rte_bitops.h |
>   xargs sed -i -e "s#\<$old\>#$new#g"
> 
> $ old=__builtin_ctzll; new=rte_ctz64;
>   git grep -lw $old :^lib/eal/include/rte_bitops.h |
>   xargs sed -i -e "s#\<$old\>#$new#g"
> $ old=__builtin_ctz; new=rte_ctz32;
>   git grep -lw $old :^lib/eal/include/rte_bitops.h |
>   xargs sed -i -e "s#\<$old\>#$new#g"
> 
> $ old=__builtin_popcountll; new=rte_popcount64;
>   git grep -lw $old :^lib/eal/include/rte_bitops.h |
>   xargs sed -i -e "s#\<$old\>#$new#g"
> $ old=__builtin_popcount; new=rte_popcount32;
>   git grep -lw $old :^lib/eal/include/rte_bitops.h |
>   xargs sed -i -e "s#\<$old\>#$new#g"
> 
> Then inclusion of rte_bitops.h was added were necessary.
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---

These kinds of patches always make me dizzy, I did a pass I cannot see
any replacement problems so lgtm.

Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
  
Long Li Aug. 26, 2023, 12:06 a.m. UTC | #2
> Subject: [PATCH 1/2] use abstracted bit count functions
> 
> Now that DPDK provides such bit count functions, make use of them.
> 
> This patch was prepared with a "brutal" commandline:
> 
> $ old=__builtin_clzll; new=rte_clz64;
>   git grep -lw $old :^lib/eal/include/rte_bitops.h |
>   xargs sed -i -e "s#\<$old\>#$new#g"
> $ old=__builtin_clz; new=rte_clz32;
>   git grep -lw $old :^lib/eal/include/rte_bitops.h |
>   xargs sed -i -e "s#\<$old\>#$new#g"
> 
> $ old=__builtin_ctzll; new=rte_ctz64;
>   git grep -lw $old :^lib/eal/include/rte_bitops.h |
>   xargs sed -i -e "s#\<$old\>#$new#g"
> $ old=__builtin_ctz; new=rte_ctz32;
>   git grep -lw $old :^lib/eal/include/rte_bitops.h |
>   xargs sed -i -e "s#\<$old\>#$new#g"
> 
> $ old=__builtin_popcountll; new=rte_popcount64;
>   git grep -lw $old :^lib/eal/include/rte_bitops.h |
>   xargs sed -i -e "s#\<$old\>#$new#g"
> $ old=__builtin_popcount; new=rte_popcount32;
>   git grep -lw $old :^lib/eal/include/rte_bitops.h |
>   xargs sed -i -e "s#\<$old\>#$new#g"
> 
> Then inclusion of rte_bitops.h was added were necessary.
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>

Patch looks good for netvsc.

Reviewed-by: Long Li <longli@microsoft.com>
  
David Marchand Sept. 5, 2023, 3:33 p.m. UTC | #3
On Fri, Aug 25, 2023 at 12:18 PM David Marchand
<david.marchand@redhat.com> wrote:
>
> Now that DPDK provides such bit count functions, make use of them.
>
> This patch was prepared with a "brutal" commandline:
>
> $ old=__builtin_clzll; new=rte_clz64;
>   git grep -lw $old :^lib/eal/include/rte_bitops.h |
>   xargs sed -i -e "s#\<$old\>#$new#g"
> $ old=__builtin_clz; new=rte_clz32;
>   git grep -lw $old :^lib/eal/include/rte_bitops.h |
>   xargs sed -i -e "s#\<$old\>#$new#g"
>
> $ old=__builtin_ctzll; new=rte_ctz64;
>   git grep -lw $old :^lib/eal/include/rte_bitops.h |
>   xargs sed -i -e "s#\<$old\>#$new#g"
> $ old=__builtin_ctz; new=rte_ctz32;
>   git grep -lw $old :^lib/eal/include/rte_bitops.h |
>   xargs sed -i -e "s#\<$old\>#$new#g"
>
> $ old=__builtin_popcountll; new=rte_popcount64;
>   git grep -lw $old :^lib/eal/include/rte_bitops.h |
>   xargs sed -i -e "s#\<$old\>#$new#g"
> $ old=__builtin_popcount; new=rte_popcount32;
>   git grep -lw $old :^lib/eal/include/rte_bitops.h |
>   xargs sed -i -e "s#\<$old\>#$new#g"
>
> Then inclusion of rte_bitops.h was added were necessary.
>
> Signed-off-by: David Marchand <david.marchand@redhat.com>

Series applied.
  

Patch

diff --git a/app/proc-info/main.c b/app/proc-info/main.c
index 88cee0ca48..af4c1d8bcb 100644
--- a/app/proc-info/main.c
+++ b/app/proc-info/main.c
@@ -990,7 +990,7 @@  show_offloads(uint64_t offloads,
 {
 	printf(" offloads :");
 	while (offloads != 0) {
-		uint64_t offload_flag = 1ULL << __builtin_ctzll(offloads);
+		uint64_t offload_flag = 1ULL << rte_ctz64(offloads);
 		printf(" %s", show_offload(offload_flag));
 		offloads &= ~offload_flag;
 	}
diff --git a/app/test-fib/main.c b/app/test-fib/main.c
index eafd4e2be0..75a56135f2 100644
--- a/app/test-fib/main.c
+++ b/app/test-fib/main.c
@@ -864,7 +864,7 @@  run_v4(void)
 	conf.max_routes = config.nb_routes * 2;
 	conf.rib_ext_sz = 0;
 	if (conf.type == RTE_FIB_DIR24_8) {
-		conf.dir24_8.nh_sz = __builtin_ctz(config.ent_sz);
+		conf.dir24_8.nh_sz = rte_ctz32(config.ent_sz);
 		conf.dir24_8.num_tbl8 = RTE_MIN(config.tbl8,
 			get_max_nh(conf.dir24_8.nh_sz));
 	}
@@ -1065,7 +1065,7 @@  run_v6(void)
 	conf.max_routes = config.nb_routes * 2;
 	conf.rib_ext_sz = 0;
 	if (conf.type == RTE_FIB6_TRIE) {
-		conf.trie.nh_sz = __builtin_ctz(config.ent_sz);
+		conf.trie.nh_sz = rte_ctz32(config.ent_sz);
 		conf.trie.num_tbl8 = RTE_MIN(config.tbl8,
 			get_max_nh(conf.trie.nh_sz));
 	}
@@ -1293,12 +1293,12 @@  main(int argc, char **argv)
 				"Bad routes distribution configuration\n");
 		if (af == AF_INET) {
 			gen_random_rt_4(config.rt,
-				__builtin_ctz(config.ent_sz));
+				rte_ctz32(config.ent_sz));
 			if (config.flags & SHUFFLE_FLAG)
 				shuffle_rt_4(config.rt, config.nb_routes);
 		} else {
 			gen_random_rt_6(config.rt,
-				__builtin_ctz(config.ent_sz));
+				rte_ctz32(config.ent_sz));
 			if (config.flags & SHUFFLE_FLAG)
 				shuffle_rt_6(config.rt, config.nb_routes);
 		}
diff --git a/app/test-pipeline/pipeline_acl.c b/app/test-pipeline/pipeline_acl.c
index 5857bc285f..2f04868e3e 100644
--- a/app/test-pipeline/pipeline_acl.c
+++ b/app/test-pipeline/pipeline_acl.c
@@ -188,9 +188,9 @@  app_main_loop_worker_pipeline_acl(void) {
 		rule_params.field_value[SRC_FIELD_IPV4].value.u32 = 0;
 		rule_params.field_value[SRC_FIELD_IPV4].mask_range.u32 = 0;
 		rule_params.field_value[DST_FIELD_IPV4].value.u32 =
-			i << (24 - __builtin_popcount(app.n_ports - 1));
+			i << (24 - rte_popcount32(app.n_ports - 1));
 		rule_params.field_value[DST_FIELD_IPV4].mask_range.u32 =
-			8 + __builtin_popcount(app.n_ports - 1);
+			8 + rte_popcount32(app.n_ports - 1);
 		rule_params.field_value[SRCP_FIELD_IPV4].value.u16 = 0;
 		rule_params.field_value[SRCP_FIELD_IPV4].mask_range.u16 =
 			UINT16_MAX;
diff --git a/app/test-pipeline/pipeline_lpm.c b/app/test-pipeline/pipeline_lpm.c
index 8add5e71b7..854319174b 100644
--- a/app/test-pipeline/pipeline_lpm.c
+++ b/app/test-pipeline/pipeline_lpm.c
@@ -123,8 +123,8 @@  app_main_loop_worker_pipeline_lpm(void) {
 		};
 
 		struct rte_table_lpm_key key = {
-			.ip = i << (24 - __builtin_popcount(app.n_ports - 1)),
-			.depth = 8 + __builtin_popcount(app.n_ports - 1),
+			.ip = i << (24 - rte_popcount32(app.n_ports - 1)),
+			.depth = 8 + rte_popcount32(app.n_ports - 1),
 		};
 
 		struct rte_pipeline_table_entry *entry_ptr;
diff --git a/app/test-pipeline/pipeline_lpm_ipv6.c b/app/test-pipeline/pipeline_lpm_ipv6.c
index 26b325180d..18d4f018f1 100644
--- a/app/test-pipeline/pipeline_lpm_ipv6.c
+++ b/app/test-pipeline/pipeline_lpm_ipv6.c
@@ -123,10 +123,10 @@  app_main_loop_worker_pipeline_lpm_ipv6(void) {
 		uint32_t ip;
 		int key_found, status;
 
-		key.depth = 8 + __builtin_popcount(app.n_ports - 1);
+		key.depth = 8 + rte_popcount32(app.n_ports - 1);
 
 		ip = rte_bswap32(i << (24 -
-			__builtin_popcount(app.n_ports - 1)));
+			rte_popcount32(app.n_ports - 1)));
 		memcpy(key.ip, &ip, sizeof(uint32_t));
 
 		printf("Adding rule to IPv6 LPM table (IPv6 destination = "
diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index 0d0723f659..a0e97719b3 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -10893,8 +10893,8 @@  print_rx_offloads(uint64_t offloads)
 	if (offloads == 0)
 		return;
 
-	begin = __builtin_ctzll(offloads);
-	end = sizeof(offloads) * CHAR_BIT - __builtin_clzll(offloads);
+	begin = rte_ctz64(offloads);
+	end = sizeof(offloads) * CHAR_BIT - rte_clz64(offloads);
 
 	single_offload = 1ULL << begin;
 	for (bit = begin; bit < end; bit++) {
@@ -11312,8 +11312,8 @@  print_tx_offloads(uint64_t offloads)
 	if (offloads == 0)
 		return;
 
-	begin = __builtin_ctzll(offloads);
-	end = sizeof(offloads) * CHAR_BIT - __builtin_clzll(offloads);
+	begin = rte_ctz64(offloads);
+	end = sizeof(offloads) * CHAR_BIT - rte_clz64(offloads);
 
 	single_offload = 1ULL << begin;
 	for (bit = begin; bit < end; bit++) {
diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index 11f3a22048..3d1da99307 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -699,8 +699,8 @@  print_dev_capabilities(uint64_t capabilities)
 	if (capabilities == 0)
 		return;
 
-	begin = __builtin_ctzll(capabilities);
-	end = sizeof(capabilities) * CHAR_BIT - __builtin_clzll(capabilities);
+	begin = rte_ctz64(capabilities);
+	end = sizeof(capabilities) * CHAR_BIT - rte_clz64(capabilities);
 
 	single_capa = 1ULL << begin;
 	for (bit = begin; bit < end; bit++) {
diff --git a/app/test/test_bitmap.c b/app/test/test_bitmap.c
index 9a0536c805..bab11812c7 100644
--- a/app/test/test_bitmap.c
+++ b/app/test/test_bitmap.c
@@ -91,7 +91,7 @@  test_bitmap_scan_operations(struct rte_bitmap *bmp)
 	start_pos = pos;
 	nb_set = 0;
 	do {
-		nb_set += __builtin_popcountll(out_slab);
+		nb_set += rte_popcount64(out_slab);
 		if (!rte_bitmap_scan(bmp, &pos, &out_slab))
 			break;
 	} while (pos != start_pos);
@@ -245,7 +245,7 @@  test_bitmap_all_set(void)
 			printf("Failed with init bitmap.\n");
 			return TEST_FAILED;
 		}
-		pos += (slab ? __builtin_ctzll(slab) : 0);
+		pos += (slab ? rte_ctz64(slab) : 0);
 		rte_bitmap_clear(bmp, pos);
 	}
 
diff --git a/drivers/common/cnxk/roc_dev.c b/drivers/common/cnxk/roc_dev.c
index 4b0ba218ed..13d3eece47 100644
--- a/drivers/common/cnxk/roc_dev.c
+++ b/drivers/common/cnxk/roc_dev.c
@@ -1155,7 +1155,7 @@  dev_active_vfs(struct dev *dev)
 	int i, count = 0;
 
 	for (i = 0; i < MAX_VFPF_DWORD_BITS; i++)
-		count += __builtin_popcount(dev->active_vfs[i]);
+		count += rte_popcount32(dev->active_vfs[i]);
 
 	return count;
 }
diff --git a/drivers/common/cnxk/roc_nix_tm.c b/drivers/common/cnxk/roc_nix_tm.c
index c104611355..fa73dfbacf 100644
--- a/drivers/common/cnxk/roc_nix_tm.c
+++ b/drivers/common/cnxk/roc_nix_tm.c
@@ -11,7 +11,7 @@  bitmap_ctzll(uint64_t slab)
 	if (slab == 0)
 		return 0;
 
-	return __builtin_ctzll(slab);
+	return rte_ctz64(slab);
 }
 
 void
diff --git a/drivers/common/cnxk/roc_nix_tm_utils.c b/drivers/common/cnxk/roc_nix_tm_utils.c
index 3840d6d457..275afffca1 100644
--- a/drivers/common/cnxk/roc_nix_tm_utils.c
+++ b/drivers/common/cnxk/roc_nix_tm_utils.c
@@ -927,7 +927,7 @@  nix_tm_resource_avail(struct nix *nix, uint8_t hw_lvl, bool contig)
 	/* Count bit set */
 	start_pos = pos;
 	do {
-		count += __builtin_popcountll(slab);
+		count += rte_popcount64(slab);
 		if (!plt_bitmap_scan(bmp, &pos, &slab))
 			break;
 	} while (pos != start_pos);
diff --git a/drivers/common/cnxk/roc_npa.c b/drivers/common/cnxk/roc_npa.c
index 3b9a70028b..a7d096f1fd 100644
--- a/drivers/common/cnxk/roc_npa.c
+++ b/drivers/common/cnxk/roc_npa.c
@@ -398,7 +398,7 @@  bitmap_ctzll(uint64_t slab)
 	if (slab == 0)
 		return 0;
 
-	return __builtin_ctzll(slab);
+	return rte_ctz64(slab);
 }
 
 static int
diff --git a/drivers/common/cnxk/roc_npc.c b/drivers/common/cnxk/roc_npc.c
index 848086c8de..86f4d85350 100644
--- a/drivers/common/cnxk/roc_npc.c
+++ b/drivers/common/cnxk/roc_npc.c
@@ -1398,7 +1398,7 @@  roc_npc_sdp_channel_get(struct roc_npc *roc_npc, uint16_t *chan_base, uint16_t *
 	num_chan = nix->rx_chan_cnt - 1;
 	if (num_chan) {
 		range = *chan_base ^ (*chan_base + num_chan);
-		num_bits = (sizeof(uint32_t) * 8) - __builtin_clz(range) - 1;
+		num_bits = (sizeof(uint32_t) * 8) - rte_clz32(range) - 1;
 		/* Set mask for (15 - numbits) MSB bits */
 		*chan_mask = (uint16_t)~GENMASK(num_bits, 0);
 	} else {
diff --git a/drivers/common/cnxk/roc_npc_mcam.c b/drivers/common/cnxk/roc_npc_mcam.c
index 62e0ce21b2..8ec4bef472 100644
--- a/drivers/common/cnxk/roc_npc_mcam.c
+++ b/drivers/common/cnxk/roc_npc_mcam.c
@@ -745,7 +745,7 @@  npc_mcam_alloc_and_write(struct npc *npc, struct roc_npc_flow *flow, struct npc_
 		 * For all other rules, set LA LTYPE to match both 1st pass and 2nd pass ltypes.
 		 */
 		if (pst->is_second_pass_rule || (!pst->is_second_pass_rule && pst->has_eth_type)) {
-			la_offset = __builtin_popcount(npc->keyx_supp_nmask[flow->nix_intf] &
+			la_offset = rte_popcount32(npc->keyx_supp_nmask[flow->nix_intf] &
 						       ((1ULL << 9 /* LA offset */) - 1));
 			la_offset *= 4;
 
@@ -790,7 +790,7 @@  npc_set_vlan_ltype(struct npc_parse_state *pst)
 	uint8_t lb_offset;
 
 	lb_offset =
-		__builtin_popcount(pst->npc->keyx_supp_nmask[pst->nix_intf] &
+		rte_popcount32(pst->npc->keyx_supp_nmask[pst->nix_intf] &
 				   ((1ULL << NPC_LTYPE_LB_OFFSET) - 1));
 	lb_offset *= 4;
 
@@ -812,7 +812,7 @@  npc_set_ipv6ext_ltype_mask(struct npc_parse_state *pst)
 	uint64_t val, mask;
 
 	lc_offset =
-		__builtin_popcount(pst->npc->keyx_supp_nmask[pst->nix_intf] &
+		rte_popcount32(pst->npc->keyx_supp_nmask[pst->nix_intf] &
 				   ((1ULL << NPC_LTYPE_LC_OFFSET) - 1));
 	lc_offset *= 4;
 
@@ -835,7 +835,7 @@  npc_set_ipv6ext_ltype_mask(struct npc_parse_state *pst)
 	 * zero in LFLAG.
 	 */
 	if (pst->npc->keyx_supp_nmask[pst->nix_intf] & (1ULL << NPC_LFLAG_LC_OFFSET)) {
-		lcflag_offset = __builtin_popcount(pst->npc->keyx_supp_nmask[pst->nix_intf] &
+		lcflag_offset = rte_popcount32(pst->npc->keyx_supp_nmask[pst->nix_intf] &
 						   ((1ULL << NPC_LFLAG_LC_OFFSET) - 1));
 		lcflag_offset *= 4;
 
diff --git a/drivers/common/idpf/idpf_common_rxtx_avx512.c b/drivers/common/idpf/idpf_common_rxtx_avx512.c
index 81312617cc..2ac46fb1d2 100644
--- a/drivers/common/idpf/idpf_common_rxtx_avx512.c
+++ b/drivers/common/idpf/idpf_common_rxtx_avx512.c
@@ -505,11 +505,11 @@  _idpf_singleq_recv_raw_pkts_avx512(struct idpf_rx_queue *rxq,
 		status0_7 = _mm256_packs_epi32(status0_7,
 					       _mm256_setzero_si256());
 
-		uint64_t burst = __builtin_popcountll
+		uint64_t burst = rte_popcount64
 					(_mm_cvtsi128_si64
 						(_mm256_extracti128_si256
 							(status0_7, 1)));
-		burst += __builtin_popcountll
+		burst += rte_popcount64
 				(_mm_cvtsi128_si64
 					(_mm256_castsi256_si128(status0_7)));
 		received += burst;
@@ -966,7 +966,7 @@  _idpf_splitq_recv_raw_pkts_avx512(struct idpf_rx_queue *rxq,
 			_mm512_and_epi64(raw_gen0_7, gen_check),
 			_mm512_set1_epi64((uint64_t)rxq->expected_gen_id << 46));
 		const __mmask8 recv_mask = _kand_mask8(dd_mask, gen_mask);
-		uint16_t burst = __builtin_popcount(_cvtmask8_u32(recv_mask));
+		uint16_t burst = rte_popcount32(_cvtmask8_u32(recv_mask));
 
 		received += burst;
 		if (burst != IDPF_DESCS_PER_LOOP_AVX)
diff --git a/drivers/common/qat/qat_qp.c b/drivers/common/qat/qat_qp.c
index f284718441..f95dd33375 100644
--- a/drivers/common/qat/qat_qp.c
+++ b/drivers/common/qat/qat_qp.c
@@ -286,7 +286,7 @@  qat_queue_create(struct qat_pci_device *qat_dev, struct qat_queue *queue,
 	queue->msg_size = desc_size;
 
 	/* For fast calculation of cookie index, relies on msg_size being 2^n */
-	queue->trailz = __builtin_ctz(desc_size);
+	queue->trailz = rte_ctz32(desc_size);
 
 	/*
 	 * Write an unused pattern to the queue memory.
diff --git a/drivers/crypto/bcmfs/hw/bcmfs4_rm.c b/drivers/crypto/bcmfs/hw/bcmfs4_rm.c
index 0ccb111898..9a30c654da 100644
--- a/drivers/crypto/bcmfs/hw/bcmfs4_rm.c
+++ b/drivers/crypto/bcmfs/hw/bcmfs4_rm.c
@@ -473,7 +473,7 @@  bcmfs4_enqueue_single_request_qp(struct bcmfs_qp *qp, void *op)
 		return -ERANGE;
 	}
 
-	reqid = pos + __builtin_ctzll(slab);
+	reqid = pos + rte_ctz64(slab);
 	rte_bitmap_clear(qp->ctx_bmp, reqid);
 	qp->ctx_pool[reqid] = (unsigned long)msg;
 
diff --git a/drivers/crypto/bcmfs/hw/bcmfs5_rm.c b/drivers/crypto/bcmfs/hw/bcmfs5_rm.c
index c677c0cd9b..cbfe42cb47 100644
--- a/drivers/crypto/bcmfs/hw/bcmfs5_rm.c
+++ b/drivers/crypto/bcmfs/hw/bcmfs5_rm.c
@@ -404,7 +404,7 @@  bcmfs5_enqueue_single_request_qp(struct bcmfs_qp *qp, void *op)
 		return -ERANGE;
 	}
 
-	reqid = pos + __builtin_ctzll(slab);
+	reqid = pos + rte_ctz64(slab);
 	rte_bitmap_clear(qp->ctx_bmp, reqid);
 	qp->ctx_pool[reqid] = (unsigned long)msg;
 
diff --git a/drivers/event/dlb2/dlb2.c b/drivers/event/dlb2/dlb2.c
index 60c5cd4804..6986938d4e 100644
--- a/drivers/event/dlb2/dlb2.c
+++ b/drivers/event/dlb2/dlb2.c
@@ -1679,7 +1679,7 @@  dlb2_hw_create_ldb_port(struct dlb2_eventdev *dlb2,
 	else
 		qm_port->cq_depth_mask = qm_port->cq_depth - 1;
 
-	qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
+	qm_port->gen_bit_shift = rte_popcount32(qm_port->cq_depth_mask);
 	/* starting value of gen bit - it toggles at wrap time */
 	qm_port->gen_bit = 1;
 
@@ -1893,7 +1893,7 @@  dlb2_hw_create_dir_port(struct dlb2_eventdev *dlb2,
 	else
 		qm_port->cq_depth_mask = cfg.cq_depth - 1;
 
-	qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
+	qm_port->gen_bit_shift = rte_popcount32(qm_port->cq_depth_mask);
 	/* starting value of gen bit - it toggles at wrap time */
 	qm_port->gen_bit = 1;
 	dlb2_hw_cq_bitmask_init(qm_port, qm_port->cq_depth);
@@ -3695,7 +3695,7 @@  dlb2_recv_qe_sparse(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe)
 	/* Mask off gen bits we don't care about */
 	gen_bits &= and_mask;
 
-	return __builtin_popcount(gen_bits);
+	return rte_popcount32(gen_bits);
 }
 
 static inline void
@@ -3946,7 +3946,7 @@  dlb2_recv_qe_sparse_vec(struct dlb2_port *qm_port, void *events,
 	 */
 	uint64_t rolling = qm_port->cq_rolling_mask & 0xF;
 	uint64_t qe_xor_bits = (qe_gen_bits ^ rolling);
-	uint32_t count_new = __builtin_popcount(qe_xor_bits);
+	uint32_t count_new = rte_popcount32(qe_xor_bits);
 	count_new = RTE_MIN(count_new, max_events);
 	if (!count_new)
 		return 0;
@@ -4122,7 +4122,7 @@  dlb2_recv_qe(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe,
 	/* Mask off gen bits we don't care about */
 	gen_bits &= and_mask[*offset];
 
-	return __builtin_popcount(gen_bits);
+	return rte_popcount32(gen_bits);
 }
 
 static inline int16_t
diff --git a/drivers/event/sw/sw_evdev_scheduler.c b/drivers/event/sw/sw_evdev_scheduler.c
index 8bc21944f5..de6ed21643 100644
--- a/drivers/event/sw/sw_evdev_scheduler.c
+++ b/drivers/event/sw/sw_evdev_scheduler.c
@@ -15,7 +15,7 @@ 
  * CLZ twice is faster than caching the value due to data dependencies
  */
 #define PKT_MASK_TO_IQ(pkts) \
-	(__builtin_ctz(pkts | (1 << SW_IQS_MAX)))
+	(rte_ctz32(pkts | (1 << SW_IQS_MAX)))
 
 #if SW_IQS_MAX != 4
 #error Misconfigured PRIO_TO_IQ caused by SW_IQS_MAX value change
diff --git a/drivers/ml/cnxk/cn10k_ml_ocm.c b/drivers/ml/cnxk/cn10k_ml_ocm.c
index 93505c9c09..6fb0bb620e 100644
--- a/drivers/ml/cnxk/cn10k_ml_ocm.c
+++ b/drivers/ml/cnxk/cn10k_ml_ocm.c
@@ -494,7 +494,7 @@  cn10k_ml_ocm_print(struct rte_ml_dev *dev, FILE *fp)
 		wb_pages = 0 - ocm->tile_ocm_info[tile_id].scratch_pages;
 		for (word_id = 0; word_id < mldev->ocm.mask_words; word_id++)
 			wb_pages +=
-				__builtin_popcount(ocm->tile_ocm_info[tile_id].ocm_mask[word_id]);
+				rte_popcount32(ocm->tile_ocm_info[tile_id].ocm_mask[word_id]);
 
 		fprintf(fp,
 			"tile = %2u, scratch_pages = %4u,"
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_avx2.c b/drivers/net/bnxt/bnxt_rxtx_vec_avx2.c
index d4e8e8eb87..ea8dbaffba 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_avx2.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_avx2.c
@@ -261,7 +261,7 @@  recv_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		valid = _mm_cvtsi128_si64(_mm256_extracti128_si256(info3_v, 1));
 		valid = (valid << CHAR_BIT) |
 			_mm_cvtsi128_si64(_mm256_castsi256_si128(info3_v));
-		num_valid = __builtin_popcountll(valid & desc_valid_mask);
+		num_valid = rte_popcount64(valid & desc_valid_mask);
 
 		if (num_valid == 0)
 			break;
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
index 2ad8591b90..e99a547f58 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
@@ -259,7 +259,7 @@  recv_burst_vec_sse(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		 * the number of valid descriptors.
 		 */
 		valid = _mm_cvtsi128_si64(_mm_packs_epi32(info3_v, info3_v));
-		num_valid = __builtin_popcountll(valid & desc_valid_mask);
+		num_valid = rte_popcount64(valid & desc_valid_mask);
 
 		if (num_valid == 0)
 			break;
diff --git a/drivers/net/bnxt/tf_core/cfa_tcam_mgr_sbmp.h b/drivers/net/bnxt/tf_core/cfa_tcam_mgr_sbmp.h
index 6ad158abe8..78c6c2ab67 100644
--- a/drivers/net/bnxt/tf_core/cfa_tcam_mgr_sbmp.h
+++ b/drivers/net/bnxt/tf_core/cfa_tcam_mgr_sbmp.h
@@ -39,7 +39,7 @@  struct sbmp {
 #define SBMP_CLEAR(bm)                  (SBMP_WORD_GET(bm, 0) = 0)
 #define SBMP_IS_NULL(bm)		(SBMP_WORD_GET(bm, 0) == 0)
 #define	SBMP_COUNT(bm, count)	\
-	(count = __builtin_popcount(SBMP_WORD_GET(bm, 0)))
+	(count = rte_popcount32(SBMP_WORD_GET(bm, 0)))
 #elif SBMP_WORD_MAX == 2
 #define	SBMP_WENT(session)		((session) / SBMP_WORD_WIDTH)
 #define	SBMP_WBIT(session)		(1U << ((session) % SBMP_WORD_WIDTH))
@@ -53,8 +53,8 @@  struct sbmp {
 #define	SBMP_COUNT(bm, count)						\
 	do {								\
 		typeof(bm) *_bm = &(bm);				\
-		count = __builtin_popcount(SBMP_WORD_GET(*_bm, 0)) +	\
-			__builtin_popcount(SBMP_WORD_GET(*_bm, 1)));	\
+		count = rte_popcount32(SBMP_WORD_GET(*_bm, 0)) +	\
+			rte_popcount32(SBMP_WORD_GET(*_bm, 1)));	\
 	} while (0)
 #elif SBMP_WORD_MAX == 3
 #define	SBMP_WENT(session)		((session) / SBMP_WORD_WIDTH)
@@ -71,9 +71,9 @@  struct sbmp {
 #define	SBMP_COUNT(bm, count)						\
 	do {								\
 		typeof(bm) *_bm = &(bm);				\
-		count = __builtin_popcount(SBMP_WORD_GET(*_bm, 0)) +	\
-			__builtin_popcount(SBMP_WORD_GET(*_bm, 1)) +	\
-			__builtin_popcount(SBMP_WORD_GET(*_bm, 2));	\
+		count = rte_popcount32(SBMP_WORD_GET(*_bm, 0)) +	\
+			rte_popcount32(SBMP_WORD_GET(*_bm, 1)) +	\
+			rte_popcount32(SBMP_WORD_GET(*_bm, 2));	\
 	} while (0)
 #else  /* SBMP_WORD_MAX > 3 */
 #define	SBMP_WENT(session)		((session) / SBMP_WORD_WIDTH)
@@ -93,7 +93,7 @@  struct sbmp {
 		int	_count, _w;					\
 		_count = 0;						\
 		for (_w = 0; _w < SBMP_WORD_MAX; _w++) {		\
-			_count += __builtin_popcount(SBMP_WORD_GET(*_bm, _w)); \
+			_count += rte_popcount32(SBMP_WORD_GET(*_bm, _w)); \
 		}							\
 		count = _count;						\
 	} while (0)
diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
index 8148866e44..3bf89b8c6c 100644
--- a/drivers/net/cnxk/cn10k_rx.h
+++ b/drivers/net/cnxk/cn10k_rx.h
@@ -1715,7 +1715,7 @@  cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts,
 				 * timestamp.
 				 */
 				tstamp->rx_ready = 1;
-				tstamp->rx_tstamp = ts[31 - __builtin_clz(res)];
+				tstamp->rx_tstamp = ts[31 - rte_clz32(res)];
 			}
 		}
 
diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h
index 4d476d0a02..d8bb65c643 100644
--- a/drivers/net/cnxk/cn9k_rx.h
+++ b/drivers/net/cnxk/cn9k_rx.h
@@ -788,7 +788,7 @@  cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
 				 */
 				rxq->tstamp->rx_ready = 1;
 				rxq->tstamp->rx_tstamp =
-					ts[31 - __builtin_clz(res)];
+					ts[31 - rte_clz32(res)];
 			}
 		}
 
diff --git a/drivers/net/cnxk/cnxk_ethdev_sec.c b/drivers/net/cnxk/cnxk_ethdev_sec.c
index dc17c128de..b02dac4952 100644
--- a/drivers/net/cnxk/cnxk_ethdev_sec.c
+++ b/drivers/net/cnxk/cnxk_ethdev_sec.c
@@ -36,7 +36,7 @@  bitmap_ctzll(uint64_t slab)
 	if (slab == 0)
 		return 0;
 
-	return __builtin_ctzll(slab);
+	return rte_ctz64(slab);
 }
 
 int
diff --git a/drivers/net/cxgbe/cxgbe_compat.h b/drivers/net/cxgbe/cxgbe_compat.h
index 8d3737fc61..0b02eb62a9 100644
--- a/drivers/net/cxgbe/cxgbe_compat.h
+++ b/drivers/net/cxgbe/cxgbe_compat.h
@@ -198,7 +198,7 @@  static inline uint8_t hweight32(uint32_t word32)
  */
 static inline int cxgbe_fls(int x)
 {
-	return x ? sizeof(x) * 8 - __builtin_clz(x) : 0;
+	return x ? sizeof(x) * 8 - rte_clz32(x) : 0;
 }
 
 static inline unsigned long ilog2(unsigned long n)
diff --git a/drivers/net/fm10k/fm10k_rxtx_vec.c b/drivers/net/fm10k/fm10k_rxtx_vec.c
index dfc4abe3e3..2b6914b1da 100644
--- a/drivers/net/fm10k/fm10k_rxtx_vec.c
+++ b/drivers/net/fm10k/fm10k_rxtx_vec.c
@@ -565,7 +565,7 @@  fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		fm10k_desc_to_pktype_v(descs0, &rx_pkts[pos]);
 
 		/* C.4 calc available number of desc */
-		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
+		var = rte_popcount64(_mm_cvtsi128_si64(staterr));
 		nb_pkts_recd += var;
 		if (likely(var != RTE_FM10K_DESCS_PER_LOOP))
 			break;
diff --git a/drivers/net/hns3/hns3_ethdev.c b/drivers/net/hns3/hns3_ethdev.c
index 95c72e86aa..964f47f164 100644
--- a/drivers/net/hns3/hns3_ethdev.c
+++ b/drivers/net/hns3/hns3_ethdev.c
@@ -5936,7 +5936,7 @@  hns3_fec_get_capability(struct rte_eth_dev *dev,
 
 	speed_capa = hns3_get_speed_capa(hw);
 	/* speed_num counts number of speed capabilities */
-	speed_num = __builtin_popcount(speed_capa & HNS3_SPEEDS_SUPP_FEC);
+	speed_num = rte_popcount32(speed_capa & HNS3_SPEEDS_SUPP_FEC);
 	if (speed_num == 0)
 		return -ENOTSUP;
 
@@ -6143,7 +6143,7 @@  hns3_fec_mode_valid(struct rte_eth_dev *dev, uint32_t mode)
 	struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(hns);
 	uint32_t cur_capa;
 
-	if (__builtin_popcount(mode) != 1) {
+	if (rte_popcount32(mode) != 1) {
 		hns3_err(hw, "FEC mode(0x%x) should be only one bit set", mode);
 		return -EINVAL;
 	}
diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index 6f65d5e0ac..8d7e50287f 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -1492,7 +1492,7 @@  i40e_align_floor(int n)
 {
 	if (n == 0)
 		return 0;
-	return 1 << (sizeof(n) * CHAR_BIT - 1 - __builtin_clz(n));
+	return 1 << (sizeof(n) * CHAR_BIT - 1 - rte_clz32(n));
 }
 
 static inline uint16_t
diff --git a/drivers/net/i40e/i40e_rxtx_vec_altivec.c b/drivers/net/i40e/i40e_rxtx_vec_altivec.c
index 4cd78f4e58..b6b0d38ec1 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_altivec.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_altivec.c
@@ -432,7 +432,7 @@  _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		desc_to_olflags_v(descs, &rx_pkts[pos]);
 
 		/* C.4 calc available number of desc */
-		var = __builtin_popcountll((vec_ld(0,
+		var = rte_popcount64((vec_ld(0,
 			(__vector unsigned long *)&staterr)[0]));
 		nb_pkts_recd += var;
 		if (likely(var != RTE_I40E_DESCS_PER_LOOP))
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
index 761edb9d20..f468c1fd90 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
@@ -605,9 +605,9 @@  _recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		status0_7 = _mm256_packs_epi32(status0_7,
 				_mm256_setzero_si256());
 
-		uint64_t burst = __builtin_popcountll(_mm_cvtsi128_si64(
+		uint64_t burst = rte_popcount64(_mm_cvtsi128_si64(
 				_mm256_extracti128_si256(status0_7, 1)));
-		burst += __builtin_popcountll(_mm_cvtsi128_si64(
+		burst += rte_popcount64(_mm_cvtsi128_si64(
 				_mm256_castsi256_si128(status0_7)));
 		received += burst;
 		if (burst != RTE_I40E_DESCS_PER_LOOP_AVX)
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
index ad0893324d..f3050cd06c 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
@@ -654,11 +654,11 @@  _recv_raw_pkts_vec_avx512(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		status0_7 = _mm256_packs_epi32
 			(status0_7, _mm256_setzero_si256());
 
-		uint64_t burst = __builtin_popcountll
+		uint64_t burst = rte_popcount64
 				(_mm_cvtsi128_si64
 					(_mm256_extracti128_si256
 						(status0_7, 1)));
-		burst += __builtin_popcountll(_mm_cvtsi128_si64
+		burst += rte_popcount64(_mm_cvtsi128_si64
 				(_mm256_castsi256_si128(status0_7)));
 		received += burst;
 		if (burst != RTE_I40E_DESCS_PER_LOOP_AVX)
diff --git a/drivers/net/i40e/i40e_rxtx_vec_sse.c b/drivers/net/i40e/i40e_rxtx_vec_sse.c
index b94c37cbb8..9200a23ff6 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_sse.c
@@ -579,7 +579,7 @@  _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 				 pkt_mb1);
 		desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
 		/* C.4 calc available number of desc */
-		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
+		var = rte_popcount64(_mm_cvtsi128_si64(staterr));
 		nb_pkts_recd += var;
 		if (likely(var != RTE_I40E_DESCS_PER_LOOP))
 			break;
diff --git a/drivers/net/iavf/iavf_hash.c b/drivers/net/iavf/iavf_hash.c
index cf4d677101..217f0500ab 100644
--- a/drivers/net/iavf/iavf_hash.c
+++ b/drivers/net/iavf/iavf_hash.c
@@ -1398,7 +1398,7 @@  iavf_any_invalid_rss_type(enum rte_eth_hash_function rss_func,
 
 	/* check invalid combination */
 	for (i = 0; i < RTE_DIM(invalid_rss_comb); i++) {
-		if (__builtin_popcountll(rss_type & invalid_rss_comb[i]) > 1)
+		if (rte_popcount64(rss_type & invalid_rss_comb[i]) > 1)
 			return true;
 	}
 
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index c10f24036e..510b4d8f1c 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -480,11 +480,11 @@  _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 		status0_7 = _mm256_packs_epi32(status0_7,
 					       _mm256_setzero_si256());
 
-		uint64_t burst = __builtin_popcountll
+		uint64_t burst = rte_popcount64
 					(_mm_cvtsi128_si64
 						(_mm256_extracti128_si256
 							(status0_7, 1)));
-		burst += __builtin_popcountll
+		burst += rte_popcount64
 				(_mm_cvtsi128_si64
 					(_mm256_castsi256_si128(status0_7)));
 		received += burst;
@@ -1388,11 +1388,11 @@  _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 		status0_7 = _mm256_packs_epi32(status0_7,
 					       _mm256_setzero_si256());
 
-		uint64_t burst = __builtin_popcountll
+		uint64_t burst = rte_popcount64
 					(_mm_cvtsi128_si64
 						(_mm256_extracti128_si256
 							(status0_7, 1)));
-		burst += __builtin_popcountll
+		burst += rte_popcount64
 				(_mm_cvtsi128_si64
 					(_mm256_castsi256_si128(status0_7)));
 		received += burst;
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
index 3e66df5341..aa3bf31d60 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
@@ -539,11 +539,11 @@  _iavf_recv_raw_pkts_vec_avx512(struct iavf_rx_queue *rxq,
 		status0_7 = _mm256_packs_epi32(status0_7,
 					       _mm256_setzero_si256());
 
-		uint64_t burst = __builtin_popcountll
+		uint64_t burst = rte_popcount64
 					(_mm_cvtsi128_si64
 						(_mm256_extracti128_si256
 							(status0_7, 1)));
-		burst += __builtin_popcountll
+		burst += rte_popcount64
 				(_mm_cvtsi128_si64
 					(_mm256_castsi256_si128(status0_7)));
 		received += burst;
@@ -1544,11 +1544,11 @@  _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
 		status0_7 = _mm256_packs_epi32(status0_7,
 					       _mm256_setzero_si256());
 
-		uint64_t burst = __builtin_popcountll
+		uint64_t burst = rte_popcount64
 					(_mm_cvtsi128_si64
 						(_mm256_extracti128_si256
 							(status0_7, 1)));
-		burst += __builtin_popcountll
+		burst += rte_popcount64
 				(_mm_cvtsi128_si64
 					(_mm256_castsi256_si128(status0_7)));
 		received += burst;
diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index 892bfa4cf3..96f187f511 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -695,7 +695,7 @@  _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 				 pkt_mb1);
 		desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
 		/* C.4 calc available number of desc */
-		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
+		var = rte_popcount64(_mm_cvtsi128_si64(staterr));
 		nb_pkts_recd += var;
 		if (likely(var != IAVF_VPMD_DESCS_PER_LOOP))
 			break;
@@ -1122,7 +1122,7 @@  _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 				 pkt_mb0);
 		flex_desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
 		/* C.4 calc available number of desc */
-		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
+		var = rte_popcount64(_mm_cvtsi128_si64(staterr));
 		nb_pkts_recd += var;
 
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
diff --git a/drivers/net/ice/ice_ethdev.h b/drivers/net/ice/ice_ethdev.h
index f925231f34..9789cb8525 100644
--- a/drivers/net/ice/ice_ethdev.h
+++ b/drivers/net/ice/ice_ethdev.h
@@ -692,7 +692,7 @@  ice_align_floor(int n)
 {
 	if (n == 0)
 		return 0;
-	return 1 << (sizeof(n) * CHAR_BIT - 1 - __builtin_clz(n));
+	return 1 << (sizeof(n) * CHAR_BIT - 1 - rte_clz32(n));
 }
 
 #define ICE_PHY_TYPE_SUPPORT_50G(phy_type) \
diff --git a/drivers/net/ice/ice_hash.c b/drivers/net/ice/ice_hash.c
index 52646e9408..e36e7da2b5 100644
--- a/drivers/net/ice/ice_hash.c
+++ b/drivers/net/ice/ice_hash.c
@@ -1033,7 +1033,7 @@  ice_any_invalid_rss_type(enum rte_eth_hash_function rss_func,
 
 	/* check invalid combination */
 	for (i = 0; i < RTE_DIM(invalid_rss_comb); i++) {
-		if (__builtin_popcountll(rss_type & invalid_rss_comb[i]) > 1)
+		if (rte_popcount64(rss_type & invalid_rss_comb[i]) > 1)
 			return true;
 	}
 
diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c
index fd13ff18f1..6f6d790967 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
@@ -678,11 +678,11 @@  _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		status0_7 = _mm256_packs_epi32(status0_7,
 					       _mm256_setzero_si256());
 
-		uint64_t burst = __builtin_popcountll
+		uint64_t burst = rte_popcount64
 					(_mm_cvtsi128_si64
 						(_mm256_extracti128_si256
 							(status0_7, 1)));
-		burst += __builtin_popcountll
+		burst += rte_popcount64
 				(_mm_cvtsi128_si64
 					(_mm256_castsi256_si128(status0_7)));
 		received += burst;
diff --git a/drivers/net/ice/ice_rxtx_vec_avx512.c b/drivers/net/ice/ice_rxtx_vec_avx512.c
index c3b087c52e..04148e8ea2 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx512.c
@@ -680,11 +680,11 @@  _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
 		status0_7 = _mm256_packs_epi32(status0_7,
 					       _mm256_setzero_si256());
 
-		uint64_t burst = __builtin_popcountll
+		uint64_t burst = rte_popcount64
 					(_mm_cvtsi128_si64
 						(_mm256_extracti128_si256
 							(status0_7, 1)));
-		burst += __builtin_popcountll
+		burst += rte_popcount64
 				(_mm_cvtsi128_si64
 					(_mm256_castsi256_si128(status0_7)));
 		received += burst;
diff --git a/drivers/net/ice/ice_rxtx_vec_sse.c b/drivers/net/ice/ice_rxtx_vec_sse.c
index 71fdd6ffb5..9a1b7e3e51 100644
--- a/drivers/net/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/ice/ice_rxtx_vec_sse.c
@@ -575,7 +575,7 @@  _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 				 pkt_mb0);
 		ice_rx_desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
 		/* C.4 calc available number of desc */
-		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
+		var = rte_popcount64(_mm_cvtsi128_si64(staterr));
 		nb_pkts_recd += var;
 		if (likely(var != ICE_DESCS_PER_LOOP))
 			break;
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
index 90b254ea26..952b032eb6 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
@@ -452,7 +452,7 @@  _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		if (unlikely(stat == 0)) {
 			nb_pkts_recd += RTE_IXGBE_DESCS_PER_LOOP;
 		} else {
-			nb_pkts_recd += __builtin_ctz(stat) / IXGBE_UINT8_BIT;
+			nb_pkts_recd += rte_ctz32(stat) / IXGBE_UINT8_BIT;
 			break;
 		}
 	}
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c b/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
index bb34b27168..f60808d576 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
@@ -574,7 +574,7 @@  _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		desc_to_ptype_v(descs, rxq->pkt_type_mask, &rx_pkts[pos]);
 
 		/* C.4 calc available number of desc */
-		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
+		var = rte_popcount64(_mm_cvtsi128_si64(staterr));
 		nb_pkts_recd += var;
 		if (likely(var != RTE_IXGBE_DESCS_PER_LOOP))
 			break;
diff --git a/drivers/net/mlx5/hws/mlx5dr_buddy.c b/drivers/net/mlx5/hws/mlx5dr_buddy.c
index cde4f54f66..394ca71217 100644
--- a/drivers/net/mlx5/hws/mlx5dr_buddy.c
+++ b/drivers/net/mlx5/hws/mlx5dr_buddy.c
@@ -61,7 +61,7 @@  static unsigned long bitmap_ffs(struct rte_bitmap *bmap,
 		DR_LOG(ERR, "Failed to get slab from bitmap.");
 		return m;
 	}
-	pos = pos + __builtin_ctzll(out_slab);
+	pos = pos + rte_ctz64(out_slab);
 
 	if (pos < n) {
 		DR_LOG(ERR, "Unexpected bit (%d < %"PRIx64") from bitmap", pos, n);
diff --git a/drivers/net/mlx5/hws/mlx5dr_pool.c b/drivers/net/mlx5/hws/mlx5dr_pool.c
index af6a5c743b..b7b532c7cf 100644
--- a/drivers/net/mlx5/hws/mlx5dr_pool.c
+++ b/drivers/net/mlx5/hws/mlx5dr_pool.c
@@ -116,7 +116,7 @@  static int mlx5dr_pool_bitmap_get_free_slot(struct rte_bitmap *bitmap, uint32_t
 	if (!rte_bitmap_scan(bitmap, iidx, &slab))
 		return ENOMEM;
 
-	*iidx += __builtin_ctzll(slab);
+	*iidx += rte_ctz64(slab);
 
 	rte_bitmap_clear(bitmap, *iidx);
 
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index d8f1adfe3d..bb1deff545 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1311,7 +1311,7 @@  mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			 * REG_C_0 and REG_C_1 is reserved for metadata feature.
 			 */
 			reg_c_mask &= 0xfc;
-			if (__builtin_popcount(reg_c_mask) < 1) {
+			if (rte_popcount32(reg_c_mask) < 1) {
 				priv->mtr_en = 0;
 				DRV_LOG(WARNING, "No available register for"
 					" meter.");
@@ -1592,8 +1592,8 @@  mlx5_dev_spawn(struct rte_device *dpdk_dev,
 				err = ENOTSUP;
 				goto error;
 			}
-			usable_bits = __builtin_popcount(priv->sh->dv_regc0_mask);
-			required_bits = __builtin_popcount(priv->vport_meta_mask);
+			usable_bits = rte_popcount32(priv->sh->dv_regc0_mask);
+			required_bits = rte_popcount32(priv->vport_meta_mask);
 			if (usable_bits < required_bits) {
 				DRV_LOG(ERR, "Not enough bits available in reg_c[0] to provide "
 					     "representor matching.");
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index e91eb636d0..f7f8f54eb4 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -5853,7 +5853,7 @@  flow_meter_split_prep(struct rte_eth_dev *dev,
 					"Failed to allocate meter flow id.");
 		flow_id = tag_id - 1;
 		flow_id_bits = (!flow_id) ? 1 :
-				(MLX5_REG_BITS - __builtin_clz(flow_id));
+				(MLX5_REG_BITS - rte_clz32(flow_id));
 		if ((flow_id_bits + priv->sh->mtrmng->max_mtr_bits) >
 		    mtr_reg_bits) {
 			mlx5_ipool_free(fm->flow_ipool, tag_id);
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index a8dd9920e6..3f4325c5c8 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -421,7 +421,7 @@  flow_dv_convert_modify_action(struct rte_flow_item *item,
 			/* Deduce actual data width in bits from mask value. */
 			off_b = rte_bsf32(mask) + carry_b;
 			size_b = sizeof(uint32_t) * CHAR_BIT -
-				 off_b - __builtin_clz(mask);
+				 off_b - rte_clz32(mask);
 		}
 		MLX5_ASSERT(size_b);
 		actions[i] = (struct mlx5_modification_cmd) {
@@ -1392,10 +1392,10 @@  mlx5_flow_item_field_width(struct rte_eth_dev *dev,
 	case RTE_FLOW_FIELD_TAG:
 		return 32;
 	case RTE_FLOW_FIELD_MARK:
-		return __builtin_popcount(priv->sh->dv_mark_mask);
+		return rte_popcount32(priv->sh->dv_mark_mask);
 	case RTE_FLOW_FIELD_META:
 		return (flow_dv_get_metadata_reg(dev, attr, error) == REG_C_0) ?
-			__builtin_popcount(priv->sh->dv_meta_mask) : 32;
+			rte_popcount32(priv->sh->dv_meta_mask) : 32;
 	case RTE_FLOW_FIELD_POINTER:
 	case RTE_FLOW_FIELD_VALUE:
 		return inherit < 0 ? 0 : inherit;
@@ -1940,7 +1940,7 @@  mlx5_flow_field_id_to_modify_info
 	case RTE_FLOW_FIELD_MARK:
 		{
 			uint32_t mark_mask = priv->sh->dv_mark_mask;
-			uint32_t mark_count = __builtin_popcount(mark_mask);
+			uint32_t mark_count = rte_popcount32(mark_mask);
 			RTE_SET_USED(mark_count);
 			MLX5_ASSERT(data->offset + width <= mark_count);
 			int reg = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK,
@@ -1961,7 +1961,7 @@  mlx5_flow_field_id_to_modify_info
 	case RTE_FLOW_FIELD_META:
 		{
 			uint32_t meta_mask = priv->sh->dv_meta_mask;
-			uint32_t meta_count = __builtin_popcount(meta_mask);
+			uint32_t meta_count = rte_popcount32(meta_mask);
 			RTE_SET_USED(meta_count);
 			MLX5_ASSERT(data->offset + width <= meta_count);
 			int reg = flow_dv_get_metadata_reg(dev, attr, error);
@@ -2002,7 +2002,7 @@  mlx5_flow_field_id_to_modify_info
 	case MLX5_RTE_FLOW_FIELD_META_REG:
 		{
 			uint32_t meta_mask = priv->sh->dv_meta_mask;
-			uint32_t meta_count = __builtin_popcount(meta_mask);
+			uint32_t meta_count = rte_popcount32(meta_mask);
 			uint8_t reg = flow_tag_index_get(data);
 
 			RTE_SET_USED(meta_count);
diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index 5395969eb0..cbd741605b 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -6014,7 +6014,7 @@  flow_hw_tx_tag_regc_mask(struct rte_eth_dev *dev)
 	 * Availability of sufficient number of bits in REG_C_0 is verified on initialization.
 	 * Sanity checking here.
 	 */
-	MLX5_ASSERT(__builtin_popcount(mask) >= __builtin_popcount(priv->vport_meta_mask));
+	MLX5_ASSERT(rte_popcount32(mask) >= rte_popcount32(priv->vport_meta_mask));
 	return mask;
 }
 
@@ -6082,7 +6082,7 @@  flow_hw_create_tx_repr_tag_jump_acts_tmpl(struct rte_eth_dev *dev,
 		.src = {
 			.field = RTE_FLOW_FIELD_VALUE,
 		},
-		.width = __builtin_popcount(tag_mask),
+		.width = rte_popcount32(tag_mask),
 	};
 	struct rte_flow_action_modify_field set_tag_m = {
 		.operation = RTE_FLOW_MODIFY_SET,
@@ -6458,7 +6458,7 @@  flow_hw_create_ctrl_regc_jump_actions_template(struct rte_eth_dev *dev,
 		.src = {
 			.field = RTE_FLOW_FIELD_VALUE,
 		},
-		.width = __builtin_popcount(marker_mask),
+		.width = rte_popcount32(marker_mask),
 	};
 	struct rte_flow_action_modify_field set_reg_m = {
 		.operation = RTE_FLOW_MODIFY_SET,
diff --git a/drivers/net/mlx5/mlx5_flow_meter.c b/drivers/net/mlx5/mlx5_flow_meter.c
index ac8c3deaf0..14a435d157 100644
--- a/drivers/net/mlx5/mlx5_flow_meter.c
+++ b/drivers/net/mlx5/mlx5_flow_meter.c
@@ -1820,7 +1820,7 @@  mlx5_flow_meter_create(struct rte_eth_dev *dev, uint32_t meter_id,
 		legacy_fm->idx = mtr_idx;
 		fm = &legacy_fm->fm;
 	}
-	mtr_id_bits = MLX5_REG_BITS - __builtin_clz(mtr_idx);
+	mtr_id_bits = MLX5_REG_BITS - rte_clz32(mtr_idx);
 	if ((mtr_id_bits + priv->sh->mtrmng->max_mtr_flow_bits) >
 	    mtr_reg_bits) {
 		DRV_LOG(ERR, "Meter number exceeds max limit.");
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
index 4d0d05c376..cccfa7f2d3 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
@@ -1183,7 +1183,7 @@  rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 		comp_idx = ((__vector unsigned long)comp_mask)[0];
 
 		/* F.3 get the first compressed CQE. */
-		comp_idx = comp_idx ? __builtin_ctzll(comp_idx) /
+		comp_idx = comp_idx ? rte_ctz64(comp_idx) /
 			(sizeof(uint16_t) * 8) : MLX5_VPMD_DESCS_PER_LOOP;
 
 		/* E.6 mask out entries after the compressed CQE. */
@@ -1202,7 +1202,7 @@  rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 
 		/* E.7 count non-compressed valid CQEs. */
 		n = ((__vector unsigned long)invalid_mask)[0];
-		n = n ? __builtin_ctzll(n) / (sizeof(uint16_t) * 8) :
+		n = n ? rte_ctz64(n) / (sizeof(uint16_t) * 8) :
 			MLX5_VPMD_DESCS_PER_LOOP;
 		nocmp_n += n;
 
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
index 0766952255..2bdd1f676d 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@@ -753,7 +753,7 @@  rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 		comp_idx = _mm_cvtsi128_si64(comp_mask);
 		/* F.3 get the first compressed CQE. */
 		comp_idx = comp_idx ?
-				__builtin_ctzll(comp_idx) /
+				rte_ctz64(comp_idx) /
 					(sizeof(uint16_t) * 8) :
 				MLX5_VPMD_DESCS_PER_LOOP;
 		/* E.6 mask out entries after the compressed CQE. */
@@ -762,7 +762,7 @@  rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 		invalid_mask = _mm_or_si128(invalid_mask, mask);
 		/* E.7 count non-compressed valid CQEs. */
 		n = _mm_cvtsi128_si64(invalid_mask);
-		n = n ? __builtin_ctzll(n) / (sizeof(uint16_t) * 8) :
+		n = n ? rte_ctz64(n) / (sizeof(uint16_t) * 8) :
 			MLX5_VPMD_DESCS_PER_LOOP;
 		nocmp_n += n;
 		/* D.2 get the final invalid mask. */
diff --git a/drivers/net/mlx5/mlx5_utils.c b/drivers/net/mlx5/mlx5_utils.c
index b295702fd4..4db738785f 100644
--- a/drivers/net/mlx5/mlx5_utils.c
+++ b/drivers/net/mlx5/mlx5_utils.c
@@ -535,7 +535,7 @@  mlx5_ipool_malloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
 		return NULL;
 	}
 	MLX5_ASSERT(slab);
-	iidx += __builtin_ctzll(slab);
+	iidx += rte_ctz64(slab);
 	MLX5_ASSERT(iidx != UINT32_MAX);
 	MLX5_ASSERT(iidx < mlx5_trunk_size_get(pool, trunk->idx));
 	rte_bitmap_clear(trunk->bmp, iidx);
@@ -783,7 +783,7 @@  mlx5_ipool_get_next_cache(struct mlx5_indexed_pool *pool, uint32_t *pos)
 		}
 		return NULL;
 	}
-	iidx += __builtin_ctzll(slab);
+	iidx += rte_ctz64(slab);
 	rte_bitmap_clear(ibmp, iidx);
 	iidx++;
 	*pos = iidx;
diff --git a/drivers/net/mvpp2/mrvl_ethdev.c b/drivers/net/mvpp2/mrvl_ethdev.c
index 89c83f1c1f..359a5d1df7 100644
--- a/drivers/net/mvpp2/mrvl_ethdev.c
+++ b/drivers/net/mvpp2/mrvl_ethdev.c
@@ -193,7 +193,7 @@  static struct {
 static inline int
 mrvl_reserve_bit(int *bitmap, int max)
 {
-	int n = sizeof(*bitmap) * 8 - __builtin_clz(*bitmap);
+	int n = sizeof(*bitmap) * 8 - rte_clz32(*bitmap);
 
 	if (n >= max)
 		return -1;
diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c
index bc6f60c64a..e4f5015aa3 100644
--- a/drivers/net/netvsc/hn_rxtx.c
+++ b/drivers/net/netvsc/hn_rxtx.c
@@ -116,7 +116,7 @@  hn_update_packet_stats(struct hn_stats *stats, const struct rte_mbuf *m)
 		uint32_t bin;
 
 		/* count zeros, and offset into correct bin */
-		bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
+		bin = (sizeof(s) * 8) - rte_clz32(s) - 5;
 		stats->size_bins[bin]++;
 	} else {
 		if (s < 64)
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index e48ff3cca7..99b95194cd 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -94,7 +94,7 @@  virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
 		uint32_t bin;
 
 		/* count zeros, and offset into correct bin */
-		bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
+		bin = (sizeof(s) * 8) - rte_clz32(s) - 5;
 		stats->size_bins[bin]++;
 	} else {
 		if (s < 64)
diff --git a/examples/bbdev_app/main.c b/examples/bbdev_app/main.c
index d3f66bb03d..ef9cd2918f 100644
--- a/examples/bbdev_app/main.c
+++ b/examples/bbdev_app/main.c
@@ -241,7 +241,7 @@  bbdev_parse_args(int argc, char **argv,
 				return -1;
 			}
 			app_params->num_enc_cores =
-				__builtin_popcount(app_params->enc_core_mask);
+				rte_popcount32(app_params->enc_core_mask);
 			break;
 
 		case 'd':
@@ -252,7 +252,7 @@  bbdev_parse_args(int argc, char **argv,
 				return -1;
 			}
 			app_params->num_dec_cores =
-				__builtin_popcount(app_params->dec_core_mask);
+				rte_popcount32(app_params->dec_core_mask);
 			break;
 
 		case 'p':
diff --git a/examples/eventdev_pipeline/main.c b/examples/eventdev_pipeline/main.c
index 8d6c90f15d..0c995d1a70 100644
--- a/examples/eventdev_pipeline/main.c
+++ b/examples/eventdev_pipeline/main.c
@@ -230,17 +230,17 @@  parse_app_args(int argc, char **argv)
 			break;
 		case 'r':
 			rx_lcore_mask = parse_coremask(optarg);
-			popcnt = __builtin_popcountll(rx_lcore_mask);
+			popcnt = rte_popcount64(rx_lcore_mask);
 			fdata->rx_single = (popcnt == 1);
 			break;
 		case 't':
 			tx_lcore_mask = parse_coremask(optarg);
-			popcnt = __builtin_popcountll(tx_lcore_mask);
+			popcnt = rte_popcount64(tx_lcore_mask);
 			fdata->tx_single = (popcnt == 1);
 			break;
 		case 'e':
 			sched_lcore_mask = parse_coremask(optarg);
-			popcnt = __builtin_popcountll(sched_lcore_mask);
+			popcnt = rte_popcount64(sched_lcore_mask);
 			fdata->sched_single = (popcnt == 1);
 			break;
 		case 'm':
diff --git a/examples/fips_validation/main.c b/examples/fips_validation/main.c
index 6518c959c4..fed5596f36 100644
--- a/examples/fips_validation/main.c
+++ b/examples/fips_validation/main.c
@@ -2034,7 +2034,7 @@  fips_mct_tdes_test(void)
 		}
 
 		for (k = 0; k < 24; k++)
-			val_key.val[k] = (__builtin_popcount(val_key.val[k]) &
+			val_key.val[k] = (rte_popcount32(val_key.val[k]) &
 					0x1) ?
 					val_key.val[k] : (val_key.val[k] ^ 0x1);
 
diff --git a/lib/acl/acl_bld.c b/lib/acl/acl_bld.c
index 2816632803..418751e9f4 100644
--- a/lib/acl/acl_bld.c
+++ b/lib/acl/acl_bld.c
@@ -1091,7 +1091,7 @@  acl_calc_wildness(struct rte_acl_build_rule *head,
 
 			switch (rule->config->defs[n].type) {
 			case RTE_ACL_FIELD_TYPE_BITMASK:
-				wild = (size - __builtin_popcountll(
+				wild = (size - rte_popcount64(
 					fld->mask_range.u64 & msk_val)) /
 					size;
 				break;
diff --git a/lib/acl/acl_run_avx512.c b/lib/acl/acl_run_avx512.c
index 3b8795561b..30b8214ab5 100644
--- a/lib/acl/acl_run_avx512.c
+++ b/lib/acl/acl_run_avx512.c
@@ -45,13 +45,13 @@  update_flow_mask(const struct acl_flow_avx512 *flow, uint32_t *fmsk,
 	fmsk[0] ^= rmsk[0];
 	m = rmsk[0];
 
-	k = __builtin_popcount(m);
+	k = rte_popcount32(m);
 	n = flow->total_packets - flow->num_packets;
 
 	if (n < k) {
 		/* reduce mask */
 		for (i = k - n; i != 0; i--) {
-			j = sizeof(m) * CHAR_BIT - 1 - __builtin_clz(m);
+			j = sizeof(m) * CHAR_BIT - 1 - rte_clz32(m);
 			m ^= 1 << j;
 		}
 	} else
diff --git a/lib/acl/acl_run_avx512_common.h b/lib/acl/acl_run_avx512_common.h
index 578eaa1d0c..67eb2af774 100644
--- a/lib/acl/acl_run_avx512_common.h
+++ b/lib/acl/acl_run_avx512_common.h
@@ -192,7 +192,7 @@  _F_(start_flow)(struct acl_flow_avx512 *flow, uint32_t num, uint32_t msk,
 	m[1] = msk >> _SIMD_PTR_NUM_;
 
 	/* calculate masks for new flows */
-	n = __builtin_popcount(m[0]);
+	n = rte_popcount32(m[0]);
 	nm[0] = (1 << n) - 1;
 	nm[1] = (1 << (num - n)) - 1;
 
diff --git a/lib/bpf/bpf_validate.c b/lib/bpf/bpf_validate.c
index 119dc4d3aa..95b9ef99ef 100644
--- a/lib/bpf/bpf_validate.c
+++ b/lib/bpf/bpf_validate.c
@@ -370,7 +370,7 @@  eval_umax_bits(uint64_t v, size_t opsz)
 	if (v == 0)
 		return 0;
 
-	v = __builtin_clzll(v);
+	v = rte_clz64(v);
 	return RTE_LEN2MASK(opsz - v, uint64_t);
 }
 
diff --git a/lib/dmadev/rte_dmadev.c b/lib/dmadev/rte_dmadev.c
index 8c095e1f35..bf7d5ec519 100644
--- a/lib/dmadev/rte_dmadev.c
+++ b/lib/dmadev/rte_dmadev.c
@@ -758,7 +758,7 @@  dma_dump_capability(FILE *f, uint64_t dev_capa)
 
 	(void)fprintf(f, "  dev_capa: 0x%" PRIx64 " -", dev_capa);
 	while (dev_capa > 0) {
-		capa = 1ull << __builtin_ctzll(dev_capa);
+		capa = 1ull << rte_ctz64(dev_capa);
 		(void)fprintf(f, " %s", dma_capability_name(capa));
 		dev_capa &= ~capa;
 	}
diff --git a/lib/eal/common/eal_common_fbarray.c b/lib/eal/common/eal_common_fbarray.c
index 169e66e04b..2055bfa57d 100644
--- a/lib/eal/common/eal_common_fbarray.c
+++ b/lib/eal/common/eal_common_fbarray.c
@@ -189,7 +189,7 @@  find_next_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
 				tmp_msk &= tmp_msk >> 1ULL;
 			/* we found what we were looking for */
 			if (tmp_msk != 0) {
-				run_start = __builtin_ctzll(tmp_msk);
+				run_start = rte_ctz64(tmp_msk);
 				return MASK_GET_IDX(msk_idx, run_start);
 			}
 		}
@@ -203,7 +203,7 @@  find_next_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
 		if (~cur_msk == 0)
 			clz = sizeof(cur_msk) * 8;
 		else
-			clz = __builtin_clzll(~cur_msk);
+			clz = rte_clz64(~cur_msk);
 
 		/* if there aren't any runs at the end either, just continue */
 		if (clz == 0)
@@ -308,7 +308,7 @@  find_next(const struct rte_fbarray *arr, unsigned int start, bool used)
 		 * find first set bit - that will correspond to whatever it is
 		 * that we're looking for.
 		 */
-		found = __builtin_ctzll(cur);
+		found = rte_ctz64(cur);
 		return MASK_GET_IDX(idx, found);
 	}
 	/* we didn't find anything */
@@ -366,7 +366,7 @@  find_contig(const struct rte_fbarray *arr, unsigned int start, bool used)
 		/*
 		 * see if current run ends before mask end.
 		 */
-		run_len = __builtin_ctzll(cur);
+		run_len = rte_ctz64(cur);
 
 		/* add however many zeroes we've had in the last run and quit */
 		if (run_len < need_len) {
@@ -454,7 +454,7 @@  find_prev_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
 				 * would have been.
 				 */
 				run_start = MASK_ALIGN -
-						__builtin_clzll(tmp_msk) - n;
+						rte_clz64(tmp_msk) - n;
 				return MASK_GET_IDX(msk_idx, run_start);
 			}
 		}
@@ -468,7 +468,7 @@  find_prev_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
 		if (~cur_msk == 0)
 			ctz = sizeof(cur_msk) * 8;
 		else
-			ctz = __builtin_ctzll(~cur_msk);
+			ctz = rte_ctz64(~cur_msk);
 
 		/* if there aren't any runs at the start either, just
 		 * continue
@@ -584,7 +584,7 @@  find_prev(const struct rte_fbarray *arr, unsigned int start, bool used)
 		 * the value we get is counted from end of mask, so calculate
 		 * position from start of mask.
 		 */
-		found = MASK_ALIGN - __builtin_clzll(cur) - 1;
+		found = MASK_ALIGN - rte_clz64(cur) - 1;
 
 		return MASK_GET_IDX(idx, found);
 	} while (idx-- != 0); /* decrement after check  to include zero*/
@@ -635,7 +635,7 @@  find_rev_contig(const struct rte_fbarray *arr, unsigned int start, bool used)
 		/*
 		 * see where run ends, starting from the end.
 		 */
-		run_len = __builtin_clzll(cur);
+		run_len = rte_clz64(cur);
 
 		/* add however many zeroes we've had in the last run and quit */
 		if (run_len < need_len) {
diff --git a/lib/eal/common/rte_random.c b/lib/eal/common/rte_random.c
index 565f2401ce..53636331a2 100644
--- a/lib/eal/common/rte_random.c
+++ b/lib/eal/common/rte_random.c
@@ -7,6 +7,7 @@ 
 #endif
 #include <unistd.h>
 
+#include <rte_bitops.h>
 #include <rte_branch_prediction.h>
 #include <rte_cycles.h>
 #include <rte_lcore.h>
@@ -153,7 +154,7 @@  rte_rand_max(uint64_t upper_bound)
 
 	state = __rte_rand_get_state();
 
-	ones = __builtin_popcountll(upper_bound);
+	ones = rte_popcount64(upper_bound);
 
 	/* Handle power-of-2 upper_bound as a special case, since it
 	 * has no bias issues.
@@ -168,7 +169,7 @@  rte_rand_max(uint64_t upper_bound)
 	 * the value and generate a new one.
 	 */
 
-	leading_zeros = __builtin_clzll(upper_bound);
+	leading_zeros = rte_clz64(upper_bound);
 	mask >>= leading_zeros;
 
 	do {
diff --git a/lib/eal/common/rte_reciprocal.c b/lib/eal/common/rte_reciprocal.c
index d47dc47fc5..87c67e6d49 100644
--- a/lib/eal/common/rte_reciprocal.c
+++ b/lib/eal/common/rte_reciprocal.c
@@ -55,7 +55,7 @@  divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r)
 	}
 
 	/* Count leading zeros. */
-	s = __builtin_clzll(v);
+	s = rte_clz64(v);
 	if (s > 0) {
 		v = v << s;
 		un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31));
@@ -107,7 +107,7 @@  rte_reciprocal_value_u64(uint64_t d)
 	uint64_t r;
 	int l;
 
-	l = 63 - __builtin_clzll(d);
+	l = 63 - rte_clz64(d);
 
 	m = divide_128_div_64_to_64((1ULL << l), 0, d, &r) << 1;
 	if (r << 1 < r || r << 1 >= d)
diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c
index 94e872a08a..9e2aa4ae18 100644
--- a/lib/eal/common/rte_service.c
+++ b/lib/eal/common/rte_service.c
@@ -586,7 +586,7 @@  rte_service_lcore_count_services(uint32_t lcore)
 	if (!cs->is_service_core)
 		return -ENOTSUP;
 
-	return __builtin_popcountll(cs->service_mask);
+	return rte_popcount64(cs->service_mask);
 }
 
 int32_t
diff --git a/lib/eal/linux/eal_vfio.c b/lib/eal/linux/eal_vfio.c
index 56edccb0db..ad3c1654b2 100644
--- a/lib/eal/linux/eal_vfio.c
+++ b/lib/eal/linux/eal_vfio.c
@@ -1682,7 +1682,7 @@  spapr_dma_win_size(void)
 	RTE_LOG(DEBUG, EAL, "Setting DMA window size to 0x%" PRIx64 "\n",
 		spapr_dma_win_len);
 	spapr_dma_win_page_sz = param.page_sz;
-	rte_mem_set_dma_mask(__builtin_ctzll(spapr_dma_win_len));
+	rte_mem_set_dma_mask(rte_ctz64(spapr_dma_win_len));
 	return 0;
 }
 
@@ -1720,7 +1720,7 @@  vfio_spapr_create_dma_window(int vfio_container_fd)
 
 	/* create a new DMA window (start address is not selectable) */
 	create.window_size = spapr_dma_win_len;
-	create.page_shift  = __builtin_ctzll(spapr_dma_win_page_sz);
+	create.page_shift  = rte_ctz64(spapr_dma_win_page_sz);
 	create.levels = 1;
 	ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create);
 #ifdef VFIO_IOMMU_SPAPR_INFO_DDW
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 0840d2b594..46eaed6467 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -1067,7 +1067,7 @@  eth_dev_offload_names(uint64_t bitmask, char *buf, size_t size,
 	}
 
 	while (bitmask != 0) {
-		uint64_t offload = RTE_BIT64(__builtin_ctzll(bitmask));
+		uint64_t offload = RTE_BIT64(rte_ctz64(bitmask));
 		const char *name = offload_name(offload);
 
 		ret = snprintf(&buf[pos], size - pos, "%s,", name);
@@ -1165,7 +1165,7 @@  eth_dev_validate_offloads(uint16_t port_id, uint64_t req_offloads,
 
 	while (offloads_diff != 0) {
 		/* Check if any offload is requested but not enabled. */
-		offload = RTE_BIT64(__builtin_ctzll(offloads_diff));
+		offload = RTE_BIT64(rte_ctz64(offloads_diff));
 		if (offload & req_offloads) {
 			RTE_ETHDEV_LOG(ERR,
 				"Port %u failed to enable %s offload %s\n",
diff --git a/lib/fib/dir24_8.c b/lib/fib/dir24_8.c
index 3efdcb533c..a61897667b 100644
--- a/lib/fib/dir24_8.c
+++ b/lib/fib/dir24_8.c
@@ -155,7 +155,7 @@  tbl8_get_idx(struct dir24_8_tbl *dp)
 			(dp->tbl8_idxes[i] == UINT64_MAX); i++)
 		;
 	if (i < (dp->number_tbl8s >> BITMAP_SLAB_BIT_SIZE_LOG2)) {
-		bit_idx = __builtin_ctzll(~dp->tbl8_idxes[i]);
+		bit_idx = rte_ctz64(~dp->tbl8_idxes[i]);
 		dp->tbl8_idxes[i] |= (1ULL << bit_idx);
 		return (i << BITMAP_SLAB_BIT_SIZE_LOG2) + bit_idx;
 	}
diff --git a/lib/hash/rte_thash.c b/lib/hash/rte_thash.c
index 2228af576b..4ff567ee5a 100644
--- a/lib/hash/rte_thash.c
+++ b/lib/hash/rte_thash.c
@@ -130,7 +130,7 @@  get_bit_lfsr(struct thash_lfsr *lfsr)
 	 * masking the TAP bits defined by the polynomial and
 	 * calculating parity
 	 */
-	bit = __builtin_popcount(lfsr->state & lfsr->poly) & 0x1;
+	bit = rte_popcount32(lfsr->state & lfsr->poly) & 0x1;
 	ret = lfsr->state & 0x1;
 	lfsr->state = ((lfsr->state >> 1) | (bit << (lfsr->deg - 1))) &
 		((1 << lfsr->deg) - 1);
@@ -144,7 +144,7 @@  get_rev_bit_lfsr(struct thash_lfsr *lfsr)
 {
 	uint32_t bit, ret;
 
-	bit = __builtin_popcount(lfsr->rev_state & lfsr->rev_poly) & 0x1;
+	bit = rte_popcount32(lfsr->rev_state & lfsr->rev_poly) & 0x1;
 	ret = lfsr->rev_state & (1 << (lfsr->deg - 1));
 	lfsr->rev_state = ((lfsr->rev_state << 1) | bit) &
 		((1 << lfsr->deg) - 1);
diff --git a/lib/hash/rte_thash_x86_gfni.h b/lib/hash/rte_thash_x86_gfni.h
index 7bb76ac1bb..fbec16dde0 100644
--- a/lib/hash/rte_thash_x86_gfni.h
+++ b/lib/hash/rte_thash_x86_gfni.h
@@ -110,7 +110,7 @@  __rte_thash_gfni(const uint64_t *mtrx, const uint8_t *tuple,
 				secondary_tuple);
 		}
 
-		chunk_len = __builtin_popcountll(load_mask);
+		chunk_len = rte_popcount64(load_mask);
 		for (i = 0; i < ((chunk_len + prepend) / 8); i++, mtrx += 8) {
 			perm_bytes = _mm512_mask_permutexvar_epi8(perm_bytes,
 				permute_mask, permute_idx, tuple_bytes);
diff --git a/lib/mldev/mldev_utils_scalar.c b/lib/mldev/mldev_utils_scalar.c
index 92be5daee8..4d6cb88024 100644
--- a/lib/mldev/mldev_utils_scalar.c
+++ b/lib/mldev/mldev_utils_scalar.c
@@ -413,7 +413,7 @@  __float16_to_float32_scalar_rtx(uint16_t f16)
 		if (f16_m == 0) { /* zero signed */
 			f32_e = 0;
 		} else { /* subnormal numbers */
-			clz = __builtin_clz((uint32_t)f16_m) - sizeof(uint32_t) * 8 + FP16_LSB_E;
+			clz = rte_clz32((uint32_t)f16_m) - sizeof(uint32_t) * 8 + FP16_LSB_E;
 			e_16 = (int)f16_e - clz;
 			f32_e = FP32_BIAS_E + e_16 - FP16_BIAS_E;
 
diff --git a/lib/pipeline/rte_pipeline.c b/lib/pipeline/rte_pipeline.c
index 1fa9f9c47e..436cf54953 100644
--- a/lib/pipeline/rte_pipeline.c
+++ b/lib/pipeline/rte_pipeline.c
@@ -17,7 +17,7 @@ 
 #ifdef RTE_PIPELINE_STATS_COLLECT
 
 #define RTE_PIPELINE_STATS_AH_DROP_WRITE(p, mask)			\
-	({ (p)->n_pkts_ah_drop = __builtin_popcountll(mask); })
+	({ (p)->n_pkts_ah_drop = rte_popcount64(mask); })
 
 #define RTE_PIPELINE_STATS_AH_DROP_READ(p, counter)			\
 	({ (counter) += (p)->n_pkts_ah_drop; (p)->n_pkts_ah_drop = 0; })
@@ -29,7 +29,7 @@ 
 ({									\
 	uint64_t mask = (p)->action_mask0[RTE_PIPELINE_ACTION_DROP];	\
 	mask ^= (p)->pkts_drop_mask;					\
-	(counter) += __builtin_popcountll(mask);			\
+	(counter) += rte_popcount64(mask);			\
 })
 
 #else
@@ -133,7 +133,7 @@  rte_mask_get_next(uint64_t mask, uint32_t pos)
 {
 	uint64_t mask_rot = (mask << ((63 - pos) & 0x3F)) |
 			(mask >> ((pos + 1) & 0x3F));
-	return (__builtin_ctzll(mask_rot) - (63 - pos)) & 0x3F;
+	return (rte_ctz64(mask_rot) - (63 - pos)) & 0x3F;
 }
 
 static inline uint32_t
@@ -141,7 +141,7 @@  rte_mask_get_prev(uint64_t mask, uint32_t pos)
 {
 	uint64_t mask_rot = (mask >> (pos & 0x3F)) |
 			(mask << ((64 - pos) & 0x3F));
-	return ((63 - __builtin_clzll(mask_rot)) + pos) & 0x3F;
+	return ((63 - rte_clz64(mask_rot)) + pos) & 0x3F;
 }
 
 static void
@@ -1082,7 +1082,7 @@  rte_pipeline_compute_masks(struct rte_pipeline *p, uint64_t pkts_mask)
 	p->action_mask1[RTE_PIPELINE_ACTION_TABLE] = 0;
 
 	if ((pkts_mask & (pkts_mask + 1)) == 0) {
-		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint64_t n_pkts = rte_popcount64(pkts_mask);
 		uint32_t i;
 
 		for (i = 0; i < n_pkts; i++) {
@@ -1136,7 +1136,7 @@  rte_pipeline_action_handler_port(struct rte_pipeline *p, uint64_t pkts_mask)
 	p->pkts_mask = pkts_mask;
 
 	if ((pkts_mask & (pkts_mask + 1)) == 0) {
-		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint64_t n_pkts = rte_popcount64(pkts_mask);
 		uint32_t i;
 
 		for (i = 0; i < n_pkts; i++) {
@@ -1209,7 +1209,7 @@  rte_pipeline_action_handler_port_meta(struct rte_pipeline *p,
 	p->pkts_mask = pkts_mask;
 
 	if ((pkts_mask & (pkts_mask + 1)) == 0) {
-		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint64_t n_pkts = rte_popcount64(pkts_mask);
 		uint32_t i;
 
 		for (i = 0; i < n_pkts; i++) {
@@ -1282,7 +1282,7 @@  static inline void
 rte_pipeline_action_handler_drop(struct rte_pipeline *p, uint64_t pkts_mask)
 {
 	if ((pkts_mask & (pkts_mask + 1)) == 0) {
-		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint64_t n_pkts = rte_popcount64(pkts_mask);
 		uint32_t i;
 
 		for (i = 0; i < n_pkts; i++)
diff --git a/lib/pipeline/rte_table_action.c b/lib/pipeline/rte_table_action.c
index 59d71ee50d..dfdbc66b08 100644
--- a/lib/pipeline/rte_table_action.c
+++ b/lib/pipeline/rte_table_action.c
@@ -386,7 +386,7 @@  tm_apply(struct tm_data *data,
 
 	/* Apply */
 	data->queue_id = p->subport_id <<
-				(__builtin_ctz(cfg->n_pipes_per_subport) + 4) |
+				(rte_ctz32(cfg->n_pipes_per_subport) + 4) |
 				p->pipe_id << 4;
 
 	return 0;
@@ -429,7 +429,7 @@  static int
 encap_cfg_check(struct rte_table_action_encap_config *encap)
 {
 	if ((encap->encap_mask == 0) ||
-		(__builtin_popcountll(encap->encap_mask) != 1))
+		(rte_popcount64(encap->encap_mask) != 1))
 		return -ENOTSUP;
 
 	return 0;
@@ -3364,7 +3364,7 @@  ah(struct rte_pipeline *p,
 		time = rte_rdtsc();
 
 	if ((pkts_mask & (pkts_mask + 1)) == 0) {
-		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint64_t n_pkts = rte_popcount64(pkts_mask);
 		uint32_t i;
 
 		for (i = 0; i < (n_pkts & (~0x3LLU)); i += 4) {
@@ -3392,7 +3392,7 @@  ah(struct rte_pipeline *p,
 		}
 	} else
 		for ( ; pkts_mask; ) {
-			uint32_t pos = __builtin_ctzll(pkts_mask);
+			uint32_t pos = rte_ctz64(pkts_mask);
 			uint64_t pkt_mask = 1LLU << pos;
 			uint64_t drop_mask;
 
diff --git a/lib/port/rte_port_ethdev.c b/lib/port/rte_port_ethdev.c
index 0da7890261..e6bb7ee480 100644
--- a/lib/port/rte_port_ethdev.c
+++ b/lib/port/rte_port_ethdev.c
@@ -206,7 +206,7 @@  rte_port_ethdev_writer_tx_bulk(void *port,
 			((pkts_mask & bsz_mask) ^ bsz_mask);
 
 	if (expr == 0) {
-		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint64_t n_pkts = rte_popcount64(pkts_mask);
 		uint32_t n_pkts_ok;
 
 		if (tx_buf_count)
@@ -224,7 +224,7 @@  rte_port_ethdev_writer_tx_bulk(void *port,
 		}
 	} else {
 		for ( ; pkts_mask; ) {
-			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint32_t pkt_index = rte_ctz64(pkts_mask);
 			uint64_t pkt_mask = 1LLU << pkt_index;
 			struct rte_mbuf *pkt = pkts[pkt_index];
 
@@ -413,7 +413,7 @@  rte_port_ethdev_writer_nodrop_tx_bulk(void *port,
 			((pkts_mask & bsz_mask) ^ bsz_mask);
 
 	if (expr == 0) {
-		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint64_t n_pkts = rte_popcount64(pkts_mask);
 		uint32_t n_pkts_ok;
 
 		if (tx_buf_count)
@@ -437,7 +437,7 @@  rte_port_ethdev_writer_nodrop_tx_bulk(void *port,
 		send_burst_nodrop(p);
 	} else {
 		for ( ; pkts_mask; ) {
-			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint32_t pkt_index = rte_ctz64(pkts_mask);
 			uint64_t pkt_mask = 1LLU << pkt_index;
 			struct rte_mbuf *pkt = pkts[pkt_index];
 
diff --git a/lib/port/rte_port_eventdev.c b/lib/port/rte_port_eventdev.c
index fd7dac9a56..13350fd608 100644
--- a/lib/port/rte_port_eventdev.c
+++ b/lib/port/rte_port_eventdev.c
@@ -231,7 +231,7 @@  rte_port_eventdev_writer_tx_bulk(void *port,
 					((pkts_mask & bsz_mask) ^ bsz_mask);
 
 	if (expr == 0) {
-		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint64_t n_pkts = rte_popcount64(pkts_mask);
 		uint32_t i, n_enq_ok;
 
 		if (enq_buf_count)
@@ -257,7 +257,7 @@  rte_port_eventdev_writer_tx_bulk(void *port,
 
 	} else {
 		for (; pkts_mask;) {
-			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint32_t pkt_index = rte_ctz64(pkts_mask);
 			uint64_t pkt_mask = 1LLU << pkt_index;
 
 			p->ev[enq_buf_count++].mbuf = pkts[pkt_index];
@@ -463,7 +463,7 @@  rte_port_eventdev_writer_nodrop_tx_bulk(void *port,
 					((pkts_mask & bsz_mask) ^ bsz_mask);
 
 	if (expr == 0) {
-		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint64_t n_pkts = rte_popcount64(pkts_mask);
 		uint32_t i, n_enq_ok;
 
 		if (enq_buf_count)
@@ -497,7 +497,7 @@  rte_port_eventdev_writer_nodrop_tx_bulk(void *port,
 		send_burst_nodrop(p);
 	} else {
 		for (; pkts_mask;) {
-			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint32_t pkt_index = rte_ctz64(pkts_mask);
 			uint64_t pkt_mask = 1LLU << pkt_index;
 
 			p->ev[enq_buf_count++].mbuf = pkts[pkt_index];
diff --git a/lib/port/rte_port_fd.c b/lib/port/rte_port_fd.c
index 932ecd324e..7e140793b2 100644
--- a/lib/port/rte_port_fd.c
+++ b/lib/port/rte_port_fd.c
@@ -239,7 +239,7 @@  rte_port_fd_writer_tx_bulk(void *port,
 	uint32_t tx_buf_count = p->tx_buf_count;
 
 	if ((pkts_mask & (pkts_mask + 1)) == 0) {
-		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint64_t n_pkts = rte_popcount64(pkts_mask);
 		uint32_t i;
 
 		for (i = 0; i < n_pkts; i++)
@@ -247,7 +247,7 @@  rte_port_fd_writer_tx_bulk(void *port,
 		RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, n_pkts);
 	} else
 		for ( ; pkts_mask; ) {
-			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint32_t pkt_index = rte_ctz64(pkts_mask);
 			uint64_t pkt_mask = 1LLU << pkt_index;
 			struct rte_mbuf *pkt = pkts[pkt_index];
 
@@ -424,7 +424,7 @@  rte_port_fd_writer_nodrop_tx_bulk(void *port,
 	uint32_t tx_buf_count = p->tx_buf_count;
 
 	if ((pkts_mask & (pkts_mask + 1)) == 0) {
-		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint64_t n_pkts = rte_popcount64(pkts_mask);
 		uint32_t i;
 
 		for (i = 0; i < n_pkts; i++)
@@ -432,7 +432,7 @@  rte_port_fd_writer_nodrop_tx_bulk(void *port,
 		RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, n_pkts);
 	} else
 		for ( ; pkts_mask; ) {
-			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint32_t pkt_index = rte_ctz64(pkts_mask);
 			uint64_t pkt_mask = 1LLU << pkt_index;
 			struct rte_mbuf *pkt = pkts[pkt_index];
 
diff --git a/lib/port/rte_port_ras.c b/lib/port/rte_port_ras.c
index e5de57da42..15109661d1 100644
--- a/lib/port/rte_port_ras.c
+++ b/lib/port/rte_port_ras.c
@@ -234,7 +234,7 @@  rte_port_ring_writer_ras_tx_bulk(void *port,
 			port;
 
 	if ((pkts_mask & (pkts_mask + 1)) == 0) {
-		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint64_t n_pkts = rte_popcount64(pkts_mask);
 		uint32_t i;
 
 		for (i = 0; i < n_pkts; i++) {
@@ -247,7 +247,7 @@  rte_port_ring_writer_ras_tx_bulk(void *port,
 		}
 	} else {
 		for ( ; pkts_mask; ) {
-			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint32_t pkt_index = rte_ctz64(pkts_mask);
 			uint64_t pkt_mask = 1LLU << pkt_index;
 			struct rte_mbuf *pkt = pkts[pkt_index];
 
diff --git a/lib/port/rte_port_ring.c b/lib/port/rte_port_ring.c
index 52b2d8e557..002efb7c3e 100644
--- a/lib/port/rte_port_ring.c
+++ b/lib/port/rte_port_ring.c
@@ -279,7 +279,7 @@  rte_port_ring_writer_tx_bulk_internal(void *port,
 			((pkts_mask & bsz_mask) ^ bsz_mask);
 
 	if (expr == 0) {
-		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint64_t n_pkts = rte_popcount64(pkts_mask);
 		uint32_t n_pkts_ok;
 
 		if (tx_buf_count) {
@@ -305,7 +305,7 @@  rte_port_ring_writer_tx_bulk_internal(void *port,
 		}
 	} else {
 		for ( ; pkts_mask; ) {
-			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint32_t pkt_index = rte_ctz64(pkts_mask);
 			uint64_t pkt_mask = 1LLU << pkt_index;
 			struct rte_mbuf *pkt = pkts[pkt_index];
 
@@ -595,7 +595,7 @@  rte_port_ring_writer_nodrop_tx_bulk_internal(void *port,
 			((pkts_mask & bsz_mask) ^ bsz_mask);
 
 	if (expr == 0) {
-		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint64_t n_pkts = rte_popcount64(pkts_mask);
 		uint32_t n_pkts_ok;
 
 		if (tx_buf_count) {
@@ -633,7 +633,7 @@  rte_port_ring_writer_nodrop_tx_bulk_internal(void *port,
 			send_burst_nodrop(p);
 	} else {
 		for ( ; pkts_mask; ) {
-			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint32_t pkt_index = rte_ctz64(pkts_mask);
 			uint64_t pkt_mask = 1LLU << pkt_index;
 			struct rte_mbuf *pkt = pkts[pkt_index];
 
diff --git a/lib/port/rte_port_sched.c b/lib/port/rte_port_sched.c
index 8a7d815ef3..f6255c4346 100644
--- a/lib/port/rte_port_sched.c
+++ b/lib/port/rte_port_sched.c
@@ -191,7 +191,7 @@  rte_port_sched_writer_tx_bulk(void *port,
 
 	if (expr == 0) {
 		__rte_unused uint32_t nb_tx;
-		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint64_t n_pkts = rte_popcount64(pkts_mask);
 
 		if (tx_buf_count) {
 			nb_tx = rte_sched_port_enqueue(p->sched, p->tx_buf,
@@ -204,7 +204,7 @@  rte_port_sched_writer_tx_bulk(void *port,
 		RTE_PORT_SCHED_WRITER_STATS_PKTS_DROP_ADD(p, n_pkts - nb_tx);
 	} else {
 		for ( ; pkts_mask; ) {
-			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint32_t pkt_index = rte_ctz64(pkts_mask);
 			uint64_t pkt_mask = 1LLU << pkt_index;
 			struct rte_mbuf *pkt = pkts[pkt_index];
 
diff --git a/lib/port/rte_port_source_sink.c b/lib/port/rte_port_source_sink.c
index 7d73adc1e7..ff9677cdfe 100644
--- a/lib/port/rte_port_source_sink.c
+++ b/lib/port/rte_port_source_sink.c
@@ -500,7 +500,7 @@  rte_port_sink_tx_bulk(void *port, struct rte_mbuf **pkts,
 	struct rte_port_sink *p = port;
 
 	if ((pkts_mask & (pkts_mask + 1)) == 0) {
-		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint64_t n_pkts = rte_popcount64(pkts_mask);
 		uint32_t i;
 
 		RTE_PORT_SINK_STATS_PKTS_IN_ADD(p, n_pkts);
@@ -523,7 +523,7 @@  rte_port_sink_tx_bulk(void *port, struct rte_mbuf **pkts,
 			uint32_t pkt_index;
 
 			for ( ; dump_pkts_mask; ) {
-				pkt_index = __builtin_ctzll(
+				pkt_index = rte_ctz64(
 					dump_pkts_mask);
 				PCAP_SINK_WRITE_PKT(p, pkts[pkt_index]);
 				dump_pkts_mask &= ~(1LLU << pkt_index);
@@ -531,7 +531,7 @@  rte_port_sink_tx_bulk(void *port, struct rte_mbuf **pkts,
 		}
 
 		for ( ; pkts_mask; ) {
-			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint32_t pkt_index = rte_ctz64(pkts_mask);
 			uint64_t pkt_mask = 1LLU << pkt_index;
 			struct rte_mbuf *pkt = pkts[pkt_index];
 
diff --git a/lib/port/rte_port_sym_crypto.c b/lib/port/rte_port_sym_crypto.c
index 295984d025..27b7e07cea 100644
--- a/lib/port/rte_port_sym_crypto.c
+++ b/lib/port/rte_port_sym_crypto.c
@@ -235,7 +235,7 @@  rte_port_sym_crypto_writer_tx_bulk(void *port,
 					((pkts_mask & bsz_mask) ^ bsz_mask);
 
 	if (expr == 0) {
-		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint64_t n_pkts = rte_popcount64(pkts_mask);
 		uint32_t i;
 
 		RTE_PORT_SYM_CRYPTO_WRITER_STATS_PKTS_IN_ADD(p, n_pkts);
@@ -249,7 +249,7 @@  rte_port_sym_crypto_writer_tx_bulk(void *port,
 			send_burst(p);
 	} else {
 		for (; pkts_mask;) {
-			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint32_t pkt_index = rte_ctz64(pkts_mask);
 			uint64_t pkt_mask = 1LLU << pkt_index;
 			struct rte_mbuf *pkt = pkts[pkt_index];
 
@@ -447,7 +447,7 @@  rte_port_sym_crypto_writer_nodrop_tx_bulk(void *port,
 					((pkts_mask & bsz_mask) ^ bsz_mask);
 
 	if (expr == 0) {
-		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint64_t n_pkts = rte_popcount64(pkts_mask);
 		uint32_t i;
 
 		RTE_PORT_SYM_CRYPTO_WRITER_NODROP_STATS_PKTS_IN_ADD(p, n_pkts);
@@ -461,7 +461,7 @@  rte_port_sym_crypto_writer_nodrop_tx_bulk(void *port,
 			send_burst_nodrop(p);
 	} else {
 		for ( ; pkts_mask; ) {
-			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint32_t pkt_index = rte_ctz64(pkts_mask);
 			uint64_t pkt_mask = 1LLU << pkt_index;
 			struct rte_mbuf *pkt = pkts[pkt_index];
 
diff --git a/lib/rib/rte_rib.c b/lib/rib/rte_rib.c
index 812a2597d1..486e8216df 100644
--- a/lib/rib/rte_rib.c
+++ b/lib/rib/rte_rib.c
@@ -302,7 +302,7 @@  rte_rib_insert(struct rte_rib *rib, uint32_t ip, uint8_t depth)
 	/* closest node found, new_node should be inserted in the middle */
 	common_depth = RTE_MIN(depth, (*tmp)->depth);
 	common_prefix = ip ^ (*tmp)->ip;
-	d = (common_prefix == 0) ? 32 : __builtin_clz(common_prefix);
+	d = (common_prefix == 0) ? 32 : rte_clz32(common_prefix);
 
 	common_depth = RTE_MIN(d, common_depth);
 	common_prefix = ip & rte_rib_depth_to_mask(common_depth);
diff --git a/lib/rib/rte_rib6.c b/lib/rib/rte_rib6.c
index ae44281ae1..94ff434978 100644
--- a/lib/rib/rte_rib6.c
+++ b/lib/rib/rte_rib6.c
@@ -362,7 +362,7 @@  rte_rib6_insert(struct rte_rib6 *rib,
 		if (ip_xor == 0)
 			d += 8;
 		else {
-			d += __builtin_clz(ip_xor << 24);
+			d += rte_clz32(ip_xor << 24);
 			break;
 		}
 	}
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index 751f6cf841..1a6beb14f4 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -973,7 +973,7 @@  rte_sched_port_config(struct rte_sched_port_params *params)
 	port->n_max_subport_profiles = params->n_max_subport_profiles;
 	port->n_pipes_per_subport = params->n_pipes_per_subport;
 	port->n_pipes_per_subport_log2 =
-			__builtin_ctz(params->n_pipes_per_subport);
+			rte_ctz32(params->n_pipes_per_subport);
 	port->socket = params->socket;
 
 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
diff --git a/lib/table/rte_swx_table_learner.c b/lib/table/rte_swx_table_learner.c
index 996fd3de5b..2b5e6bdce1 100644
--- a/lib/table/rte_swx_table_learner.c
+++ b/lib/table/rte_swx_table_learner.c
@@ -202,7 +202,7 @@  table_params_get(struct table_params *p, struct rte_swx_table_learner_params *pa
 
 	p->key_size_pow2 = rte_align64pow2(p->key_size);
 
-	p->key_size_log2 = __builtin_ctzll(p->key_size_pow2);
+	p->key_size_log2 = rte_ctz64(p->key_size_pow2);
 
 	p->key_offset = params->key_offset;
 
@@ -211,7 +211,7 @@  table_params_get(struct table_params *p, struct rte_swx_table_learner_params *pa
 
 	p->data_size_pow2 = rte_align64pow2(sizeof(uint64_t) + p->action_data_size);
 
-	p->data_size_log2 = __builtin_ctzll(p->data_size_pow2);
+	p->data_size_log2 = rte_ctz64(p->data_size_pow2);
 
 	/* Buckets. */
 	p->n_buckets = rte_align32pow2(params->n_keys_max);
@@ -224,7 +224,7 @@  table_params_get(struct table_params *p, struct rte_swx_table_learner_params *pa
 					 p->bucket_key_all_size +
 					 TABLE_KEYS_PER_BUCKET * p->data_size_pow2);
 
-	p->bucket_size_log2 = __builtin_ctzll(p->bucket_size);
+	p->bucket_size_log2 = rte_ctz64(p->bucket_size);
 
 	p->hash_func = params->hash_func ? params->hash_func : rte_hash_crc;
 
diff --git a/lib/table/rte_table_acl.c b/lib/table/rte_table_acl.c
index 53fd5c66ad..902cb78eac 100644
--- a/lib/table/rte_table_acl.c
+++ b/lib/table/rte_table_acl.c
@@ -719,12 +719,12 @@  rte_table_acl_lookup(
 	uint64_t pkts_out_mask;
 	uint32_t n_pkts, i, j;
 
-	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	__rte_unused uint32_t n_pkts_in = rte_popcount64(pkts_mask);
 	RTE_TABLE_ACL_STATS_PKTS_IN_ADD(acl, n_pkts_in);
 
 	/* Input conversion */
 	for (i = 0, j = 0; i < (uint32_t)(RTE_PORT_IN_BURST_SIZE_MAX -
-		__builtin_clzll(pkts_mask)); i++) {
+		rte_clz64(pkts_mask)); i++) {
 		uint64_t pkt_mask = 1LLU << i;
 
 		if (pkt_mask & pkts_mask) {
@@ -744,7 +744,7 @@  rte_table_acl_lookup(
 	pkts_out_mask = 0;
 	for (i = 0; i < n_pkts; i++) {
 		uint32_t action_table_pos = results[i];
-		uint32_t pkt_pos = __builtin_ctzll(pkts_mask);
+		uint32_t pkt_pos = rte_ctz64(pkts_mask);
 		uint64_t pkt_mask = 1LLU << pkt_pos;
 
 		pkts_mask &= ~pkt_mask;
@@ -759,7 +759,7 @@  rte_table_acl_lookup(
 	}
 
 	*lookup_hit_mask = pkts_out_mask;
-	RTE_TABLE_ACL_STATS_PKTS_LOOKUP_MISS(acl, n_pkts_in - __builtin_popcountll(pkts_out_mask));
+	RTE_TABLE_ACL_STATS_PKTS_LOOKUP_MISS(acl, n_pkts_in - rte_popcount64(pkts_out_mask));
 
 	return 0;
 }
diff --git a/lib/table/rte_table_array.c b/lib/table/rte_table_array.c
index 54a0c42f7d..a45b29ed6a 100644
--- a/lib/table/rte_table_array.c
+++ b/lib/table/rte_table_array.c
@@ -146,12 +146,12 @@  rte_table_array_lookup(
 	void **entries)
 {
 	struct rte_table_array *t = (struct rte_table_array *) table;
-	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	__rte_unused uint32_t n_pkts_in = rte_popcount64(pkts_mask);
 	RTE_TABLE_ARRAY_STATS_PKTS_IN_ADD(t, n_pkts_in);
 	*lookup_hit_mask = pkts_mask;
 
 	if ((pkts_mask & (pkts_mask + 1)) == 0) {
-		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint64_t n_pkts = rte_popcount64(pkts_mask);
 		uint32_t i;
 
 		for (i = 0; i < n_pkts; i++) {
@@ -164,7 +164,7 @@  rte_table_array_lookup(
 		}
 	} else {
 		for ( ; pkts_mask; ) {
-			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint32_t pkt_index = rte_ctz64(pkts_mask);
 			uint64_t pkt_mask = 1LLU << pkt_index;
 			struct rte_mbuf *pkt = pkts[pkt_index];
 			uint32_t entry_pos = RTE_MBUF_METADATA_UINT32(pkt,
diff --git a/lib/table/rte_table_hash_cuckoo.c b/lib/table/rte_table_hash_cuckoo.c
index c77eccf527..86c960c103 100644
--- a/lib/table/rte_table_hash_cuckoo.c
+++ b/lib/table/rte_table_hash_cuckoo.c
@@ -237,7 +237,7 @@  rte_table_hash_cuckoo_lookup(void *table,
 	uint64_t pkts_mask_out = 0;
 	uint32_t i;
 
-	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	__rte_unused uint32_t n_pkts_in = rte_popcount64(pkts_mask);
 
 	RTE_TABLE_HASH_CUCKOO_STATS_PKTS_IN_ADD(t, n_pkts_in);
 
@@ -268,7 +268,7 @@  rte_table_hash_cuckoo_lookup(void *table,
 		}
 	} else
 		for (i = 0; i < (uint32_t)(RTE_PORT_IN_BURST_SIZE_MAX
-					- __builtin_clzll(pkts_mask)); i++) {
+					- rte_clz64(pkts_mask)); i++) {
 			uint64_t pkt_mask = 1LLU << i;
 
 			if (pkt_mask & pkts_mask) {
@@ -288,7 +288,7 @@  rte_table_hash_cuckoo_lookup(void *table,
 
 	*lookup_hit_mask = pkts_mask_out;
 	RTE_TABLE_HASH_CUCKOO_STATS_PKTS_LOOKUP_MISS(t,
-			n_pkts_in - __builtin_popcountll(pkts_mask_out));
+			n_pkts_in - rte_popcount64(pkts_mask_out));
 
 	return 0;
 
diff --git a/lib/table/rte_table_hash_ext.c b/lib/table/rte_table_hash_ext.c
index 4753ccb15c..51a20acbd7 100644
--- a/lib/table/rte_table_hash_ext.c
+++ b/lib/table/rte_table_hash_ext.c
@@ -469,7 +469,7 @@  static int rte_table_hash_ext_lookup_unoptimized(
 	struct rte_table_hash *t = (struct rte_table_hash *) table;
 	uint64_t pkts_mask_out = 0;
 
-	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	__rte_unused uint32_t n_pkts_in = rte_popcount64(pkts_mask);
 
 	for ( ; pkts_mask; ) {
 		struct bucket *bkt0, *bkt;
@@ -478,7 +478,7 @@  static int rte_table_hash_ext_lookup_unoptimized(
 		uint64_t pkt_mask, sig;
 		uint32_t pkt_index, bkt_index, i;
 
-		pkt_index = __builtin_ctzll(pkts_mask);
+		pkt_index = rte_ctz64(pkts_mask);
 		pkt_mask = 1LLU << pkt_index;
 		pkts_mask &= ~pkt_mask;
 
@@ -669,12 +669,12 @@  static int rte_table_hash_ext_lookup_unoptimized(
 	struct rte_mbuf *mbuf00, *mbuf01;				\
 	uint32_t key_offset = t->key_offset;			\
 									\
-	pkt00_index = __builtin_ctzll(pkts_mask);			\
+	pkt00_index = rte_ctz64(pkts_mask);			\
 	pkt00_mask = 1LLU << pkt00_index;				\
 	pkts_mask &= ~pkt00_mask;					\
 	mbuf00 = pkts[pkt00_index];					\
 									\
-	pkt01_index = __builtin_ctzll(pkts_mask);			\
+	pkt01_index = rte_ctz64(pkts_mask);			\
 	pkt01_mask = 1LLU << pkt01_index;				\
 	pkts_mask &= ~pkt01_mask;					\
 	mbuf01 = pkts[pkt01_index];					\
@@ -690,12 +690,12 @@  static int rte_table_hash_ext_lookup_unoptimized(
 	struct rte_mbuf *mbuf00, *mbuf01;				\
 	uint32_t key_offset = t->key_offset;			\
 									\
-	pkt00_index = __builtin_ctzll(pkts_mask);			\
+	pkt00_index = rte_ctz64(pkts_mask);			\
 	pkt00_mask = 1LLU << pkt00_index;				\
 	pkts_mask &= ~pkt00_mask;					\
 	mbuf00 = pkts[pkt00_index];					\
 									\
-	pkt01_index = __builtin_ctzll(pkts_mask);			\
+	pkt01_index = rte_ctz64(pkts_mask);			\
 	if (pkts_mask == 0)						\
 		pkt01_index = pkt00_index;				\
 	pkt01_mask = 1LLU << pkt01_index;				\
@@ -857,15 +857,15 @@  static int rte_table_hash_ext_lookup(
 	uint64_t pkts_mask_out = 0, pkts_mask_match_many = 0;
 	int status = 0;
 
-	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	__rte_unused uint32_t n_pkts_in = rte_popcount64(pkts_mask);
 	RTE_TABLE_HASH_EXT_STATS_PKTS_IN_ADD(t, n_pkts_in);
 
 	/* Cannot run the pipeline with less than 7 packets */
-	if (__builtin_popcountll(pkts_mask) < 7) {
+	if (rte_popcount64(pkts_mask) < 7) {
 		status = rte_table_hash_ext_lookup_unoptimized(table, pkts,
 			pkts_mask, lookup_hit_mask, entries);
 		RTE_TABLE_HASH_EXT_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in -
-				__builtin_popcountll(*lookup_hit_mask));
+				rte_popcount64(*lookup_hit_mask));
 		return status;
 	}
 
@@ -976,7 +976,7 @@  static int rte_table_hash_ext_lookup(
 	}
 
 	*lookup_hit_mask = pkts_mask_out;
-	RTE_TABLE_HASH_EXT_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+	RTE_TABLE_HASH_EXT_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - rte_popcount64(pkts_mask_out));
 	return status;
 }
 
diff --git a/lib/table/rte_table_hash_key16.c b/lib/table/rte_table_hash_key16.c
index 04d7fd64bd..584c3f2c98 100644
--- a/lib/table/rte_table_hash_key16.c
+++ b/lib/table/rte_table_hash_key16.c
@@ -636,7 +636,7 @@  rte_table_hash_entry_delete_key16_ext(
 	uint64_t pkt_mask;					\
 	uint32_t key_offset = f->key_offset;\
 								\
-	pkt0_index = __builtin_ctzll(pkts_mask);		\
+	pkt0_index = rte_ctz64(pkts_mask);		\
 	pkt_mask = 1LLU << pkt0_index;				\
 	pkts_mask &= ~pkt_mask;					\
 								\
@@ -741,14 +741,14 @@  rte_table_hash_entry_delete_key16_ext(
 	uint64_t pkt00_mask, pkt01_mask;			\
 	uint32_t key_offset = f->key_offset;		\
 								\
-	pkt00_index = __builtin_ctzll(pkts_mask);		\
+	pkt00_index = rte_ctz64(pkts_mask);		\
 	pkt00_mask = 1LLU << pkt00_index;			\
 	pkts_mask &= ~pkt00_mask;				\
 								\
 	mbuf00 = pkts[pkt00_index];				\
 	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
 								\
-	pkt01_index = __builtin_ctzll(pkts_mask);		\
+	pkt01_index = rte_ctz64(pkts_mask);		\
 	pkt01_mask = 1LLU << pkt01_index;			\
 	pkts_mask &= ~pkt01_mask;				\
 								\
@@ -762,14 +762,14 @@  rte_table_hash_entry_delete_key16_ext(
 	uint64_t pkt00_mask, pkt01_mask;			\
 	uint32_t key_offset = f->key_offset;		\
 								\
-	pkt00_index = __builtin_ctzll(pkts_mask);		\
+	pkt00_index = rte_ctz64(pkts_mask);		\
 	pkt00_mask = 1LLU << pkt00_index;			\
 	pkts_mask &= ~pkt00_mask;				\
 								\
 	mbuf00 = pkts[pkt00_index];				\
 	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));	\
 								\
-	pkt01_index = __builtin_ctzll(pkts_mask);		\
+	pkt01_index = rte_ctz64(pkts_mask);		\
 	if (pkts_mask == 0)					\
 		pkt01_index = pkt00_index;			\
 	pkt01_mask = 1LLU << pkt01_index;			\
@@ -882,12 +882,12 @@  rte_table_hash_lookup_key16_lru(
 	uint32_t pkt11_index, pkt20_index, pkt21_index;
 	uint64_t pkts_mask_out = 0;
 
-	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	__rte_unused uint32_t n_pkts_in = rte_popcount64(pkts_mask);
 
 	RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in);
 
 	/* Cannot run the pipeline with less than 5 packets */
-	if (__builtin_popcountll(pkts_mask) < 5) {
+	if (rte_popcount64(pkts_mask) < 5) {
 		for ( ; pkts_mask; ) {
 			struct rte_bucket_4_16 *bucket;
 			struct rte_mbuf *mbuf;
@@ -901,7 +901,7 @@  rte_table_hash_lookup_key16_lru(
 
 		*lookup_hit_mask = pkts_mask_out;
 		RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
-			__builtin_popcountll(pkts_mask_out));
+			rte_popcount64(pkts_mask_out));
 		return 0;
 	}
 
@@ -992,7 +992,7 @@  rte_table_hash_lookup_key16_lru(
 
 	*lookup_hit_mask = pkts_mask_out;
 	RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
-		__builtin_popcountll(pkts_mask_out));
+		rte_popcount64(pkts_mask_out));
 	return 0;
 } /* lookup LRU */
 
@@ -1013,12 +1013,12 @@  rte_table_hash_lookup_key16_ext(
 	struct rte_bucket_4_16 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
 	uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
 
-	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	__rte_unused uint32_t n_pkts_in = rte_popcount64(pkts_mask);
 
 	RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in);
 
 	/* Cannot run the pipeline with less than 5 packets */
-	if (__builtin_popcountll(pkts_mask) < 5) {
+	if (rte_popcount64(pkts_mask) < 5) {
 		for ( ; pkts_mask; ) {
 			struct rte_bucket_4_16 *bucket;
 			struct rte_mbuf *mbuf;
@@ -1131,7 +1131,7 @@  rte_table_hash_lookup_key16_ext(
 			uint64_t pkt_mask;
 			uint32_t pkt_index;
 
-			pkt_index = __builtin_ctzll(buckets_mask);
+			pkt_index = rte_ctz64(buckets_mask);
 			pkt_mask = 1LLU << pkt_index;
 			buckets_mask &= ~pkt_mask;
 
@@ -1144,7 +1144,7 @@  rte_table_hash_lookup_key16_ext(
 
 	*lookup_hit_mask = pkts_mask_out;
 	RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
-		__builtin_popcountll(pkts_mask_out));
+		rte_popcount64(pkts_mask_out));
 	return 0;
 } /* lookup EXT */
 
diff --git a/lib/table/rte_table_hash_key32.c b/lib/table/rte_table_hash_key32.c
index 88d8f69c72..22b5ca9166 100644
--- a/lib/table/rte_table_hash_key32.c
+++ b/lib/table/rte_table_hash_key32.c
@@ -664,7 +664,7 @@  rte_table_hash_entry_delete_key32_ext(
 	uint64_t pkt_mask;					\
 	uint32_t key_offset = f->key_offset;	\
 								\
-	pkt0_index = __builtin_ctzll(pkts_mask);		\
+	pkt0_index = rte_ctz64(pkts_mask);		\
 	pkt_mask = 1LLU << pkt0_index;				\
 	pkts_mask &= ~pkt_mask;					\
 								\
@@ -773,14 +773,14 @@  rte_table_hash_entry_delete_key32_ext(
 	uint64_t pkt00_mask, pkt01_mask;			\
 	uint32_t key_offset = f->key_offset;		\
 								\
-	pkt00_index = __builtin_ctzll(pkts_mask);		\
+	pkt00_index = rte_ctz64(pkts_mask);		\
 	pkt00_mask = 1LLU << pkt00_index;			\
 	pkts_mask &= ~pkt00_mask;				\
 								\
 	mbuf00 = pkts[pkt00_index];				\
 	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
 								\
-	pkt01_index = __builtin_ctzll(pkts_mask);		\
+	pkt01_index = rte_ctz64(pkts_mask);		\
 	pkt01_mask = 1LLU << pkt01_index;			\
 	pkts_mask &= ~pkt01_mask;				\
 								\
@@ -794,14 +794,14 @@  rte_table_hash_entry_delete_key32_ext(
 	uint64_t pkt00_mask, pkt01_mask;			\
 	uint32_t key_offset = f->key_offset;		\
 								\
-	pkt00_index = __builtin_ctzll(pkts_mask);		\
+	pkt00_index = rte_ctz64(pkts_mask);		\
 	pkt00_mask = 1LLU << pkt00_index;			\
 	pkts_mask &= ~pkt00_mask;				\
 								\
 	mbuf00 = pkts[pkt00_index];				\
 	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));	\
 								\
-	pkt01_index = __builtin_ctzll(pkts_mask);		\
+	pkt01_index = rte_ctz64(pkts_mask);		\
 	if (pkts_mask == 0)					\
 		pkt01_index = pkt00_index;			\
 								\
@@ -919,11 +919,11 @@  rte_table_hash_lookup_key32_lru(
 	uint32_t pkt11_index, pkt20_index, pkt21_index;
 	uint64_t pkts_mask_out = 0;
 
-	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	__rte_unused uint32_t n_pkts_in = rte_popcount64(pkts_mask);
 	RTE_TABLE_HASH_KEY32_STATS_PKTS_IN_ADD(f, n_pkts_in);
 
 	/* Cannot run the pipeline with less than 5 packets */
-	if (__builtin_popcountll(pkts_mask) < 5) {
+	if (rte_popcount64(pkts_mask) < 5) {
 		for ( ; pkts_mask; ) {
 			struct rte_bucket_4_32 *bucket;
 			struct rte_mbuf *mbuf;
@@ -936,7 +936,7 @@  rte_table_hash_lookup_key32_lru(
 		}
 
 		*lookup_hit_mask = pkts_mask_out;
-		RTE_TABLE_HASH_KEY32_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+		RTE_TABLE_HASH_KEY32_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - rte_popcount64(pkts_mask_out));
 		return 0;
 	}
 
@@ -1027,7 +1027,7 @@  rte_table_hash_lookup_key32_lru(
 		mbuf20, mbuf21, bucket20, bucket21, pkts_mask_out, entries, f);
 
 	*lookup_hit_mask = pkts_mask_out;
-	RTE_TABLE_HASH_KEY32_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+	RTE_TABLE_HASH_KEY32_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - rte_popcount64(pkts_mask_out));
 	return 0;
 } /* rte_table_hash_lookup_key32_lru() */
 
@@ -1048,11 +1048,11 @@  rte_table_hash_lookup_key32_ext(
 	struct rte_bucket_4_32 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
 	uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
 
-	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	__rte_unused uint32_t n_pkts_in = rte_popcount64(pkts_mask);
 	RTE_TABLE_HASH_KEY32_STATS_PKTS_IN_ADD(f, n_pkts_in);
 
 	/* Cannot run the pipeline with less than 5 packets */
-	if (__builtin_popcountll(pkts_mask) < 5) {
+	if (rte_popcount64(pkts_mask) < 5) {
 		for ( ; pkts_mask; ) {
 			struct rte_bucket_4_32 *bucket;
 			struct rte_mbuf *mbuf;
@@ -1165,7 +1165,7 @@  rte_table_hash_lookup_key32_ext(
 			uint64_t pkt_mask;
 			uint32_t pkt_index;
 
-			pkt_index = __builtin_ctzll(buckets_mask);
+			pkt_index = rte_ctz64(buckets_mask);
 			pkt_mask = 1LLU << pkt_index;
 			buckets_mask &= ~pkt_mask;
 
@@ -1177,7 +1177,7 @@  rte_table_hash_lookup_key32_ext(
 	}
 
 	*lookup_hit_mask = pkts_mask_out;
-	RTE_TABLE_HASH_KEY32_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+	RTE_TABLE_HASH_KEY32_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - rte_popcount64(pkts_mask_out));
 	return 0;
 } /* rte_table_hash_lookup_key32_ext() */
 
diff --git a/lib/table/rte_table_hash_key8.c b/lib/table/rte_table_hash_key8.c
index 035d242769..bd0ec4aac0 100644
--- a/lib/table/rte_table_hash_key8.c
+++ b/lib/table/rte_table_hash_key8.c
@@ -608,7 +608,7 @@  rte_table_hash_entry_delete_key8_ext(
 	uint64_t pkt_mask;					\
 	uint32_t key_offset = f->key_offset;\
 								\
-	pkt0_index = __builtin_ctzll(pkts_mask);		\
+	pkt0_index = rte_ctz64(pkts_mask);		\
 	pkt_mask = 1LLU << pkt0_index;				\
 	pkts_mask &= ~pkt_mask;					\
 								\
@@ -710,14 +710,14 @@  rte_table_hash_entry_delete_key8_ext(
 	uint64_t pkt00_mask, pkt01_mask;			\
 	uint32_t key_offset = f->key_offset;		\
 								\
-	pkt00_index = __builtin_ctzll(pkts_mask);		\
+	pkt00_index = rte_ctz64(pkts_mask);		\
 	pkt00_mask = 1LLU << pkt00_index;			\
 	pkts_mask &= ~pkt00_mask;				\
 								\
 	mbuf00 = pkts[pkt00_index];				\
 	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
 								\
-	pkt01_index = __builtin_ctzll(pkts_mask);		\
+	pkt01_index = rte_ctz64(pkts_mask);		\
 	pkt01_mask = 1LLU << pkt01_index;			\
 	pkts_mask &= ~pkt01_mask;				\
 								\
@@ -731,14 +731,14 @@  rte_table_hash_entry_delete_key8_ext(
 	uint64_t pkt00_mask, pkt01_mask;			\
 	uint32_t key_offset = f->key_offset;		\
 								\
-	pkt00_index = __builtin_ctzll(pkts_mask);		\
+	pkt00_index = rte_ctz64(pkts_mask);		\
 	pkt00_mask = 1LLU << pkt00_index;			\
 	pkts_mask &= ~pkt00_mask;				\
 								\
 	mbuf00 = pkts[pkt00_index];				\
 	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
 								\
-	pkt01_index = __builtin_ctzll(pkts_mask);		\
+	pkt01_index = rte_ctz64(pkts_mask);		\
 	if (pkts_mask == 0)					\
 		pkt01_index = pkt00_index;			\
 								\
@@ -854,11 +854,11 @@  rte_table_hash_lookup_key8_lru(
 	uint32_t pkt11_index, pkt20_index, pkt21_index;
 	uint64_t pkts_mask_out = 0;
 
-	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	__rte_unused uint32_t n_pkts_in = rte_popcount64(pkts_mask);
 	RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in);
 
 	/* Cannot run the pipeline with less than 5 packets */
-	if (__builtin_popcountll(pkts_mask) < 5) {
+	if (rte_popcount64(pkts_mask) < 5) {
 		for ( ; pkts_mask; ) {
 			struct rte_bucket_4_8 *bucket;
 			struct rte_mbuf *mbuf;
@@ -871,7 +871,7 @@  rte_table_hash_lookup_key8_lru(
 		}
 
 		*lookup_hit_mask = pkts_mask_out;
-		RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+		RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - rte_popcount64(pkts_mask_out));
 		return 0;
 	}
 
@@ -961,7 +961,7 @@  rte_table_hash_lookup_key8_lru(
 		bucket20, bucket21, pkts_mask_out, entries, f);
 
 	*lookup_hit_mask = pkts_mask_out;
-	RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+	RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - rte_popcount64(pkts_mask_out));
 	return 0;
 } /* lookup LRU */
 
@@ -982,11 +982,11 @@  rte_table_hash_lookup_key8_ext(
 	struct rte_bucket_4_8 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
 	uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
 
-	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	__rte_unused uint32_t n_pkts_in = rte_popcount64(pkts_mask);
 	RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in);
 
 	/* Cannot run the pipeline with less than 5 packets */
-	if (__builtin_popcountll(pkts_mask) < 5) {
+	if (rte_popcount64(pkts_mask) < 5) {
 		for ( ; pkts_mask; ) {
 			struct rte_bucket_4_8 *bucket;
 			struct rte_mbuf *mbuf;
@@ -1099,7 +1099,7 @@  rte_table_hash_lookup_key8_ext(
 			uint64_t pkt_mask;
 			uint32_t pkt_index;
 
-			pkt_index = __builtin_ctzll(buckets_mask);
+			pkt_index = rte_ctz64(buckets_mask);
 			pkt_mask = 1LLU << pkt_index;
 			buckets_mask &= ~pkt_mask;
 
@@ -1111,7 +1111,7 @@  rte_table_hash_lookup_key8_ext(
 	}
 
 	*lookup_hit_mask = pkts_mask_out;
-	RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+	RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - rte_popcount64(pkts_mask_out));
 	return 0;
 } /* lookup EXT */
 
diff --git a/lib/table/rte_table_hash_lru.c b/lib/table/rte_table_hash_lru.c
index f312d898c2..a4e1a0599c 100644
--- a/lib/table/rte_table_hash_lru.c
+++ b/lib/table/rte_table_hash_lru.c
@@ -404,7 +404,7 @@  static int rte_table_hash_lru_lookup_unoptimized(
 	struct rte_table_hash *t = (struct rte_table_hash *) table;
 	uint64_t pkts_mask_out = 0;
 
-	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	__rte_unused uint32_t n_pkts_in = rte_popcount64(pkts_mask);
 	RTE_TABLE_HASH_LRU_STATS_PKTS_IN_ADD(t, n_pkts_in);
 
 	for ( ; pkts_mask; ) {
@@ -414,7 +414,7 @@  static int rte_table_hash_lru_lookup_unoptimized(
 		uint64_t pkt_mask, sig;
 		uint32_t pkt_index, bkt_index, i;
 
-		pkt_index = __builtin_ctzll(pkts_mask);
+		pkt_index = rte_ctz64(pkts_mask);
 		pkt_mask = 1LLU << pkt_index;
 		pkts_mask &= ~pkt_mask;
 
@@ -447,7 +447,7 @@  static int rte_table_hash_lru_lookup_unoptimized(
 	}
 
 	*lookup_hit_mask = pkts_mask_out;
-	RTE_TABLE_HASH_LRU_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+	RTE_TABLE_HASH_LRU_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - rte_popcount64(pkts_mask_out));
 	return 0;
 }
 
@@ -606,12 +606,12 @@  static int rte_table_hash_lru_lookup_unoptimized(
 	struct rte_mbuf *mbuf00, *mbuf01;			\
 	uint32_t key_offset = t->key_offset;		\
 								\
-	pkt00_index = __builtin_ctzll(pkts_mask);		\
+	pkt00_index = rte_ctz64(pkts_mask);		\
 	pkt00_mask = 1LLU << pkt00_index;			\
 	pkts_mask &= ~pkt00_mask;				\
 	mbuf00 = pkts[pkt00_index];				\
 								\
-	pkt01_index = __builtin_ctzll(pkts_mask);		\
+	pkt01_index = rte_ctz64(pkts_mask);		\
 	pkt01_mask = 1LLU << pkt01_index;			\
 	pkts_mask &= ~pkt01_mask;				\
 	mbuf01 = pkts[pkt01_index];				\
@@ -627,12 +627,12 @@  static int rte_table_hash_lru_lookup_unoptimized(
 	struct rte_mbuf *mbuf00, *mbuf01;			\
 	uint32_t key_offset = t->key_offset;		\
 								\
-	pkt00_index = __builtin_ctzll(pkts_mask);		\
+	pkt00_index = rte_ctz64(pkts_mask);		\
 	pkt00_mask = 1LLU << pkt00_index;			\
 	pkts_mask &= ~pkt00_mask;				\
 	mbuf00 = pkts[pkt00_index];				\
 								\
-	pkt01_index = __builtin_ctzll(pkts_mask);		\
+	pkt01_index = rte_ctz64(pkts_mask);		\
 	if (pkts_mask == 0)					\
 		pkt01_index = pkt00_index;			\
 								\
@@ -809,11 +809,11 @@  static int rte_table_hash_lru_lookup(
 	uint64_t pkts_mask_out = 0, pkts_mask_match_many = 0;
 	int status = 0;
 
-	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	__rte_unused uint32_t n_pkts_in = rte_popcount64(pkts_mask);
 	RTE_TABLE_HASH_LRU_STATS_PKTS_IN_ADD(t, n_pkts_in);
 
 	/* Cannot run the pipeline with less than 7 packets */
-	if (__builtin_popcountll(pkts_mask) < 7)
+	if (rte_popcount64(pkts_mask) < 7)
 		return rte_table_hash_lru_lookup_unoptimized(table, pkts,
 			pkts_mask, lookup_hit_mask, entries);
 
@@ -924,7 +924,7 @@  static int rte_table_hash_lru_lookup(
 	}
 
 	*lookup_hit_mask = pkts_mask_out;
-	RTE_TABLE_HASH_LRU_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+	RTE_TABLE_HASH_LRU_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - rte_popcount64(pkts_mask_out));
 	return status;
 }
 
diff --git a/lib/table/rte_table_lpm.c b/lib/table/rte_table_lpm.c
index 9de9e8a20d..c2ef0d9ba0 100644
--- a/lib/table/rte_table_lpm.c
+++ b/lib/table/rte_table_lpm.c
@@ -309,12 +309,12 @@  rte_table_lpm_lookup(
 	uint64_t pkts_out_mask = 0;
 	uint32_t i;
 
-	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	__rte_unused uint32_t n_pkts_in = rte_popcount64(pkts_mask);
 	RTE_TABLE_LPM_STATS_PKTS_IN_ADD(lpm, n_pkts_in);
 
 	pkts_out_mask = 0;
 	for (i = 0; i < (uint32_t)(RTE_PORT_IN_BURST_SIZE_MAX -
-		__builtin_clzll(pkts_mask)); i++) {
+		rte_clz64(pkts_mask)); i++) {
 		uint64_t pkt_mask = 1LLU << i;
 
 		if (pkt_mask & pkts_mask) {
@@ -334,7 +334,7 @@  rte_table_lpm_lookup(
 	}
 
 	*lookup_hit_mask = pkts_out_mask;
-	RTE_TABLE_LPM_STATS_PKTS_LOOKUP_MISS(lpm, n_pkts_in - __builtin_popcountll(pkts_out_mask));
+	RTE_TABLE_LPM_STATS_PKTS_LOOKUP_MISS(lpm, n_pkts_in - rte_popcount64(pkts_out_mask));
 	return 0;
 }
 
diff --git a/lib/table/rte_table_lpm_ipv6.c b/lib/table/rte_table_lpm_ipv6.c
index 8fde2c012f..6f3e11a14f 100644
--- a/lib/table/rte_table_lpm_ipv6.c
+++ b/lib/table/rte_table_lpm_ipv6.c
@@ -310,12 +310,12 @@  rte_table_lpm_ipv6_lookup(
 	uint64_t pkts_out_mask = 0;
 	uint32_t i;
 
-	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	__rte_unused uint32_t n_pkts_in = rte_popcount64(pkts_mask);
 	RTE_TABLE_LPM_IPV6_STATS_PKTS_IN_ADD(lpm, n_pkts_in);
 
 	pkts_out_mask = 0;
 	for (i = 0; i < (uint32_t)(RTE_PORT_IN_BURST_SIZE_MAX -
-		__builtin_clzll(pkts_mask)); i++) {
+		rte_clz64(pkts_mask)); i++) {
 		uint64_t pkt_mask = 1LLU << i;
 
 		if (pkt_mask & pkts_mask) {
@@ -335,7 +335,7 @@  rte_table_lpm_ipv6_lookup(
 	}
 
 	*lookup_hit_mask = pkts_out_mask;
-	RTE_TABLE_LPM_IPV6_STATS_PKTS_LOOKUP_MISS(lpm, n_pkts_in - __builtin_popcountll(pkts_out_mask));
+	RTE_TABLE_LPM_IPV6_STATS_PKTS_LOOKUP_MISS(lpm, n_pkts_in - rte_popcount64(pkts_out_mask));
 	return 0;
 }
 
diff --git a/lib/table/rte_table_stub.c b/lib/table/rte_table_stub.c
index 23d0de5c79..cc21516995 100644
--- a/lib/table/rte_table_stub.c
+++ b/lib/table/rte_table_stub.c
@@ -56,7 +56,7 @@  rte_table_stub_lookup(
 	__rte_unused void **entries)
 {
 	__rte_unused struct rte_table_stub *stub = (struct rte_table_stub *) table;
-	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	__rte_unused uint32_t n_pkts_in = rte_popcount64(pkts_mask);
 
 	RTE_TABLE_LPM_STATS_PKTS_IN_ADD(stub, n_pkts_in);
 	*lookup_hit_mask = 0;
diff --git a/lib/vhost/iotlb.c b/lib/vhost/iotlb.c
index 424121cc00..87ac0e5126 100644
--- a/lib/vhost/iotlb.c
+++ b/lib/vhost/iotlb.c
@@ -271,7 +271,7 @@  vhost_user_iotlb_cache_insert(struct virtio_net *dev, uint64_t iova, uint64_t ua
 	new_node->uaddr = uaddr;
 	new_node->uoffset = uoffset;
 	new_node->size = size;
-	new_node->page_shift = __builtin_ctzll(page_size);
+	new_node->page_shift = rte_ctz64(page_size);
 	new_node->perm = perm;
 
 	vhost_user_iotlb_wr_lock_all(dev);
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index d7624d18c8..759a78e3e3 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -77,7 +77,7 @@  vhost_queue_stats_update(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			uint32_t bin;
 
 			/* count zeros, and offset into correct bin */
-			bin = (sizeof(pkt_len) * 8) - __builtin_clz(pkt_len) - 5;
+			bin = (sizeof(pkt_len) * 8) - rte_clz32(pkt_len) - 5;
 			stats->size_bins[bin]++;
 		} else {
 			if (pkt_len < 64)