@@ -240,6 +240,24 @@ if find_libnuma
endif
endif
+has_libhwloc = false
+find_libhwloc = true
+
+if meson.is_cross_build() and not meson.get_cross_property('hwloc', true)
+ # don't look for libhwloc if explicitly disabled in cross build
+ find_libhwloc = false
+endif
+
+if find_libhwloc
+ hwloc_dep = cc.find_library('hwloc', required: false)
+ if hwloc_dep.found() and cc.has_header('hwloc.h')
+ dpdk_conf.set10('RTE_HAS_LIBHWLOC', true)
+ has_libhwloc = true
+ add_project_link_arguments('-lhwloc', language: 'c')
+ dpdk_extra_ldflags += '-lhwloc'
+ endif
+endif
+
has_libfdt = false
fdt_dep = cc.find_library('fdt', required: false)
if fdt_dep.found() and cc.has_header('fdt.h')
@@ -14,6 +14,7 @@
#ifndef RTE_EXEC_ENV_WINDOWS
#include <rte_telemetry.h>
#endif
+#include <rte_malloc.h>
#include "eal_private.h"
#include "eal_thread.h"
@@ -112,6 +113,371 @@ unsigned int rte_get_next_lcore(unsigned int i, int skip_main, int wrap)
return i;
}
+#ifdef RTE_EAL_HWLOC_TOPOLOGY_PROBE
+static struct core_domain_mapping *
+get_domain_lcore_mapping(unsigned int domain_sel, unsigned int domain_indx)
+{
+ struct core_domain_mapping *ptr =
+ (domain_sel & RTE_LCORE_DOMAIN_IO) ? topo_cnfg.io[domain_indx] :
+ (domain_sel & RTE_LCORE_DOMAIN_L4) ? topo_cnfg.l4[domain_indx] :
+ (domain_sel & RTE_LCORE_DOMAIN_L3) ? topo_cnfg.l3[domain_indx] :
+ (domain_sel & RTE_LCORE_DOMAIN_L2) ? topo_cnfg.l2[domain_indx] :
+ (domain_sel & RTE_LCORE_DOMAIN_L1) ? topo_cnfg.l1[domain_indx] : NULL;
+
+ return ptr;
+}
+
+static unsigned int
+get_domain_lcore_count(unsigned int domain_sel)
+{
+ return ((domain_sel & RTE_LCORE_DOMAIN_IO) ? topo_cnfg.io_core_count :
+ (domain_sel & RTE_LCORE_DOMAIN_L4) ? topo_cnfg.l4_core_count :
+ (domain_sel & RTE_LCORE_DOMAIN_L3) ? topo_cnfg.l3_core_count :
+ (domain_sel & RTE_LCORE_DOMAIN_L2) ? topo_cnfg.l2_core_count :
+ (domain_sel & RTE_LCORE_DOMAIN_L1) ? topo_cnfg.l1_core_count : 0);
+}
+#endif
+
+unsigned int rte_get_domain_count(unsigned int domain_sel __rte_unused)
+{
+ unsigned int domain_cnt = 0;
+
+#ifdef RTE_EAL_HWLOC_TOPOLOGY_PROBE
+ if (domain_sel & RTE_LCORE_DOMAIN_ALL) {
+ domain_cnt =
+ (domain_sel & RTE_LCORE_DOMAIN_IO) ? topo_cnfg.io_count :
+ (domain_sel & RTE_LCORE_DOMAIN_L4) ? topo_cnfg.l4_count :
+ (domain_sel & RTE_LCORE_DOMAIN_L3) ? topo_cnfg.l3_count :
+ (domain_sel & RTE_LCORE_DOMAIN_L2) ? topo_cnfg.l2_count :
+ (domain_sel & RTE_LCORE_DOMAIN_L1) ? topo_cnfg.l1_count : 0;
+ }
+#endif
+
+ return domain_cnt;
+}
+
+unsigned int
+rte_lcore_count_from_domain(unsigned int domain_sel __rte_unused,
+unsigned int domain_indx __rte_unused)
+{
+ unsigned int core_cnt = 0;
+
+#ifdef RTE_EAL_HWLOC_TOPOLOGY_PROBE
+ unsigned int domain_cnt = 0;
+
+ if ((domain_sel & RTE_LCORE_DOMAIN_ALL) == 0)
+ return core_cnt;
+
+ domain_cnt = rte_get_domain_count(domain_sel);
+
+ if (domain_cnt == 0)
+ return core_cnt;
+
+ if ((domain_indx != RTE_LCORE_DOMAIN_LCORES_ALL) && (domain_indx >= domain_cnt))
+ return core_cnt;
+
+ core_cnt = (domain_sel & RTE_LCORE_DOMAIN_IO) ? topo_cnfg.io_core_count :
+ (domain_sel & RTE_LCORE_DOMAIN_L4) ? topo_cnfg.l3_core_count :
+ (domain_sel & RTE_LCORE_DOMAIN_L3) ? topo_cnfg.l3_core_count :
+ (domain_sel & RTE_LCORE_DOMAIN_L2) ? topo_cnfg.l2_core_count :
+ (domain_sel & RTE_LCORE_DOMAIN_L1) ? topo_cnfg.l1_core_count : 0;
+
+ if ((domain_indx != RTE_LCORE_DOMAIN_LCORES_ALL) && (core_cnt)) {
+ struct core_domain_mapping *ptr = get_domain_lcore_mapping(domain_sel, domain_indx);
+ core_cnt = ptr->core_count;
+ }
+#endif
+
+ return core_cnt;
+}
+
+unsigned int
+rte_get_lcore_in_domain(unsigned int domain_sel __rte_unused,
+unsigned int domain_indx __rte_unused, unsigned int lcore_pos __rte_unused)
+{
+ uint16_t sel_core = RTE_MAX_LCORE;
+
+#ifdef RTE_EAL_HWLOC_TOPOLOGY_PROBE
+ unsigned int domain_cnt = 0;
+ unsigned int core_cnt = 0;
+
+ if (domain_sel & RTE_LCORE_DOMAIN_ALL) {
+ domain_cnt = rte_get_domain_count(domain_sel);
+ if (domain_cnt == 0)
+ return sel_core;
+
+ core_cnt = rte_lcore_count_from_domain(domain_sel, RTE_LCORE_DOMAIN_LCORES_ALL);
+ if (core_cnt == 0)
+ return sel_core;
+
+ struct core_domain_mapping *ptr = get_domain_lcore_mapping(domain_sel, domain_indx);
+ if ((ptr) && (ptr->core_count)) {
+ if (lcore_pos < ptr->core_count)
+ sel_core = ptr->cores[lcore_pos];
+ }
+ }
+#endif
+
+ return sel_core;
+}
+
+rte_cpuset_t
+rte_lcore_cpuset_in_domain(unsigned int domain_sel __rte_unused,
+unsigned int domain_indx __rte_unused)
+{
+ rte_cpuset_t ret_cpu_set;
+ CPU_ZERO(&ret_cpu_set);
+
+#ifdef RTE_EAL_HWLOC_TOPOLOGY_PROBE
+ struct core_domain_mapping *ptr = NULL;
+ unsigned int domain_count = rte_get_domain_count(domain_sel);
+
+ if ((domain_count == 0) || (domain_indx > domain_count))
+ return ret_cpu_set;
+
+ ptr = get_domain_lcore_mapping(domain_sel, domain_indx);
+ if (ptr->core_count == 0)
+ return ret_cpu_set;
+
+ CPU_OR(&ret_cpu_set, &ret_cpu_set, &ptr->core_set);
+#endif
+
+ return ret_cpu_set;
+}
+
+bool
+rte_lcore_is_main_in_domain(unsigned int domain_sel __rte_unused,
+unsigned int domain_indx __rte_unused)
+{
+ bool is_main_in_domain = false;
+
+#ifdef RTE_EAL_HWLOC_TOPOLOGY_PROBE
+ struct core_domain_mapping *ptr = NULL;
+ unsigned int main_lcore = rte_get_main_lcore();
+ unsigned int domain_count = rte_get_domain_count(domain_sel);
+
+ if ((domain_count == 0) || (domain_indx > domain_count))
+ return is_main_in_domain;
+
+ ptr = get_domain_lcore_mapping(domain_sel, domain_indx);
+ if (ptr->core_count == 0)
+ return is_main_in_domain;
+
+ is_main_in_domain = CPU_ISSET(main_lcore, &ptr->core_set);
+#endif
+
+ return is_main_in_domain;
+}
+
+unsigned int
+rte_get_next_lcore_from_domain(unsigned int indx __rte_unused,
+int skip_main __rte_unused, int wrap __rte_unused, uint32_t flag __rte_unused)
+{
+ if (indx >= RTE_MAX_LCORE) {
+#ifdef RTE_EAL_HWLOC_TOPOLOGY_PROBE
+ if (get_domain_lcore_count(flag) == 0)
+ return RTE_MAX_LCORE;
+#endif
+ indx = rte_get_next_lcore(-1, skip_main, wrap);
+ return indx;
+ }
+ uint16_t usr_lcore = indx % RTE_MAX_LCORE;
+ uint16_t sel_domain_core = RTE_MAX_LCORE;
+
+ EAL_LOG(DEBUG, "lcore (%u), skip main lcore (%d), wrap (%d), flag (%u)",
+ usr_lcore, skip_main, wrap, flag);
+
+ /* check the input lcore indx */
+ if (!rte_lcore_is_enabled(indx)) {
+ EAL_LOG(ERR, "User input lcore (%u) is not enabled!!!", indx);
+ return sel_domain_core;
+ }
+
+ if ((rte_lcore_count() == 1)) {
+ EAL_LOG(DEBUG, "only 1 lcore in dpdk process!!!");
+ sel_domain_core = wrap ? indx : sel_domain_core;
+ return sel_domain_core;
+ }
+
+#ifdef RTE_EAL_HWLOC_TOPOLOGY_PROBE
+ uint16_t main_lcore = rte_get_main_lcore();
+ uint16_t sel_domain = 0xffff;
+ uint16_t sel_domain_core_index = 0xffff;
+ uint16_t sel_domain_core_count = 0;
+
+ struct core_domain_mapping *ptr = NULL;
+ uint16_t domain_count = 0;
+ uint16_t domain_core_count = 0;
+ uint16_t *domain_core_list = NULL;
+
+ domain_count = rte_get_domain_count(flag);
+ if (domain_count == 0) {
+ EAL_LOG(DEBUG, "No domain found for cores with flag (%u)!!!", flag);
+ return sel_domain_core;
+ }
+
+ /* identify the lcore to get the domain to start from */
+ for (int i = 0; (i < domain_count) && (sel_domain_core_index == 0xffff); i++) {
+ ptr = get_domain_lcore_mapping(flag, i);
+
+ domain_core_count = ptr->core_count;
+ domain_core_list = ptr->cores;
+
+ for (int j = 0; j < domain_core_count; j++) {
+ if (usr_lcore == domain_core_list[j]) {
+ sel_domain_core_index = j;
+ sel_domain_core_count = domain_core_count;
+ sel_domain = i;
+ break;
+ }
+ }
+ }
+
+ if (sel_domain_core_count == 1) {
+ EAL_LOG(DEBUG, "there is no more lcore in the domain!!!");
+ return sel_domain_core;
+ }
+
+ EAL_LOG(DEBUG, "selected: domain (%u), core: count %u, index %u, core: current %u",
+ sel_domain, sel_domain_core_count, sel_domain_core_index,
+ domain_core_list[sel_domain_core_index]);
+
+ /* get next lcore from the selected domain */
+ /* next lcore is always `sel_domain_core_index + 1`, but needs boundary check */
+ bool lcore_found = false;
+ uint16_t next_domain_lcore_index = sel_domain_core_index + 1;
+ while (false == lcore_found) {
+
+ if (next_domain_lcore_index >= sel_domain_core_count) {
+ if (wrap) {
+ next_domain_lcore_index = 0;
+ continue;
+ }
+ break;
+ }
+
+ /* check if main lcore skip */
+ if ((domain_core_list[next_domain_lcore_index] == main_lcore) && (skip_main)) {
+ next_domain_lcore_index += 1;
+ continue;
+ }
+
+ lcore_found = true;
+ }
+ if (true == lcore_found)
+ sel_domain_core = domain_core_list[next_domain_lcore_index];
+#endif
+
+ EAL_LOG(DEBUG, "Selected core (%u)", sel_domain_core);
+ return sel_domain_core;
+}
+
+unsigned int
+rte_get_next_lcore_from_next_domain(unsigned int indx __rte_unused,
+int skip_main __rte_unused, int wrap __rte_unused,
+uint32_t flag __rte_unused, int cores_to_skip __rte_unused)
+{
+ if (indx >= RTE_MAX_LCORE) {
+#ifdef RTE_EAL_HWLOC_TOPOLOGY_PROBE
+ if (get_domain_lcore_count(flag) == 0)
+ return RTE_MAX_LCORE;
+#endif
+ indx = rte_get_next_lcore(-1, skip_main, wrap);
+ return indx;
+ }
+
+ uint16_t sel_domain_core = RTE_MAX_LCORE;
+ uint16_t usr_lcore = indx % RTE_MAX_LCORE;
+
+ EAL_LOG(DEBUG, "lcore (%u), skip main lcore (%d), wrap (%d), flag (%u)",
+ usr_lcore, skip_main, wrap, flag);
+
+ /* check the input lcore indx */
+ if (!rte_lcore_is_enabled(indx)) {
+ EAL_LOG(DEBUG, "User input lcore (%u) is not enabled!!!", indx);
+ return sel_domain_core;
+ }
+
+#ifdef RTE_EAL_HWLOC_TOPOLOGY_PROBE
+ uint16_t main_lcore = rte_get_main_lcore();
+
+ uint16_t sel_domain = 0xffff;
+ uint16_t sel_domain_core_index = 0xffff;
+
+ uint16_t domain_count = 0;
+ uint16_t domain_core_count = 0;
+ uint16_t *domain_core_list = NULL;
+
+ domain_count = rte_get_domain_count(flag);
+ if (domain_count == 0) {
+ EAL_LOG(DEBUG, "No Domains found for the flag (%u)!!!", flag);
+ return sel_domain_core;
+ }
+
+ /* identify the lcore to get the domain to start from */
+ struct core_domain_mapping *ptr = NULL;
+ for (int i = 0; (i < domain_count) && (sel_domain_core_index == 0xffff); i++) {
+ ptr = get_domain_lcore_mapping(flag, i);
+ domain_core_count = ptr->core_count;
+ domain_core_list = ptr->cores;
+
+ for (int j = 0; j < domain_core_count; j++) {
+ if (usr_lcore == domain_core_list[j]) {
+ sel_domain_core_index = j;
+ sel_domain = i;
+ break;
+ }
+ }
+ }
+
+ if (sel_domain_core_index == 0xffff) {
+ EAL_LOG(DEBUG, "Invalid lcore %u for the flag (%u)!!!", indx, flag);
+ return sel_domain_core;
+ }
+
+ EAL_LOG(DEBUG, "Selected - core_index (%u); domain (%u), core_count (%u), cores (%p)",
+ sel_domain_core_index, sel_domain, domain_core_count, domain_core_list);
+
+ uint16_t skip_cores = (cores_to_skip >= 0) ? cores_to_skip : (0 - cores_to_skip);
+
+ /* get the next domain & valid lcore */
+ sel_domain = (((1 + sel_domain) == domain_count) && (wrap)) ? 0 : (1 + sel_domain);
+ sel_domain_core_index = 0xffff;
+
+ bool iter_loop = false;
+ for (int i = sel_domain; (i < domain_count) && (sel_domain_core == RTE_MAX_LCORE); i++) {
+ ptr = get_domain_lcore_mapping(flag, i);
+
+ domain_core_count = ptr->core_count;
+ domain_core_list = ptr->cores;
+
+ /* check if we have cores to iterate from this domain */
+ if (skip_cores >= domain_core_count)
+ continue;
+
+ if (((1 + sel_domain) == domain_count) && (wrap)) {
+ if (iter_loop == true)
+ break;
+
+ iter_loop = true;
+ }
+
+ sel_domain_core_index = (cores_to_skip >= 0) ? skip_cores :
+ (domain_core_count - skip_cores);
+ sel_domain_core = domain_core_list[sel_domain_core_index];
+
+ if ((skip_main) && (sel_domain_core == main_lcore)) {
+ sel_domain_core_index = 0xffff;
+ sel_domain_core = RTE_MAX_LCORE;
+ continue;
+ }
+ }
+#endif
+
+ EAL_LOG(DEBUG, "Selected core (%u)", sel_domain_core);
+ return sel_domain_core;
+}
+
unsigned int
rte_lcore_to_socket_id(unsigned int lcore_id)
{
@@ -131,6 +497,354 @@ socket_id_cmp(const void *a, const void *b)
return 0;
}
+
+
+/*
+ * Use HWLOC library to parse L1|L2|L3|NUMA-IO on the running target machine.
+ * Store the topology structure in memory.
+ */
+int
+rte_eal_topology_init(void)
+{
+#ifdef RTE_EAL_HWLOC_TOPOLOGY_PROBE
+ memset(&topo_cnfg, 0, sizeof(struct topology_config));
+
+ hwloc_topology_init(&topo_cnfg.topology);
+ hwloc_topology_load(topo_cnfg.topology);
+
+ int l1_depth = hwloc_get_type_depth(topo_cnfg.topology, HWLOC_OBJ_L1CACHE);
+ int l2_depth = hwloc_get_type_depth(topo_cnfg.topology, HWLOC_OBJ_L2CACHE);
+ int l3_depth = hwloc_get_type_depth(topo_cnfg.topology, HWLOC_OBJ_L3CACHE);
+ int l4_depth = hwloc_get_type_depth(topo_cnfg.topology, HWLOC_OBJ_L4CACHE);
+ int io_depth = hwloc_get_type_depth(topo_cnfg.topology, HWLOC_OBJ_NUMANODE);
+
+ EAL_LOG(DEBUG, "TOPOLOGY - depth: l1 %d, l2 %d, l3 %d, l4 %d, io %d",
+ l1_depth, l2_depth, l3_depth, l4_depth, io_depth);
+
+ topo_cnfg.l1_count = hwloc_get_nbobjs_by_depth(topo_cnfg.topology, l1_depth);
+ topo_cnfg.l2_count = hwloc_get_nbobjs_by_depth(topo_cnfg.topology, l2_depth);
+ topo_cnfg.l3_count = hwloc_get_nbobjs_by_depth(topo_cnfg.topology, l3_depth);
+ topo_cnfg.l4_count = hwloc_get_nbobjs_by_depth(topo_cnfg.topology, l4_depth);
+ topo_cnfg.io_count = hwloc_get_nbobjs_by_depth(topo_cnfg.topology, io_depth);
+
+ EAL_LOG(DEBUG, "TOPOLOGY - obj count: l1 %d, l2 %d, l3 %d, l4 %d, io %d",
+ topo_cnfg.l1_count, topo_cnfg.l2_count,
+ topo_cnfg.l3_count, topo_cnfg.l4_count,
+ topo_cnfg.io_count);
+
+ if ((l1_depth) && (topo_cnfg.l1_count)) {
+ topo_cnfg.l1 = rte_malloc(NULL,
+ sizeof(struct core_domain_mapping *) * topo_cnfg.l1_count, 0);
+ if (topo_cnfg.l1 == NULL) {
+ rte_eal_topology_release();
+ return -1;
+ }
+
+ for (int j = 0; j < topo_cnfg.l1_count; j++) {
+ hwloc_obj_t obj = hwloc_get_obj_by_depth(topo_cnfg.topology, l1_depth, j);
+ unsigned int first_cpu = hwloc_bitmap_first(obj->cpuset);
+ unsigned int cpu_count = hwloc_bitmap_weight(obj->cpuset);
+
+ topo_cnfg.l1[j] = rte_malloc(NULL, sizeof(struct core_domain_mapping), 0);
+ if (topo_cnfg.l1[j] == NULL) {
+ rte_eal_topology_release();
+ return -1;
+ }
+
+ topo_cnfg.l1[j]->core_count = 0;
+ topo_cnfg.l1[j]->cores = rte_malloc(NULL, sizeof(uint16_t) * cpu_count, 0);
+ if (topo_cnfg.l1[j]->cores == NULL) {
+ rte_eal_topology_release();
+ return -1;
+ }
+
+ signed int cpu_id = first_cpu;
+ unsigned int cpu_index = 0;
+ do {
+ if (rte_lcore_is_enabled(cpu_id)) {
+ EAL_LOG(DEBUG, " L1|SMT domain (%u) lcore %u", j, cpu_id);
+ topo_cnfg.l1[j]->cores[cpu_index] = cpu_id;
+ cpu_index++;
+
+ CPU_SET(cpu_id, &topo_cnfg.l1[j]->core_set);
+ topo_cnfg.l1[j]->core_count += 1;
+ topo_cnfg.l1_core_count += 1;
+ }
+ cpu_id = hwloc_bitmap_next(obj->cpuset, cpu_id);
+ cpu_count -= 1;
+ } while ((cpu_id != -1) && (cpu_count));
+ }
+ }
+
+ if ((l2_depth) && (topo_cnfg.l2_count)) {
+ topo_cnfg.l2 = rte_malloc(NULL,
+ sizeof(struct core_domain_mapping *) * topo_cnfg.l2_count, 0);
+ if (topo_cnfg.l2 == NULL) {
+ rte_eal_topology_release();
+ return -1;
+ }
+
+ for (int j = 0; j < topo_cnfg.l2_count; j++) {
+ hwloc_obj_t obj = hwloc_get_obj_by_depth(topo_cnfg.topology, l2_depth, j);
+ unsigned int first_cpu = hwloc_bitmap_first(obj->cpuset);
+ unsigned int cpu_count = hwloc_bitmap_weight(obj->cpuset);
+
+ topo_cnfg.l2[j] = rte_malloc(NULL, sizeof(struct core_domain_mapping), 0);
+ if (topo_cnfg.l2[j] == NULL) {
+ rte_eal_topology_release();
+ return -1;
+ }
+
+ topo_cnfg.l2[j]->core_count = 0;
+ topo_cnfg.l2[j]->cores = rte_malloc(NULL, sizeof(uint16_t) * cpu_count, 0);
+ if (topo_cnfg.l2[j]->cores == NULL) {
+ rte_eal_topology_release();
+ return -1;
+ }
+
+ signed int cpu_id = first_cpu;
+ unsigned int cpu_index = 0;
+ do {
+ if (rte_lcore_is_enabled(cpu_id)) {
+ EAL_LOG(DEBUG, " L2 domain (%u) lcore %u", j, cpu_id);
+ topo_cnfg.l2[j]->cores[cpu_index] = cpu_id;
+ cpu_index++;
+
+ CPU_SET(cpu_id, &topo_cnfg.l2[j]->core_set);
+ topo_cnfg.l2[j]->core_count += 1;
+ topo_cnfg.l2_core_count += 1;
+ }
+ cpu_id = hwloc_bitmap_next(obj->cpuset, cpu_id);
+ cpu_count -= 1;
+ } while ((cpu_id != -1) && (cpu_count));
+ }
+ }
+
+ if ((l3_depth) && (topo_cnfg.l3_count)) {
+ topo_cnfg.l3 = rte_malloc(NULL,
+ sizeof(struct core_domain_mapping *) * topo_cnfg.l3_count, 0);
+ if (topo_cnfg.l3 == NULL) {
+ rte_eal_topology_release();
+ return -1;
+ }
+
+ for (int j = 0; j < topo_cnfg.l3_count; j++) {
+ hwloc_obj_t obj = hwloc_get_obj_by_depth(topo_cnfg.topology, l3_depth, j);
+ unsigned int first_cpu = hwloc_bitmap_first(obj->cpuset);
+ unsigned int cpu_count = hwloc_bitmap_weight(obj->cpuset);
+
+ topo_cnfg.l3[j] = rte_malloc(NULL, sizeof(struct core_domain_mapping), 0);
+ if (topo_cnfg.l3[j] == NULL) {
+ rte_eal_topology_release();
+ return -1;
+ }
+
+ topo_cnfg.l3[j]->core_count = 0;
+ topo_cnfg.l3[j]->cores = rte_malloc(NULL, sizeof(uint16_t) * cpu_count, 0);
+ if (topo_cnfg.l3[j]->cores == NULL) {
+ rte_eal_topology_release();
+ return -1;
+ }
+
+ signed int cpu_id = first_cpu;
+ unsigned int cpu_index = 0;
+ do {
+ if (rte_lcore_is_enabled(cpu_id)) {
+ EAL_LOG(DEBUG, " L3 domain (%u) lcore %u", j, cpu_id);
+ topo_cnfg.l3[j]->cores[cpu_index] = cpu_id;
+ cpu_index++;
+
+ CPU_SET(cpu_id, &topo_cnfg.l3[j]->core_set);
+ topo_cnfg.l3[j]->core_count += 1;
+ topo_cnfg.l3_core_count += 1;
+ }
+ cpu_id = hwloc_bitmap_next(obj->cpuset, cpu_id);
+ cpu_count -= 1;
+ } while ((cpu_id != -1) && (cpu_count));
+ }
+ }
+
+ if ((l4_depth) && (topo_cnfg.l4_count)) {
+ topo_cnfg.l4 = rte_malloc(NULL,
+ sizeof(struct core_domain_mapping *) * topo_cnfg.l4_count, 0);
+ if (topo_cnfg.l4 == NULL) {
+ rte_eal_topology_release();
+ return -1;
+ }
+
+ for (int j = 0; j < topo_cnfg.l4_count; j++) {
+ hwloc_obj_t obj = hwloc_get_obj_by_depth(topo_cnfg.topology, l4_depth, j);
+ unsigned int first_cpu = hwloc_bitmap_first(obj->cpuset);
+ unsigned int cpu_count = hwloc_bitmap_weight(obj->cpuset);
+
+ topo_cnfg.l4[j] = rte_malloc(NULL, sizeof(struct core_domain_mapping), 0);
+ if (topo_cnfg.l4[j] == NULL) {
+ rte_eal_topology_release();
+ return -1;
+ }
+
+ topo_cnfg.l4[j]->core_count = 0;
+ topo_cnfg.l4[j]->cores = rte_malloc(NULL, sizeof(uint16_t) * cpu_count, 0);
+ if (topo_cnfg.l4[j]->cores == NULL) {
+ rte_eal_topology_release();
+ return -1;
+ }
+
+ signed int cpu_id = first_cpu;
+ unsigned int cpu_index = 0;
+ do {
+ if (rte_lcore_is_enabled(cpu_id)) {
+ EAL_LOG(DEBUG, " L4 domain (%u) lcore %u", j, cpu_id);
+ topo_cnfg.l4[j]->cores[cpu_index] = cpu_id;
+ cpu_index++;
+
+ CPU_SET(cpu_id, &topo_cnfg.l3[j]->core_set);
+ topo_cnfg.l4[j]->core_count += 1;
+ topo_cnfg.l4_core_count += 1;
+ }
+ cpu_id = hwloc_bitmap_next(obj->cpuset, cpu_id);
+ cpu_count -= 1;
+ } while ((cpu_id != -1) && (cpu_count));
+ }
+ }
+
+ if ((io_depth) && (topo_cnfg.io_count)) {
+ topo_cnfg.io = rte_malloc(NULL,
+ sizeof(struct core_domain_mapping *) * topo_cnfg.io_count, 0);
+ if (topo_cnfg.io == NULL) {
+ rte_eal_topology_release();
+ return -1;
+ }
+
+ for (int j = 0; j < topo_cnfg.io_count; j++) {
+ hwloc_obj_t obj = hwloc_get_obj_by_depth(topo_cnfg.topology, io_depth, j);
+ unsigned int first_cpu = hwloc_bitmap_first(obj->cpuset);
+ unsigned int cpu_count = hwloc_bitmap_weight(obj->cpuset);
+
+ topo_cnfg.io[j] = rte_malloc(NULL, sizeof(struct core_domain_mapping), 0);
+ if (topo_cnfg.io[j] == NULL) {
+ rte_eal_topology_release();
+ return -1;
+ }
+
+ topo_cnfg.io[j]->core_count = 0;
+ topo_cnfg.io[j]->cores = rte_malloc(NULL, sizeof(uint16_t) * cpu_count, 0);
+ if (topo_cnfg.io[j]->cores == NULL) {
+ rte_eal_topology_release();
+ return -1;
+ }
+
+ signed int cpu_id = first_cpu;
+ unsigned int cpu_index = 0;
+ do {
+ if (rte_lcore_is_enabled(cpu_id)) {
+ EAL_LOG(DEBUG, " IO domain (%u) lcore %u", j, cpu_id);
+ topo_cnfg.io[j]->cores[cpu_index] = cpu_id;
+ cpu_index++;
+
+ CPU_SET(cpu_id, &topo_cnfg.io[j]->core_set);
+ topo_cnfg.io[j]->core_count += 1;
+ topo_cnfg.io_core_count += 1;
+ }
+ cpu_id = hwloc_bitmap_next(obj->cpuset, cpu_id);
+ cpu_count -= 1;
+ } while ((cpu_id != -1) && (cpu_count));
+ }
+ }
+
+ hwloc_topology_destroy(topo_cnfg.topology);
+ topo_cnfg.topology = NULL;
+
+ EAL_LOG(INFO, "TOPOLOGY - core count: l1 %u, l2 %u, l3 %u, l4 %u, io %u",
+ topo_cnfg.l1_core_count, topo_cnfg.l2_core_count,
+ topo_cnfg.l3_core_count, topo_cnfg.l4_core_count,
+ topo_cnfg.io_core_count);
+#endif
+
+ return 0;
+}
+
+/*
+ * release HWLOC topology structure memory
+ */
+int
+rte_eal_topology_release(void)
+{
+#ifdef RTE_EAL_HWLOC_TOPOLOGY_PROBE
+ EAL_LOG(DEBUG, "release l1 domain memory!");
+ for (int i = 0; i < topo_cnfg.l1_count; i++) {
+ if (topo_cnfg.l1[i]->cores) {
+ rte_free(topo_cnfg.l1[i]->cores);
+ topo_cnfg.l1[i]->core_count = 0;
+ }
+ }
+
+ if (topo_cnfg.l1_count) {
+ rte_free(topo_cnfg.l1);
+ topo_cnfg.l1 = NULL;
+ topo_cnfg.l1_count = 0;
+ }
+
+ EAL_LOG(DEBUG, "release l2 domain memory!");
+ for (int i = 0; i < topo_cnfg.l2_count; i++) {
+ if (topo_cnfg.l2[i]->cores) {
+ rte_free(topo_cnfg.l2[i]->cores);
+ topo_cnfg.l2[i]->core_count = 0;
+ }
+ }
+
+ if (topo_cnfg.l2_count) {
+ rte_free(topo_cnfg.l2);
+ topo_cnfg.l2 = NULL;
+ topo_cnfg.l2_count = 0;
+ }
+
+ EAL_LOG(DEBUG, "release l3 domain memory!");
+ for (int i = 0; i < topo_cnfg.l3_count; i++) {
+ if (topo_cnfg.l3[i]->cores) {
+ rte_free(topo_cnfg.l3[i]->cores);
+ topo_cnfg.l3[i]->core_count = 0;
+ }
+ }
+
+ if (topo_cnfg.l3_count) {
+ rte_free(topo_cnfg.l3);
+ topo_cnfg.l3 = NULL;
+ topo_cnfg.l3_count = 0;
+ }
+
+ EAL_LOG(DEBUG, "release l4 domain memory!");
+ for (int i = 0; i < topo_cnfg.l4_count; i++) {
+ if (topo_cnfg.l4[i]->cores) {
+ rte_free(topo_cnfg.l4[i]->cores);
+ topo_cnfg.l4[i]->core_count = 0;
+ }
+ }
+
+ if (topo_cnfg.l4_count) {
+ rte_free(topo_cnfg.l4);
+ topo_cnfg.l4 = NULL;
+ topo_cnfg.l4_count = 0;
+ }
+
+ EAL_LOG(DEBUG, "release IO domain memory!");
+ for (int i = 0; i < topo_cnfg.io_count; i++) {
+ if (topo_cnfg.io[i]->cores) {
+ rte_free(topo_cnfg.io[i]->cores);
+ topo_cnfg.io[i]->core_count = 0;
+ }
+ }
+
+ if (topo_cnfg.io_count) {
+ rte_free(topo_cnfg.io);
+ topo_cnfg.io = NULL;
+ topo_cnfg.io_count = 0;
+ }
+#endif
+
+ return 0;
+}
+
/*
* Parse /sys/devices/system/cpu to get the number of physical and logical
* processors on the machine. The function will fill the cpu_info
@@ -14,9 +14,14 @@
#include <rte_lcore.h>
#include <rte_log.h>
#include <rte_memory.h>
+#include <rte_os.h>
#include "eal_internal_cfg.h"
+#ifdef RTE_EAL_HWLOC_TOPOLOGY_PROBE
+#include <hwloc.h>
+#endif
+
/**
* Structure storing internal configuration (per-lcore)
*/
@@ -40,6 +45,45 @@ struct lcore_config {
extern struct lcore_config lcore_config[RTE_MAX_LCORE];
+struct core_domain_mapping {
+ rte_cpuset_t core_set; /**< cpu_set representing lcores within domain */
+ uint16_t core_count; /**< dpdk enabled lcores within domain */
+ uint16_t *cores; /**< list of cores */
+
+ /* uint16_t *l1_cache_id; */
+ /* uint16_t *l2_cache_id; */
+ /* uint16_t *l3_cache_id; */
+ /* uint16_t *l4_cache_id; */
+};
+
+struct topology_config {
+#ifdef RTE_EAL_HWLOC_TOPOLOGY_PROBE
+ hwloc_topology_t topology;
+#endif
+
+ /* domain count */
+ uint16_t l1_count;
+ uint16_t l2_count;
+ uint16_t l3_count;
+ uint16_t l4_count;
+ uint16_t io_count;
+
+ /* total cores under all domain */
+ uint16_t l1_core_count;
+ uint16_t l2_core_count;
+ uint16_t l3_core_count;
+ uint16_t l4_core_count;
+ uint16_t io_core_count;
+
+ /* two dimensional array for each domain */
+ struct core_domain_mapping **l1;
+ struct core_domain_mapping **l2;
+ struct core_domain_mapping **l3;
+ struct core_domain_mapping **l4;
+ struct core_domain_mapping **io;
+};
+extern struct topology_config topo_cnfg;
+
/**
* The global RTE configuration structure.
*/
@@ -81,6 +125,20 @@ struct rte_config *rte_eal_get_configuration(void);
*/
int rte_eal_memzone_init(void);
+
+/**
+ * Initialize the topology structure using HWLOC Library
+ */
+__rte_internal
+int rte_eal_topology_init(void);
+
+/**
+ * Release the memory held by Topology structure
+ */
+__rte_internal
+int rte_eal_topology_release(void);
+
+
/**
* Fill configuration with number of physical and logical processors
*
@@ -73,6 +73,8 @@ struct lcore_config lcore_config[RTE_MAX_LCORE];
/* used by rte_rdtsc() */
int rte_cycles_vmware_tsc_map;
+/* holds topology information */
+struct topology_config topo_cnfg;
int
eal_clean_runtime_dir(void)
@@ -912,6 +914,12 @@ rte_eal_init(int argc, char **argv)
return -1;
}
+ if (rte_eal_topology_init()) {
+ rte_eal_init_alert("Cannot invoke topology!!!");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
eal_mcfg_complete();
return fctret;
@@ -932,6 +940,8 @@ rte_eal_cleanup(void)
struct internal_config *internal_conf =
eal_get_internal_configuration();
+
+ rte_eal_topology_release();
rte_service_finalize();
rte_mp_channel_cleanup();
eal_bus_cleanup();
@@ -18,6 +18,7 @@
#include <rte_eal.h>
#include <rte_launch.h>
#include <rte_thread.h>
+#include <rte_bitset.h>
#ifdef __cplusplus
extern "C" {
@@ -37,6 +38,44 @@ enum rte_lcore_role_t {
ROLE_NON_EAL,
};
+/**
+ * The lcore grouping with in the L1 Domain.
+ */
+#define RTE_LCORE_DOMAIN_L1 RTE_BIT32(0)
+/**
+ * The lcore grouping with in the L2 Domain.
+ */
+#define RTE_LCORE_DOMAIN_L2 RTE_BIT32(1)
+/**
+ * The lcore grouping with in the L3 Domain.
+ */
+#define RTE_LCORE_DOMAIN_L3 RTE_BIT32(2)
+/**
+ * The lcore grouping with in the L4 Domain.
+ */
+#define RTE_LCORE_DOMAIN_L4 RTE_BIT32(3)
+/**
+ * The lcore grouping with in the IO Domain.
+ */
+#define RTE_LCORE_DOMAIN_IO RTE_BIT32(4)
+/**
+ * The lcore grouping with in the SMT Domain (Like L1 Domain).
+ */
+#define RTE_LCORE_DOMAIN_SMT RTE_LCORE_DOMAIN_L1
+/**
+ * The lcore grouping based on Domains (L1|L2|L3|L4|IO).
+ */
+#define RTE_LCORE_DOMAIN_ALL (RTE_LCORE_DOMAIN_L1 | \
+ RTE_LCORE_DOMAIN_L2 | \
+ RTE_LCORE_DOMAIN_L3 | \
+ RTE_LCORE_DOMAIN_L4 | \
+ RTE_LCORE_DOMAIN_IO)
+/**
+ * The mask for getting all cores under same topology.
+ */
+#define RTE_LCORE_DOMAIN_LCORES_ALL RTE_GENMASK32(31, 0)
+
+
/**
* Get a lcore's role.
*
@@ -211,6 +250,144 @@ int rte_lcore_is_enabled(unsigned int lcore_id);
*/
unsigned int rte_get_next_lcore(unsigned int i, int skip_main, int wrap);
+/**
+ * Get count for selected domain.
+ *
+ * @param domain_sel
+ * Domain selection, RTE_LCORE_DOMAIN_[L1|L2|L3|L4|IO].
+ * @return
+ * total count for selected domain.
+ *
+ * @note valid for EAL args of lcore and coremask.
+ *
+ */
+__rte_experimental
+unsigned int rte_get_domain_count(unsigned int domain_sel);
+
+/**
+ * Get count for lcores for a domain.
+ *
+ * @param domain_sel
+ * Domain selection, RTE_LCORE_DOMAIN_[L1|L2|L3|L4|IO].
+ * @param domain_indx
+ * Domain Index, valid range from 0 to (rte_get_domain_count - 1).
+ * @return
+ * total count for lcore in a selected index of a domain.
+ *
+ * @note valid for EAL args of lcore and coremask.
+ *
+ */
+__rte_experimental
+unsigned int
+rte_lcore_count_from_domain(unsigned int domain_sel, unsigned int domain_indx);
+
+/**
+ * Get n'th lcore from a selected domain.
+ *
+ * @param domain_sel
+ * Domain selection, RTE_LCORE_DOMAIN_[L1|L2|L3|L4|IO].
+ * @param domain_indx
+ * Domain Index, valid range from 0 to (rte_get_domain_count - 1).
+ * @param lcore_pos
+ * lcore position, valid range from 0 to (dpdk_enabled_lcores in the domain -1)
+ * @return
+ * lcore from the list for the selected domain.
+ *
+ * @note valid for EAL args of lcore and coremask.
+ *
+ */
+__rte_experimental
+unsigned int
+rte_get_lcore_in_domain(unsigned int domain_sel,
+unsigned int domain_indx, unsigned int lcore_pos);
+
+#ifdef RTE_HAS_CPUSET
+/**
+ * Return cpuset for all lcores in selected domain.
+ *
+ * @param domain_sel
+ * Domain selection, RTE_LCORE_DOMAIN_[L1|L2|L3|L4|IO].
+ * @param domain_indx
+ * Domain Index, valid range from 0 to (rte_get_domain_count - 1).
+ * @return
+ * cpuset for all lcores from the selected domain.
+ *
+ * @note valid for EAL args of lcore and coremask.
+ *
+ */
+__rte_experimental
+rte_cpuset_t
+rte_lcore_cpuset_in_domain(unsigned int domain_sel, unsigned int domain_indx);
+__rte_experimental
+#endif
+
+/**
+ * Return TRUE|FALSE if main lcore in available in selected domain.
+ *
+ * @param domain_sel
+ * Domain selection, RTE_LCORE_DOMAIN_[L1|L2|L3|L4|IO].
+ * @param domain_indx
+ * Domain Index, valid range from 0 to (rte_get_domain_count - 1).
+ * @return
+ * Check if main lcore is avaialable in the selected domain.
+ *
+ * @note valid for EAL args of lcore and coremask.
+ *
+ */
+bool
+rte_lcore_is_main_in_domain(unsigned int domain_sel, unsigned int domain_indx);
+
+/**
+ * Get the enabled lcores from next domain based on extended flag.
+ *
+ * @param i
+ * The current lcore (reference).
+ * @param skip_main
+ * If true, do not return the ID of the main lcore.
+ * @param wrap
+ * If true, go back to first core of flag based domain when last core is reached.
+ * If false, return RTE_MAX_LCORE when no more cores are available.
+ * @param flag
+ * Allows user to select various domain as specified under RTE_LCORE_DOMAIN_[L1|L2|L3|L4|IO]
+ *
+ * @return
+ * The next lcore_id or RTE_MAX_LCORE if not found.
+ *
+ * @note valid for EAL args of lcore and coremask.
+ *
+ */
+__rte_experimental
+unsigned int
+rte_get_next_lcore_from_domain(unsigned int i, int skip_main, int wrap,
+uint32_t flag);
+
+/**
+ * Get the Nth (first|last) lcores from next domain based on extended flag.
+ *
+ * @param i
+ * The current lcore (reference).
+ * @param skip_main
+ * If true, do not return the ID of the main lcore.
+ * @param wrap
+ * If true, go back to first core of flag based domain when last core is reached.
+ * If false, return RTE_MAX_LCORE when no more cores are available.
+ * @param flag
+ * Allows user to select various domain as specified under RTE_LCORE_DOMAIN_(L1|L2|L3|L4|IO)
+ * @param cores_to_skip
+ * If set to positive value, will skip to Nth lcore from start.
+ * If set to negative value, will skip to Nth lcore from last.
+ *
+ * @return
+ * The next lcore_id or RTE_MAX_LCORE if not found.
+ *
+ * @note valid for EAL args of lcore and coremask.
+ *
+ */
+__rte_experimental
+unsigned int
+rte_get_next_lcore_from_next_domain(unsigned int i,
+int skip_main, int wrap, uint32_t flag, int cores_to_skip);
+
/**
* Macro to browse all running lcores.
*/
@@ -227,6 +404,38 @@ unsigned int rte_get_next_lcore(unsigned int i, int skip_main, int wrap);
i < RTE_MAX_LCORE; \
i = rte_get_next_lcore(i, 1, 0))
+/**
+ * Macro to browse all running lcores in a domain.
+ */
+#define RTE_LCORE_FOREACH_DOMAIN(i, flag) \
+ for (i = rte_get_next_lcore_from_domain(-1, 0, 0, flag); \
+ i < RTE_MAX_LCORE; \
+ i = rte_get_next_lcore_from_domain(i, 0, 0, flag))
+
+/**
+ * Macro to browse all running lcores except the main lcore in domain.
+ */
+#define RTE_LCORE_FOREACH_WORKER_DOMAIN(i, flag) \
+ for (i = rte_get_next_lcore_from_domain(-1, 1, 0, flag); \
+ i < RTE_MAX_LCORE; \
+ i = rte_get_next_lcore_from_domain(i, 1, 0, flag))
+
+/**
+ * Macro to browse Nth lcores on each domain.
+ */
+#define RTE_LCORE_FORN_NEXT_DOMAIN(i, flag, n) \
+ for (i = rte_get_next_lcore_from_next_domain(-1, 0, 0, flag, n);\
+ i < RTE_MAX_LCORE; \
+ i = rte_get_next_lcore_from_next_domain(i, 0, 0, flag, n))
+
+/**
+ * Macro to browse all Nth lcores except the main lcore on each domain.
+ */
+#define RTE_LCORE_FORN_WORKER_NEXT_DOMAIN(i, flag, n) \
+ for (i = rte_get_next_lcore_from_next_domain(-1, 1, 0, flag, n);\
+ i < RTE_MAX_LCORE; \
+ i = rte_get_next_lcore_from_next_domain(i, 1, 0, flag, n))
+
/**
* Callback prototype for initializing lcores.
*
@@ -65,6 +65,9 @@
* duration of the program, as we hold a write lock on it in the primary proc */
static int mem_cfg_fd = -1;
+/* holds topology information */
+struct topology_config topo_cnfg;
+
static struct flock wr_lock = {
.l_type = F_WRLCK,
.l_whence = SEEK_SET,
@@ -1311,6 +1314,12 @@ rte_eal_init(int argc, char **argv)
return -1;
}
+ if (rte_eal_topology_init()) {
+ rte_eal_init_alert("Cannot invoke topology!!!");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
eal_mcfg_complete();
return fctret;
@@ -1352,6 +1361,8 @@ rte_eal_cleanup(void)
struct internal_config *internal_conf =
eal_get_internal_configuration();
+ rte_eal_topology_release();
+
if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
internal_conf->hugepage_file.unlink_existing)
rte_memseg_walk(mark_freeable, NULL);
@@ -31,3 +31,7 @@ endif
if is_freebsd
annotate_locks = false
endif
+
+if has_libhwloc
+ dpdk_conf.set10('RTE_EAL_HWLOC_TOPOLOGY_PROBE', true)
+endif
@@ -397,6 +397,15 @@ EXPERIMENTAL {
# added in 24.11
rte_bitset_to_str;
+
+ # added in 25.03
+ rte_get_domain_count;
+ rte_get_lcore_in_domain;
+ rte_get_next_lcore_from_domain;
+ rte_get_next_lcore_from_next_domain;
+ rte_lcore_count_from_domain;
+ rte_lcore_cpuset_in_domain;
+ rte_lcore_is_main_in_domain;
};
INTERNAL {
@@ -406,6 +415,8 @@ INTERNAL {
rte_bus_unregister;
rte_eal_get_baseaddr;
rte_eal_parse_coremask;
+ rte_eal_topology_init;
+ rte_eal_topology_release;
rte_firmware_read;
rte_intr_allow_others;
rte_intr_cap_multiple;
@@ -40,6 +40,10 @@ static int mem_cfg_fd = -1;
/* internal configuration (per-core) */
struct lcore_config lcore_config[RTE_MAX_LCORE];
+/* holds topology information */
+struct topology_config topo_cnfg;
+
+
/* Detect if we are a primary or a secondary process */
enum rte_proc_type_t
eal_proc_type_detect(void)
@@ -262,6 +266,8 @@ rte_eal_cleanup(void)
struct internal_config *internal_conf =
eal_get_internal_configuration();
+ rte_eal_topology_release();
+
eal_intr_thread_cancel();
eal_mem_virt2iova_cleanup();
eal_bus_cleanup();
@@ -505,6 +511,12 @@ rte_eal_init(int argc, char **argv)
rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MAIN);
rte_eal_mp_wait_lcore();
+ if (rte_eal_topology_init()) {
+ rte_eal_init_alert("Cannot invoke topology!!!");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
eal_mcfg_complete();
return fctret;