[RFC] eal: make lcore_init aware of cgroup

Message ID 20240429195342.42711-1-stephen@networkplumber.org (mailing list archive)
State Rejected
Delegated to: Thomas Monjalon
Headers
Series [RFC] eal: make lcore_init aware of cgroup |

Checks

Context Check Description
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing fail Unit Testing FAIL
ci/Intel-compilation success Compilation OK
ci/iol-intel-Performance success Performance Testing PASS
ci/github-robot: build fail github build: failed
ci/intel-Functional success Functional PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-sample-apps-testing success Testing PASS
ci/iol-abi-testing success Testing PASS
ci/iol-compile-amd64-testing success Testing PASS
ci/iol-unit-amd64-testing fail Testing issues
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-intel-Functional fail Functional Testing issues
ci/iol-unit-arm64-testing fail Testing issues
ci/iol-broadcom-Functional success Functional Testing PASS
ci/intel-Testing fail Testing issues
ci/iol-compile-arm64-testing success Testing PASS

Commit Message

Stephen Hemminger April 29, 2024, 7:52 p.m. UTC
  Some lcores maybe restricted from being used by DPDK by cgroups.
This should be detected at startup and only those cpu's allowed
to be used by the process should be marked as enabled.

This is a lightly tested patch, and parsing the cpuset info here
probably needs more checking. It is a response to the problem
reported with error handling.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/eal/linux/eal_lcore.c | 95 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 88 insertions(+), 7 deletions(-)
  

Comments

Stephen Hemminger April 30, 2024, 12:49 a.m. UTC | #1
On Mon, 29 Apr 2024 12:52:25 -0700
Stephen Hemminger <stephen@networkplumber.org> wrote:

> Some lcores maybe restricted from being used by DPDK by cgroups.
> This should be detected at startup and only those cpu's allowed
> to be used by the process should be marked as enabled.
> 
> This is a lightly tested patch, and parsing the cpuset info here
> probably needs more checking. It is a response to the problem
> reported with error handling.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>

This won't work right because Cpus_allowed reflects the state of the
current affinity mask, which might be reduced for the case of process
spawned by another. It shows up as test failure for when EAL main thread
(with affinity to cpu 0) spawns another process as a test. The test
inherits the affinity of 0 and cpus_allowed is 1 (vs all cpus ff).

Need to find a better way to read the cgroup allowed cpus list
for case where process is running with restricted cpus.
  

Patch

diff --git a/lib/eal/linux/eal_lcore.c b/lib/eal/linux/eal_lcore.c
index 29b36dd610..098892fafa 100644
--- a/lib/eal/linux/eal_lcore.c
+++ b/lib/eal/linux/eal_lcore.c
@@ -4,6 +4,7 @@ 
 
 #include <unistd.h>
 #include <limits.h>
+#include <inttypes.h>
 
 #include <rte_log.h>
 
@@ -11,23 +12,103 @@ 
 #include "eal_filesystem.h"
 #include "eal_thread.h"
 
+#define PROC_STATUS "/proc/%u/status"
 #define SYS_CPU_DIR "/sys/devices/system/cpu/cpu%u"
 #define CORE_ID_FILE "topology/core_id"
 #define NUMA_NODE_PATH "/sys/devices/system/node"
 
-/* Check if a cpu is present by the presence of the cpu information for it */
+static cpu_set_t *cpus_allowed;
+
+/*
+ * Initialize allowed cpus from /proc/<pid>status
+ * The cpus allowed is a subset of available lcores on the system
+ * which maybe restricted by cgroups
+ */
+static void
+get_allowed_cpus(cpu_set_t *set)
+{
+	const char cpus_allowed[] = "Cpus_allowed:";
+	const size_t setsize = CPU_ALLOC_SIZE(RTE_MAX_LCORE);
+	char path[PATH_MAX];
+	char line[LINE_MAX];
+	FILE *f;
+
+	CPU_ZERO_S(setsize, set);
+
+	snprintf(path, sizeof(path), PROC_STATUS, getpid());
+	f = fopen(path, "r");
+	if (f == NULL) {
+		EAL_LOG(ERR, "%s(): cannot open %s: %s",
+			__func__, path, strerror(errno));
+		return;
+	}
+
+	while (fgets(line, sizeof(line), f)) {
+		char *cp;
+		unsigned int cpu;
+
+		if (strncmp(line, cpus_allowed, sizeof(cpus_allowed) - 1))
+			continue;
+
+		cp = line + sizeof(cpus_allowed);
+
+		while(*cp && isspace(*cp))
+			++cp;
+
+		for (cpu = 0; cpu < RTE_MAX_LCORE; cpu += 32) {
+			uint32_t cpu_mask;
+			unsigned int i;
+
+			if (*cp == '\0')
+				break;
+
+			if (sscanf(cp, "%" SCNx32, &cpu_mask) != 1) {
+				EAL_LOG(NOTICE, "%s(): can not parse: %s",
+					__func__, line);
+				goto error;
+			}
+			for (i = 0; i < 32; i++) {
+				if (cpu_mask & (1u << i))
+					CPU_SET_S(cpu + i, setsize, set);
+			}
+
+			cp = strchr(cp, ',');
+			if (cp == NULL)
+				break;
+			cp += 1; /* skip the comma */
+		}
+	}
+
+error:
+	fclose(f);
+}
+
+/* Check if a cpu can be used by looking at /proc/<pid>/status */
 int
-eal_cpu_detected(unsigned lcore_id)
+eal_cpu_detected(unsigned int lcore_id)
 {
 	char path[PATH_MAX];
-	int len = snprintf(path, sizeof(path), SYS_CPU_DIR
-		"/"CORE_ID_FILE, lcore_id);
-	if (len <= 0 || (unsigned)len >= sizeof(path))
+	int len;
+
+	if (cpus_allowed == NULL) {
+		cpus_allowed = CPU_ALLOC(RTE_MAX_LCORE);
+
+		if (cpus_allowed == NULL) {
+			EAL_LOG(ERR, "%s(): cannot allocate cpuset", __func__);
+			return 0;
+		}
+		get_allowed_cpus(cpus_allowed);
+	}
+
+	/* skip cpus blocked by cgroup */
+	if (!CPU_ISSET(lcore_id, cpus_allowed))
 		return 0;
-	if (access(path, F_OK) != 0)
+
+	len = snprintf(path, sizeof(path), SYS_CPU_DIR "/"CORE_ID_FILE, lcore_id);
+	if (len <= 0 || (unsigned)len >= sizeof(path))
 		return 0;
 
-	return 1;
+	return access(path, F_OK) == 0;
 }
 
 /*