eal: speed up dpdk init time

Message ID 20240528061259.29528-1-changfengnan@bytedance.com (mailing list archive)
State Changes Requested
Delegated to: Thomas Monjalon
Headers
Series eal: speed up dpdk init time |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/github-robot: build success github build: passed
ci/intel-Functional success Functional PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-abi-testing success Testing PASS
ci/iol-compile-amd64-testing success Testing PASS
ci/iol-sample-apps-testing success Testing PASS
ci/iol-unit-amd64-testing success Testing PASS
ci/iol-unit-arm64-testing success Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-compile-arm64-testing success Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS

Commit Message

Fengnan Chang May 28, 2024, 6:12 a.m. UTC
If we have a lot of huge pages in system, the memory init will
cost long time in legacy-mem mode. For example, we have 120G memory
in unit of 2MB hugepage, the env init will cost 43s. Almost half
of time spent on find_numasocket, since the address in
/proc/self/numa_maps is orderd, we can sort hugepg_tbl by orig_va
first and then just read numa_maps line by line is enough to find
socket. In my test, spent time reduced to 19s.

Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
---
 lib/eal/linux/eal_memory.c | 115 +++++++++++++++++++++++--------------
 1 file changed, 72 insertions(+), 43 deletions(-)
  

Comments

Stephen Hemminger May 29, 2024, 9:51 p.m. UTC | #1
On Tue, 28 May 2024 14:12:59 +0800
Fengnan Chang <changfengnan@bytedance.com> wrote:

> If we have a lot of huge pages in system, the memory init will
> cost long time in legacy-mem mode. For example, we have 120G memory
> in unit of 2MB hugepage, the env init will cost 43s. Almost half
> of time spent on find_numasocket, since the address in
> /proc/self/numa_maps is orderd, we can sort hugepg_tbl by orig_va
> first and then just read numa_maps line by line is enough to find
> socket. In my test, spent time reduced to 19s.
> 
> Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
> ---

Good speed up, but you could do much better if the code only read
/proc/self/numa_maps once and constructed an internal table.
Could use a hash or tree to store the relatively small table.
  

Patch

diff --git a/lib/eal/linux/eal_memory.c b/lib/eal/linux/eal_memory.c
index 45879ca743..28cc136ac0 100644
--- a/lib/eal/linux/eal_memory.c
+++ b/lib/eal/linux/eal_memory.c
@@ -414,7 +414,7 @@  map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi,
 static int
 find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
 {
-	int socket_id;
+	int socket_id = -1;
 	char *end, *nodestr;
 	unsigned i, hp_count = 0;
 	uint64_t virt_addr;
@@ -432,54 +432,61 @@  find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
 	snprintf(hugedir_str, sizeof(hugedir_str),
 			"%s/%s", hpi->hugedir, eal_get_hugefile_prefix());
 
-	/* parse numa map */
-	while (fgets(buf, sizeof(buf), f) != NULL) {
-
-		/* ignore non huge page */
-		if (strstr(buf, " huge ") == NULL &&
+	/* if we find this page in our mappings, set socket_id */
+	for (i = 0; i < hpi->num_pages[0]; i++) {
+		void *va = NULL;
+		/* parse numa map */
+		while (fgets(buf, sizeof(buf), f) != NULL) {
+			if (strstr(buf, " huge ") == NULL &&
 				strstr(buf, hugedir_str) == NULL)
-			continue;
-
-		/* get zone addr */
-		virt_addr = strtoull(buf, &end, 16);
-		if (virt_addr == 0 || end == buf) {
-			EAL_LOG(ERR, "%s(): error in numa_maps parsing", __func__);
-			goto error;
-		}
+				continue;
+			/* get zone addr */
+			virt_addr = strtoull(buf, &end, 16);
+			if (virt_addr == 0 || end == buf) {
+				EAL_LOG(ERR, "error in numa_maps parsing");
+				goto error;
+			}
 
-		/* get node id (socket id) */
-		nodestr = strstr(buf, " N");
-		if (nodestr == NULL) {
-			EAL_LOG(ERR, "%s(): error in numa_maps parsing", __func__);
-			goto error;
-		}
-		nodestr += 2;
-		end = strstr(nodestr, "=");
-		if (end == NULL) {
-			EAL_LOG(ERR, "%s(): error in numa_maps parsing", __func__);
-			goto error;
-		}
-		end[0] = '\0';
-		end = NULL;
+			/* get node id (socket id) */
+			nodestr = strstr(buf, " N");
+			if (nodestr == NULL) {
+				EAL_LOG(ERR, "error in numa_maps parsing");
+				goto error;
+			}
+			nodestr += 2;
+			end = strstr(nodestr, "=");
+			if (end == NULL) {
+				EAL_LOG(ERR, "error in numa_maps parsing");
+				goto error;
+			}
+			end[0] = '\0';
+			end = NULL;
 
-		socket_id = strtoul(nodestr, &end, 0);
-		if ((nodestr[0] == '\0') || (end == NULL) || (*end != '\0')) {
-			EAL_LOG(ERR, "%s(): error in numa_maps parsing", __func__);
-			goto error;
+			socket_id = strtoul(nodestr, &end, 0);
+			if ((nodestr[0] == '\0') || (end == NULL) || (*end != '\0')) {
+				EAL_LOG(ERR, "error in numa_maps parsing");
+				goto error;
+			}
+			va = (void *)(unsigned long)virt_addr;
+			if (hugepg_tbl[i].orig_va != va) {
+				EAL_LOG(DEBUG, "search %p not seq, let's start from begin",
+					hugepg_tbl[i].orig_va);
+				fseek(f, 0, SEEK_SET);
+			} else {
+				break;
+			}
 		}
-
-		/* if we find this page in our mappings, set socket_id */
-		for (i = 0; i < hpi->num_pages[0]; i++) {
-			void *va = (void *)(unsigned long)virt_addr;
-			if (hugepg_tbl[i].orig_va == va) {
-				hugepg_tbl[i].socket_id = socket_id;
-				hp_count++;
+		if (hugepg_tbl[i].orig_va == va) {
+			hugepg_tbl[i].socket_id = socket_id;
+			hp_count++;
 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
-				EAL_LOG(DEBUG,
-					"Hugepage %s is on socket %d",
-					hugepg_tbl[i].filepath, socket_id);
+			EAL_LOG(DEBUG,
+				"Hugepage %s is on socket %d",
+				hugepg_tbl[i].filepath, socket_id);
 #endif
-			}
+		} else {
+			EAL_LOG(ERR,
+				"shoudn't happen %p", hugepg_tbl[i].orig_va);
 		}
 	}
 
@@ -494,6 +501,25 @@  find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
 	return -1;
 }
 
+static int
+cmp_orig_va(const void *a, const void *b)
+{
+#ifndef RTE_ARCH_PPC_64
+	const struct hugepage_file *p1 = a;
+	const struct hugepage_file *p2 = b;
+#else
+	/* PowerPC needs memory sorted in reverse order from x86 */
+	const struct hugepage_file *p1 = b;
+	const struct hugepage_file *p2 = a;
+#endif
+	if (p1->orig_va < p2->orig_va)
+		return -1;
+	else if (p1->orig_va > p2->orig_va)
+		return 1;
+	else
+		return 0;
+}
+
 static int
 cmp_physaddr(const void *a, const void *b)
 {
@@ -1324,6 +1350,9 @@  eal_legacy_hugepage_init(void)
 			}
 		}
 
+		qsort(&tmp_hp[hp_offset], hpi->num_pages[0],
+		      sizeof(struct hugepage_file), cmp_orig_va);
+
 		if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){
 			EAL_LOG(DEBUG, "Failed to find NUMA socket for %u MB pages",
 					(unsigned)(hpi->hugepage_sz / 0x100000));