[v4] mem: allow using ASan in multi-process mode

Message ID 20241017100317.1356-1-artur.paszkiewicz@intel.com (mailing list archive)
State New
Delegated to: Thomas Monjalon
Headers
Series [v4] mem: allow using ASan in multi-process mode |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/github-robot: build success github build: passed
ci/intel-Functional success Functional PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-marvell-Functional success Functional Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-compile-arm64-testing success Testing PASS
ci/iol-unit-arm64-testing fail Testing issues RETEST #1
ci/iol-sample-apps-testing success Testing PASS RETEST #1
ci/iol-unit-amd64-testing fail Testing issues RETEST #1
ci/iol-compile-amd64-testing success Testing PASS RETEST #1

Commit Message

Artur Paszkiewicz Oct. 17, 2024, 10:03 a.m. UTC
Multi-process applications operate on shared hugepage memory but each
process has its own ASan shadow region which is not synchronized with
the other processes. This causes issues when different processes try to
use the same memory because they have their own view of which addresses
are valid.

Fix it by mapping the shadow regions for allocated segments as shared
memory. The primary process is responsible for creating and removing the
shared memory objects.

Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
v4:
- Map ASan shadow shm after mapping the segment.
  Due to a change in ASan behavior[1] the mapped shadow shared memory
  regions are remapped later, when segments are mapped. So instead of
  mapping the whole shadow region when reserving the memseg list memory,
  map only the fragments corresponding to the segments after they are
  mapped. Because of this it is also no longer necessary to disable ASan
  instrumentation for triggering the page fault in alloc_seg().
- Adjusted function naming.
- Enabled unit tests.
v3:
- Removed conditional compilation from eal_common_memory.c.
- Improved comments.
v2:
- Added checks for config options disabling multi-process support.
- Fixed missing unmap in legacy mode.

[1] https://github.com/llvm/llvm-project/commit/a34e702aa16fde4cc76e9360d985a64e008e0b23

 app/test/test_mp_secondary.c       |  2 +-
 app/test/test_pdump.c              |  2 +-
 lib/eal/common/eal_common_memory.c |  7 +++
 lib/eal/common/eal_private.h       | 54 ++++++++++++++++
 lib/eal/linux/eal_memalloc.c       | 30 +++++++++
 lib/eal/linux/eal_memory.c         | 98 ++++++++++++++++++++++++++++++
 lib/eal/linux/meson.build          |  4 ++
 7 files changed, 195 insertions(+), 2 deletions(-)
  

Comments

Artur Paszkiewicz Oct. 18, 2024, 9:04 a.m. UTC | #1
Recheck-request: iol-unit-amd64-testing
  
Artur Paszkiewicz Oct. 24, 2024, 7:30 a.m. UTC | #2
Recheck-request: 
rebase=main,iol-compile-amd64-testing,iol-compile-arm64-testing,iol-unit-amd64-testing,iol-unit-arm64-testing
  

Patch

diff --git a/app/test/test_mp_secondary.c b/app/test/test_mp_secondary.c
index f3694530a8..7da2878f64 100644
--- a/app/test/test_mp_secondary.c
+++ b/app/test/test_mp_secondary.c
@@ -223,4 +223,4 @@  test_mp_secondary(void)
 
 #endif /* !RTE_EXEC_ENV_WINDOWS */
 
-REGISTER_FAST_TEST(multiprocess_autotest, false, false, test_mp_secondary);
+REGISTER_FAST_TEST(multiprocess_autotest, false, true, test_mp_secondary);
diff --git a/app/test/test_pdump.c b/app/test/test_pdump.c
index 9f7769707e..a0919e89ba 100644
--- a/app/test/test_pdump.c
+++ b/app/test/test_pdump.c
@@ -219,4 +219,4 @@  test_pdump(void)
 	return TEST_SUCCESS;
 }
 
-REGISTER_FAST_TEST(pdump_autotest, true, false, test_pdump);
+REGISTER_FAST_TEST(pdump_autotest, true, true, test_pdump);
diff --git a/lib/eal/common/eal_common_memory.c b/lib/eal/common/eal_common_memory.c
index a185e0b580..8fbd0c5af9 100644
--- a/lib/eal/common/eal_common_memory.c
+++ b/lib/eal/common/eal_common_memory.c
@@ -263,6 +263,11 @@  eal_memseg_list_alloc(struct rte_memseg_list *msl, int reserve_flags)
 	EAL_LOG(DEBUG, "VA reserved for memseg list at %p, size %zx",
 			addr, mem_sz);
 
+	if (eal_memseg_list_init_asan_shadow(msl) != 0) {
+		EAL_LOG(ERR, "Failed to init ASan shadow region for memseg list");
+		return -1;
+	}
+
 	return 0;
 }
 
@@ -1052,6 +1057,8 @@  rte_eal_memory_detach(void)
 				EAL_LOG(ERR, "Could not unmap memory: %s",
 						rte_strerror(rte_errno));
 
+		eal_memseg_list_cleanup_asan_shadow(msl);
+
 		/*
 		 * we are detaching the fbarray rather than destroying because
 		 * other processes might still reference this fbarray, and we
diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h
index bb315dab04..96e05647ff 100644
--- a/lib/eal/common/eal_private.h
+++ b/lib/eal/common/eal_private.h
@@ -309,6 +309,60 @@  eal_memseg_list_alloc(struct rte_memseg_list *msl, int reserve_flags);
 void
 eal_memseg_list_populate(struct rte_memseg_list *msl, void *addr, int n_segs);
 
+/**
+ * Initialize the MSL ASan shadow region shared memory.
+ *
+ * @param msl
+ *  Memory segment list.
+ * @return
+ *  0 on success, (-1) on failure.
+ */
+#ifdef RTE_MALLOC_ASAN
+int
+eal_memseg_list_init_asan_shadow(struct rte_memseg_list *msl);
+#else
+static inline int
+eal_memseg_list_init_asan_shadow(__rte_unused struct rte_memseg_list *msl)
+{
+	return 0;
+}
+#endif
+
+/**
+ * Cleanup the MSL ASan shadow region shared memory.
+ *
+ * @param msl
+ *  Memory segment list.
+ */
+#ifdef RTE_MALLOC_ASAN
+void
+eal_memseg_list_cleanup_asan_shadow(struct rte_memseg_list *msl);
+#else
+static inline void
+eal_memseg_list_cleanup_asan_shadow(__rte_unused struct rte_memseg_list *msl)
+{
+}
+#endif
+
+/**
+ * Get the MSL ASan shadow shared memory object file descriptor.
+ *
+ * @param msl
+ *  Index of the MSL.
+ * @return
+ *  A file descriptor.
+ */
+#ifdef RTE_MALLOC_ASAN
+int
+eal_memseg_list_get_asan_shadow_fd(int msl_idx);
+#else
+static inline int
+eal_memseg_list_get_asan_shadow_fd(__rte_unused int msl_idx)
+{
+	return -1;
+}
+#endif
+
 /**
  * Distribute available memory between MSLs.
  *
diff --git a/lib/eal/linux/eal_memalloc.c b/lib/eal/linux/eal_memalloc.c
index e354efc95d..5ea6dc25b0 100644
--- a/lib/eal/linux/eal_memalloc.c
+++ b/lib/eal/linux/eal_memalloc.c
@@ -37,6 +37,7 @@ 
 #include "eal_memalloc.h"
 #include "eal_memcfg.h"
 #include "eal_private.h"
+#include "malloc_elem.h"
 
 const int anonymous_hugepages_supported =
 #ifdef MAP_HUGE_SHIFT
@@ -677,6 +678,35 @@  alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
 				__func__);
 #endif
 
+#ifdef RTE_MALLOC_ASAN
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int shadow_shm_fd = eal_memseg_list_get_asan_shadow_fd(list_idx);
+
+	if (shadow_shm_fd != -1) {
+		void *shadow_base_addr, *shadow_addr;
+		off_t shadow_map_offset;
+		size_t shadow_sz;
+
+		shadow_base_addr = ASAN_MEM_TO_SHADOW(mcfg->memsegs[list_idx].base_va);
+		shadow_addr = ASAN_MEM_TO_SHADOW(addr);
+		shadow_map_offset = (char *)shadow_addr - (char *)shadow_base_addr;
+		shadow_sz = alloc_sz >> ASAN_SHADOW_SCALE;
+
+		va = mmap(shadow_addr, shadow_sz, PROT_READ | PROT_WRITE,
+			  MAP_SHARED | MAP_FIXED, shadow_shm_fd, shadow_map_offset);
+		if (va == MAP_FAILED) {
+			EAL_LOG(DEBUG, "shadow mmap() failed: %s",
+				strerror(errno));
+			goto mapped;
+		}
+
+		if (va != shadow_addr) {
+			EAL_LOG(DEBUG, "wrong shadow mmap() address");
+			munmap(addr, shadow_sz);
+			goto mapped;
+		}
+	}
+#endif
 	huge_recover_sigbus();
 
 	ms->addr = addr;
diff --git a/lib/eal/linux/eal_memory.c b/lib/eal/linux/eal_memory.c
index 45879ca743..2795abdbf4 100644
--- a/lib/eal/linux/eal_memory.c
+++ b/lib/eal/linux/eal_memory.c
@@ -41,6 +41,7 @@ 
 #include "eal_filesystem.h"
 #include "eal_hugepages.h"
 #include "eal_options.h"
+#include "malloc_elem.h"
 
 #define PFN_MASK_SIZE	8
 
@@ -1469,6 +1470,7 @@  eal_legacy_hugepage_init(void)
 		if (msl->memseg_arr.count > 0)
 			continue;
 		/* this is an unused list, deallocate it */
+		eal_memseg_list_cleanup_asan_shadow(msl);
 		mem_sz = msl->len;
 		munmap(msl->base_va, mem_sz);
 		msl->base_va = NULL;
@@ -1915,6 +1917,10 @@  memseg_secondary_init(void)
 	return 0;
 }
 
+#ifdef RTE_MALLOC_ASAN
+static int msl_asan_shadow_fd[RTE_MAX_MEMSEG_LISTS];
+#endif
+
 int
 rte_eal_memseg_init(void)
 {
@@ -1947,6 +1953,12 @@  rte_eal_memseg_init(void)
 		EAL_LOG(WARNING, "Please use --"OPT_LEGACY_MEM" option, or recompile with NUMA support.");
 	}
 #endif
+#ifdef RTE_MALLOC_ASAN
+	int msl_idx;
+
+	for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++)
+		msl_asan_shadow_fd[msl_idx] = -1;
+#endif
 
 	return rte_eal_process_type() == RTE_PROC_PRIMARY ?
 #ifndef RTE_ARCH_64
@@ -1956,3 +1968,89 @@  rte_eal_memseg_init(void)
 #endif
 			memseg_secondary_init();
 }
+
+#ifdef RTE_MALLOC_ASAN
+int
+eal_memseg_list_init_asan_shadow(struct rte_memseg_list *msl)
+{
+	const struct internal_config *internal_conf =
+			eal_get_internal_configuration();
+	int msl_idx = msl - rte_eal_get_configuration()->mem_config->memsegs;
+	int shm_oflag;
+	char shm_path[PATH_MAX];
+	int shm_fd;
+
+	if (!msl->heap)
+		return 0;
+
+	/* these options imply no secondary process support */
+	if (internal_conf->hugepage_file.unlink_before_mapping ||
+	    internal_conf->no_shconf || internal_conf->no_hugetlbfs) {
+		RTE_ASSERT(rte_eal_process_type() != RTE_PROC_SECONDARY);
+		return 0;
+	}
+
+	snprintf(shm_path, sizeof(shm_path), "/%s_%s_shadow",
+		eal_get_hugefile_prefix(), msl->memseg_arr.name);
+
+	shm_oflag = O_RDWR;
+	if (internal_conf->process_type == RTE_PROC_PRIMARY)
+		shm_oflag |= O_CREAT | O_TRUNC;
+
+	shm_fd = shm_open(shm_path, shm_oflag, 0600);
+	if (shm_fd == -1) {
+		EAL_LOG(DEBUG, "shadow shm_open() failed: %s",
+			strerror(errno));
+		return -1;
+	}
+
+	if (internal_conf->process_type == RTE_PROC_PRIMARY) {
+		if (ftruncate(shm_fd, msl->len >> ASAN_SHADOW_SCALE) == -1) {
+			EAL_LOG(DEBUG, "shadow ftruncate() failed: %s",
+				strerror(errno));
+			close(shm_fd);
+			if (internal_conf->process_type == RTE_PROC_PRIMARY)
+				shm_unlink(shm_path);
+			return -1;
+		}
+	}
+
+	msl_asan_shadow_fd[msl_idx] = shm_fd;
+
+	return 0;
+}
+
+void
+eal_memseg_list_cleanup_asan_shadow(struct rte_memseg_list *msl)
+{
+	const struct internal_config *internal_conf =
+			eal_get_internal_configuration();
+	int msl_idx = msl - rte_eal_get_configuration()->mem_config->memsegs;
+	int *shm_fd = &msl_asan_shadow_fd[msl_idx];
+
+	if (*shm_fd == -1)
+		return;
+
+	close(*shm_fd);
+	*shm_fd = -1;
+
+	if (munmap(ASAN_MEM_TO_SHADOW(msl->base_va),
+		   msl->len >> ASAN_SHADOW_SCALE) != 0)
+		EAL_LOG(ERR, "Could not unmap asan shadow memory: %s",
+			strerror(errno));
+	if (internal_conf->process_type == RTE_PROC_PRIMARY) {
+		char shm_path[PATH_MAX];
+
+		snprintf(shm_path, sizeof(shm_path), "/%s_%s_shadow",
+			 eal_get_hugefile_prefix(),
+			 msl->memseg_arr.name);
+		shm_unlink(shm_path);
+	}
+}
+
+int
+eal_memseg_list_get_asan_shadow_fd(int msl_idx)
+{
+	return msl_asan_shadow_fd[msl_idx];
+}
+#endif
diff --git a/lib/eal/linux/meson.build b/lib/eal/linux/meson.build
index e99ebed256..1e8a48c8d3 100644
--- a/lib/eal/linux/meson.build
+++ b/lib/eal/linux/meson.build
@@ -23,3 +23,7 @@  deps += ['kvargs', 'telemetry']
 if has_libnuma
     dpdk_conf.set10('RTE_EAL_NUMA_AWARE_HUGEPAGES', true)
 endif
+
+if dpdk_conf.has('RTE_MALLOC_ASAN')
+    ext_deps += cc.find_library('rt')
+endif