[1/4] bus/pci: Use force-noreplace flag when mapping PCI resources

Message ID 20250506174046.1136711-2-jfree@FreeBSD.org (mailing list archive)
State New
Delegated to: Thomas Monjalon
Headers
Series BSD PCI Fixes |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Jake Freeland May 6, 2025, 5:40 p.m. UTC
When mapping PCI resources in secondary processes, use the
RTE_MAP_FORCE_ADDRESS_NOREPLACE flag to indicate that the
mapping must be made at the provided address.

Without this flag, the kernel might return a different address for
the mapping, even if the requested region was available.

Signed-off-by: Jake Freeland <jfree@FreeBSD.org>
---
 drivers/bus/pci/pci_common_uio.c | 4 +++-
 lib/eal/common/eal_private.h     | 7 ++++++-
 lib/eal/include/rte_eal_paging.h | 7 ++++++-
 lib/eal/unix/eal_unix_memory.c   | 8 +++++++-
 4 files changed, 22 insertions(+), 4 deletions(-)
  

Comments

Burakov, Anatoly May 8, 2025, 11:32 a.m. UTC | #1
On 5/6/2025 7:40 PM, Jake Freeland wrote:
> When mapping PCI resources in secondary processes, use the
> RTE_MAP_FORCE_ADDRESS_NOREPLACE flag to indicate that the
> mapping must be made at the provided address.
> 
> Without this flag, the kernel might return a different address for
> the mapping, even if the requested region was available.
> 
> Signed-off-by: Jake Freeland <jfree@FreeBSD.org>
> ---
>   drivers/bus/pci/pci_common_uio.c | 4 +++-
>   lib/eal/common/eal_private.h     | 7 ++++++-
>   lib/eal/include/rte_eal_paging.h | 7 ++++++-
>   lib/eal/unix/eal_unix_memory.c   | 8 +++++++-
>   4 files changed, 22 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/bus/pci/pci_common_uio.c b/drivers/bus/pci/pci_common_uio.c
> index 30503bd23a..71974e9f56 100644
> --- a/drivers/bus/pci/pci_common_uio.c
> +++ b/drivers/bus/pci/pci_common_uio.c
> @@ -10,6 +10,7 @@
>   #include <sys/mman.h>
>   
>   #include <rte_eal.h>
> +#include <rte_eal_paging.h>
>   #include <rte_pci.h>
>   #include <rte_bus_pci.h>
>   #include <rte_tailq.h>
> @@ -58,7 +59,8 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
>   
>   			void *mapaddr = pci_map_resource(uio_res->maps[i].addr,
>   					fd, (off_t)uio_res->maps[i].offset,
> -					(size_t)uio_res->maps[i].size, 0);
> +					(size_t)uio_res->maps[i].size,
> +					RTE_MAP_FORCE_ADDRESS_NOREPLACE);
>   
>   			/* fd is not needed in secondary process, close it */
>   			close(fd);
> diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h
> index 04ba8ddb86..aaeb140eaf 100644
> --- a/lib/eal/common/eal_private.h
> +++ b/lib/eal/common/eal_private.h
> @@ -211,7 +211,12 @@ enum eal_mem_reserve_flags {
>   	 * @see RTE_MAP_FORCE_ADDRESS for description of possible consequences
>   	 *      (although implementations are not required to use it).
>   	 */
> -	EAL_RESERVE_FORCE_ADDRESS = 1 << 1
> +	EAL_RESERVE_FORCE_ADDRESS = 1 << 1,
> +	/**
> +	 * Force reserving memory at the requested address, but fail if a
> +	 * preexisting mapping collides with the request.
> +	 */
> +	EAL_RESERVE_FORCE_ADDRESS_NOREPLACE = 1 << 2,
>   };
>   
>   /**
> diff --git a/lib/eal/include/rte_eal_paging.h b/lib/eal/include/rte_eal_paging.h
> index c60317d0f5..7b1983b615 100644
> --- a/lib/eal/include/rte_eal_paging.h
> +++ b/lib/eal/include/rte_eal_paging.h
> @@ -34,7 +34,12 @@ enum rte_map_flags {
>   	 * may remove all other mappings in the requested region. However,
>   	 * it is not required to do so, thus mapping with this flag may fail.
>   	 */
> -	RTE_MAP_FORCE_ADDRESS = 1 << 3
> +	RTE_MAP_FORCE_ADDRESS = 1 << 3,
> +	/**
> +	 * Force mapping to the requested address, but fail if a preexisting
> +	 * mapping collides with the request.
> +	 */
> +	RTE_MAP_FORCE_ADDRESS_NOREPLACE = 1 << 4,
>   };
>   
>   /**
> diff --git a/lib/eal/unix/eal_unix_memory.c b/lib/eal/unix/eal_unix_memory.c
> index c540f1e838..61e914b8db 100644
> --- a/lib/eal/unix/eal_unix_memory.c
> +++ b/lib/eal/unix/eal_unix_memory.c
> @@ -17,11 +17,13 @@
>   #ifdef RTE_EXEC_ENV_LINUX
>   #define EAL_DONTDUMP MADV_DONTDUMP
>   #define EAL_DODUMP   MADV_DODUMP
> +#define EAL_FIXED_NOREPLACE MAP_FIXED_NOREPLACE

Is this available in all supported Linux kernel versions?
  
Jake Freeland May 12, 2025, 2:26 p.m. UTC | #2
On Thu May 8, 2025 at 6:32 AM CDT, Anatoly Burakov wrote:
> On 5/6/2025 7:40 PM, Jake Freeland wrote:
> > When mapping PCI resources in secondary processes, use the
> > RTE_MAP_FORCE_ADDRESS_NOREPLACE flag to indicate that the
> > mapping must be made at the provided address.
> > 
> > Without this flag, the kernel might return a different address for
> > the mapping, even if the requested region was available.
> > 
> > Signed-off-by: Jake Freeland <jfree@FreeBSD.org>
> > ---
> >   drivers/bus/pci/pci_common_uio.c | 4 +++-
> >   lib/eal/common/eal_private.h     | 7 ++++++-
> >   lib/eal/include/rte_eal_paging.h | 7 ++++++-
> >   lib/eal/unix/eal_unix_memory.c   | 8 +++++++-
> >   4 files changed, 22 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/bus/pci/pci_common_uio.c b/drivers/bus/pci/pci_common_uio.c
> > index 30503bd23a..71974e9f56 100644
> > --- a/drivers/bus/pci/pci_common_uio.c
> > +++ b/drivers/bus/pci/pci_common_uio.c
> > @@ -10,6 +10,7 @@
> >   #include <sys/mman.h>
> >   
> >   #include <rte_eal.h>
> > +#include <rte_eal_paging.h>
> >   #include <rte_pci.h>
> >   #include <rte_bus_pci.h>
> >   #include <rte_tailq.h>
> > @@ -58,7 +59,8 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
> >   
> >   			void *mapaddr = pci_map_resource(uio_res->maps[i].addr,
> >   					fd, (off_t)uio_res->maps[i].offset,
> > -					(size_t)uio_res->maps[i].size, 0);
> > +					(size_t)uio_res->maps[i].size,
> > +					RTE_MAP_FORCE_ADDRESS_NOREPLACE);
> >   
> >   			/* fd is not needed in secondary process, close it */
> >   			close(fd);
> > diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h
> > index 04ba8ddb86..aaeb140eaf 100644
> > --- a/lib/eal/common/eal_private.h
> > +++ b/lib/eal/common/eal_private.h
> > @@ -211,7 +211,12 @@ enum eal_mem_reserve_flags {
> >   	 * @see RTE_MAP_FORCE_ADDRESS for description of possible consequences
> >   	 *      (although implementations are not required to use it).
> >   	 */
> > -	EAL_RESERVE_FORCE_ADDRESS = 1 << 1
> > +	EAL_RESERVE_FORCE_ADDRESS = 1 << 1,
> > +	/**
> > +	 * Force reserving memory at the requested address, but fail if a
> > +	 * preexisting mapping collides with the request.
> > +	 */
> > +	EAL_RESERVE_FORCE_ADDRESS_NOREPLACE = 1 << 2,
> >   };
> >   
> >   /**
> > diff --git a/lib/eal/include/rte_eal_paging.h b/lib/eal/include/rte_eal_paging.h
> > index c60317d0f5..7b1983b615 100644
> > --- a/lib/eal/include/rte_eal_paging.h
> > +++ b/lib/eal/include/rte_eal_paging.h
> > @@ -34,7 +34,12 @@ enum rte_map_flags {
> >   	 * may remove all other mappings in the requested region. However,
> >   	 * it is not required to do so, thus mapping with this flag may fail.
> >   	 */
> > -	RTE_MAP_FORCE_ADDRESS = 1 << 3
> > +	RTE_MAP_FORCE_ADDRESS = 1 << 3,
> > +	/**
> > +	 * Force mapping to the requested address, but fail if a preexisting
> > +	 * mapping collides with the request.
> > +	 */
> > +	RTE_MAP_FORCE_ADDRESS_NOREPLACE = 1 << 4,
> >   };
> >   
> >   /**
> > diff --git a/lib/eal/unix/eal_unix_memory.c b/lib/eal/unix/eal_unix_memory.c
> > index c540f1e838..61e914b8db 100644
> > --- a/lib/eal/unix/eal_unix_memory.c
> > +++ b/lib/eal/unix/eal_unix_memory.c
> > @@ -17,11 +17,13 @@
> >   #ifdef RTE_EXEC_ENV_LINUX
> >   #define EAL_DONTDUMP MADV_DONTDUMP
> >   #define EAL_DODUMP   MADV_DODUMP
> > +#define EAL_FIXED_NOREPLACE MAP_FIXED_NOREPLACE
>
> Is this available in all supported Linux kernel versions?

Support for the MAP_FIXED_NOREPLACE flag was adding in Linux 4.17.

It looks like we're still supporting DPDK 22.11 until Dec 2025. When
22.11 was released, the most recent LTS kernel version was 5.15.

Additionally, the man page specifies:

"older kernels which  do not recognize the MAP_FIXED_NOREPLACE flag will
typically (upon detecting a collision with a preexisting mapping) fall
back to a “non-MAP_FIXED” type of behavior"

Thanks,
Jake Freeland
  
Bruce Richardson May 12, 2025, 2:42 p.m. UTC | #3
On Thu, May 08, 2025 at 01:32:50PM +0200, Burakov, Anatoly wrote:
> On 5/6/2025 7:40 PM, Jake Freeland wrote:
> > When mapping PCI resources in secondary processes, use the
> > RTE_MAP_FORCE_ADDRESS_NOREPLACE flag to indicate that the
> > mapping must be made at the provided address.
> > 
> > Without this flag, the kernel might return a different address for
> > the mapping, even if the requested region was available.
> > 
> > Signed-off-by: Jake Freeland <jfree@FreeBSD.org>
> > ---
> >   drivers/bus/pci/pci_common_uio.c | 4 +++-
> >   lib/eal/common/eal_private.h     | 7 ++++++-
> >   lib/eal/include/rte_eal_paging.h | 7 ++++++-
> >   lib/eal/unix/eal_unix_memory.c   | 8 +++++++-
> >   4 files changed, 22 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/bus/pci/pci_common_uio.c b/drivers/bus/pci/pci_common_uio.c
> > index 30503bd23a..71974e9f56 100644
> > --- a/drivers/bus/pci/pci_common_uio.c
> > +++ b/drivers/bus/pci/pci_common_uio.c
> > @@ -10,6 +10,7 @@
> >   #include <sys/mman.h>
> >   #include <rte_eal.h>
> > +#include <rte_eal_paging.h>
> >   #include <rte_pci.h>
> >   #include <rte_bus_pci.h>
> >   #include <rte_tailq.h>
> > @@ -58,7 +59,8 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
> >   			void *mapaddr = pci_map_resource(uio_res->maps[i].addr,
> >   					fd, (off_t)uio_res->maps[i].offset,
> > -					(size_t)uio_res->maps[i].size, 0);
> > +					(size_t)uio_res->maps[i].size,
> > +					RTE_MAP_FORCE_ADDRESS_NOREPLACE);
> >   			/* fd is not needed in secondary process, close it */
> >   			close(fd);
> > diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h
> > index 04ba8ddb86..aaeb140eaf 100644
> > --- a/lib/eal/common/eal_private.h
> > +++ b/lib/eal/common/eal_private.h
> > @@ -211,7 +211,12 @@ enum eal_mem_reserve_flags {
> >   	 * @see RTE_MAP_FORCE_ADDRESS for description of possible consequences
> >   	 *      (although implementations are not required to use it).
> >   	 */
> > -	EAL_RESERVE_FORCE_ADDRESS = 1 << 1
> > +	EAL_RESERVE_FORCE_ADDRESS = 1 << 1,
> > +	/**
> > +	 * Force reserving memory at the requested address, but fail if a
> > +	 * preexisting mapping collides with the request.
> > +	 */
> > +	EAL_RESERVE_FORCE_ADDRESS_NOREPLACE = 1 << 2,
> >   };
> >   /**
> > diff --git a/lib/eal/include/rte_eal_paging.h b/lib/eal/include/rte_eal_paging.h
> > index c60317d0f5..7b1983b615 100644
> > --- a/lib/eal/include/rte_eal_paging.h
> > +++ b/lib/eal/include/rte_eal_paging.h
> > @@ -34,7 +34,12 @@ enum rte_map_flags {
> >   	 * may remove all other mappings in the requested region. However,
> >   	 * it is not required to do so, thus mapping with this flag may fail.
> >   	 */
> > -	RTE_MAP_FORCE_ADDRESS = 1 << 3
> > +	RTE_MAP_FORCE_ADDRESS = 1 << 3,
> > +	/**
> > +	 * Force mapping to the requested address, but fail if a preexisting
> > +	 * mapping collides with the request.
> > +	 */
> > +	RTE_MAP_FORCE_ADDRESS_NOREPLACE = 1 << 4,
> >   };
> >   /**
> > diff --git a/lib/eal/unix/eal_unix_memory.c b/lib/eal/unix/eal_unix_memory.c
> > index c540f1e838..61e914b8db 100644
> > --- a/lib/eal/unix/eal_unix_memory.c
> > +++ b/lib/eal/unix/eal_unix_memory.c
> > @@ -17,11 +17,13 @@
> >   #ifdef RTE_EXEC_ENV_LINUX
> >   #define EAL_DONTDUMP MADV_DONTDUMP
> >   #define EAL_DODUMP   MADV_DODUMP
> > +#define EAL_FIXED_NOREPLACE MAP_FIXED_NOREPLACE
> 
> Is this available in all supported Linux kernel versions?
> 
Checking the mmap man page [1] indicates that this is available from 4.17.
That means that it's both in the oldest supported DPDK kernel (4.19), and
also the kernel version in the oldest supported linux distro, which should
now be RHEL 8, with 4.18 kernel. Therefore the addition of this define
LGTM.

/Bruce

[1] https://man7.org/linux/man-pages/man2/mmap.2.html
  
Burakov, Anatoly May 13, 2025, 12:54 p.m. UTC | #4
On 5/6/2025 7:40 PM, Jake Freeland wrote:
> When mapping PCI resources in secondary processes, use the
> RTE_MAP_FORCE_ADDRESS_NOREPLACE flag to indicate that the
> mapping must be made at the provided address.
> 
> Without this flag, the kernel might return a different address for
> the mapping, even if the requested region was available.
> 
> Signed-off-by: Jake Freeland <jfree@FreeBSD.org>
> ---
Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
  

Patch

diff --git a/drivers/bus/pci/pci_common_uio.c b/drivers/bus/pci/pci_common_uio.c
index 30503bd23a..71974e9f56 100644
--- a/drivers/bus/pci/pci_common_uio.c
+++ b/drivers/bus/pci/pci_common_uio.c
@@ -10,6 +10,7 @@ 
 #include <sys/mman.h>
 
 #include <rte_eal.h>
+#include <rte_eal_paging.h>
 #include <rte_pci.h>
 #include <rte_bus_pci.h>
 #include <rte_tailq.h>
@@ -58,7 +59,8 @@  pci_uio_map_secondary(struct rte_pci_device *dev)
 
 			void *mapaddr = pci_map_resource(uio_res->maps[i].addr,
 					fd, (off_t)uio_res->maps[i].offset,
-					(size_t)uio_res->maps[i].size, 0);
+					(size_t)uio_res->maps[i].size,
+					RTE_MAP_FORCE_ADDRESS_NOREPLACE);
 
 			/* fd is not needed in secondary process, close it */
 			close(fd);
diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h
index 04ba8ddb86..aaeb140eaf 100644
--- a/lib/eal/common/eal_private.h
+++ b/lib/eal/common/eal_private.h
@@ -211,7 +211,12 @@  enum eal_mem_reserve_flags {
 	 * @see RTE_MAP_FORCE_ADDRESS for description of possible consequences
 	 *      (although implementations are not required to use it).
 	 */
-	EAL_RESERVE_FORCE_ADDRESS = 1 << 1
+	EAL_RESERVE_FORCE_ADDRESS = 1 << 1,
+	/**
+	 * Force reserving memory at the requested address, but fail if a
+	 * preexisting mapping collides with the request.
+	 */
+	EAL_RESERVE_FORCE_ADDRESS_NOREPLACE = 1 << 2,
 };
 
 /**
diff --git a/lib/eal/include/rte_eal_paging.h b/lib/eal/include/rte_eal_paging.h
index c60317d0f5..7b1983b615 100644
--- a/lib/eal/include/rte_eal_paging.h
+++ b/lib/eal/include/rte_eal_paging.h
@@ -34,7 +34,12 @@  enum rte_map_flags {
 	 * may remove all other mappings in the requested region. However,
 	 * it is not required to do so, thus mapping with this flag may fail.
 	 */
-	RTE_MAP_FORCE_ADDRESS = 1 << 3
+	RTE_MAP_FORCE_ADDRESS = 1 << 3,
+	/**
+	 * Force mapping to the requested address, but fail if a preexisting
+	 * mapping collides with the request.
+	 */
+	RTE_MAP_FORCE_ADDRESS_NOREPLACE = 1 << 4,
 };
 
 /**
diff --git a/lib/eal/unix/eal_unix_memory.c b/lib/eal/unix/eal_unix_memory.c
index c540f1e838..61e914b8db 100644
--- a/lib/eal/unix/eal_unix_memory.c
+++ b/lib/eal/unix/eal_unix_memory.c
@@ -17,11 +17,13 @@ 
 #ifdef RTE_EXEC_ENV_LINUX
 #define EAL_DONTDUMP MADV_DONTDUMP
 #define EAL_DODUMP   MADV_DODUMP
+#define EAL_FIXED_NOREPLACE MAP_FIXED_NOREPLACE
 #elif defined RTE_EXEC_ENV_FREEBSD
 #define EAL_DONTDUMP MADV_NOCORE
 #define EAL_DODUMP   MADV_CORE
+#define EAL_FIXED_NOREPLACE (MAP_FIXED | MAP_EXCL)
 #else
-#error "madvise doesn't support this OS"
+#error "EAL doesn't support this OS"
 #endif
 
 static void *
@@ -68,6 +70,8 @@  eal_mem_reserve(void *requested_addr, size_t size, int flags)
 
 	if (flags & EAL_RESERVE_FORCE_ADDRESS)
 		sys_flags |= MAP_FIXED;
+	if (flags & EAL_RESERVE_FORCE_ADDRESS_NOREPLACE)
+		sys_flags |= EAL_FIXED_NOREPLACE;
 
 	return mem_map(requested_addr, size, PROT_NONE, sys_flags, -1, 0);
 }
@@ -124,6 +128,8 @@  rte_mem_map(void *requested_addr, size_t size, int prot, int flags,
 		sys_flags |= MAP_PRIVATE;
 	if (flags & RTE_MAP_FORCE_ADDRESS)
 		sys_flags |= MAP_FIXED;
+	if (flags & RTE_MAP_FORCE_ADDRESS_NOREPLACE)
+		sys_flags |= EAL_FIXED_NOREPLACE;
 
 	return mem_map(requested_addr, size, sys_prot, sys_flags, fd, offset);
 }