eal/linux: verify mmu type for DPDK support (ppc64le)
Checks
Commit Message
IBM POWER systems support more than one type of memory management unit
(MMU). The Power ISA 3.0 specification, which applies to P9 and later
CPUs, defined a new Radix MMU which, among other things, allows an
anonymous memory page mapping to be converted into a hugepage mapping
at a specific address. This is a required feature in DPDK so we need
to test the MMU type when POWER systems are used and provide a more
useful error message for the user when running on an unsupported
system.
Bugzilla ID: 1221
Suggested-by: Thomas Monjalon <thomas@monjalon.net>
Signed-off-by: David Christensen <drc@linux.vnet.ibm.com>
---
lib/eal/linux/eal.c | 63 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 63 insertions(+)
Comments
11/10/2023 00:51, David Christensen:
> IBM POWER systems support more than one type of memory management unit
> (MMU). The Power ISA 3.0 specification, which applies to P9 and later
> CPUs, defined a new Radix MMU which, among other things, allows an
> anonymous memory page mapping to be converted into a hugepage mapping
> at a specific address. This is a required feature in DPDK so we need
> to test the MMU type when POWER systems are used and provide a more
> useful error message for the user when running on an unsupported
> system.
>
> Bugzilla ID: 1221
> Suggested-by: Thomas Monjalon <thomas@monjalon.net>
> Signed-off-by: David Christensen <drc@linux.vnet.ibm.com>
> ---
> --- a/lib/eal/linux/eal.c
> +++ b/lib/eal/linux/eal.c
> +/*
> + * IBM POWER systems support more than one type of memory management unit (MMU).
> + * The Power ISA 3.0 specification, which applies to P9 and later CPUs, defined
> + * a new Radix MMU which, among other things, allows an anonymous memory page
> + * mapping to be converted into a hugepage mapping at a specific address. This
> + * is a required feature in DPDK so we need to test the MMU type when POWER
> + * systems are used.
> + */
> +static bool
> +is_mmu_supported(void)
> +{
> +#ifdef RTE_ARCH_PPC_64
> + static const char proc_cpuinfo[] = "/proc/cpuinfo";
> + static const char str_mmu[] = "MMU";
> + static const char str_radix[] = "Radix";
> + char buf[512];
> + char *ret = NULL;
> + FILE *f = fopen(proc_cpuinfo, "r");
> +
> + if (f == NULL) {
> + RTE_LOG(ERR, EAL, "Cannot open %s\n", proc_cpuinfo);
> + return false;
> + }
> +
> + /*
> + * Example "MMU" in /proc/cpuinfo:
> + * ...
> + * model : 8335-GTW
> + * machine : PowerNV 8335-GTW
> + * firmware : OPAL
> + * MMU : Radix
> + * ... or ...
> + * model : IBM,9009-22A
> + * machine : CHRP IBM,9009-22A
> + * MMU : Hash
> + */
> + while (fgets(buf, sizeof(buf), f) != NULL) {
> + ret = strstr(buf, str_mmu);
> + if (ret == NULL)
> + continue;
> + ret += sizeof(str_mmu) - 1;
> + ret = strchr(ret, ':');
> + if (ret == NULL)
> + continue;
> + ret = strstr(ret, str_radix);
> + break;
> + }
> + fclose(f);
> + if (ret == NULL)
> + rte_eal_init_alert("DPDK on PPC64 requires radix-mmu.");
> + return (ret != NULL);
> +#else
> + return true;
> +#endif
> +}
I feel this function should not be implemented in the common EAL.
What about adding a new function in lib/eal/ppc/ ?
And add the "return true" for other architectures?
On 10/17/23 5:39 AM, Thomas Monjalon wrote:
> I feel this function should not be implemented in the common EAL.
> What about adding a new function in lib/eal/ppc/ ?
> And add the "return true" for other architectures?
Would it be more appropriate in the lib/eal/common level or
lib/eal/linux only? I would expect the MMU requirement should apply to
FreeBSD on ppc64le as well but IBM doesn't support or test FreeBSD
internally.
Dave
23/10/2023 23:59, David Christensen:
>
> On 10/17/23 5:39 AM, Thomas Monjalon wrote:
> > I feel this function should not be implemented in the common EAL.
> > What about adding a new function in lib/eal/ppc/ ?
> > And add the "return true" for other architectures?
>
> Would it be more appropriate in the lib/eal/common level or
> lib/eal/linux only? I would expect the MMU requirement should apply to
> FreeBSD on ppc64le as well but IBM doesn't support or test FreeBSD
> internally.
Even if you are not testing it, I don't think you should restrict
the code change to Linux.
@@ -910,6 +910,62 @@ is_iommu_enabled(void)
return n > 2;
}
+/*
+ * IBM POWER systems support more than one type of memory management unit (MMU).
+ * The Power ISA 3.0 specification, which applies to P9 and later CPUs, defined
+ * a new Radix MMU which, among other things, allows an anonymous memory page
+ * mapping to be converted into a hugepage mapping at a specific address. This
+ * is a required feature in DPDK so we need to test the MMU type when POWER
+ * systems are used.
+ */
+static bool
+is_mmu_supported(void)
+{
+#ifdef RTE_ARCH_PPC_64
+ static const char proc_cpuinfo[] = "/proc/cpuinfo";
+ static const char str_mmu[] = "MMU";
+ static const char str_radix[] = "Radix";
+ char buf[512];
+ char *ret = NULL;
+ FILE *f = fopen(proc_cpuinfo, "r");
+
+ if (f == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot open %s\n", proc_cpuinfo);
+ return false;
+ }
+
+ /*
+ * Example "MMU" in /proc/cpuinfo:
+ * ...
+ * model : 8335-GTW
+ * machine : PowerNV 8335-GTW
+ * firmware : OPAL
+ * MMU : Radix
+ * ... or ...
+ * model : IBM,9009-22A
+ * machine : CHRP IBM,9009-22A
+ * MMU : Hash
+ */
+ while (fgets(buf, sizeof(buf), f) != NULL) {
+ ret = strstr(buf, str_mmu);
+ if (ret == NULL)
+ continue;
+ ret += sizeof(str_mmu) - 1;
+ ret = strchr(ret, ':');
+ if (ret == NULL)
+ continue;
+ ret = strstr(ret, str_radix);
+ break;
+ }
+ fclose(f);
+ if (ret == NULL)
+ rte_eal_init_alert("DPDK on PPC64 requires radix-mmu.");
+ return (ret != NULL);
+#else
+ return true;
+#endif
+}
+
static __rte_noreturn void *
eal_worker_thread_loop(void *arg)
{
@@ -983,6 +1039,13 @@ rte_eal_init(int argc, char **argv)
return -1;
}
+ /* verify if mmu is supported */
+ if (!is_mmu_supported()) {
+ rte_eal_init_alert("unsupported mmu type.");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
if (!__atomic_compare_exchange_n(&run_once, &has_run, 1, 0,
__ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
rte_eal_init_alert("already called initialization.");