@@ -122,6 +122,11 @@ Memory-related options
to system pthread stack size unless the optional size (in kbytes) is
specified.
+* ``--const-translate``
+
+ Prepare hugepage memory such that the offset between any hugepage virtual
+ address is a constant offset from physical address and vice versa.
+
Debugging options
~~~~~~~~~~~~~~~~~
@@ -350,6 +350,16 @@ if the optional size parameter is not specified.
hugepage worker thread stacks given the same thread stack size and
loading conditions.
+Constant Address Translation
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When the ``--const-translate`` EAL option is specified, hugepage
+memory is initialized to provide a constant offset between hugepage
+virtual and physical addresses.
+
+This allows device drivers to quickly translate from both virtual to
+physical and physical to virtual addresses for any hugepage address.
+
Support for Externally Allocated Memory
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -104,6 +104,7 @@ eal_long_options[] = {
{OPT_NO_TELEMETRY, 0, NULL, OPT_NO_TELEMETRY_NUM },
{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
{OPT_HUGE_WORKER_STACK, 2, NULL, OPT_HUGE_WORKER_STACK_NUM },
+ {OPT_CONST_TRANSLATE, 0, NULL, OPT_CONST_TRANSLATE_NUM },
{0, 0, NULL, 0 }
};
@@ -2086,6 +2087,11 @@ eal_check_common_options(struct internal_config *internal_cfg)
"be specified together with --"OPT_NO_HUGE"\n");
return -1;
}
+ if (internal_cfg->no_hugetlbfs && internal_cfg->const_translate) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_CONST_TRANSLATE" cannot "
+ "be specified together with --"OPT_NO_HUGE"\n");
+ return -1;
+ }
if (internal_conf->force_socket_limits && internal_conf->legacy_mem) {
RTE_LOG(ERR, EAL, "Option --"OPT_SOCKET_LIMIT
" is only supported in non-legacy memory mode\n");
@@ -103,6 +103,8 @@ struct internal_config {
struct simd_bitwidth max_simd_bitwidth;
/**< max simd bitwidth path to use */
size_t huge_worker_stack_size; /**< worker thread stack size */
+ volatile unsigned const_translate;
+ /**< true to enable constant VA->PA, PA->VA address translation */
};
void eal_reset_internal_config(struct internal_config *internal_cfg);
@@ -89,6 +89,8 @@ enum {
OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
#define OPT_HUGE_WORKER_STACK "huge-worker-stack"
OPT_HUGE_WORKER_STACK_NUM,
+#define OPT_CONST_TRANSLATE "const-translate"
+ OPT_CONST_TRANSLATE_NUM,
OPT_LONG_MAX_NUM
};
@@ -134,6 +134,34 @@ rte_iova_t rte_mem_virt2iova(const void *virt);
void *
rte_mem_iova2virt(rte_iova_t iova);
+/**
+ * Get IO virtual address of any mapped virtual address in the current process.
+ *
+ * @note This function provides a fast implementation of virtual to physical
+ * addresses that does not walk any page tables. Suitable for use in
+ * data plane threads.
+ *
+ * @param virt
+ * The virtual address.
+ * @return
+ * The IO address or RTE_BAD_IOVA on error.
+ */
+rte_iova_t rte_mem_fast_virt2iova(const void *virt);
+
+/**
+ * Get virtual memory address corresponding to iova address.
+ *
+ * @note This function provides a fast implementation of physical to virtual to
+ * addresses. Suitable for use in data plane threads.
+ *
+ * @param iova
+ * The iova address.
+ * @return
+ * Virtual address corresponding to iova address (or NULL if address does not
+ * exist within DPDK memory map).
+ */
+void *rte_mem_fast_iova2virt(rte_iova_t iova);
+
/**
* Get memseg to which a particular virtual address belongs.
*
@@ -455,6 +455,8 @@ eal_usage(const char *prgname)
" Allocate worker thread stacks from hugepage memory.\n"
" Size is in units of kbytes and defaults to system\n"
" thread stack size if not specified.\n"
+ " --"OPT_CONST_TRANSLATE" Constant delta between hugepage "
+ "physical and virtual addresses\n"
"\n");
/* Allow the application to print its usage message too if hook is set */
if (hook) {
@@ -767,6 +769,10 @@ eal_parse_args(int argc, char **argv)
}
break;
+ case OPT_CONST_TRANSLATE_NUM:
+ internal_conf->const_translate = 1;
+ break;
+
default:
if (opt < OPT_LONG_MIN_NUM && isprint(opt)) {
RTE_LOG(ERR, EAL, "Option %c is not supported "
@@ -148,6 +148,47 @@ rte_mem_virt2iova(const void *virtaddr)
return rte_mem_virt2phy(virtaddr);
}
+static void *const_va_pa_delta;
+
+#ifdef RTE_MEM_SANITY_CHECK
+#define __rte_mem_validate(v) rte_mem_validate(v)
+
+static int rte_mem_validate(const void *virtaddr)
+{
+ if (!rte_mem_virt2memseg(virt, NULL)) {
+ RTE_LOG(ERR, EAL, "Invalid virtual address %p\n", virtaddr);
+ return -1;
+ }
+ return 0;
+}
+#else
+#define __rte_mem_validate(v) 0
+#endif
+
+rte_iova_t rte_mem_fast_virt2iova(const void *virtaddr)
+{
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ return (uintptr_t)virtaddr;
+
+ if (__rte_mem_validate(virtaddr) != 0)
+ return RTE_BAD_IOVA;
+
+ return (rte_iova_t)((uintptr_t)virtaddr - (uintptr_t)const_va_pa_delta);
+}
+
+void *rte_mem_fast_iova2virt(rte_iova_t iova)
+{
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ return (void *)(uintptr_t)iova;
+
+ void *virtaddr = (void *)((uintptr_t)const_va_pa_delta + iova);
+
+ if (__rte_mem_validate(virtaddr) != 0)
+ return NULL;
+
+ return virtaddr;
+}
+
/*
* For each hugepage in hugepg_tbl, fill the physaddr value. We find
* it by browsing the /proc/self/pagemap special file.
@@ -664,10 +705,9 @@ remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
uint64_t page_sz;
size_t memseg_len;
int socket_id;
-#ifndef RTE_ARCH_64
const struct internal_config *internal_conf =
eal_get_internal_configuration();
-#endif
+
page_sz = hugepages[seg_start].size;
socket_id = hugepages[seg_start].socket_id;
seg_len = seg_end - seg_start;
@@ -691,6 +731,12 @@ remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
ms_idx = rte_fbarray_find_next_n_free(arr, 0,
seg_len + (empty ? 0 : 1));
+ if (internal_conf->const_translate &&
+ internal_conf->legacy_mem &&
+ rte_eal_iova_mode() == RTE_IOVA_PA &&
+ ms_idx != 0)
+ continue;
+
/* memseg list is full? */
if (ms_idx < 0)
continue;
@@ -735,7 +781,12 @@ remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
return -1;
}
memseg_len = (size_t)page_sz;
- addr = RTE_PTR_ADD(msl->base_va, ms_idx * memseg_len);
+ if (internal_conf->const_translate &&
+ internal_conf->legacy_mem &&
+ rte_eal_iova_mode() == RTE_IOVA_PA)
+ addr = RTE_PTR_ADD(const_va_pa_delta, hfile->physaddr);
+ else
+ addr = RTE_PTR_ADD(msl->base_va, ms_idx * memseg_len);
/* we know this address is already mmapped by memseg list, so
* using MAP_FIXED here is safe
@@ -1085,6 +1136,98 @@ huge_recover_sigbus(void)
}
}
+static int
+remap_hugepages_const_xlate(struct hugepage_file *hugepage, int n_pages,
+ int nr_hugepages)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int i, remap_failed = 0;
+ void *addr;
+
+ /* Adjust VA bases in memory segment lists to enable constant
+ * va->pa and pa->va address translation
+ */
+ if (rte_eal_iova_mode() == RTE_IOVA_PA) {
+ RTE_LOG(INFO, EAL,
+ "Enabling constant address translation support...\n");
+
+ /* Allocate virtual address space to cover the full
+ * range of huge page physical addresses
+ */
+ size_t va_mem_sz =
+ hugepage[nr_hugepages - 1].physaddr +
+ hugepage[nr_hugepages - 1].size -
+ hugepage[0].physaddr;
+ size_t page_sz = 0;
+
+ for (i = 0; i < nr_hugepages; i++)
+ if (hugepage[i].size > page_sz)
+ page_sz = hugepage[i].size;
+
+ void *va_base =
+ eal_get_virtual_area(NULL, &va_mem_sz, page_sz, 0, 0);
+
+ if (va_base == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot reserve memory\n");
+ return -ENOMEM;
+ }
+ const_va_pa_delta = RTE_PTR_ADD(va_base, -hugepage[0].physaddr);
+
+ /* Unmap gaps in virtual address space when there are gaps
+ * between huge page physical addresses
+ */
+ for (i = 1; i < nr_hugepages; i++) {
+ size_t gap_sz = hugepage[i].physaddr -
+ (hugepage[i-1].physaddr + hugepage[i-1].size);
+
+ if (gap_sz) {
+ addr = RTE_PTR_ADD(const_va_pa_delta,
+ hugepage[i-1].physaddr +
+ hugepage[i-1].size);
+
+ if (munmap(addr, gap_sz) != 0)
+ RTE_LOG(ERR, EAL, "Gap unmap failed\n");
+ }
+ }
+ }
+
+ /* remap all pages we do need into memseg list VA space, so that those
+ * pages become first-class citizens in DPDK memory subsystem
+ */
+ if (remap_needed_hugepages(hugepage, n_pages)) {
+ RTE_LOG(ERR, EAL,
+ "Couldn't remap hugepage files into memseg lists\n");
+ remap_failed = 1;
+ }
+
+ /* Unmap the existing virtual address space in each MSL with
+ * allocated pages. Modify MSL base_va to be the VA of the
+ * first page of the segment list. Adjust the msl->len to the
+ * length of the address space consumed by the msl.
+ */
+ for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+ struct rte_memseg_list *msl = &mcfg->memsegs[i];
+ struct rte_memseg *ms;
+
+ /* skip inactive lists */
+ if (msl->base_va == NULL)
+ continue;
+
+ /* skip lists where there are no pages allocated */
+ if (!msl->memseg_arr.count)
+ continue;
+
+ /* release current VA space */
+ munmap(msl->base_va, msl->len);
+
+ /* assign new VA base and len */
+ ms = rte_fbarray_get(&msl->memseg_arr, 0);
+ msl->base_va = ms->addr;
+ msl->len = (msl->page_sz * msl->memseg_arr.count);
+ }
+ return remap_failed;
+}
+
/*
* Prepare physical memory mapping: fill configuration structure with
* these infos, return 0 on success.
@@ -1413,9 +1556,19 @@ eal_legacy_hugepage_init(void)
/* remap all pages we do need into memseg list VA space, so that those
* pages become first-class citizens in DPDK memory subsystem
*/
- if (remap_needed_hugepages(hugepage, nr_hugefiles)) {
- RTE_LOG(ERR, EAL, "Couldn't remap hugepage files into memseg lists\n");
- goto fail;
+ if (internal_conf->const_translate) {
+ if (remap_hugepages_const_xlate(hugepage, nr_hugefiles,
+ nr_hugepages)) {
+ RTE_LOG(ERR, EAL,
+ "Couldn't remap hugepage files into memseg lists\n");
+ goto fail;
+ }
+ } else {
+ if (remap_needed_hugepages(hugepage, nr_hugefiles)) {
+ RTE_LOG(ERR, EAL,
+ "Couldn't remap hugepage files into memseg lists\n");
+ goto fail;
+ }
}
/* free the hugepage backing files */