[dpdk-dev,2/4] mem: add API to obstain memory-backed file info

Message ID 1452426182-86851-3-git-send-email-jianfeng.tan@intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Jianfeng Tan Jan. 10, 2016, 11:43 a.m. UTC
  A new API named rte_eal_get_backfile_info() and a new data
struct back_file is added to obstain information of memory-
backed file info.

Signed-off-by: Huawei Xie <huawei.xie@intel.com>
Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com>
---
 lib/librte_eal/common/include/rte_memory.h | 16 +++++++++++++
 lib/librte_eal/linuxapp/eal/eal_memory.c   | 37 ++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+)
  

Comments

Pavel Fedin Jan. 11, 2016, 11:43 a.m. UTC | #1
Hello!

> -----Original Message-----
> From: Jianfeng Tan [mailto:jianfeng.tan@intel.com]
> Sent: Sunday, January 10, 2016 2:43 PM
> To: dev@dpdk.org
> Cc: rich.lane@bigswitch.com; yuanhan.liu@linux.intel.com; mst@redhat.com;
> nakajima.yoshihiro@lab.ntt.co.jp; huawei.xie@intel.com; mukawa@igel.co.jp;
> p.fedin@samsung.com; michael.qiu@intel.com; ann.zhuangyanying@huawei.com; Jianfeng Tan
> Subject: [PATCH 2/4] mem: add API to obstain memory-backed file info

 "obtain" - typo in subject

> 
> A new API named rte_eal_get_backfile_info() and a new data
> struct back_file is added to obstain information of memory-
> backed file info.
> 
> Signed-off-by: Huawei Xie <huawei.xie@intel.com>
> Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com>
> ---
>  lib/librte_eal/common/include/rte_memory.h | 16 +++++++++++++
>  lib/librte_eal/linuxapp/eal/eal_memory.c   | 37 ++++++++++++++++++++++++++++++
>  2 files changed, 53 insertions(+)
> 
> diff --git a/lib/librte_eal/common/include/rte_memory.h
> b/lib/librte_eal/common/include/rte_memory.h
> index 9c9e40f..75ef8db 100644
> --- a/lib/librte_eal/common/include/rte_memory.h
> +++ b/lib/librte_eal/common/include/rte_memory.h
> @@ -109,6 +109,22 @@ struct rte_memseg {
>  } __rte_packed;
> 
>  /**
> + * This struct is used to store information about memory-backed file that
> + * we mapped in memory initialization.
> + */
> +struct back_file {
> +	void *addr;         /**< virtual addr */
> +	size_t size;        /**< the page size */
> +	char filepath[PATH_MAX]; /**< path to backing file on filesystem */
> +};
> +
> +/**
> +  * Get the hugepage file information. Caller to free.
> +  * Return number of hugepage files used.
> +  */
> +int rte_eal_get_backfile_info(struct back_file **);
> +
> +/**
>   * Lock page in physical memory and prevent from swapping.
>   *
>   * @param virt
> diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c
> b/lib/librte_eal/linuxapp/eal/eal_memory.c
> index 2bb1163..6ca1404 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_memory.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
> @@ -758,6 +758,9 @@ sort_by_physaddr(struct hugepage_file *hugepg_tbl, struct hugepage_info
> *hpi)
>  	return 0;
>  }
> 
> +static struct hugepage_file *hugepage_files;
> +static int num_hugepage_files;
> +
>  /*
>   * Uses mmap to create a shared memory area for storage of data
>   * Used in this file to store the hugepage file map on disk
> @@ -776,9 +779,29 @@ create_shared_memory(const char *filename, const size_t mem_size)
>  	retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
>  	close(fd);
> 
> +	hugepage_files = retval;
> +	num_hugepage_files = mem_size / (sizeof(struct hugepage_file));
> +
>  	return retval;
>  }
> 
> +int
> +rte_eal_get_backfile_info(struct back_file **p)
> +{
> +	struct back_file *backfiles;
> +	int i, num_backfiles = num_hugepage_files;
> +
> +	backfiles = malloc(sizeof(struct back_file) * num_backfiles);
> +	for (i = 0; i < num_backfiles; ++i) {
> +		backfiles[i].addr = hugepage_files[i].final_va;
> +		backfiles[i].size = hugepage_files[i].size;
> +		strcpy(backfiles[i].filepath, hugepage_files[i].filepath);
> +	}
> +
> +	*p = backfiles;
> +	return num_backfiles;
> +}
> +
>  /*
>   * this copies *active* hugepages from one hugepage table to another.
>   * destination is typically the shared memory.
> @@ -1157,6 +1180,20 @@ rte_eal_hugepage_init(void)
>  		mcfg->memseg[0].len = internal_config.memory;
>  		mcfg->memseg[0].socket_id = socket_id;
> 
> +		hugepage = create_shared_memory(eal_hugepage_info_path(),
> +				sizeof(struct hugepage_file));
> +		hugepage->orig_va = addr;
> +		hugepage->final_va = addr;
> +		hugepage->physaddr = rte_mem_virt2phy(addr);
> +		hugepage->size = pagesize;
> +		hugepage->socket_id = socket_id;
> +		hugepage->file_id = 0;
> +		hugepage->memseg_id = 0;
> +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
> +		hugepage->repeated = internal_config.memory / pagesize;
> +#endif
> +		strncpy(hugepage->filepath, filepath, MAX_HUGEPAGE_PATH);
> +
>  		close(fd);
> 
>  		return 0;
> --
> 2.1.4

Kind regards,
Pavel Fedin
Expert Engineer
Samsung Electronics Research center Russia
  
Rich Lane Jan. 11, 2016, 8:26 p.m. UTC | #2
On Sun, Jan 10, 2016 at 3:43 AM, Jianfeng Tan <jianfeng.tan@intel.com>
wrote:

> @@ -1157,6 +1180,20 @@ rte_eal_hugepage_init(void)
>                 mcfg->memseg[0].len = internal_config.memory;
>                 mcfg->memseg[0].socket_id = socket_id;
>
> +               hugepage = create_shared_memory(eal_hugepage_info_path(),
> +                               sizeof(struct hugepage_file));
> +               hugepage->orig_va = addr;
> +               hugepage->final_va = addr;
> +               hugepage->physaddr = rte_mem_virt2phy(addr);
> +               hugepage->size = pagesize;
>

Should this be "hugepage->size = internal_config.memory"? Otherwise the
vhost-user
memtable entry has a size of only 2MB.
  
Jianfeng Tan Jan. 12, 2016, 9:12 a.m. UTC | #3
Hi!

On 1/12/2016 4:26 AM, Rich Lane wrote:
> On Sun, Jan 10, 2016 at 3:43 AM, Jianfeng Tan <jianfeng.tan@intel.com 
> <mailto:jianfeng.tan@intel.com>> wrote:
>
>     @@ -1157,6 +1180,20 @@ rte_eal_hugepage_init(void)
>                     mcfg->memseg[0].len = internal_config.memory;
>                     mcfg->memseg[0].socket_id = socket_id;
>
>     +               hugepage =
>     create_shared_memory(eal_hugepage_info_path(),
>     +                               sizeof(struct hugepage_file));
>     +               hugepage->orig_va = addr;
>     +               hugepage->final_va = addr;
>     +               hugepage->physaddr = rte_mem_virt2phy(addr);
>     +               hugepage->size = pagesize;
>
>
> Should this be "hugepage->size = internal_config.memory"? Otherwise 
> the vhost-user
> memtable entry has a size of only 2MB.

I don't think so. See the definition:

  47 struct hugepage_file {
  48         void *orig_va;      /**< virtual addr of first mmap() */
  49         void *final_va;     /**< virtual addr of 2nd mmap() */
  50         uint64_t physaddr;  /**< physical addr */
  51         size_t size;        /**< the page size */
  52         int socket_id;      /**< NUMA socket ID */
  53         int file_id;        /**< the '%d' in HUGEFILE_FMT */
  54         int memseg_id;      /**< the memory segment to which page 
belongs */
  55 #ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
  56         int repeated;           /**< number of times the page size 
is repeated */
  57 #endif
  58         char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file 
on filesystem */
  59 };

size stands for the page size instead of total size.

Thanks,
Jianfeng
  
Pavel Fedin Jan. 12, 2016, 10:04 a.m. UTC | #4
Hello!

>> Should this be "hugepage->size = internal_config.memory"? Otherwise the vhost-user
>> memtable entry has a size of only 2MB.

> I don't think so. See the definition:

> 47 struct hugepage_file {
> 48         void *orig_va;      /**< virtual addr of first mmap() */
> 49         void *final_va;     /**< virtual addr of 2nd mmap() */
> 50         uint64_t physaddr;  /**< physical addr */
> 51         size_t size;        /**< the page size */
> 52         int socket_id;      /**< NUMA socket ID */
> 53         int file_id;        /**< the '%d' in HUGEFILE_FMT */
> 54         int memseg_id;      /**< the memory segment to which page belongs */                                                                                            
> 55 #ifdef RTE_EAL_SINGLE_FILE_SEGMENTS    
> 56         int repeated;           /**< number of times the page size is repeated */                                                                                       
> 57 #endif                    
> 58         char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */                                                                                    
> 59 };

> size stands for the page size instead of total size.

 But in this case host gets this page size for total region size, therefore qva_to_vva() fails.
 I haven't worked with hugepages, but i guess that with real hugepages we get one file per page, therefore page size == mapping size. With newly introduced --single-file we now have something that pretends to be a single "uber-huge-page", so we need to specify total size of the mapping here.

 BTW, i'm still unhappy about ABI breakage here. I think we could easily add --shared-mem option, which would simply change mapping mode to SHARED. So, we could use it with both hugepages (default) and plain mmap (with --no-hugepages).

Kind regards,
Pavel Fedin
Expert Engineer
Samsung Electronics Research center Russia
  
Jianfeng Tan Jan. 12, 2016, 10:48 a.m. UTC | #5
Hello!

>   But in this case host gets this page size for total region size, therefore qva_to_vva() fails.
>   I haven't worked with hugepages, but i guess that with real hugepages we get one file per page, therefore page size == mapping size. With newly introduced --single-file we now have something that pretends to be a single "uber-huge-page", so we need to specify total size of the mapping here.

Oh I get it and recognize the problem here. The actual problem lies in 
the API rte_eal_get_backfile_info().
backfiles[i].size = hugepage_files[i].size;
Should use statfs or hugepage_files[i].size * hugepage_files[i].repeated 
to calculate the total size.

>
>   BTW, i'm still unhappy about ABI breakage here. I think we could easily add --shared-mem option, which would simply change mapping mode to SHARED. So, we could use it with both hugepages (default) and plain mmap (with --no-hugepages).

You mean, use "--no-hugepages --shared-mem" together, right?
That makes sense to me.

Thanks,
Jianfeng

>
> Kind regards,
> Pavel Fedin
> Expert Engineer
> Samsung Electronics Research center Russia
>
>
  
Pavel Fedin Jan. 12, 2016, 11 a.m. UTC | #6
Hello!

> >   BTW, i'm still unhappy about ABI breakage here. I think we could easily add --shared-mem
> option, which would simply change mapping mode to SHARED. So, we could use it with both
> hugepages (default) and plain mmap (with --no-hugepages).
> 
> You mean, use "--no-hugepages --shared-mem" together, right?

 Yes. This would be perfectly backwards-compatible because.

Kind regards,
Pavel Fedin
Senior Engineer
Samsung Electronics Research center Russia
  
Sergio Gonzalez Monroy Jan. 12, 2016, 11:07 a.m. UTC | #7
Hi Pavel,

On 12/01/2016 11:00, Pavel Fedin wrote:
>   Hello!
>
>>>    BTW, i'm still unhappy about ABI breakage here. I think we could easily add --shared-mem
>> option, which would simply change mapping mode to SHARED. So, we could use it with both
>> hugepages (default) and plain mmap (with --no-hugepages).
>>
>> You mean, use "--no-hugepages --shared-mem" together, right?
>   Yes. This would be perfectly backwards-compatible because.

So are you suggesting to not introduce --single-file option but instead 
--shared-mem?
AFAIK --single-file was trying to workaround the limitation of just 
being able to map 8 fds.

Sergio
> Kind regards,
> Pavel Fedin
> Senior Engineer
> Samsung Electronics Research center Russia
>
>
  
Pavel Fedin Jan. 12, 2016, 11:22 a.m. UTC | #8
Hello!

> Oh I get it and recognize the problem here. The actual problem lies in
> the API rte_eal_get_backfile_info().
> backfiles[i].size = hugepage_files[i].size;
> Should use statfs or hugepage_files[i].size * hugepage_files[i].repeated
> to calculate the total size.

 .repeated depends on CONFIG_RTE_EAL_SIGLE_FILE_SEGMENTS. By the way, looks like it does the same thing as you are trying to do with --single-file, but with hugepages, doesn't it? I see it's currently used by ivshmem (which is AFAIK very immature and half-abandoned).
 Or should we just move .repeated out of the #ifdef ?

Kind regards,
Pavel Fedin
Senior Engineer
Samsung Electronics Research center Russia
  
Sergio Gonzalez Monroy Jan. 12, 2016, 11:33 a.m. UTC | #9
On 12/01/2016 11:22, Pavel Fedin wrote:
>   Hello!
>
>> Oh I get it and recognize the problem here. The actual problem lies in
>> the API rte_eal_get_backfile_info().
>> backfiles[i].size = hugepage_files[i].size;
>> Should use statfs or hugepage_files[i].size * hugepage_files[i].repeated
>> to calculate the total size.
>   .repeated depends on CONFIG_RTE_EAL_SIGLE_FILE_SEGMENTS. By the way, looks like it does the same thing as you are trying to do with --single-file, but with hugepages, doesn't it? I see it's currently used by ivshmem (which is AFAIK very immature and half-abandoned).

Similar but not the same.
--single-file: a single file for all mapped hugepages.
SINGLE_FILE_SEGMENTS: a file per set of physically contiguous mapped 
hugepages (what DPDK calls memseg , memory segment). So there could be 
more than one file.

Sergio
>   Or should we just move .repeated out of the #ifdef ?
>
> Kind regards,
> Pavel Fedin
> Senior Engineer
> Samsung Electronics Research center Russia
>
>
  
Pavel Fedin Jan. 12, 2016, 11:37 a.m. UTC | #10
Hello!

> So are you suggesting to not introduce --single-file option but instead
> --shared-mem?
> AFAIK --single-file was trying to workaround the limitation of just
> being able to map 8 fds.

 Heh, yes, you're right... Indeed, sorry, i was not patient enough, i see it uses hpi->hugedir instead of using /dev/shm... I was confused by the code path... It seemed that --single-file is an alias to --no-hugepages.
 And the patch still changes mmap() mode to SHARED unconditionally, which is not good in terms of backwards compability (and this is explicitly noticed in the cover letter).

 So, let's try to sort out...
 a) By default we should still have MAP_PRIVATE
 b) Let's say that we need --shared-mem in order to make it MAP_SHARED. This can be combined with --no-hugepages if necessary (this is what i tried to implement based on the old RFC).
 c) Let's say that --single-file uses hugetlbfs but maps everything via single file. This still can be combined with --shared-mem.

 wouldn't this be more clear, more straightforward and implication-free?

 And if we agree on that, we could now try to decrease number of options:
 a) We could imply MAP_SHARED if cvio is used, because shared memory is mandatory in this case.
 b) (c) above again raises a question: doesn't it make CONFIG_RTE_EAL_SIGLE_FILE_SEGMENTS obsolete? Or may be we could use that one instead of --single-file (however i'm not a fan of compile-time configuration like this)?

Kind regards,
Pavel Fedin
Senior Engineer
Samsung Electronics Research center Russia
  
Sergio Gonzalez Monroy Jan. 12, 2016, 11:44 a.m. UTC | #11
On 12/01/2016 11:07, Sergio Gonzalez Monroy wrote:
> Hi Pavel,
>
> On 12/01/2016 11:00, Pavel Fedin wrote:
>>   Hello!
>>
>>>>    BTW, i'm still unhappy about ABI breakage here. I think we could 
>>>> easily add --shared-mem

Could you elaborate a bit more on your concerns regarding ABI breakage ?

>>> option, which would simply change mapping mode to SHARED. So, we 
>>> could use it with both
>>> hugepages (default) and plain mmap (with --no-hugepages).
>>>
>>> You mean, use "--no-hugepages --shared-mem" together, right?
>>   Yes. This would be perfectly backwards-compatible because.
>
> So are you suggesting to not introduce --single-file option but 
> instead --shared-mem?
> AFAIK --single-file was trying to workaround the limitation of just 
> being able to map 8 fds.
>

My bad, I misread the posts.
Jianfeng pointed out that you are suggesting to have --shared-mem to 
have same functionality
with or without hugepages.

Sergio

> Sergio
>> Kind regards,
>> Pavel Fedin
>> Senior Engineer
>> Samsung Electronics Research center Russia
>>
>>
>
  
Pavel Fedin Jan. 12, 2016, 12:01 p.m. UTC | #12
Hello!

> >   .repeated depends on CONFIG_RTE_EAL_SIGLE_FILE_SEGMENTS. By the way, looks like it does
> the same thing as you are trying to do with --single-file, but with hugepages, doesn't it? I
> see it's currently used by ivshmem (which is AFAIK very immature and half-abandoned).
> 
> Similar but not the same.
> --single-file: a single file for all mapped hugepages.
> SINGLE_FILE_SEGMENTS: a file per set of physically contiguous mapped
> hugepages (what DPDK calls memseg , memory segment). So there could be
> more than one file.

 Thank you for the explanation.

 By this time, i've done more testing. Current patchset breaks --no-huge. I did not study why:
--- cut ---
Program received signal SIGBUS, Bus error.
malloc_elem_init (elem=elem@entry=0x7fffe51e6000, heap=0x7ffff7fe5a1c, ms=ms@entry=0x7ffff7fb301c, size=size@entry=268435392) at /home/p.fedin/dpdk/lib/librte_eal/common/malloc_elem.c:62
62	/home/p.fedin/dpdk/lib/librte_eal/common/malloc_elem.c: No such file or directory.
Missing separate debuginfos, use: dnf debuginfo-install keyutils-libs-1.5.9-7.fc23.x86_64 krb5-libs-1.13.2-11.fc23.x86_64 libcap-ng-0.7.7-2.fc23.x86_64 libcom_err-1.42.13-3.fc23.x86_64 libselinux-2.4-4.fc23.x86_64 openssl-libs-1.0.2d-2.fc23.x86_64 pcre-8.37-4.fc23.x86_64 zlib-1.2.8-9.fc23.x86_64
(gdb) where
#0  malloc_elem_init (elem=elem@entry=0x7fffe51e6000, heap=0x7ffff7fe5a1c, ms=ms@entry=0x7ffff7fb301c, size=size@entry=268435392)
    at /home/p.fedin/dpdk/lib/librte_eal/common/malloc_elem.c:62
#1  0x00000000004a50b5 in malloc_heap_add_memseg (ms=0x7ffff7fb301c, heap=<optimized out>) at /home/p.fedin/dpdk/lib/librte_eal/common/malloc_heap.c:109
#2  rte_eal_malloc_heap_init () at /home/p.fedin/dpdk/lib/librte_eal/common/malloc_heap.c:232
#3  0x00000000004be896 in rte_eal_memzone_init () at /home/p.fedin/dpdk/lib/librte_eal/common/eal_common_memzone.c:427
#4  0x000000000042ab02 in rte_eal_init (argc=argc@entry=11, argv=argv@entry=0x7fffffffeb80) at /home/p.fedin/dpdk/lib/librte_eal/linuxapp/eal/eal.c:799
#5  0x000000000066dfb9 in dpdk_init (argc=11, argv=0x7fffffffeb80) at lib/netdev-dpdk.c:2192
#6  0x000000000040ddd9 in main (argc=12, argv=0x7fffffffeb78) at vswitchd/ovs-vswitchd.c:74
--- cut ---

 And now i tend to think that we do not need --single-file at all. Because:
a) It's just a temporary workaround for "more than 8 regions" problem.
b) It's not compatible with physical hardware anyway.

 So i think that we could easily use "--no-huge --shared-mem" combination. We could address hugepages compatibility problem later.

Kind regards,
Pavel Fedin
Senior Engineer
Samsung Electronics Research center Russia
  
Sergio Gonzalez Monroy Jan. 12, 2016, 12:12 p.m. UTC | #13
On 12/01/2016 11:37, Pavel Fedin wrote:
>   Hello!
>
>> So are you suggesting to not introduce --single-file option but instead
>> --shared-mem?
>> AFAIK --single-file was trying to workaround the limitation of just
>> being able to map 8 fds.
>   Heh, yes, you're right... Indeed, sorry, i was not patient enough, i see it uses hpi->hugedir instead of using /dev/shm... I was confused by the code path... It seemed that --single-file is an alias to --no-hugepages.
>   And the patch still changes mmap() mode to SHARED unconditionally, which is not good in terms of backwards compability (and this is explicitly noticed in the cover letter).

I might be missing something obvious here but, aside from having memory 
SHARED which most DPDK apps using hugepages will have anyway, what is 
the backward compatibility issues that you see here?

>
>   So, let's try to sort out...
>   a) By default we should still have MAP_PRIVATE
>   b) Let's say that we need --shared-mem in order to make it MAP_SHARED. This can be combined with --no-hugepages if necessary (this is what i tried to implement based on the old RFC).

--share-mem would only have meaning with --no-huge, right?

>   c) Let's say that --single-file uses hugetlbfs but maps everything via single file. This still can be combined with --shared-mem.

By default, when using hugepages all mappings are SHARED for 
multiprocess model.
IMHO If you really want to have the ability to have private memory 
instead because you are not considering that model, then it might be 
more appropriate to have --private-mem or --no-shared-mem option instead.

Sergio
>   wouldn't this be more clear, more straightforward and implication-free?
>
>   And if we agree on that, we could now try to decrease number of options:
>   a) We could imply MAP_SHARED if cvio is used, because shared memory is mandatory in this case.
>   b) (c) above again raises a question: doesn't it make CONFIG_RTE_EAL_SIGLE_FILE_SEGMENTS obsolete? Or may be we could use that one instead of --single-file (however i'm not a fan of compile-time configuration like this)?
>
> Kind regards,
> Pavel Fedin
> Senior Engineer
> Samsung Electronics Research center Russia
>
>
  
Sergio Gonzalez Monroy Jan. 12, 2016, 1:39 p.m. UTC | #14
On 12/01/2016 12:01, Pavel Fedin wrote:
>   Hello!
>
>>>    .repeated depends on CONFIG_RTE_EAL_SIGLE_FILE_SEGMENTS. By the way, looks like it does
>> the same thing as you are trying to do with --single-file, but with hugepages, doesn't it? I
>> see it's currently used by ivshmem (which is AFAIK very immature and half-abandoned).
>>
>> Similar but not the same.
>> --single-file: a single file for all mapped hugepages.
>> SINGLE_FILE_SEGMENTS: a file per set of physically contiguous mapped
>> hugepages (what DPDK calls memseg , memory segment). So there could be
>> more than one file.
>   Thank you for the explanation.
>
>   By this time, i've done more testing. Current patchset breaks --no-huge. I did not study why:
> --- cut ---
> Program received signal SIGBUS, Bus error.
> malloc_elem_init (elem=elem@entry=0x7fffe51e6000, heap=0x7ffff7fe5a1c, ms=ms@entry=0x7ffff7fb301c, size=size@entry=268435392) at /home/p.fedin/dpdk/lib/librte_eal/common/malloc_elem.c:62
> 62	/home/p.fedin/dpdk/lib/librte_eal/common/malloc_elem.c: No such file or directory.
> Missing separate debuginfos, use: dnf debuginfo-install keyutils-libs-1.5.9-7.fc23.x86_64 krb5-libs-1.13.2-11.fc23.x86_64 libcap-ng-0.7.7-2.fc23.x86_64 libcom_err-1.42.13-3.fc23.x86_64 libselinux-2.4-4.fc23.x86_64 openssl-libs-1.0.2d-2.fc23.x86_64 pcre-8.37-4.fc23.x86_64 zlib-1.2.8-9.fc23.x86_64
> (gdb) where
> #0  malloc_elem_init (elem=elem@entry=0x7fffe51e6000, heap=0x7ffff7fe5a1c, ms=ms@entry=0x7ffff7fb301c, size=size@entry=268435392)
>      at /home/p.fedin/dpdk/lib/librte_eal/common/malloc_elem.c:62
> #1  0x00000000004a50b5 in malloc_heap_add_memseg (ms=0x7ffff7fb301c, heap=<optimized out>) at /home/p.fedin/dpdk/lib/librte_eal/common/malloc_heap.c:109
> #2  rte_eal_malloc_heap_init () at /home/p.fedin/dpdk/lib/librte_eal/common/malloc_heap.c:232
> #3  0x00000000004be896 in rte_eal_memzone_init () at /home/p.fedin/dpdk/lib/librte_eal/common/eal_common_memzone.c:427
> #4  0x000000000042ab02 in rte_eal_init (argc=argc@entry=11, argv=argv@entry=0x7fffffffeb80) at /home/p.fedin/dpdk/lib/librte_eal/linuxapp/eal/eal.c:799
> #5  0x000000000066dfb9 in dpdk_init (argc=11, argv=0x7fffffffeb80) at lib/netdev-dpdk.c:2192
> #6  0x000000000040ddd9 in main (argc=12, argv=0x7fffffffeb78) at vswitchd/ovs-vswitchd.c:74
> --- cut ---
>
>   And now i tend to think that we do not need --single-file at all. Because:
> a) It's just a temporary workaround for "more than 8 regions" problem.
> b) It's not compatible with physical hardware anyway.

That's a good summary.
I think --single-file was mostly solving the limit of vhost only mapping 
8 fds. We end up with a single memseg as we do with --no-huge except 
that they are hugepages (well, also in this patch mapped with shared 
instead of private).
Also, It would be compatible with physical hardware if using iommu and vfio.

Sergio

>   So i think that we could easily use "--no-huge --shared-mem" combination. We could address hugepages compatibility problem later.
>
> Kind regards,
> Pavel Fedin
> Senior Engineer
> Samsung Electronics Research center Russia
>
>
  
Pavel Fedin Jan. 12, 2016, 1:57 p.m. UTC | #15
Hello!

> I might be missing something obvious here but, aside from having memory
> SHARED which most DPDK apps using hugepages will have anyway, what is
> the backward compatibility issues that you see here?

 Heh, sorry once again for confusing. Indeed, with hugepages we always get MAP_SHARED. I missed that. So, we indeed need
--shared-mem only in addition to --no-huge.

 Backwards compatibility issue is stated in the description of PATCH 1/4:
--- cut ---
b. possible ABI break, originally, --no-huge uses anonymous memory
instead of file-backed way to create memory.
--- cut ---
 The patch unconditionally changes that to SHARED. That's all.

Kind regards,
Pavel Fedin
Senior Engineer
Samsung Electronics Research center Russia
  
Sergio Gonzalez Monroy Jan. 12, 2016, 2:13 p.m. UTC | #16
On 12/01/2016 13:57, Pavel Fedin wrote:
>   Hello!
>
>> I might be missing something obvious here but, aside from having memory
>> SHARED which most DPDK apps using hugepages will have anyway, what is
>> the backward compatibility issues that you see here?
>   Heh, sorry once again for confusing. Indeed, with hugepages we always get MAP_SHARED. I missed that. So, we indeed need
> --shared-mem only in addition to --no-huge.
>
>   Backwards compatibility issue is stated in the description of PATCH 1/4:
> --- cut ---
> b. possible ABI break, originally, --no-huge uses anonymous memory
> instead of file-backed way to create memory.
> --- cut ---
>   The patch unconditionally changes that to SHARED. That's all.

I should read more carefully!
Sorry about that, I thought you were the one with the ABI concerns.

Regarding ABI, I don't think there is any ABI issue with the change, we 
just have our memory file-backed and SHARED but we do that when using 
hugepages so I don't think it would be a huge issue.
But if folks have concerns about it, we could always keep old behavior 
by default and, as you suggest, introduce another option for changing 
the flag.

Sergio
> Kind regards,
> Pavel Fedin
> Senior Engineer
> Samsung Electronics Research center Russia
>
>
  

Patch

diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index 9c9e40f..75ef8db 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -109,6 +109,22 @@  struct rte_memseg {
 } __rte_packed;
 
 /**
+ * This struct is used to store information about memory-backed file that
+ * we mapped in memory initialization.
+ */
+struct back_file {
+	void *addr;         /**< virtual addr */
+	size_t size;        /**< the page size */
+	char filepath[PATH_MAX]; /**< path to backing file on filesystem */
+};
+
+/**
+  * Get the hugepage file information. Caller to free.
+  * Return number of hugepage files used.
+  */
+int rte_eal_get_backfile_info(struct back_file **);
+
+/**
  * Lock page in physical memory and prevent from swapping.
  *
  * @param virt
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index 2bb1163..6ca1404 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -758,6 +758,9 @@  sort_by_physaddr(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
 	return 0;
 }
 
+static struct hugepage_file *hugepage_files;
+static int num_hugepage_files;
+
 /*
  * Uses mmap to create a shared memory area for storage of data
  * Used in this file to store the hugepage file map on disk
@@ -776,9 +779,29 @@  create_shared_memory(const char *filename, const size_t mem_size)
 	retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
 	close(fd);
 
+	hugepage_files = retval;
+	num_hugepage_files = mem_size / (sizeof(struct hugepage_file));
+
 	return retval;
 }
 
+int
+rte_eal_get_backfile_info(struct back_file **p)
+{
+	struct back_file *backfiles;
+	int i, num_backfiles = num_hugepage_files;
+
+	backfiles = malloc(sizeof(struct back_file) * num_backfiles);
+	for (i = 0; i < num_backfiles; ++i) {
+		backfiles[i].addr = hugepage_files[i].final_va;
+		backfiles[i].size = hugepage_files[i].size;
+		strcpy(backfiles[i].filepath, hugepage_files[i].filepath);
+	}
+
+	*p = backfiles;
+	return num_backfiles;
+}
+
 /*
  * this copies *active* hugepages from one hugepage table to another.
  * destination is typically the shared memory.
@@ -1157,6 +1180,20 @@  rte_eal_hugepage_init(void)
 		mcfg->memseg[0].len = internal_config.memory;
 		mcfg->memseg[0].socket_id = socket_id;
 
+		hugepage = create_shared_memory(eal_hugepage_info_path(),
+				sizeof(struct hugepage_file));
+		hugepage->orig_va = addr;
+		hugepage->final_va = addr;
+		hugepage->physaddr = rte_mem_virt2phy(addr);
+		hugepage->size = pagesize;
+		hugepage->socket_id = socket_id;
+		hugepage->file_id = 0;
+		hugepage->memseg_id = 0;
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+		hugepage->repeated = internal_config.memory / pagesize;
+#endif
+		strncpy(hugepage->filepath, filepath, MAX_HUGEPAGE_PATH);
+
 		close(fd);
 
 		return 0;