[v2,2/2] vhost: enable IOMMU for async vhost
Checks
Commit Message
The use of IOMMU has many advantages, such as isolation and address
translation. This patch extends the capbility of DMA engine to use
IOMMU if the DMA engine is bound to vfio.
When set memory table, the guest memory will be mapped
into the default container of DPDK.
Signed-off-by: Xuan Ding <xuan.ding@intel.com>
---
lib/vhost/rte_vhost.h | 1 +
lib/vhost/vhost_user.c | 57 +++++++++++++++++++++++++++++++++++++++++-
2 files changed, 57 insertions(+), 1 deletion(-)
Comments
Hi Xuan,
> -----Original Message-----
> From: Ding, Xuan <xuan.ding@intel.com>
> Sent: Friday, September 17, 2021 1:26 PM
> To: dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>;
> maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> Cc: Hu, Jiayu <jiayu.hu@intel.com>; Jiang, Cheng1 <cheng1.jiang@intel.com>;
> Richardson, Bruce <bruce.richardson@intel.com>; Pai G, Sunil
> <sunil.pai.g@intel.com>; Wang, Yinan <yinan.wang@intel.com>; Yang,
> YvonneX <yvonnex.yang@intel.com>; Ding, Xuan <xuan.ding@intel.com>
> Subject: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
>
> The use of IOMMU has many advantages, such as isolation and address
> translation. This patch extends the capbility of DMA engine to use IOMMU if
> the DMA engine is bound to vfio.
>
> When set memory table, the guest memory will be mapped into the default
> container of DPDK.
>
> Signed-off-by: Xuan Ding <xuan.ding@intel.com>
> ---
> lib/vhost/rte_vhost.h | 1 +
> lib/vhost/vhost_user.c | 57
> +++++++++++++++++++++++++++++++++++++++++-
> 2 files changed, 57 insertions(+), 1 deletion(-)
>
> diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h index
> 8d875e9322..e0537249f3 100644
> --- a/lib/vhost/rte_vhost.h
> +++ b/lib/vhost/rte_vhost.h
> @@ -127,6 +127,7 @@ struct rte_vhost_mem_region {
> void *mmap_addr;
> uint64_t mmap_size;
> int fd;
> + uint64_t dma_map_success;
How about using bool for dma_map_success?
> };
>
> /**
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> 29a4c9af60..7d1d592b86 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -45,6 +45,8 @@
> #include <rte_common.h>
> #include <rte_malloc.h>
> #include <rte_log.h>
> +#include <rte_vfio.h>
> +#include <rte_errno.h>
>
> #include "iotlb.h"
> #include "vhost.h"
> @@ -141,6 +143,46 @@ get_blk_size(int fd)
> return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize; }
>
> +static int
> +async_dma_map(struct rte_vhost_mem_region *region, bool do_map) {
> + int ret = 0;
> + uint64_t host_iova;
> + host_iova = rte_mem_virt2iova((void *)(uintptr_t)region-
> >host_user_addr);
> + if (do_map) {
> + /* Add mapped region into the default container of DPDK. */
> + ret =
> rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> + region->host_user_addr,
> + host_iova,
> + region->size);
> + region->dma_map_success = ret == 0;
> + if (ret) {
> + if (rte_errno != ENODEV && rte_errno != ENOTSUP) {
> + VHOST_LOG_CONFIG(ERR, "DMA engine map
> failed\n");
> + return ret;
> + }
> + return 0;
Why return 0, if ret is -1 here?
Thanks,
Jiayu
> + }
> + return ret;
> + } else {
> + /* No need to do vfio unmap if the map failed. */
> + if (!region->dma_map_success)
> + return 0;
> +
> + /* Remove mapped region from the default container of
> DPDK. */
> + ret =
> rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> + region->host_user_addr,
> + host_iova,
> + region->size);
> + if (ret) {
> + VHOST_LOG_CONFIG(ERR, "DMA engine unmap
> failed\n");
> + return ret;
> + }
> + region->dma_map_success = 0;
> + }
> + return ret;
> +}
> +
> static void
> free_mem_region(struct virtio_net *dev) { @@ -153,6 +195,9 @@
> free_mem_region(struct virtio_net *dev)
> for (i = 0; i < dev->mem->nregions; i++) {
> reg = &dev->mem->regions[i];
> if (reg->host_user_addr) {
> + if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> + async_dma_map(reg, false);
> +
> munmap(reg->mmap_addr, reg->mmap_size);
> close(reg->fd);
> }
> @@ -1157,6 +1202,7 @@ vhost_user_mmap_region(struct virtio_net *dev,
> uint64_t mmap_size;
> uint64_t alignment;
> int populate;
> + int ret;
>
> /* Check for memory_size + mmap_offset overflow */
> if (mmap_offset >= -region->size) {
> @@ -1210,13 +1256,22 @@ vhost_user_mmap_region(struct virtio_net *dev,
> region->mmap_size = mmap_size;
> region->host_user_addr = (uint64_t)(uintptr_t)mmap_addr +
> mmap_offset;
>
> - if (dev->async_copy)
> + if (dev->async_copy) {
> if (add_guest_pages(dev, region, alignment) < 0) {
> VHOST_LOG_CONFIG(ERR,
> "adding guest pages to region
> failed.\n");
> return -1;
> }
>
> + if (rte_vfio_is_enabled("vfio")) {
> + ret = async_dma_map(region, true);
> + if (ret < 0) {
> + VHOST_LOG_CONFIG(ERR, "Configure
> IOMMU for DMA engine failed\n");
> + return -1;
> + }
> + }
> + }
> +
> VHOST_LOG_CONFIG(INFO,
> "guest memory region size: 0x%" PRIx64 "\n"
> "\t guest physical addr: 0x%" PRIx64 "\n"
> --
> 2.17.1
On 9/23/21 16:39, Hu, Jiayu wrote:
> Hi Xuan,
>
>> -----Original Message-----
>> From: Ding, Xuan <xuan.ding@intel.com>
>> Sent: Friday, September 17, 2021 1:26 PM
>> To: dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>;
>> maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
>> Cc: Hu, Jiayu <jiayu.hu@intel.com>; Jiang, Cheng1 <cheng1.jiang@intel.com>;
>> Richardson, Bruce <bruce.richardson@intel.com>; Pai G, Sunil
>> <sunil.pai.g@intel.com>; Wang, Yinan <yinan.wang@intel.com>; Yang,
>> YvonneX <yvonnex.yang@intel.com>; Ding, Xuan <xuan.ding@intel.com>
>> Subject: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
>>
>> The use of IOMMU has many advantages, such as isolation and address
>> translation. This patch extends the capbility of DMA engine to use IOMMU if
>> the DMA engine is bound to vfio.
>>
>> When set memory table, the guest memory will be mapped into the default
>> container of DPDK.
>>
>> Signed-off-by: Xuan Ding <xuan.ding@intel.com>
>> ---
>> lib/vhost/rte_vhost.h | 1 +
>> lib/vhost/vhost_user.c | 57
>> +++++++++++++++++++++++++++++++++++++++++-
>> 2 files changed, 57 insertions(+), 1 deletion(-)
>>
>> diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h index
>> 8d875e9322..e0537249f3 100644
>> --- a/lib/vhost/rte_vhost.h
>> +++ b/lib/vhost/rte_vhost.h
>> @@ -127,6 +127,7 @@ struct rte_vhost_mem_region {
>> void *mmap_addr;
>> uint64_t mmap_size;
>> int fd;
>> + uint64_t dma_map_success;
>
> How about using bool for dma_map_success?
The bigger problem here is that you are breaking the ABI.
>> };
>>
>> /**
>> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
>> 29a4c9af60..7d1d592b86 100644
>> --- a/lib/vhost/vhost_user.c
>> +++ b/lib/vhost/vhost_user.c
>> @@ -45,6 +45,8 @@
>> #include <rte_common.h>
>> #include <rte_malloc.h>
>> #include <rte_log.h>
>> +#include <rte_vfio.h>
>> +#include <rte_errno.h>
>>
>> #include "iotlb.h"
>> #include "vhost.h"
>> @@ -141,6 +143,46 @@ get_blk_size(int fd)
>> return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize; }
>>
>> +static int
>> +async_dma_map(struct rte_vhost_mem_region *region, bool do_map) {
>> + int ret = 0;
>> + uint64_t host_iova;
>> + host_iova = rte_mem_virt2iova((void *)(uintptr_t)region-
>>> host_user_addr);
>> + if (do_map) {
>> + /* Add mapped region into the default container of DPDK. */
>> + ret =
>> rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
>> + region->host_user_addr,
>> + host_iova,
>> + region->size);
>> + region->dma_map_success = ret == 0;
>> + if (ret) {
>> + if (rte_errno != ENODEV && rte_errno != ENOTSUP) {
>> + VHOST_LOG_CONFIG(ERR, "DMA engine map
>> failed\n");
>> + return ret;
>> + }
>> + return 0;
>
> Why return 0, if ret is -1 here?
>
> Thanks,
> Jiayu
>
>> + }
>> + return ret;
>> + } else {
>> + /* No need to do vfio unmap if the map failed. */
>> + if (!region->dma_map_success)
>> + return 0;
>> +
>> + /* Remove mapped region from the default container of
>> DPDK. */
>> + ret =
>> rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
>> + region->host_user_addr,
>> + host_iova,
>> + region->size);
>> + if (ret) {
>> + VHOST_LOG_CONFIG(ERR, "DMA engine unmap
>> failed\n");
>> + return ret;
>> + }
>> + region->dma_map_success = 0;
>> + }
>> + return ret;
>> +}
>> +
>> static void
>> free_mem_region(struct virtio_net *dev) { @@ -153,6 +195,9 @@
>> free_mem_region(struct virtio_net *dev)
>> for (i = 0; i < dev->mem->nregions; i++) {
>> reg = &dev->mem->regions[i];
>> if (reg->host_user_addr) {
>> + if (dev->async_copy && rte_vfio_is_enabled("vfio"))
>> + async_dma_map(reg, false);
>> +
>> munmap(reg->mmap_addr, reg->mmap_size);
>> close(reg->fd);
>> }
>> @@ -1157,6 +1202,7 @@ vhost_user_mmap_region(struct virtio_net *dev,
>> uint64_t mmap_size;
>> uint64_t alignment;
>> int populate;
>> + int ret;
>>
>> /* Check for memory_size + mmap_offset overflow */
>> if (mmap_offset >= -region->size) {
>> @@ -1210,13 +1256,22 @@ vhost_user_mmap_region(struct virtio_net *dev,
>> region->mmap_size = mmap_size;
>> region->host_user_addr = (uint64_t)(uintptr_t)mmap_addr +
>> mmap_offset;
>>
>> - if (dev->async_copy)
>> + if (dev->async_copy) {
>> if (add_guest_pages(dev, region, alignment) < 0) {
>> VHOST_LOG_CONFIG(ERR,
>> "adding guest pages to region
>> failed.\n");
>> return -1;
>> }
>>
>> + if (rte_vfio_is_enabled("vfio")) {
>> + ret = async_dma_map(region, true);
>> + if (ret < 0) {
>> + VHOST_LOG_CONFIG(ERR, "Configure
>> IOMMU for DMA engine failed\n");
>> + return -1;
>> + }
>> + }
>> + }
>> +
>> VHOST_LOG_CONFIG(INFO,
>> "guest memory region size: 0x%" PRIx64 "\n"
>> "\t guest physical addr: 0x%" PRIx64 "\n"
>> --
>> 2.17.1
>
> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Thursday, September 23, 2021 10:56 PM
> To: Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan <xuan.ding@intel.com>;
> dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>; Xia, Chenbo
> <chenbo.xia@intel.com>
> Cc: Jiang, Cheng1 <cheng1.jiang@intel.com>; Richardson, Bruce
> <bruce.richardson@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>; Wang,
> Yinan <yinan.wang@intel.com>; Yang, YvonneX <yvonnex.yang@intel.com>
> Subject: Re: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
>
>
>
> On 9/23/21 16:39, Hu, Jiayu wrote:
> > Hi Xuan,
> >
> >> -----Original Message-----
> >> From: Ding, Xuan <xuan.ding@intel.com>
> >> Sent: Friday, September 17, 2021 1:26 PM
> >> To: dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>;
> >> maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> >> Cc: Hu, Jiayu <jiayu.hu@intel.com>; Jiang, Cheng1 <cheng1.jiang@intel.com>;
> >> Richardson, Bruce <bruce.richardson@intel.com>; Pai G, Sunil
> >> <sunil.pai.g@intel.com>; Wang, Yinan <yinan.wang@intel.com>; Yang,
> >> YvonneX <yvonnex.yang@intel.com>; Ding, Xuan <xuan.ding@intel.com>
> >> Subject: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
> >>
> >> The use of IOMMU has many advantages, such as isolation and address
> >> translation. This patch extends the capbility of DMA engine to use IOMMU if
> >> the DMA engine is bound to vfio.
> >>
> >> When set memory table, the guest memory will be mapped into the default
> >> container of DPDK.
> >>
> >> Signed-off-by: Xuan Ding <xuan.ding@intel.com>
> >> ---
> >> lib/vhost/rte_vhost.h | 1 +
> >> lib/vhost/vhost_user.c | 57
> >> +++++++++++++++++++++++++++++++++++++++++-
> >> 2 files changed, 57 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h index
> >> 8d875e9322..e0537249f3 100644
> >> --- a/lib/vhost/rte_vhost.h
> >> +++ b/lib/vhost/rte_vhost.h
> >> @@ -127,6 +127,7 @@ struct rte_vhost_mem_region {
> >> void *mmap_addr;
> >> uint64_t mmap_size;
> >> int fd;
> >> + uint64_t dma_map_success;
> >
> > How about using bool for dma_map_success?
>
> The bigger problem here is that you are breaking the ABI.
Maybe this kind of driver-facing structs/functions should be removed
from ABI, since we are refactoring DPDK ABI recently.
/Chenbo
>
> >> };
> >>
> >> /**
On 9/24/21 03:53, Xia, Chenbo wrote:
>> -----Original Message-----
>> From: Maxime Coquelin <maxime.coquelin@redhat.com>
>> Sent: Thursday, September 23, 2021 10:56 PM
>> To: Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan <xuan.ding@intel.com>;
>> dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>; Xia, Chenbo
>> <chenbo.xia@intel.com>
>> Cc: Jiang, Cheng1 <cheng1.jiang@intel.com>; Richardson, Bruce
>> <bruce.richardson@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>; Wang,
>> Yinan <yinan.wang@intel.com>; Yang, YvonneX <yvonnex.yang@intel.com>
>> Subject: Re: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
>>
>>
>>
>> On 9/23/21 16:39, Hu, Jiayu wrote:
>>> Hi Xuan,
>>>
>>>> -----Original Message-----
>>>> From: Ding, Xuan <xuan.ding@intel.com>
>>>> Sent: Friday, September 17, 2021 1:26 PM
>>>> To: dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>;
>>>> maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
>>>> Cc: Hu, Jiayu <jiayu.hu@intel.com>; Jiang, Cheng1 <cheng1.jiang@intel.com>;
>>>> Richardson, Bruce <bruce.richardson@intel.com>; Pai G, Sunil
>>>> <sunil.pai.g@intel.com>; Wang, Yinan <yinan.wang@intel.com>; Yang,
>>>> YvonneX <yvonnex.yang@intel.com>; Ding, Xuan <xuan.ding@intel.com>
>>>> Subject: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
>>>>
>>>> The use of IOMMU has many advantages, such as isolation and address
>>>> translation. This patch extends the capbility of DMA engine to use IOMMU if
>>>> the DMA engine is bound to vfio.
>>>>
>>>> When set memory table, the guest memory will be mapped into the default
>>>> container of DPDK.
>>>>
>>>> Signed-off-by: Xuan Ding <xuan.ding@intel.com>
>>>> ---
>>>> lib/vhost/rte_vhost.h | 1 +
>>>> lib/vhost/vhost_user.c | 57
>>>> +++++++++++++++++++++++++++++++++++++++++-
>>>> 2 files changed, 57 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h index
>>>> 8d875e9322..e0537249f3 100644
>>>> --- a/lib/vhost/rte_vhost.h
>>>> +++ b/lib/vhost/rte_vhost.h
>>>> @@ -127,6 +127,7 @@ struct rte_vhost_mem_region {
>>>> void *mmap_addr;
>>>> uint64_t mmap_size;
>>>> int fd;
>>>> + uint64_t dma_map_success;
>>>
>>> How about using bool for dma_map_success?
>>
>> The bigger problem here is that you are breaking the ABI.
>
> Maybe this kind of driver-facing structs/functions should be removed
> from ABI, since we are refactoring DPDK ABI recently.
It has actually been exposed for SPDK, we cannot just remove it from
API.
Maxime
> /Chenbo
>
>>
>>>> };
>>>>
>>>> /**
>
> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Friday, September 24, 2021 3:14 PM
> To: Xia, Chenbo <chenbo.xia@intel.com>; Hu, Jiayu <jiayu.hu@intel.com>; Ding,
> Xuan <xuan.ding@intel.com>; dev@dpdk.org; Burakov, Anatoly
> <anatoly.burakov@intel.com>
> Cc: Jiang, Cheng1 <cheng1.jiang@intel.com>; Richardson, Bruce
> <bruce.richardson@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>; Wang,
> Yinan <yinan.wang@intel.com>; Yang, YvonneX <yvonnex.yang@intel.com>
> Subject: Re: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
>
>
>
> On 9/24/21 03:53, Xia, Chenbo wrote:
> >> -----Original Message-----
> >> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> >> Sent: Thursday, September 23, 2021 10:56 PM
> >> To: Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan <xuan.ding@intel.com>;
> >> dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>; Xia, Chenbo
> >> <chenbo.xia@intel.com>
> >> Cc: Jiang, Cheng1 <cheng1.jiang@intel.com>; Richardson, Bruce
> >> <bruce.richardson@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>; Wang,
> >> Yinan <yinan.wang@intel.com>; Yang, YvonneX <yvonnex.yang@intel.com>
> >> Subject: Re: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
> >>
> >>
> >>
> >> On 9/23/21 16:39, Hu, Jiayu wrote:
> >>> Hi Xuan,
> >>>
> >>>> -----Original Message-----
> >>>> From: Ding, Xuan <xuan.ding@intel.com>
> >>>> Sent: Friday, September 17, 2021 1:26 PM
> >>>> To: dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>;
> >>>> maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> >>>> Cc: Hu, Jiayu <jiayu.hu@intel.com>; Jiang, Cheng1
> <cheng1.jiang@intel.com>;
> >>>> Richardson, Bruce <bruce.richardson@intel.com>; Pai G, Sunil
> >>>> <sunil.pai.g@intel.com>; Wang, Yinan <yinan.wang@intel.com>; Yang,
> >>>> YvonneX <yvonnex.yang@intel.com>; Ding, Xuan <xuan.ding@intel.com>
> >>>> Subject: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
> >>>>
> >>>> The use of IOMMU has many advantages, such as isolation and address
> >>>> translation. This patch extends the capbility of DMA engine to use IOMMU
> if
> >>>> the DMA engine is bound to vfio.
> >>>>
> >>>> When set memory table, the guest memory will be mapped into the default
> >>>> container of DPDK.
> >>>>
> >>>> Signed-off-by: Xuan Ding <xuan.ding@intel.com>
> >>>> ---
> >>>> lib/vhost/rte_vhost.h | 1 +
> >>>> lib/vhost/vhost_user.c | 57
> >>>> +++++++++++++++++++++++++++++++++++++++++-
> >>>> 2 files changed, 57 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h index
> >>>> 8d875e9322..e0537249f3 100644
> >>>> --- a/lib/vhost/rte_vhost.h
> >>>> +++ b/lib/vhost/rte_vhost.h
> >>>> @@ -127,6 +127,7 @@ struct rte_vhost_mem_region {
> >>>> void *mmap_addr;
> >>>> uint64_t mmap_size;
> >>>> int fd;
> >>>> + uint64_t dma_map_success;
> >>>
> >>> How about using bool for dma_map_success?
> >>
> >> The bigger problem here is that you are breaking the ABI.
> >
> > Maybe this kind of driver-facing structs/functions should be removed
> > from ABI, since we are refactoring DPDK ABI recently.
>
> It has actually been exposed for SPDK, we cannot just remove it from
> API.
'exposed' does not mean it has to be ABI. Like 'driver_sdk_headers' in
ethdev lib, those headers can be exposed but do not include ABI. I see
SPDK is using that for building its lib. Not sure in this case, the SPDK
Vhost lib should be considered as application.
Thanks,
Chenbo
>
> Maxime
>
> > /Chenbo
> >
> >>
> >>>> };
> >>>>
> >>>> /**
> >
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Friday, September 24, 2021 3:36 PM
> To: Maxime Coquelin <maxime.coquelin@redhat.com>; Hu, Jiayu
> <jiayu.hu@intel.com>; Ding, Xuan <xuan.ding@intel.com>; dev@dpdk.org;
> Burakov, Anatoly <anatoly.burakov@intel.com>
> Cc: Jiang, Cheng1 <cheng1.jiang@intel.com>; Richardson, Bruce
> <bruce.richardson@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>; Wang,
> Yinan <yinan.wang@intel.com>; Yang, YvonneX <yvonnex.yang@intel.com>
> Subject: RE: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
>
> > -----Original Message-----
> > From: Maxime Coquelin <maxime.coquelin@redhat.com>
> > Sent: Friday, September 24, 2021 3:14 PM
> > To: Xia, Chenbo <chenbo.xia@intel.com>; Hu, Jiayu <jiayu.hu@intel.com>;
> Ding,
> > Xuan <xuan.ding@intel.com>; dev@dpdk.org; Burakov, Anatoly
> > <anatoly.burakov@intel.com>
> > Cc: Jiang, Cheng1 <cheng1.jiang@intel.com>; Richardson, Bruce
> > <bruce.richardson@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>; Wang,
> > Yinan <yinan.wang@intel.com>; Yang, YvonneX <yvonnex.yang@intel.com>
> > Subject: Re: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
> >
> >
> >
> > On 9/24/21 03:53, Xia, Chenbo wrote:
> > >> -----Original Message-----
> > >> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> > >> Sent: Thursday, September 23, 2021 10:56 PM
> > >> To: Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan <xuan.ding@intel.com>;
> > >> dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>; Xia,
> Chenbo
> > >> <chenbo.xia@intel.com>
> > >> Cc: Jiang, Cheng1 <cheng1.jiang@intel.com>; Richardson, Bruce
> > >> <bruce.richardson@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>; Wang,
> > >> Yinan <yinan.wang@intel.com>; Yang, YvonneX <yvonnex.yang@intel.com>
> > >> Subject: Re: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
> > >>
> > >>
> > >>
> > >> On 9/23/21 16:39, Hu, Jiayu wrote:
> > >>> Hi Xuan,
> > >>>
> > >>>> -----Original Message-----
> > >>>> From: Ding, Xuan <xuan.ding@intel.com>
> > >>>> Sent: Friday, September 17, 2021 1:26 PM
> > >>>> To: dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>;
> > >>>> maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> > >>>> Cc: Hu, Jiayu <jiayu.hu@intel.com>; Jiang, Cheng1
> > <cheng1.jiang@intel.com>;
> > >>>> Richardson, Bruce <bruce.richardson@intel.com>; Pai G, Sunil
> > >>>> <sunil.pai.g@intel.com>; Wang, Yinan <yinan.wang@intel.com>; Yang,
> > >>>> YvonneX <yvonnex.yang@intel.com>; Ding, Xuan <xuan.ding@intel.com>
> > >>>> Subject: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
> > >>>>
> > >>>> The use of IOMMU has many advantages, such as isolation and address
> > >>>> translation. This patch extends the capbility of DMA engine to use
> IOMMU
> > if
> > >>>> the DMA engine is bound to vfio.
> > >>>>
> > >>>> When set memory table, the guest memory will be mapped into the
> default
> > >>>> container of DPDK.
> > >>>>
> > >>>> Signed-off-by: Xuan Ding <xuan.ding@intel.com>
> > >>>> ---
> > >>>> lib/vhost/rte_vhost.h | 1 +
> > >>>> lib/vhost/vhost_user.c | 57
> > >>>> +++++++++++++++++++++++++++++++++++++++++-
> > >>>> 2 files changed, 57 insertions(+), 1 deletion(-)
> > >>>>
> > >>>> diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h index
> > >>>> 8d875e9322..e0537249f3 100644
> > >>>> --- a/lib/vhost/rte_vhost.h
> > >>>> +++ b/lib/vhost/rte_vhost.h
> > >>>> @@ -127,6 +127,7 @@ struct rte_vhost_mem_region {
> > >>>> void *mmap_addr;
> > >>>> uint64_t mmap_size;
> > >>>> int fd;
> > >>>> + uint64_t dma_map_success;
> > >>>
> > >>> How about using bool for dma_map_success?
> > >>
> > >> The bigger problem here is that you are breaking the ABI.
> > >
> > > Maybe this kind of driver-facing structs/functions should be removed
> > > from ABI, since we are refactoring DPDK ABI recently.
> >
> > It has actually been exposed for SPDK, we cannot just remove it from
> > API.
>
> 'exposed' does not mean it has to be ABI. Like 'driver_sdk_headers' in
> ethdev lib, those headers can be exposed but do not include ABI. I see
> SPDK is using that for building its lib. Not sure in this case, the SPDK
> Vhost lib should be considered as application.
>
> Thanks,
> Chenbo
Thanks for the discussion. Since the possible ABI changing is in the future,
I consider adding the dma_map_success in the virtio_net structure, to indicate
the map status of each region. This flag can even be removed if it is not considering
the restrictions on user(kernel driver support). Details can be provided in next version's patch.
Hope to get your insights. :)
Thanks,
Xuan
>
> >
> > Maxime
> >
> > > /Chenbo
> > >
> > >>
> > >>>> };
> > >>>>
> > >>>> /**
> > >
@@ -127,6 +127,7 @@ struct rte_vhost_mem_region {
void *mmap_addr;
uint64_t mmap_size;
int fd;
+ uint64_t dma_map_success;
};
/**
@@ -45,6 +45,8 @@
#include <rte_common.h>
#include <rte_malloc.h>
#include <rte_log.h>
+#include <rte_vfio.h>
+#include <rte_errno.h>
#include "iotlb.h"
#include "vhost.h"
@@ -141,6 +143,46 @@ get_blk_size(int fd)
return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
}
+static int
+async_dma_map(struct rte_vhost_mem_region *region, bool do_map)
+{
+ int ret = 0;
+ uint64_t host_iova;
+ host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr);
+ if (do_map) {
+ /* Add mapped region into the default container of DPDK. */
+ ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
+ region->host_user_addr,
+ host_iova,
+ region->size);
+ region->dma_map_success = ret == 0;
+ if (ret) {
+ if (rte_errno != ENODEV && rte_errno != ENOTSUP) {
+ VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
+ return ret;
+ }
+ return 0;
+ }
+ return ret;
+ } else {
+ /* No need to do vfio unmap if the map failed. */
+ if (!region->dma_map_success)
+ return 0;
+
+ /* Remove mapped region from the default container of DPDK. */
+ ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
+ region->host_user_addr,
+ host_iova,
+ region->size);
+ if (ret) {
+ VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
+ return ret;
+ }
+ region->dma_map_success = 0;
+ }
+ return ret;
+}
+
static void
free_mem_region(struct virtio_net *dev)
{
@@ -153,6 +195,9 @@ free_mem_region(struct virtio_net *dev)
for (i = 0; i < dev->mem->nregions; i++) {
reg = &dev->mem->regions[i];
if (reg->host_user_addr) {
+ if (dev->async_copy && rte_vfio_is_enabled("vfio"))
+ async_dma_map(reg, false);
+
munmap(reg->mmap_addr, reg->mmap_size);
close(reg->fd);
}
@@ -1157,6 +1202,7 @@ vhost_user_mmap_region(struct virtio_net *dev,
uint64_t mmap_size;
uint64_t alignment;
int populate;
+ int ret;
/* Check for memory_size + mmap_offset overflow */
if (mmap_offset >= -region->size) {
@@ -1210,13 +1256,22 @@ vhost_user_mmap_region(struct virtio_net *dev,
region->mmap_size = mmap_size;
region->host_user_addr = (uint64_t)(uintptr_t)mmap_addr + mmap_offset;
- if (dev->async_copy)
+ if (dev->async_copy) {
if (add_guest_pages(dev, region, alignment) < 0) {
VHOST_LOG_CONFIG(ERR,
"adding guest pages to region failed.\n");
return -1;
}
+ if (rte_vfio_is_enabled("vfio")) {
+ ret = async_dma_map(region, true);
+ if (ret < 0) {
+ VHOST_LOG_CONFIG(ERR, "Configure IOMMU for DMA engine failed\n");
+ return -1;
+ }
+ }
+ }
+
VHOST_LOG_CONFIG(INFO,
"guest memory region size: 0x%" PRIx64 "\n"
"\t guest physical addr: 0x%" PRIx64 "\n"