From patchwork Fri Sep 6 04:16:13 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ajit Khaparde X-Patchwork-Id: 143682 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id D075C45917; Fri, 6 Sep 2024 06:16:27 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 9C82E42E6A; Fri, 6 Sep 2024 06:16:22 +0200 (CEST) Received: from mail-yb1-f181.google.com (mail-yb1-f181.google.com [209.85.219.181]) by mails.dpdk.org (Postfix) with ESMTP id 00B2B42E66 for ; Fri, 6 Sep 2024 06:16:21 +0200 (CEST) Received: by mail-yb1-f181.google.com with SMTP id 3f1490d57ef6-e1a74ee4c0cso1803568276.2 for ; Thu, 05 Sep 2024 21:16:21 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=broadcom.com; s=google; t=1725596181; x=1726200981; darn=dpdk.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=IIWekGJJy6o2veDE5hPs5UVcoZBdgrvq4ahinAJHD10=; b=W4NcyRmduA+H7WhhtJEIOHGgcuofYgodE+jUp/5dfMAPttFSCOS5mvMcA1Me3k8XlQ r9+4wnGGS7BJz0CBTGV3IRCk05CKPRnV2mCkCcpmBOW0FDr9g0mAz1gDJsvIv1y34ySD ciIDlAV31rhBs4vVGSAKE5wBDUNMq0Jkblksc= X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1725596181; x=1726200981; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=IIWekGJJy6o2veDE5hPs5UVcoZBdgrvq4ahinAJHD10=; b=sqB5gPByJUx62eiq4XyKeMLWn/nl9HBw0+DjJgz488XzOFo/TAx87MZrHUHA5/Oe9E u7QhTNs7zRWgm/gbkcI7Mazk9FF8KMCSgAUDe1W5Rs7ZDHRsgJ2jLUc++WDVRnM1jrc6 Rl7N16AmJv/2+SUHvMNIfokB2Cj0IbPHT6N/ZQqu6z9+XWyPsbNiGSR3LXbh1mjfoW0I uA40M1nGbOIVp5vDQsgds4rPSX46VHlxbDL3EQvbnwbYAmPHXZyujJHdusu1n0V4p0Yx LLDXTG9lt4MLOKwBQYXowvemYw8/4TRn1kDjy0cB4b+6d4hoTUUDehtxfIjpaNGGeSd5 mNZw== X-Gm-Message-State: AOJu0YwsuEu+7PEEAP0+78vKSTLjyEkYC7RMI4QJPSwfCP9vPbBXUfZr bcwrGq9t9rlUPXrZ1I2Q6WiztGkn6zDR8GWwXokdnPyyp63tVTl0YVGihV78tf6Geng1KxWJKZg Agzd/A/VilvF6jcMPBnsSz5d5NTsuMtXiN28/qpcwndKtB7y319Nx/Xl4dBQaRzKhHfrVtKBJTI v0+GqreZSwcfnXFuw6UGSDYmh0O56p X-Google-Smtp-Source: AGHT+IF4JKknWdLpUTBv4wWbKhPe5mXb7dF7tu8qzsz6JaYcTjkZ04w+nGKPDA71wj0B97jysuu/mg== X-Received: by 2002:a05:6902:2701:b0:e1c:f49a:7415 with SMTP id 3f1490d57ef6-e1d34a37c54mr1461346276.46.1725596180330; Thu, 05 Sep 2024 21:16:20 -0700 (PDT) Received: from localhost.localdomain ([136.52.21.78]) by smtp.gmail.com with ESMTPSA id 6a1803df08f44-6c520419eb9sm13418276d6.122.2024.09.05.21.16.18 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Thu, 05 Sep 2024 21:16:19 -0700 (PDT) From: Ajit Khaparde To: dev@dpdk.org Cc: thomas@monjalon.net, eagostini@nvidia.com Subject: [PATCH 1/2] gpudev: add API to get GPU physical address Date: Thu, 5 Sep 2024 21:16:13 -0700 Message-Id: <20240906041614.36962-2-ajit.khaparde@broadcom.com> X-Mailer: git-send-email 2.39.3 (Apple Git-146) In-Reply-To: <20240906041614.36962-1-ajit.khaparde@broadcom.com> References: <20240906041614.36962-1-ajit.khaparde@broadcom.com> MIME-Version: 1.0 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Add API to get the physical address of the peer GPU. This should allow some NIC hardware to directly use the physical address for DMA instead of the CUDA Unified Memory provided by rte_gpu_mem_map. Signed-off-by: Ajit Khaparde --- lib/gpudev/gpudev.c | 61 ++++++++++++++++++++++++++++++++++++++ lib/gpudev/gpudev_driver.h | 6 ++++ lib/gpudev/rte_gpudev.h | 49 ++++++++++++++++++++++++++++++ lib/gpudev/version.map | 2 ++ 4 files changed, 118 insertions(+) diff --git a/lib/gpudev/gpudev.c b/lib/gpudev/gpudev.c index 1c2011b856..acbe39361b 100644 --- a/lib/gpudev/gpudev.c +++ b/lib/gpudev/gpudev.c @@ -683,6 +683,44 @@ rte_gpu_mem_cpu_map(int16_t dev_id, size_t size, void *ptr) } } +void * +rte_gpu_mem_dma_map(int16_t dev_id, size_t size, void *ptr) +{ + struct rte_gpu *dev; + void *ptr_out; + int ret; + + dev = gpu_get_by_id(dev_id); + if (dev == NULL) { + GPU_LOG(ERR, "mem CPU map for invalid device ID %d", dev_id); + rte_errno = ENODEV; + return NULL; + } + + if (dev->ops.mem_cpu_map == NULL) { + GPU_LOG(ERR, "mem CPU map not supported"); + rte_errno = ENOTSUP; + return NULL; + } + + if (ptr == NULL || size == 0) /* dry-run */ + return NULL; + + ret = GPU_DRV_RET(dev->ops.mem_dma_map(dev, size, ptr, &ptr_out)); + + switch (ret) { + case 0: + return ptr_out; + case -ENOMEM: + case -E2BIG: + rte_errno = -ret; + return NULL; + default: + rte_errno = -EPERM; + return NULL; + } +} + int rte_gpu_mem_cpu_unmap(int16_t dev_id, void *ptr) { @@ -706,6 +744,29 @@ rte_gpu_mem_cpu_unmap(int16_t dev_id, void *ptr) return GPU_DRV_RET(dev->ops.mem_cpu_unmap(dev, ptr)); } +int +rte_gpu_mem_dma_unmap(int16_t dev_id, void *ptr) +{ + struct rte_gpu *dev; + + dev = gpu_get_by_id(dev_id); + if (dev == NULL) { + GPU_LOG(ERR, "cpu_unmap mem for invalid device ID %d", dev_id); + rte_errno = ENODEV; + return -rte_errno; + } + + if (dev->ops.mem_dma_unmap == NULL) { + rte_errno = ENOTSUP; + return -rte_errno; + } + + if (ptr == NULL) /* dry-run */ + return 0; + + return GPU_DRV_RET(dev->ops.mem_dma_unmap(dev, ptr)); +} + int rte_gpu_wmb(int16_t dev_id) { diff --git a/lib/gpudev/gpudev_driver.h b/lib/gpudev/gpudev_driver.h index 37b6ae3149..ad3ab9e214 100644 --- a/lib/gpudev/gpudev_driver.h +++ b/lib/gpudev/gpudev_driver.h @@ -39,6 +39,8 @@ typedef int (rte_gpu_mem_unregister_t)(struct rte_gpu *dev, void *ptr); typedef int (rte_gpu_mem_cpu_map_t)(struct rte_gpu *dev, size_t size, void *ptr_in, void **ptr_out); typedef int (rte_gpu_mem_cpu_unmap_t)(struct rte_gpu *dev, void *ptr); typedef int (rte_gpu_wmb_t)(struct rte_gpu *dev); +typedef int (rte_gpu_mem_dma_map_t)(struct rte_gpu *dev, size_t size, void *ptr_in, void **ptr_out); +typedef int (rte_gpu_mem_dma_unmap_t)(struct rte_gpu *dev, void *ptr); struct rte_gpu_ops { /* Get device info. If NULL, info is just copied. */ @@ -59,6 +61,10 @@ struct rte_gpu_ops { rte_gpu_mem_cpu_unmap_t *mem_cpu_unmap; /* Enforce GPU write memory barrier. */ rte_gpu_wmb_t *wmb; + /* DMA address of GPU memory. */ + rte_gpu_mem_dma_map_t *mem_dma_map; + /* DMA unmap GPU memory. */ + rte_gpu_mem_dma_unmap_t *mem_dma_unmap; }; struct rte_gpu_mpshared { diff --git a/lib/gpudev/rte_gpudev.h b/lib/gpudev/rte_gpudev.h index 0a94a6abc4..ebc789880b 100644 --- a/lib/gpudev/rte_gpudev.h +++ b/lib/gpudev/rte_gpudev.h @@ -484,6 +484,34 @@ int rte_gpu_mem_unregister(int16_t dev_id, void *ptr); __rte_experimental void *rte_gpu_mem_cpu_map(int16_t dev_id, size_t size, void *ptr); +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Map a chunk of GPU memory to make it accessible for DMA + * using the memory pointer returned by the function. + * GPU memory has to be allocated via rte_gpu_mem_alloc(). + * + * @param dev_id + * Device ID requiring mapped memory. + * @param size + * Number of bytes to map. + * Requesting 0 will do nothing. + * @param ptr + * Pointer to the GPU memory area to be mapped for DMA. + * NULL is a no-op accepted value. + + * @return + * A pointer to the mapped GPU memory usable by the CPU, otherwise NULL and rte_errno is set: + * - ENODEV if invalid dev_id + * - ENOTSUP if operation not supported by the driver + * - E2BIG if size is higher than limit + * - ENOMEM if out of space + * - EPERM if driver error + */ +__rte_experimental +void *rte_gpu_mem_dma_map(int16_t dev_id, size_t size, void *ptr); + /** * @warning * @b EXPERIMENTAL: this API may change without prior notice. @@ -505,6 +533,27 @@ void *rte_gpu_mem_cpu_map(int16_t dev_id, size_t size, void *ptr); __rte_experimental int rte_gpu_mem_cpu_unmap(int16_t dev_id, void *ptr); +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Unmap a chunk of GPU memory previously mapped with rte_gpu_mem_dma_map() + * + * @param dev_id + * Reference device ID. + * @param ptr + * Pointer to the GPU memory area to be unmapped. + * NULL is a no-op accepted value. + * + * @return + * 0 on success, -rte_errno otherwise: + * - ENODEV if invalid dev_id + * - ENOTSUP if operation not supported by the driver + * - EPERM if driver error + */ +__rte_experimental +int rte_gpu_mem_dma_unmap(int16_t dev_id, void *ptr); + /** * @warning * @b EXPERIMENTAL: this API may change without prior notice. diff --git a/lib/gpudev/version.map b/lib/gpudev/version.map index a2c8ce5759..f267b5e029 100644 --- a/lib/gpudev/version.map +++ b/lib/gpudev/version.map @@ -28,6 +28,8 @@ EXPERIMENTAL { rte_gpu_mem_cpu_unmap; rte_gpu_mem_unregister; rte_gpu_wmb; + rte_gpu_mem_dma_map; + rte_gpu_mem_dma_unmap; }; INTERNAL { From patchwork Fri Sep 6 04:16:14 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ajit Khaparde X-Patchwork-Id: 143683 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 3547145917; Fri, 6 Sep 2024 06:16:36 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 0307F42E8A; Fri, 6 Sep 2024 06:16:25 +0200 (CEST) Received: from mail-qv1-f50.google.com (mail-qv1-f50.google.com [209.85.219.50]) by mails.dpdk.org (Postfix) with ESMTP id C31EB42E8A for ; Fri, 6 Sep 2024 06:16:23 +0200 (CEST) Received: by mail-qv1-f50.google.com with SMTP id 6a1803df08f44-6c358b725feso8847866d6.1 for ; Thu, 05 Sep 2024 21:16:23 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=broadcom.com; s=google; t=1725596182; x=1726200982; darn=dpdk.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=vNlD4SAJ0ykyT4g7IQ95w89f5j8QR6SKaI5ARMprADs=; b=SsoClAbVrxebYZwZi+1b2QuyA6vHQwJPB+JPu+vgYaxeW4sCfHPSWPhflWqLjnytx5 i0um37YSMbEVwf4caEXruBtmTq9G2jvrzE1FKPagvPQXiEuGAgUTf6Gm/Q01yjFsAtMX CMW7R24HzVqDkGVVjgaCBGEE5AHECcSZaohV8= X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1725596182; x=1726200982; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=vNlD4SAJ0ykyT4g7IQ95w89f5j8QR6SKaI5ARMprADs=; b=oxatQVSbzWGuSyhPxOts2qRRSa91GlfhS5PDzYmRCaf9vgxo/Xm7Rof5gg2qB6W12s 7TSoF9BP0msyPPPAyzn76vkD+v/oJDGRVw+RGBlJBoandLYjYhRuzLmcvbt+0W0tAU8u 5oVnKMphyTr0tjYbaPbw8EBXEoGu2xiu2KQHm64clEsrFHi6aekf1KLWmkZ5d9CB61d4 zFtRh/a02aYDqHGt8tJSq4wSeCANcvh97fPHoIl2q05Nm4ulAKiuoktxq3oAndTUKmKD CAxx063o0rHxeTDX2kzYaVsL07skfVExpRzhUfKoKWkmIpsdUiYZpOLRmaTvFK/QzWGq Nzug== X-Gm-Message-State: AOJu0YxifSoB+KDUem2FvNXT8qq4bTqWoNqJr4xX6iN9meiVKUAj7WiP 7mohfdEGdVQAF2EilXBZnDsJTqHLUPCuIiG72EWe2sAHjFA5CU+XDLhir2V9tAe5JcYoik8eZMA phuMLj+6lYptnQb/OqXWFye9su8r7E5iZ+slj1bnNVyp2JEVhOJg8ryeQB54Ty31h7OgZm/pDhb AfxjoGg282RVCdPjMMynt9Hts64naG X-Google-Smtp-Source: AGHT+IHwyceLvsK6r/OlThJgE0gzYUo313iUFUF75qgrRjNKaTU5mHIWr8iF0f0s1VqMg6SADtnCew== X-Received: by 2002:a05:6214:2b8c:b0:6c5:19fd:4158 with SMTP id 6a1803df08f44-6c519fd4234mr67699406d6.47.1725596182070; Thu, 05 Sep 2024 21:16:22 -0700 (PDT) Received: from localhost.localdomain ([136.52.21.78]) by smtp.gmail.com with ESMTPSA id 6a1803df08f44-6c520419eb9sm13418276d6.122.2024.09.05.21.16.20 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Thu, 05 Sep 2024 21:16:21 -0700 (PDT) From: Ajit Khaparde To: dev@dpdk.org Cc: thomas@monjalon.net, eagostini@nvidia.com Subject: [PATCH 2/2] gpu/cuda: extend cuda code to get PA of GPU Date: Thu, 5 Sep 2024 21:16:14 -0700 Message-Id: <20240906041614.36962-3-ajit.khaparde@broadcom.com> X-Mailer: git-send-email 2.39.3 (Apple Git-146) In-Reply-To: <20240906041614.36962-1-ajit.khaparde@broadcom.com> References: <20240906041614.36962-1-ajit.khaparde@broadcom.com> MIME-Version: 1.0 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Add code to get the physical address of the GPU memory. This should allow some NIC hardware to directly use the physical address for DMA instead of the CUDA Unified Memory provided by cuda_mem_map. Signed-off-by: Ajit Khaparde --- drivers/gpu/cuda/common.h | 2 + drivers/gpu/cuda/cuda.c | 117 ++++++++++++++++++++++++++++++------- drivers/gpu/cuda/gdrcopy.c | 50 ++++++++++++++++ 3 files changed, 147 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/cuda/common.h b/drivers/gpu/cuda/common.h index e809988503..5da5e7d176 100644 --- a/drivers/gpu/cuda/common.h +++ b/drivers/gpu/cuda/common.h @@ -33,6 +33,8 @@ extern int cuda_logtype; int gdrcopy_pin(gdr_t *gdrc_h, __rte_unused gdr_mh_t *mh, uint64_t d_addr, size_t size, void **h_addr); +int gdrcopy_dma(gdr_t *gdrc_h, __rte_unused gdr_mh_t *mh, + uint64_t d_addr, size_t size, void **h_addr, uint64_t *paddr); int gdrcopy_unpin(gdr_t gdrc_h, __rte_unused gdr_mh_t mh, void *d_addr, size_t size); diff --git a/drivers/gpu/cuda/cuda.c b/drivers/gpu/cuda/cuda.c index a552aabeb8..36cff7b3a5 100644 --- a/drivers/gpu/cuda/cuda.c +++ b/drivers/gpu/cuda/cuda.c @@ -959,6 +959,87 @@ cuda_mem_cpu_map(struct rte_gpu *dev, __rte_unused size_t size, void *ptr_in, vo return 0; } +static int +cuda_mem_dma_map(struct rte_gpu *dev, __rte_unused size_t size, void *ptr_in, void **ptr_out) +{ + struct mem_entry *mem_item; + cuda_ptr_key hk; + uint64_t paddr; + + if (dev == NULL) + return -ENODEV; + + hk = get_hash_from_ptr((void *)ptr_in); + + mem_item = mem_list_find_item(hk); + if (mem_item == NULL) { + rte_cuda_log(ERR, "Memory address 0x%p not found in driver memory.", ptr_in); + rte_errno = EPERM; + return -rte_errno; + } + + if (mem_item->mtype != GPU_MEM) { + rte_cuda_log(ERR, "Memory address 0x%p is not GPU memory type.", ptr_in); + rte_errno = EPERM; + return -rte_errno; + } + + if (mem_item->size != size) + rte_cuda_log(WARNING, + "Can't expose memory area with size (%zd) different from original size (%zd).", + size, mem_item->size); + + if (gdrcopy_dma(&gdrc_h, &(mem_item->mh), (uint64_t)mem_item->ptr_d, + mem_item->size, &(mem_item->ptr_h), &paddr)) { + rte_cuda_log(ERR, "Error exposing GPU memory address 0x%p.", ptr_in); + rte_errno = EPERM; + return -rte_errno; + } + + mem_item->mtype = GPU_REGISTERED; + *ptr_out = (void *)paddr; + + return 0; +} + +static int +cuda_mem_free(struct rte_gpu *dev, void *ptr) +{ + CUresult res; + struct mem_entry *mem_item; + const char *err_string; + cuda_ptr_key hk; + + if (dev == NULL) + return -ENODEV; + + hk = get_hash_from_ptr((void *)ptr); + + mem_item = mem_list_find_item(hk); + if (mem_item == NULL) { + rte_cuda_log(ERR, "Memory address 0x%p not found in driver memory", ptr); + rte_errno = EPERM; + return -rte_errno; + } + + if (mem_item->mtype == GPU_MEM) { + res = pfn_cuMemFree(mem_item->ptr_orig_d); + if (res != 0) { + pfn_cuGetErrorString(res, &(err_string)); + rte_cuda_log(ERR, "cuMemFree current failed with %s", + err_string); + rte_errno = EPERM; + return -rte_errno; + } + + return mem_list_del_item(hk); + } + + rte_cuda_log(ERR, "Memory type %d not supported", mem_item->mtype); + + return -EPERM; +} + static int cuda_mem_unregister(struct rte_gpu *dev, void *ptr) { @@ -1034,48 +1115,38 @@ cuda_mem_cpu_unmap(struct rte_gpu *dev, void *ptr_in) } static int -cuda_mem_free(struct rte_gpu *dev, void *ptr) +cuda_mem_dma_unmap(struct rte_gpu *dev, void *ptr_in) { - CUresult res; struct mem_entry *mem_item; - const char *err_string; cuda_ptr_key hk; if (dev == NULL) return -ENODEV; - hk = get_hash_from_ptr((void *)ptr); + hk = get_hash_from_ptr((void *)ptr_in); mem_item = mem_list_find_item(hk); if (mem_item == NULL) { - rte_cuda_log(ERR, "Memory address 0x%p not found in driver memory", ptr); + rte_cuda_log(ERR, "Memory address 0x%p not found in driver memory.", ptr_in); rte_errno = EPERM; return -rte_errno; } - /* - * If a GPU memory area that's CPU mapped is being freed - * without calling cpu_unmap, force the unmapping. - */ - if (mem_item->mtype == GPU_REGISTERED) - cuda_mem_cpu_unmap(dev, ptr); - - if (mem_item->mtype == GPU_MEM) { - res = pfn_cuMemFree(mem_item->ptr_orig_d); - if (res != 0) { - pfn_cuGetErrorString(res, &(err_string)); - rte_cuda_log(ERR, "cuMemFree current failed with %s", - err_string); + if (mem_item->mtype == GPU_REGISTERED) { + if (gdrcopy_unpin(gdrc_h, mem_item->mh, (void *)mem_item->ptr_d, + mem_item->size)) { + rte_cuda_log(ERR, "Error unexposing GPU memory address 0x%p.", ptr_in); rte_errno = EPERM; return -rte_errno; } - return mem_list_del_item(hk); + mem_item->mtype = GPU_MEM; + } else { + rte_errno = EPERM; + return -rte_errno; } - rte_cuda_log(ERR, "Memory type %d not supported", mem_item->mtype); - - return -EPERM; + return 0; } static int @@ -1391,7 +1462,9 @@ cuda_gpu_probe(__rte_unused struct rte_pci_driver *pci_drv, struct rte_pci_devic dev->ops.mem_register = cuda_mem_register; dev->ops.mem_unregister = cuda_mem_unregister; dev->ops.mem_cpu_map = cuda_mem_cpu_map; + dev->ops.mem_dma_map = cuda_mem_dma_map; dev->ops.mem_cpu_unmap = cuda_mem_cpu_unmap; + dev->ops.mem_dma_unmap = cuda_mem_dma_unmap; dev->ops.wmb = cuda_wmb; rte_gpu_complete_new(dev); diff --git a/drivers/gpu/cuda/gdrcopy.c b/drivers/gpu/cuda/gdrcopy.c index bd56b73ce4..ac6b3d773a 100644 --- a/drivers/gpu/cuda/gdrcopy.c +++ b/drivers/gpu/cuda/gdrcopy.c @@ -12,6 +12,8 @@ static void *gdrclib; static gdr_t (*sym_gdr_open)(void); static int (*sym_gdr_pin_buffer)(gdr_t g, unsigned long addr, size_t size, uint64_t p2p_token, uint32_t va_space, gdr_mh_t *handle); +static int (*sym_gdr_p2p_dma_map_buffer)(gdr_t g, unsigned long addr, size_t size, + uint64_t p2p_token, uint32_t va_space, gdr_mh_t *handle, uint64_t *paddr); static int (*sym_gdr_unpin_buffer)(gdr_t g, gdr_mh_t handle); static int (*sym_gdr_map)(gdr_t g, gdr_mh_t handle, void **va, size_t size); static int (*sym_gdr_unmap)(gdr_t g, gdr_mh_t handle, void *va, size_t size); @@ -45,6 +47,13 @@ gdrcopy_loader(void) return -1; } + sym_gdr_p2p_dma_map_buffer = dlsym(gdrclib, "gdr_p2p_dma_map_buffer"); + if (sym_gdr_p2p_dma_map_buffer == NULL) { + rte_cuda_log(ERR, "Failed to load GDRCopy symbol gdr_p2p_dma_map_buffer\n"); + printf("Failed to load GDRCopy symbol gdr_p2p_dma_map_buffer\n"); + return -1; + } + sym_gdr_unpin_buffer = dlsym(gdrclib, "gdr_unpin_buffer"); if (sym_gdr_unpin_buffer == NULL) { rte_cuda_log(ERR, "Failed to load GDRCopy symbols\n"); @@ -119,6 +128,47 @@ gdrcopy_pin(__rte_unused gdr_t *gdrc_h, __rte_unused gdr_mh_t *mh, #endif } +int +gdrcopy_dma(__rte_unused gdr_t *gdrc_h, __rte_unused gdr_mh_t *mh, + __rte_unused uint64_t d_addr, __rte_unused size_t size, + __rte_unused void **h_addr, __rte_unused uint64_t *paddr) +{ +#ifdef DRIVERS_GPU_CUDA_GDRCOPY_H + uint64_t phys; + + if (*gdrc_h == NULL) { + if (gdrcopy_loader()) + return -ENOTSUP; + + if (gdrcopy_open(gdrc_h)) { + rte_cuda_log(ERR, + "GDRCopy gdrdrv kernel module not found. Can't CPU map GPU memory."); + return -EPERM; + } + } + + /* Pin the device buffer */ + if (sym_gdr_p2p_dma_map_buffer(*gdrc_h, d_addr, size, 0, 0, mh, &phys) != 0) { + rte_cuda_log(ERR, "GDRCopy p2p dma map buffer error."); + return -1; + } + *paddr = phys; + + /* Map the buffer to user space */ + if (sym_gdr_map(*gdrc_h, *mh, h_addr, size) != 0) { + rte_cuda_log(ERR, "GDRCopy map buffer error."); + sym_gdr_unpin_buffer(*gdrc_h, *mh); + return -1; + } + + return 0; +#else + rte_cuda_log(ERR, + "GDRCopy headers not provided at DPDK building time. Can't CPU map GPU memory."); + return -ENOTSUP; +#endif +} + int gdrcopy_unpin(gdr_t gdrc_h, __rte_unused gdr_mh_t mh, __rte_unused void *d_addr, __rte_unused size_t size)