From patchwork Wed Nov 17 07:05:44 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Rybchenko X-Patchwork-Id: 104435 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 996B3A0C41; Wed, 17 Nov 2021 08:06:21 +0100 (CET) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id CB6024111C; Wed, 17 Nov 2021 08:06:18 +0100 (CET) Received: from shelob.oktetlabs.ru (shelob.oktetlabs.ru [91.220.146.113]) by mails.dpdk.org (Postfix) with ESMTP id AEE5C41148 for ; Wed, 17 Nov 2021 08:06:14 +0100 (CET) Received: by shelob.oktetlabs.ru (Postfix, from userid 122) id 6A9EA7F4D4; Wed, 17 Nov 2021 10:06:14 +0300 (MSK) X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on shelob.oktetlabs.ru X-Spam-Level: X-Spam-Status: No, score=0.8 required=5.0 tests=ALL_TRUSTED, DKIM_ADSP_DISCARD, URIBL_BLOCKED autolearn=no autolearn_force=no version=3.4.2 Received: from aros.oktetlabs.ru (aros.oktetlabs.ru [192.168.38.17]) by shelob.oktetlabs.ru (Postfix) with ESMTP id D70C27F52B; Wed, 17 Nov 2021 10:05:58 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 shelob.oktetlabs.ru D70C27F52B Authentication-Results: shelob.oktetlabs.ru/D70C27F52B; dkim=none; dkim-atps=neutral From: Andrew Rybchenko To: dev@dpdk.org Cc: Thomas Monjalon , Ferruh Yigit , Andy Moreton Subject: [PATCH v2 1/2] common/sfc_efx/base: support NIC DMA memory regions API Date: Wed, 17 Nov 2021 10:05:44 +0300 Message-Id: <20211117070545.4004374-2-andrew.rybchenko@oktetlabs.ru> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20211117070545.4004374-1-andrew.rybchenko@oktetlabs.ru> References: <20211105083333.1960017-1-andrew.rybchenko@oktetlabs.ru> <20211117070545.4004374-1-andrew.rybchenko@oktetlabs.ru> MIME-Version: 1.0 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org NIC DMA memory regions API allows to establish mapping of DMA addresses used by NIC to host IOVA understood by the host when IOMMU is absent and NIC cannot address entire host IOVA space because of too small DMA mask. The API does not allow to address entire host IOVA space, but allows arbitrary regions of the space really used for the NIC DMA. A DMA region needs to be mapped in order to perform MCDI initialization. Since the NIC has not been probed at that point, its configuration cannot be accessed and there an UNKNOWN mapping type is assumed. Signed-off-by: Andrew Rybchenko Reviewed-by: Andy Moreton --- drivers/common/sfc_efx/base/ef10_nic.c | 51 +++ drivers/common/sfc_efx/base/efx.h | 46 +++ drivers/common/sfc_efx/base/efx_impl.h | 20 ++ drivers/common/sfc_efx/base/efx_mcdi.c | 204 +++++++++++ drivers/common/sfc_efx/base/efx_mcdi.h | 31 ++ drivers/common/sfc_efx/base/efx_nic.c | 460 ++++++++++++++++++++++++ drivers/common/sfc_efx/base/siena_nic.c | 2 + drivers/common/sfc_efx/version.map | 3 + 8 files changed, 817 insertions(+) diff --git a/drivers/common/sfc_efx/base/ef10_nic.c b/drivers/common/sfc_efx/base/ef10_nic.c index 72d2caadb8..355d274470 100644 --- a/drivers/common/sfc_efx/base/ef10_nic.c +++ b/drivers/common/sfc_efx/base/ef10_nic.c @@ -1854,6 +1854,51 @@ ef10_external_port_mapping( return (rc); } +static __checkReturn efx_rc_t +efx_mcdi_get_nic_addr_caps( + __in efx_nic_t *enp) +{ + efx_nic_cfg_t *encp = &(enp->en_nic_cfg); + uint32_t mapping_type; + efx_rc_t rc; + + rc = efx_mcdi_get_nic_addr_info(enp, &mapping_type); + if (rc != 0) { + if (rc == ENOTSUP) { + encp->enc_dma_mapping = EFX_NIC_DMA_MAPPING_FLAT; + goto out; + } + goto fail1; + } + + switch (mapping_type) { + case MC_CMD_GET_DESC_ADDR_INFO_OUT_MAPPING_FLAT: + encp->enc_dma_mapping = EFX_NIC_DMA_MAPPING_FLAT; + break; + case MC_CMD_GET_DESC_ADDR_INFO_OUT_MAPPING_REGIONED: + encp->enc_dma_mapping = EFX_NIC_DMA_MAPPING_REGIONED; + rc = efx_mcdi_get_nic_addr_regions(enp, + &enp->en_dma.end_u.endu_region_info); + if (rc != 0) + goto fail2; + break; + default: + goto fail3; + } + +out: + return (0); + +fail3: + EFSYS_PROBE(fail3); +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + __checkReturn efx_rc_t efx_mcdi_nic_board_cfg( __in efx_nic_t *enp) @@ -1994,8 +2039,14 @@ efx_mcdi_nic_board_cfg( encp->enc_intr_vec_base = base; encp->enc_intr_limit = nvec; + rc = efx_mcdi_get_nic_addr_caps(enp); + if (rc != 0) + goto fail12; + return (0); +fail12: + EFSYS_PROBE(fail12); fail11: EFSYS_PROBE(fail11); fail10: diff --git a/drivers/common/sfc_efx/base/efx.h b/drivers/common/sfc_efx/base/efx.h index f08a004536..4d3210f6b6 100644 --- a/drivers/common/sfc_efx/base/efx.h +++ b/drivers/common/sfc_efx/base/efx.h @@ -1444,6 +1444,14 @@ typedef enum efx_vi_window_shift_e { EFX_VI_WINDOW_SHIFT_64K = 16, } efx_vi_window_shift_t; +typedef enum efx_nic_dma_mapping_e { + EFX_NIC_DMA_MAPPING_UNKNOWN = 0, + EFX_NIC_DMA_MAPPING_FLAT, + EFX_NIC_DMA_MAPPING_REGIONED, + + EFX_NIC_DMA_MAPPING_NTYPES +} efx_nic_dma_mapping_t; + typedef struct efx_nic_cfg_s { uint32_t enc_board_type; uint32_t enc_phy_type; @@ -1633,6 +1641,8 @@ typedef struct efx_nic_cfg_s { uint32_t enc_filter_action_mark_max; /* Port assigned to this PCI function */ uint32_t enc_assigned_port; + /* NIC DMA mapping type */ + efx_nic_dma_mapping_t enc_dma_mapping; } efx_nic_cfg_t; #define EFX_PCI_VF_INVALID 0xffff @@ -4897,6 +4907,42 @@ efx_virtio_verify_features( #endif /* EFSYS_OPT_VIRTIO */ +LIBEFX_API +extern __checkReturn efx_rc_t +efx_nic_dma_config_add( + __in efx_nic_t *enp, + __in efsys_dma_addr_t trgt_addr, + __in size_t len, + __out_opt efsys_dma_addr_t *nic_basep, + __out_opt efsys_dma_addr_t *trgt_basep, + __out_opt size_t *map_lenp); + +LIBEFX_API +extern __checkReturn efx_rc_t +efx_nic_dma_reconfigure( + __in efx_nic_t *enp); + +typedef enum efx_nic_dma_addr_type_e { + EFX_NIC_DMA_ADDR_MCDI_BUF, + EFX_NIC_DMA_ADDR_MAC_STATS_BUF, + EFX_NIC_DMA_ADDR_EVENT_RING, + EFX_NIC_DMA_ADDR_RX_RING, + EFX_NIC_DMA_ADDR_TX_RING, + EFX_NIC_DMA_ADDR_RX_BUF, + EFX_NIC_DMA_ADDR_TX_BUF, + + EFX_NIC_DMA_ADDR_NTYPES +} efx_nic_dma_addr_type_t; + +LIBEFX_API +extern __checkReturn efx_rc_t +efx_nic_dma_map( + __in efx_nic_t *enp, + __in efx_nic_dma_addr_type_t addr_type, + __in efsys_dma_addr_t tgt_addr, + __in size_t len, + __out efsys_dma_addr_t *nic_addrp); + #ifdef __cplusplus } #endif diff --git a/drivers/common/sfc_efx/base/efx_impl.h b/drivers/common/sfc_efx/base/efx_impl.h index eda41b4be0..71c83515f7 100644 --- a/drivers/common/sfc_efx/base/efx_impl.h +++ b/drivers/common/sfc_efx/base/efx_impl.h @@ -428,6 +428,25 @@ typedef struct efx_nic_ops_s { #define EFX_RXQ_LIMIT_TARGET 512 #endif +typedef struct efx_nic_dma_region_s { + efsys_dma_addr_t endr_nic_base; + efsys_dma_addr_t endr_trgt_base; + unsigned int endr_window_log2; + unsigned int endr_align_log2; + boolean_t endr_inuse; +} efx_nic_dma_region_t; + +typedef struct efx_nic_dma_region_info_s { + unsigned int endri_count; + efx_nic_dma_region_t *endri_regions; +} efx_nic_dma_region_info_t; + +typedef struct efx_nic_dma_s { + union { + /* No configuration in the case flat mapping type */ + efx_nic_dma_region_info_t endu_region_info; + } end_u; +} efx_nic_dma_t; #if EFSYS_OPT_FILTER @@ -859,6 +878,7 @@ struct efx_nic_s { const efx_rx_ops_t *en_erxop; efx_fw_variant_t efv; char en_drv_version[EFX_DRV_VER_MAX]; + efx_nic_dma_t en_dma; #if EFSYS_OPT_FILTER efx_filter_t en_filter; const efx_filter_ops_t *en_efop; diff --git a/drivers/common/sfc_efx/base/efx_mcdi.c b/drivers/common/sfc_efx/base/efx_mcdi.c index cdf7181e0d..9189a7a8b3 100644 --- a/drivers/common/sfc_efx/base/efx_mcdi.c +++ b/drivers/common/sfc_efx/base/efx_mcdi.c @@ -3236,4 +3236,208 @@ efx_mcdi_fini_txq( #endif /* EFSYS_OPT_RIVERHEAD || EFX_OPTS_EF10() */ + __checkReturn efx_rc_t +efx_mcdi_get_nic_addr_info( + __in efx_nic_t *enp, + __out uint32_t *mapping_typep) +{ + EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_DESC_ADDR_INFO_IN_LEN, + MC_CMD_GET_DESC_ADDR_INFO_OUT_LEN); + efx_mcdi_req_t req; + efx_rc_t rc; + + req.emr_cmd = MC_CMD_GET_DESC_ADDR_INFO; + req.emr_in_buf = payload; + req.emr_in_length = MC_CMD_GET_DESC_ADDR_INFO_IN_LEN; + req.emr_out_buf = payload; + req.emr_out_length = MC_CMD_GET_DESC_ADDR_INFO_OUT_LEN; + + efx_mcdi_execute_quiet(enp, &req); + + if (req.emr_rc != 0) { + rc = req.emr_rc; + goto fail1; + } + + if (req.emr_out_length_used < MC_CMD_GET_DESC_ADDR_INFO_OUT_LEN) { + rc = EMSGSIZE; + goto fail2; + } + + *mapping_typep = + MCDI_OUT_DWORD(req, GET_DESC_ADDR_INFO_OUT_MAPPING_TYPE); + + return (0); + +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + + __checkReturn efx_rc_t +efx_mcdi_get_nic_addr_regions( + __in efx_nic_t *enp, + __out efx_nic_dma_region_info_t *endrip) +{ + EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_DESC_ADDR_REGIONS_IN_LEN, + MC_CMD_GET_DESC_ADDR_REGIONS_OUT_LENMAX_MCDI2); + efx_xword_t *regions; + efx_mcdi_req_t req; + efx_rc_t rc; + size_t alloc_size; + unsigned int nregions; + unsigned int i; + + req.emr_cmd = MC_CMD_GET_DESC_ADDR_REGIONS; + req.emr_in_buf = payload; + req.emr_in_length = MC_CMD_GET_DESC_ADDR_REGIONS_IN_LEN; + req.emr_out_buf = payload; + req.emr_out_length = MC_CMD_GET_DESC_ADDR_REGIONS_OUT_LENMAX_MCDI2; + + efx_mcdi_execute_quiet(enp, &req); + + if (req.emr_rc != 0) { + rc = req.emr_rc; + goto fail1; + } + + if (req.emr_out_length_used < + MC_CMD_GET_DESC_ADDR_REGIONS_OUT_LENMIN) { + rc = EMSGSIZE; + goto fail2; + } + + nregions = MC_CMD_GET_DESC_ADDR_REGIONS_OUT_REGIONS_NUM( + req.emr_out_length_used); + + EFX_STATIC_ASSERT(sizeof (*regions) == DESC_ADDR_REGION_LEN); + regions = MCDI_OUT2(req, efx_xword_t, + GET_DESC_ADDR_REGIONS_OUT_REGIONS); + + alloc_size = nregions * sizeof(endrip->endri_regions[0]); + if (alloc_size / sizeof (endrip->endri_regions[0]) != nregions) { + rc = ENOMEM; + goto fail3; + } + + EFSYS_KMEM_ALLOC(enp->en_esip, + alloc_size, + endrip->endri_regions); + if (endrip->endri_regions == NULL) { + rc = ENOMEM; + goto fail4; + } + + endrip->endri_count = nregions; + for (i = 0; i < nregions; ++i) { + efx_nic_dma_region_t *region_info; + + region_info = &endrip->endri_regions[i]; + + region_info->endr_inuse = B_FALSE; + + region_info->endr_nic_base = + MCDI_OUT_INDEXED_MEMBER_QWORD(req, + GET_DESC_ADDR_REGIONS_OUT_REGIONS, i, + DESC_ADDR_REGION_DESC_ADDR_BASE); + + region_info->endr_trgt_base = + MCDI_OUT_INDEXED_MEMBER_QWORD(req, + GET_DESC_ADDR_REGIONS_OUT_REGIONS, i, + DESC_ADDR_REGION_TRGT_ADDR_BASE); + + region_info->endr_window_log2 = + MCDI_OUT_INDEXED_MEMBER_DWORD(req, + GET_DESC_ADDR_REGIONS_OUT_REGIONS, i, + DESC_ADDR_REGION_WINDOW_SIZE_LOG2); + + region_info->endr_align_log2 = + MCDI_OUT_INDEXED_MEMBER_DWORD(req, + GET_DESC_ADDR_REGIONS_OUT_REGIONS, i, + DESC_ADDR_REGION_TRGT_ADDR_ALIGN_LOG2); + } + + return (0); + +fail4: + EFSYS_PROBE(fail4); +fail3: + EFSYS_PROBE(fail3); +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + + __checkReturn efx_rc_t +efx_mcdi_set_nic_addr_regions( + __in efx_nic_t *enp, + __in const efx_nic_dma_region_info_t *endrip) +{ + EFX_MCDI_DECLARE_BUF(payload, + MC_CMD_SET_DESC_ADDR_REGIONS_IN_LENMAX_MCDI2, + MC_CMD_SET_DESC_ADDR_REGIONS_OUT_LEN); + efx_qword_t *trgt_addr_base; + efx_mcdi_req_t req; + unsigned int i; + efx_rc_t rc; + + if (endrip->endri_count > + MC_CMD_SET_DESC_ADDR_REGIONS_IN_TRGT_ADDR_BASE_MAXNUM) { + rc = EINVAL; + goto fail1; + } + + req.emr_cmd = MC_CMD_SET_DESC_ADDR_REGIONS; + req.emr_in_buf = payload; + req.emr_in_length = + MC_CMD_SET_DESC_ADDR_REGIONS_IN_LEN(endrip->endri_count); + req.emr_out_buf = payload; + req.emr_out_length = MC_CMD_SET_DESC_ADDR_REGIONS_OUT_LEN; + + EFX_STATIC_ASSERT(sizeof (*trgt_addr_base) == + MC_CMD_SET_DESC_ADDR_REGIONS_IN_TRGT_ADDR_BASE_LEN); + trgt_addr_base = MCDI_OUT2(req, efx_qword_t, + SET_DESC_ADDR_REGIONS_IN_TRGT_ADDR_BASE); + + for (i = 0; i < endrip->endri_count; ++i) { + const efx_nic_dma_region_t *region_info; + + region_info = &endrip->endri_regions[i]; + + if (region_info->endr_inuse != B_TRUE) + continue; + + EFX_STATIC_ASSERT(sizeof (1U) * 8 >= + MC_CMD_SET_DESC_ADDR_REGIONS_IN_TRGT_ADDR_BASE_MAXNUM); + MCDI_IN_SET_DWORD(req, + SET_DESC_ADDR_REGIONS_IN_SET_REGION_MASK, 1U << i); + + MCDI_IN_SET_INDEXED_QWORD(req, + SET_DESC_ADDR_REGIONS_IN_TRGT_ADDR_BASE, i, + region_info->endr_trgt_base); + } + + efx_mcdi_execute_quiet(enp, &req); + + if (req.emr_rc != 0) { + rc = req.emr_rc; + goto fail2; + } + + return (0); + +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + #endif /* EFSYS_OPT_MCDI */ diff --git a/drivers/common/sfc_efx/base/efx_mcdi.h b/drivers/common/sfc_efx/base/efx_mcdi.h index 96f237b1b0..c91ea41911 100644 --- a/drivers/common/sfc_efx/base/efx_mcdi.h +++ b/drivers/common/sfc_efx/base/efx_mcdi.h @@ -289,6 +289,26 @@ efx_mcdi_phy_module_get_info( __in size_t len, __out_bcount(len) uint8_t *data); +LIBEFX_INTERNAL +extern __checkReturn efx_rc_t +efx_mcdi_get_nic_addr_info( + __in efx_nic_t *enp, + __out uint32_t *mapping_typep); + +struct efx_nic_dma_region_info_s; + +LIBEFX_INTERNAL +extern __checkReturn efx_rc_t +efx_mcdi_get_nic_addr_regions( + __in efx_nic_t *enp, + __out struct efx_nic_dma_region_info_s *endrip); + +LIBEFX_INTERNAL +extern __checkReturn efx_rc_t +efx_mcdi_set_nic_addr_regions( + __in efx_nic_t *enp, + __in const struct efx_nic_dma_region_info_s *endrip); + #define MCDI_IN(_emr, _type, _ofst) \ ((_type *)((_emr).emr_in_buf + (_ofst))) @@ -315,6 +335,17 @@ efx_mcdi_phy_module_get_info( EFX_POPULATE_DWORD_1(*(MCDI_IN2(_emr, efx_dword_t, _ofst) + \ (_idx)), EFX_DWORD_0, _value) \ +#define MCDI_IN_SET_QWORD(_emr, _ofst, _value) \ + EFX_POPULATE_QWORD_2(*MCDI_IN2(_emr, efx_qword_t, _ofst), \ + EFX_DWORD_0, ((_value) & 0xffffffff), \ + EFX_DWORD_1, ((_value) >> 32)) + +#define MCDI_IN_SET_INDEXED_QWORD(_emr, _ofst, _idx, _value) \ + EFX_POPULATE_QWORD_2(*(MCDI_IN2(_emr, efx_qword_t, _ofst) + \ + (_idx)), \ + EFX_DWORD_0, ((_value) & 0xffffffff), \ + EFX_DWORD_1, ((_value) >> 32)) + #define MCDI_IN_POPULATE_DWORD_1(_emr, _ofst, _field1, _value1) \ EFX_POPULATE_DWORD_1(*MCDI_IN2(_emr, efx_dword_t, _ofst), \ MC_CMD_ ## _field1, _value1) diff --git a/drivers/common/sfc_efx/base/efx_nic.c b/drivers/common/sfc_efx/base/efx_nic.c index 9fe0933772..172488e083 100644 --- a/drivers/common/sfc_efx/base/efx_nic.c +++ b/drivers/common/sfc_efx/base/efx_nic.c @@ -1300,3 +1300,463 @@ efx_nic_check_pcie_link_speed( return (rc); } + +/* Required en_eslp lock held */ +static __checkReturn efx_rc_t +efx_nic_dma_config_regioned_find_region( + __in const efx_nic_t *enp, + __in efsys_dma_addr_t trgt_addr, + __in size_t len, + __out const efx_nic_dma_region_t **regionp) +{ + const efx_nic_dma_region_info_t *region_info; + const efx_nic_dma_region_t *region; + unsigned int i; + efx_rc_t rc; + + if (efx_nic_cfg_get(enp)->enc_dma_mapping != + EFX_NIC_DMA_MAPPING_REGIONED) { + rc = EINVAL; + goto fail1; + } + + region_info = &enp->en_dma.end_u.endu_region_info; + + for (i = 0; i < region_info->endri_count; ++i) { + efsys_dma_addr_t offset; + + region = ®ion_info->endri_regions[i]; + if (region->endr_inuse == B_FALSE) + continue; + + if (trgt_addr < region->endr_trgt_base) + continue; + + EFSYS_ASSERT3U(region->endr_window_log2, <, 64); + offset = trgt_addr - region->endr_trgt_base; + if (offset + len > (1ULL << region->endr_window_log2)) + continue; + + *regionp = region; + return (0); + } + + rc = ENOENT; + goto fail2; + +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + +static __checkReturn efx_rc_t +efx_nic_dma_config_regioned_add_region( + __in efx_nic_t *enp, + __in efsys_dma_addr_t trgt_addr, + __in size_t len, + __out const efx_nic_dma_region_t **regionp) +{ + efx_nic_dma_region_info_t *region_info; + efx_nic_dma_region_t *region; + unsigned int i; + efx_rc_t rc; + + if (efx_nic_cfg_get(enp)->enc_dma_mapping != + EFX_NIC_DMA_MAPPING_REGIONED) { + rc = EINVAL; + goto fail1; + } + + region_info = &enp->en_dma.end_u.endu_region_info; + + for (i = 0; i < region_info->endri_count; ++i) { + efsys_dma_addr_t trgt_base; + efsys_dma_addr_t offset; + + region = ®ion_info->endri_regions[i]; + if (region->endr_inuse == B_TRUE) + continue; + + /* + * Align target address base in accordance with + * the region requirements. + */ + EFSYS_ASSERT3U(region->endr_align_log2, <, 64); + trgt_base = EFX_P2ALIGN(efsys_dma_addr_t, trgt_addr, + (1ULL << region->endr_align_log2)); + + offset = trgt_addr - trgt_base; + + /* Check if region window is sufficient */ + EFSYS_ASSERT3U(region->endr_window_log2, <, 64); + if (offset + len > (1ULL << region->endr_window_log2)) + continue; + + region->endr_trgt_base = trgt_base; + region->endr_inuse = B_TRUE; + + *regionp = region; + return (0); + } + + /* No suitable free region found */ + rc = ENOMEM; + goto fail2; + +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + +static __checkReturn efx_rc_t +efx_nic_dma_config_regioned_add( + __in efx_nic_t *enp, + __in efsys_dma_addr_t trgt_addr, + __in size_t len, + __out_opt efsys_dma_addr_t *nic_basep, + __out_opt efsys_dma_addr_t *trgt_basep, + __out_opt size_t *map_lenp) +{ + const efx_nic_dma_region_t *region; + efsys_lock_state_t state; + efx_rc_t rc; + + EFSYS_LOCK(enp->en_eslp, state); + + rc = efx_nic_dma_config_regioned_find_region(enp, trgt_addr, len, + ®ion); + switch (rc) { + case 0: + /* Already covered by existing mapping */ + break; + case ENOENT: + /* No existing mapping found */ + rc = efx_nic_dma_config_regioned_add_region(enp, + trgt_addr, len, ®ion); + if (rc != 0) + goto fail1; + break; + default: + goto fail2; + } + + if (nic_basep != NULL) + *nic_basep = region->endr_nic_base; + if (trgt_basep != NULL) + *trgt_basep = region->endr_trgt_base; + if (map_lenp != NULL) + *map_lenp = 1ULL << region->endr_window_log2; + + EFSYS_UNLOCK(enp->en_eslp, state); + + return (0); + +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + EFSYS_UNLOCK(enp->en_eslp, state); + + return (rc); +} + + __checkReturn efx_rc_t +efx_nic_dma_config_add( + __in efx_nic_t *enp, + __in efsys_dma_addr_t trgt_addr, + __in size_t len, + __out_opt efsys_dma_addr_t *nic_basep, + __out_opt efsys_dma_addr_t *trgt_basep, + __out_opt size_t *map_lenp) +{ + const efx_nic_cfg_t *encp = efx_nic_cfg_get(enp); + efx_rc_t rc; + + switch (encp->enc_dma_mapping) { + case EFX_NIC_DMA_MAPPING_FLAT: + /* No mapping is required */ + if (nic_basep != NULL) + *nic_basep = 0; + if (trgt_basep != NULL) + *trgt_basep = 0; + if (map_lenp != NULL) + *map_lenp = 0; + break; + case EFX_NIC_DMA_MAPPING_REGIONED: + rc = efx_nic_dma_config_regioned_add(enp, trgt_addr, len, + nic_basep, trgt_basep, map_lenp); + if (rc != 0) + goto fail1; + break; + case EFX_NIC_DMA_MAPPING_UNKNOWN: + default: + rc = ENOTSUP; + goto fail2; + } + + return (0); + +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + +static __checkReturn efx_rc_t +efx_nic_dma_reconfigure_regioned( + __in efx_nic_t *enp) +{ + efx_rc_t rc; + + rc = efx_mcdi_set_nic_addr_regions(enp, + &enp->en_dma.end_u.endu_region_info); + if (rc != 0) + goto fail1; + + return (0); + +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); + +} + + __checkReturn efx_rc_t +efx_nic_dma_reconfigure( + __in efx_nic_t *enp) +{ + const efx_nic_cfg_t *encp = efx_nic_cfg_get(enp); + efx_rc_t rc; + + switch (encp->enc_dma_mapping) { + case EFX_NIC_DMA_MAPPING_UNKNOWN: + case EFX_NIC_DMA_MAPPING_FLAT: + /* Nothing to do */ + break; + case EFX_NIC_DMA_MAPPING_REGIONED: + rc = efx_nic_dma_reconfigure_regioned(enp); + if (rc != 0) + goto fail1; + break; + default: + rc = ENOTSUP; + goto fail2; + } + + return (0); + +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + +static __checkReturn efx_rc_t +efx_nic_dma_unknown_map( + __in efx_nic_t *enp, + __in efx_nic_dma_addr_type_t addr_type, + __in efsys_dma_addr_t trgt_addr, + __in size_t len, + __out efsys_dma_addr_t *nic_addrp) +{ + efx_rc_t rc; + + /* This function may be called before the NIC has been probed. */ + if (enp->en_mod_flags & EFX_MOD_PROBE) { + EFSYS_ASSERT3U(efx_nic_cfg_get(enp)->enc_dma_mapping, ==, + EFX_NIC_DMA_MAPPING_UNKNOWN); + } + + switch (addr_type) { + case EFX_NIC_DMA_ADDR_MCDI_BUF: + /* + * MC cares about MCDI buffer mapping itself since it cannot + * be really mapped using MCDI because mapped MCDI + * buffer is required to execute MCDI commands. + */ + *nic_addrp = trgt_addr; + break; + + case EFX_NIC_DMA_ADDR_MAC_STATS_BUF: + case EFX_NIC_DMA_ADDR_EVENT_RING: + case EFX_NIC_DMA_ADDR_RX_RING: + case EFX_NIC_DMA_ADDR_TX_RING: + case EFX_NIC_DMA_ADDR_RX_BUF: + case EFX_NIC_DMA_ADDR_TX_BUF: + /* Mapping type must be discovered first */ + rc = EFAULT; + goto fail1; + + default: + rc = EINVAL; + goto fail2; + } + + return (0); + +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + +static __checkReturn efx_rc_t +efx_nic_dma_flat_map( + __in efx_nic_t *enp, + __in efx_nic_dma_addr_type_t addr_type, + __in efsys_dma_addr_t trgt_addr, + __in size_t len, + __out efsys_dma_addr_t *nic_addrp) +{ + _NOTE(ARGUNUSED(addr_type, len)) + + EFSYS_ASSERT3U(efx_nic_cfg_get(enp)->enc_dma_mapping, ==, + EFX_NIC_DMA_MAPPING_FLAT); + + /* No re-mapping is required */ + *nic_addrp = trgt_addr; + + return (0); +} + +static __checkReturn efx_rc_t +efx_nic_dma_regioned_map( + __in efx_nic_t *enp, + __in efx_nic_dma_addr_type_t addr_type, + __in efsys_dma_addr_t trgt_addr, + __in size_t len, + __out efsys_dma_addr_t *nic_addrp) +{ + const efx_nic_dma_region_t *region; + efsys_lock_state_t state; + efx_rc_t rc; + + if (efx_nic_cfg_get(enp)->enc_dma_mapping != + EFX_NIC_DMA_MAPPING_REGIONED) { + rc = EINVAL; + goto fail1; + } + + switch (addr_type) { + case EFX_NIC_DMA_ADDR_MCDI_BUF: + case EFX_NIC_DMA_ADDR_MAC_STATS_BUF: + /* + * MC cares about MCDI buffer mapping itself since it cannot + * be really mapped using MCDI because mapped MCDI buffer is + * required to execute MCDI commands. It is not a problem + * for MAC stats buffer, but since MC can care about mapping + * itself, it may be done for MAC stats buffer as well. + */ + *nic_addrp = trgt_addr; + goto out; + + case EFX_NIC_DMA_ADDR_EVENT_RING: + case EFX_NIC_DMA_ADDR_RX_RING: + case EFX_NIC_DMA_ADDR_TX_RING: + case EFX_NIC_DMA_ADDR_RX_BUF: + case EFX_NIC_DMA_ADDR_TX_BUF: + /* Rings and buffer addresses should be mapped */ + break; + + default: + rc = EINVAL; + goto fail2; + } + + EFSYS_LOCK(enp->en_eslp, state); + + rc = efx_nic_dma_config_regioned_find_region(enp, trgt_addr, len, + ®ion); + if (rc != 0) + goto fail3; + + *nic_addrp = region->endr_nic_base + + (trgt_addr - region->endr_trgt_base); + + EFSYS_UNLOCK(enp->en_eslp, state); + +out: + return (0); + +fail3: + EFSYS_PROBE(fail3); + EFSYS_UNLOCK(enp->en_eslp, state); +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + + __checkReturn efx_rc_t +efx_nic_dma_map( + __in efx_nic_t *enp, + __in efx_nic_dma_addr_type_t addr_type, + __in efsys_dma_addr_t trgt_addr, + __in size_t len, + __out efsys_dma_addr_t *nic_addrp) +{ + efx_nic_dma_mapping_t mapping; + efx_rc_t rc; + + /* + * We cannot check configuration of a NIC that hasn't been probed. + * Use EFX_NIC_DMA_MAPPING_UNKNOWN by default. + */ + if ((enp->en_mod_flags & EFX_MOD_PROBE) == 0) + mapping = EFX_NIC_DMA_MAPPING_UNKNOWN; + else + mapping = efx_nic_cfg_get(enp)->enc_dma_mapping; + + switch (mapping) { + case EFX_NIC_DMA_MAPPING_UNKNOWN: + rc = efx_nic_dma_unknown_map(enp, addr_type, trgt_addr, + len, nic_addrp); + if (rc != 0) + goto fail1; + break; + case EFX_NIC_DMA_MAPPING_FLAT: + rc = efx_nic_dma_flat_map(enp, addr_type, trgt_addr, + len, nic_addrp); + if (rc != 0) + goto fail2; + break; + case EFX_NIC_DMA_MAPPING_REGIONED: + rc = efx_nic_dma_regioned_map(enp, addr_type, trgt_addr, + len, nic_addrp); + if (rc != 0) + goto fail3; + break; + default: + rc = ENOTSUP; + goto fail4; + } + + return (0); + +fail4: + EFSYS_PROBE(fail4); +fail3: + EFSYS_PROBE(fail3); +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} diff --git a/drivers/common/sfc_efx/base/siena_nic.c b/drivers/common/sfc_efx/base/siena_nic.c index 8b810d3ae3..e42599131a 100644 --- a/drivers/common/sfc_efx/base/siena_nic.c +++ b/drivers/common/sfc_efx/base/siena_nic.c @@ -199,6 +199,8 @@ siena_board_cfg( encp->enc_mae_supported = B_FALSE; encp->enc_mae_admin = B_FALSE; + encp->enc_dma_mapping = EFX_NIC_DMA_MAPPING_FLAT; + return (0); fail2: diff --git a/drivers/common/sfc_efx/version.map b/drivers/common/sfc_efx/version.map index 765ca39332..3e57791c99 100644 --- a/drivers/common/sfc_efx/version.map +++ b/drivers/common/sfc_efx/version.map @@ -160,6 +160,9 @@ INTERNAL { efx_nic_check_pcie_link_speed; efx_nic_create; efx_nic_destroy; + efx_nic_dma_config_add; + efx_nic_dma_map; + efx_nic_dma_reconfigure; efx_nic_fini; efx_nic_get_bar_region; efx_nic_get_board_info; From patchwork Wed Nov 17 07:05:45 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Rybchenko X-Patchwork-Id: 104434 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 87541A0C41; Wed, 17 Nov 2021 08:06:14 +0100 (CET) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 6F84041104; Wed, 17 Nov 2021 08:06:14 +0100 (CET) Received: from shelob.oktetlabs.ru (shelob.oktetlabs.ru [91.220.146.113]) by mails.dpdk.org (Postfix) with ESMTP id A1BD740040 for ; Wed, 17 Nov 2021 08:06:13 +0100 (CET) Received: by shelob.oktetlabs.ru (Postfix, from userid 122) id 70E207F585; Wed, 17 Nov 2021 10:06:13 +0300 (MSK) X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on shelob.oktetlabs.ru X-Spam-Level: X-Spam-Status: No, score=0.8 required=5.0 tests=ALL_TRUSTED, DKIM_ADSP_DISCARD, URIBL_BLOCKED autolearn=no autolearn_force=no version=3.4.2 Received: from aros.oktetlabs.ru (aros.oktetlabs.ru [192.168.38.17]) by shelob.oktetlabs.ru (Postfix) with ESMTP id 2A6EF7F57A; Wed, 17 Nov 2021 10:05:59 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 shelob.oktetlabs.ru 2A6EF7F57A Authentication-Results: shelob.oktetlabs.ru/2A6EF7F57A; dkim=none; dkim-atps=neutral From: Andrew Rybchenko To: dev@dpdk.org Cc: Thomas Monjalon , Ferruh Yigit , Viacheslav Galaktionov , Ivan Malov Subject: [PATCH v2 2/2] net/sfc: support regioned NIC DMA memory mapping type Date: Wed, 17 Nov 2021 10:05:45 +0300 Message-Id: <20211117070545.4004374-3-andrew.rybchenko@oktetlabs.ru> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20211117070545.4004374-1-andrew.rybchenko@oktetlabs.ru> References: <20211105083333.1960017-1-andrew.rybchenko@oktetlabs.ru> <20211117070545.4004374-1-andrew.rybchenko@oktetlabs.ru> MIME-Version: 1.0 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org From: Viacheslav Galaktionov DMA on SN1022 SoC requires extra mapping of the memory via MCDI. Signed-off-by: Viacheslav Galaktionov Signed-off-by: Andrew Rybchenko Reviewed-by: Ivan Malov --- doc/guides/rel_notes/release_21_11.rst | 1 + drivers/net/sfc/meson.build | 1 + drivers/net/sfc/sfc.c | 25 +- drivers/net/sfc/sfc.h | 8 +- drivers/net/sfc/sfc_dp_rx.h | 4 + drivers/net/sfc/sfc_dp_tx.h | 4 + drivers/net/sfc/sfc_ef100_rx.c | 40 ++- drivers/net/sfc/sfc_ef100_tx.c | 111 +++++--- drivers/net/sfc/sfc_ef10_essb_rx.c | 5 + drivers/net/sfc/sfc_ef10_rx.c | 5 + drivers/net/sfc/sfc_ef10_tx.c | 5 + drivers/net/sfc/sfc_ethdev.c | 13 + drivers/net/sfc/sfc_ev.c | 1 + drivers/net/sfc/sfc_mcdi.c | 5 +- drivers/net/sfc/sfc_nic_dma.c | 335 +++++++++++++++++++++++++ drivers/net/sfc/sfc_nic_dma.h | 29 +++ drivers/net/sfc/sfc_nic_dma_dp.h | 57 +++++ drivers/net/sfc/sfc_port.c | 3 +- drivers/net/sfc/sfc_rx.c | 9 +- drivers/net/sfc/sfc_tx.c | 9 +- 20 files changed, 614 insertions(+), 56 deletions(-) create mode 100644 drivers/net/sfc/sfc_nic_dma.c create mode 100644 drivers/net/sfc/sfc_nic_dma.h create mode 100644 drivers/net/sfc/sfc_nic_dma_dp.h diff --git a/doc/guides/rel_notes/release_21_11.rst b/doc/guides/rel_notes/release_21_11.rst index 4db771ce5c..550be456bd 100644 --- a/doc/guides/rel_notes/release_21_11.rst +++ b/doc/guides/rel_notes/release_21_11.rst @@ -239,6 +239,7 @@ New Features * Added flow API transfer proxy support * Added SN1000 virtual functions (VF) support * Added support for flow counters without service cores + * Added support for regioned DMA mapping required on SN1022 SoC * **Added power monitor API in vhost library.** diff --git a/drivers/net/sfc/meson.build b/drivers/net/sfc/meson.build index 33087330f9..46d94184b8 100644 --- a/drivers/net/sfc/meson.build +++ b/drivers/net/sfc/meson.build @@ -100,4 +100,5 @@ sources = files( 'sfc_service.c', 'sfc_repr_proxy.c', 'sfc_repr.c', + 'sfc_nic_dma.c', ) diff --git a/drivers/net/sfc/sfc.c b/drivers/net/sfc/sfc.c index 9c1be10ac9..ed714fe02f 100644 --- a/drivers/net/sfc/sfc.c +++ b/drivers/net/sfc/sfc.c @@ -26,6 +26,7 @@ #include "sfc_tweak.h" #include "sfc_sw_stats.h" #include "sfc_switch.h" +#include "sfc_nic_dma.h" bool sfc_repr_supported(const struct sfc_adapter *sa) @@ -53,10 +54,12 @@ sfc_repr_available(const struct sfc_adapter_shared *sas) } int -sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id, - size_t len, int socket_id, efsys_mem_t *esmp) +sfc_dma_alloc(struct sfc_adapter *sa, const char *name, uint16_t id, + efx_nic_dma_addr_type_t addr_type, size_t len, int socket_id, + efsys_mem_t *esmp) { const struct rte_memzone *mz; + int rc; sfc_log_init(sa, "name=%s id=%u len=%zu socket_id=%d", name, id, len, socket_id); @@ -69,13 +72,17 @@ sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id, rte_strerror(rte_errno)); return ENOMEM; } - - esmp->esm_addr = mz->iova; - if (esmp->esm_addr == RTE_BAD_IOVA) { + if (mz->iova == RTE_BAD_IOVA) { (void)rte_memzone_free(mz); return EFAULT; } + rc = sfc_nic_dma_mz_map(sa, mz, addr_type, &esmp->esm_addr); + if (rc != 0) { + (void)rte_memzone_free(mz); + return rc; + } + esmp->esm_mz = mz; esmp->esm_base = mz->addr; @@ -457,6 +464,13 @@ sfc_try_start(struct sfc_adapter *sa) if (rc != 0) goto fail_nic_init; + sfc_log_init(sa, "reconfigure NIC DMA"); + rc = efx_nic_dma_reconfigure(sa->nic); + if (rc != 0) { + sfc_err(sa, "cannot reconfigure NIC DMA: %s", rte_strerror(rc)); + goto fail_nic_dma_reconfigure; + } + encp = efx_nic_cfg_get(sa->nic); /* @@ -525,6 +539,7 @@ sfc_try_start(struct sfc_adapter *sa) fail_intr_start: fail_tunnel_reconfigure: +fail_nic_dma_reconfigure: efx_nic_fini(sa->nic); fail_nic_init: diff --git a/drivers/net/sfc/sfc.h b/drivers/net/sfc/sfc.h index fee1738d1d..3337cb57e3 100644 --- a/drivers/net/sfc/sfc.h +++ b/drivers/net/sfc/sfc.h @@ -35,6 +35,7 @@ #include "sfc_repr_proxy.h" #include "sfc_service.h" #include "sfc_ethdev_state.h" +#include "sfc_nic_dma_dp.h" #ifdef __cplusplus extern "C" { @@ -145,6 +146,8 @@ struct sfc_adapter_shared { bool counters_rxq_allocated; unsigned int nb_repr_rxq; unsigned int nb_repr_txq; + + struct sfc_nic_dma_info nic_dma_info; }; /* Adapter process private data */ @@ -392,8 +395,9 @@ sfc_get_system_msecs(void) return rte_get_timer_cycles() * MS_PER_S / rte_get_timer_hz(); } -int sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id, - size_t len, int socket_id, efsys_mem_t *esmp); +int sfc_dma_alloc(struct sfc_adapter *sa, const char *name, uint16_t id, + efx_nic_dma_addr_type_t addr_type, size_t len, int socket_id, + efsys_mem_t *esmp); void sfc_dma_free(const struct sfc_adapter *sa, efsys_mem_t *esmp); uint32_t sfc_register_logtype(const struct rte_pci_addr *pci_addr, diff --git a/drivers/net/sfc/sfc_dp_rx.h b/drivers/net/sfc/sfc_dp_rx.h index 099f026a15..760540ba22 100644 --- a/drivers/net/sfc/sfc_dp_rx.h +++ b/drivers/net/sfc/sfc_dp_rx.h @@ -14,6 +14,7 @@ #include #include "sfc_dp.h" +#include "sfc_nic_dma_dp.h" #ifdef __cplusplus extern "C" { @@ -95,6 +96,9 @@ struct sfc_dp_rx_qcreate_info { /** Mask to extract user bits from Rx prefix mark field */ uint32_t user_mark_mask; + + /** NIC's DMA mapping information */ + const struct sfc_nic_dma_info *nic_dma_info; }; /** diff --git a/drivers/net/sfc/sfc_dp_tx.h b/drivers/net/sfc/sfc_dp_tx.h index 61cc0fac6e..aad3b06595 100644 --- a/drivers/net/sfc/sfc_dp_tx.h +++ b/drivers/net/sfc/sfc_dp_tx.h @@ -15,6 +15,7 @@ #include "sfc_dp.h" #include "sfc_debug.h" #include "sfc_tso.h" +#include "sfc_nic_dma_dp.h" #ifdef __cplusplus extern "C" { @@ -80,6 +81,9 @@ struct sfc_dp_tx_qcreate_info { uint32_t tso_max_payload_len; /** Maximum number of frames to be generated per TSOv3 transaction */ uint32_t tso_max_nb_outgoing_frames; + + /** NIC's DMA mapping information */ + const struct sfc_nic_dma_info *nic_dma_info; }; /** diff --git a/drivers/net/sfc/sfc_ef100_rx.c b/drivers/net/sfc/sfc_ef100_rx.c index 259290f14a..5d16bf281d 100644 --- a/drivers/net/sfc/sfc_ef100_rx.c +++ b/drivers/net/sfc/sfc_ef100_rx.c @@ -27,6 +27,7 @@ #include "sfc_dp_rx.h" #include "sfc_kvargs.h" #include "sfc_ef100.h" +#include "sfc_nic_dma_dp.h" #define sfc_ef100_rx_err(_rxq, ...) \ @@ -66,6 +67,7 @@ struct sfc_ef100_rxq { #define SFC_EF100_RXQ_FLAG_INTR_EN 0x40 #define SFC_EF100_RXQ_INGRESS_MPORT 0x80 #define SFC_EF100_RXQ_USER_FLAG 0x100 +#define SFC_EF100_RXQ_NIC_DMA_MAP 0x200 unsigned int ptr_mask; unsigned int evq_phase_bit_shift; unsigned int ready_pkts; @@ -92,6 +94,8 @@ struct sfc_ef100_rxq { /* Datapath receive queue anchor */ struct sfc_dp_rxq dp; + + const struct sfc_nic_dma_info *nic_dma_info; }; static inline struct sfc_ef100_rxq * @@ -150,7 +154,6 @@ sfc_ef100_rx_qrefill(struct sfc_ef100_rxq *rxq) SFC_ASSERT(bulks > 0); do { - unsigned int id; unsigned int i; if (unlikely(rte_mempool_get_bulk(rxq->refill_mb_pool, objs, @@ -170,17 +173,28 @@ sfc_ef100_rx_qrefill(struct sfc_ef100_rxq *rxq) break; } - for (i = 0, id = added & ptr_mask; - i < RTE_DIM(objs); - ++i, ++id) { + for (i = 0; i < RTE_DIM(objs); ++i) { struct rte_mbuf *m = objs[i]; struct sfc_ef100_rx_sw_desc *rxd; - rte_iova_t phys_addr; + rte_iova_t dma_addr; __rte_mbuf_raw_sanity_check(m); - SFC_ASSERT((id & ~ptr_mask) == 0); - rxd = &rxq->sw_ring[id]; + dma_addr = rte_mbuf_data_iova_default(m); + if (rxq->flags & SFC_EF100_RXQ_NIC_DMA_MAP) { + dma_addr = sfc_nic_dma_map(rxq->nic_dma_info, + dma_addr, + rte_pktmbuf_data_len(m)); + if (unlikely(dma_addr == RTE_BAD_IOVA)) { + sfc_ef100_rx_err(rxq, + "failed to map DMA address on Rx"); + /* Just skip buffer and try to continue */ + rte_mempool_put(rxq->refill_mb_pool, m); + continue; + } + } + + rxd = &rxq->sw_ring[added & ptr_mask]; rxd->mbuf = m; /* @@ -189,12 +203,10 @@ sfc_ef100_rx_qrefill(struct sfc_ef100_rxq *rxq) * structure members. */ - phys_addr = rte_mbuf_data_iova_default(m); - EFX_POPULATE_QWORD_1(rxq->rxq_hw_ring[id], - ESF_GZ_RX_BUF_ADDR, phys_addr); + EFX_POPULATE_QWORD_1(rxq->rxq_hw_ring[added & ptr_mask], + ESF_GZ_RX_BUF_ADDR, dma_addr); + added++; } - - added += RTE_DIM(objs); } while (--bulks > 0); SFC_ASSERT(rxq->added != added); @@ -794,6 +806,10 @@ sfc_ef100_rx_qcreate(uint16_t port_id, uint16_t queue_id, info->fcw_offset + ER_GZ_EVQ_INT_PRIME_OFST; + rxq->nic_dma_info = info->nic_dma_info; + if (rxq->nic_dma_info->nb_regions > 0) + rxq->flags |= SFC_EF100_RXQ_NIC_DMA_MAP; + sfc_ef100_rx_debug(rxq, "RxQ doorbell is %p", rxq->doorbell); *dp_rxqp = &rxq->dp; diff --git a/drivers/net/sfc/sfc_ef100_tx.c b/drivers/net/sfc/sfc_ef100_tx.c index b41eddbcca..4c2205f7a4 100644 --- a/drivers/net/sfc/sfc_ef100_tx.c +++ b/drivers/net/sfc/sfc_ef100_tx.c @@ -24,6 +24,7 @@ #include "sfc_tweak.h" #include "sfc_kvargs.h" #include "sfc_ef100.h" +#include "sfc_nic_dma_dp.h" #define sfc_ef100_tx_err(_txq, ...) \ @@ -63,6 +64,7 @@ struct sfc_ef100_txq { #define SFC_EF100_TXQ_STARTED 0x1 #define SFC_EF100_TXQ_NOT_RUNNING 0x2 #define SFC_EF100_TXQ_EXCEPTION 0x4 +#define SFC_EF100_TXQ_NIC_DMA_MAP 0x8 unsigned int ptr_mask; unsigned int added; @@ -87,6 +89,8 @@ struct sfc_ef100_txq { /* Datapath transmit queue anchor */ struct sfc_dp_txq dp; + + const struct sfc_nic_dma_info *nic_dma_info; }; static inline struct sfc_ef100_txq * @@ -342,8 +346,23 @@ sfc_ef100_tx_qdesc_cso_inner_l3(uint64_t tx_tunnel) return inner_l3; } -static void -sfc_ef100_tx_qdesc_send_create(const struct rte_mbuf *m, efx_oword_t *tx_desc) +static int +sfc_ef100_tx_map(const struct sfc_ef100_txq *txq, rte_iova_t iova, size_t len, + rte_iova_t *dma_addr) +{ + if ((txq->flags & SFC_EF100_TXQ_NIC_DMA_MAP) == 0) { + *dma_addr = iova; + } else { + *dma_addr = sfc_nic_dma_map(txq->nic_dma_info, iova, len); + if (unlikely(*dma_addr == RTE_BAD_IOVA)) + sfc_ef100_tx_err(txq, "failed to map DMA address on Tx"); + } + return 0; +} + +static int +sfc_ef100_tx_qdesc_send_create(const struct sfc_ef100_txq *txq, + const struct rte_mbuf *m, efx_oword_t *tx_desc) { bool outer_l3; bool outer_l4; @@ -351,6 +370,8 @@ sfc_ef100_tx_qdesc_send_create(const struct rte_mbuf *m, efx_oword_t *tx_desc) uint8_t partial_en; uint16_t part_cksum_w; uint16_t l4_offset_w; + rte_iova_t dma_addr; + int rc; if ((m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) == 0) { outer_l3 = (m->ol_flags & RTE_MBUF_F_TX_IP_CKSUM); @@ -384,8 +405,13 @@ sfc_ef100_tx_qdesc_send_create(const struct rte_mbuf *m, efx_oword_t *tx_desc) m->l2_len + m->l3_len) >> 1; } + rc = sfc_ef100_tx_map(txq, rte_mbuf_data_iova_default(m), + rte_pktmbuf_data_len(m), &dma_addr); + if (unlikely(rc != 0)) + return rc; + EFX_POPULATE_OWORD_10(*tx_desc, - ESF_GZ_TX_SEND_ADDR, rte_mbuf_data_iova(m), + ESF_GZ_TX_SEND_ADDR, dma_addr, ESF_GZ_TX_SEND_LEN, rte_pktmbuf_data_len(m), ESF_GZ_TX_SEND_NUM_SEGS, m->nb_segs, ESF_GZ_TX_SEND_CSO_PARTIAL_START_W, l4_offset_w, @@ -405,6 +431,8 @@ sfc_ef100_tx_qdesc_send_create(const struct rte_mbuf *m, efx_oword_t *tx_desc) EFX_OR_OWORD(*tx_desc, tx_desc_extra_fields); } + + return 0; } static void @@ -554,11 +582,11 @@ sfc_ef100_tx_pkt_descs_max(const struct rte_mbuf *m) return m->nb_segs + extra_descs; } -static struct rte_mbuf * +static int sfc_ef100_xmit_tso_pkt(struct sfc_ef100_txq * const txq, - struct rte_mbuf *m, unsigned int *added) + struct rte_mbuf **m, unsigned int *added) { - struct rte_mbuf *m_seg = m; + struct rte_mbuf *m_seg = *m; unsigned int nb_hdr_descs; unsigned int nb_pld_descs; unsigned int seg_split = 0; @@ -570,17 +598,19 @@ sfc_ef100_xmit_tso_pkt(struct sfc_ef100_txq * const txq, size_t tcph_off; size_t header_len; size_t remaining_hdr_len; + rte_iova_t dma_addr; + int rc; - if (m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) { - outer_iph_off = m->outer_l2_len; - outer_udph_off = outer_iph_off + m->outer_l3_len; + if (m_seg->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) { + outer_iph_off = m_seg->outer_l2_len; + outer_udph_off = outer_iph_off + m_seg->outer_l3_len; } else { outer_iph_off = 0; outer_udph_off = 0; } - iph_off = outer_udph_off + m->l2_len; - tcph_off = iph_off + m->l3_len; - header_len = tcph_off + m->l4_len; + iph_off = outer_udph_off + m_seg->l2_len; + tcph_off = iph_off + m_seg->l3_len; + header_len = tcph_off + m_seg->l4_len; /* * Remember ID of the TX_TSO descriptor to be filled in. @@ -592,11 +622,15 @@ sfc_ef100_xmit_tso_pkt(struct sfc_ef100_txq * const txq, remaining_hdr_len = header_len; do { + rc = sfc_ef100_tx_map(txq, rte_mbuf_data_iova(m_seg), + rte_pktmbuf_data_len(m_seg), &dma_addr); + if (unlikely(rc != 0)) + return rc; + id = (*added)++ & txq->ptr_mask; if (rte_pktmbuf_data_len(m_seg) <= remaining_hdr_len) { /* The segment is fully header segment */ - sfc_ef100_tx_qdesc_seg_create( - rte_mbuf_data_iova(m_seg), + sfc_ef100_tx_qdesc_seg_create(dma_addr, rte_pktmbuf_data_len(m_seg), &txq->txq_hw_ring[id]); remaining_hdr_len -= rte_pktmbuf_data_len(m_seg); @@ -605,15 +639,13 @@ sfc_ef100_xmit_tso_pkt(struct sfc_ef100_txq * const txq, * The segment must be split into header and * payload segments */ - sfc_ef100_tx_qdesc_seg_create( - rte_mbuf_data_iova(m_seg), - remaining_hdr_len, - &txq->txq_hw_ring[id]); - SFC_ASSERT(txq->sw_ring[id].mbuf == NULL); + sfc_ef100_tx_qdesc_seg_create(dma_addr, + remaining_hdr_len, &txq->txq_hw_ring[id]); + txq->sw_ring[id].mbuf = NULL; id = (*added)++ & txq->ptr_mask; sfc_ef100_tx_qdesc_seg_create( - rte_mbuf_data_iova(m_seg) + remaining_hdr_len, + dma_addr + remaining_hdr_len, rte_pktmbuf_data_len(m_seg) - remaining_hdr_len, &txq->txq_hw_ring[id]); remaining_hdr_len = 0; @@ -628,15 +660,16 @@ sfc_ef100_xmit_tso_pkt(struct sfc_ef100_txq * const txq, * pointer counts it twice and we should correct it. */ nb_hdr_descs = ((id - tso_desc_id) & txq->ptr_mask) - seg_split; - nb_pld_descs = m->nb_segs - nb_hdr_descs + seg_split; + nb_pld_descs = (*m)->nb_segs - nb_hdr_descs + seg_split; - sfc_ef100_tx_qdesc_tso_create(m, nb_hdr_descs, nb_pld_descs, header_len, - rte_pktmbuf_pkt_len(m) - header_len, + sfc_ef100_tx_qdesc_tso_create(*m, nb_hdr_descs, nb_pld_descs, header_len, + rte_pktmbuf_pkt_len(*m) - header_len, outer_iph_off, outer_udph_off, iph_off, tcph_off, &txq->txq_hw_ring[tso_desc_id]); - return m_seg; + *m = m_seg; + return 0; } static uint16_t @@ -648,6 +681,8 @@ sfc_ef100_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) bool reap_done; struct rte_mbuf **pktp; struct rte_mbuf **pktp_end; + rte_iova_t dma_addr; + int rc; if (unlikely(txq->flags & (SFC_EF100_TXQ_NOT_RUNNING | SFC_EF100_TXQ_EXCEPTION))) @@ -694,14 +729,15 @@ sfc_ef100_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) id = added++ & txq->ptr_mask; sfc_ef100_tx_qdesc_prefix_create(m_seg, &txq->txq_hw_ring[id]); + txq->sw_ring[id].mbuf = NULL; } if (m_seg->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { - m_seg = sfc_ef100_xmit_tso_pkt(txq, m_seg, &added); + rc = sfc_ef100_xmit_tso_pkt(txq, &m_seg, &added); } else { id = added++ & txq->ptr_mask; - sfc_ef100_tx_qdesc_send_create(m_seg, - &txq->txq_hw_ring[id]); + rc = sfc_ef100_tx_qdesc_send_create(txq, m_seg, + &txq->txq_hw_ring[id]); /* * rte_pktmbuf_free() is commonly used in DPDK for @@ -722,22 +758,29 @@ sfc_ef100_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) m_seg = m_seg->next; } - while (m_seg != NULL) { + while (likely(rc == 0) && m_seg != NULL) { RTE_BUILD_BUG_ON(SFC_MBUF_SEG_LEN_MAX > SFC_EF100_TX_SEG_DESC_LEN_MAX); id = added++ & txq->ptr_mask; - sfc_ef100_tx_qdesc_seg_create(rte_mbuf_data_iova(m_seg), + rc = sfc_ef100_tx_map(txq, rte_mbuf_data_iova(m_seg), + rte_pktmbuf_data_len(m_seg), + &dma_addr); + sfc_ef100_tx_qdesc_seg_create(dma_addr, rte_pktmbuf_data_len(m_seg), &txq->txq_hw_ring[id]); txq->sw_ring[id].mbuf = m_seg; m_seg = m_seg->next; } - dma_desc_space -= (added - pkt_start); + if (likely(rc == 0)) { + dma_desc_space -= (added - pkt_start); - sfc_pkts_bytes_add(&txq->dp.dpq.stats, 1, - rte_pktmbuf_pkt_len(*pktp)); + sfc_pkts_bytes_add(&txq->dp.dpq.stats, 1, + rte_pktmbuf_pkt_len(*pktp)); + } else { + added = pkt_start; + } } if (likely(added != txq->added)) { @@ -837,6 +880,10 @@ sfc_ef100_tx_qcreate(uint16_t port_id, uint16_t queue_id, txq->tso_max_payload_len = info->tso_max_payload_len; txq->tso_max_nb_outgoing_frames = info->tso_max_nb_outgoing_frames; + txq->nic_dma_info = info->nic_dma_info; + if (txq->nic_dma_info->nb_regions > 0) + txq->flags |= SFC_EF100_TXQ_NIC_DMA_MAP; + sfc_ef100_tx_debug(txq, "TxQ doorbell is %p", txq->doorbell); *dp_txqp = &txq->dp; diff --git a/drivers/net/sfc/sfc_ef10_essb_rx.c b/drivers/net/sfc/sfc_ef10_essb_rx.c index 4f7d712297..712c207617 100644 --- a/drivers/net/sfc/sfc_ef10_essb_rx.c +++ b/drivers/net/sfc/sfc_ef10_essb_rx.c @@ -573,6 +573,10 @@ sfc_ef10_essb_rx_qcreate(uint16_t port_id, uint16_t queue_id, struct sfc_ef10_essb_rxq *rxq; int rc; + rc = ENOTSUP; + if (info->nic_dma_info->nb_regions > 0) + goto fail_nic_dma; + rc = rte_mempool_ops_get_info(mp, &mp_info); if (rc != 0) { /* Positive errno is used in the driver */ @@ -641,6 +645,7 @@ sfc_ef10_essb_rx_qcreate(uint16_t port_id, uint16_t queue_id, fail_rxq_alloc: fail_no_block_dequeue: fail_get_contig_block_size: +fail_nic_dma: return rc; } diff --git a/drivers/net/sfc/sfc_ef10_rx.c b/drivers/net/sfc/sfc_ef10_rx.c index 8503c3c15f..7be224c9c4 100644 --- a/drivers/net/sfc/sfc_ef10_rx.c +++ b/drivers/net/sfc/sfc_ef10_rx.c @@ -651,6 +651,10 @@ sfc_ef10_rx_qcreate(uint16_t port_id, uint16_t queue_id, if (info->rxq_entries != info->evq_entries) goto fail_rxq_args; + rc = ENOTSUP; + if (info->nic_dma_info->nb_regions > 0) + goto fail_nic_dma; + rc = ENOMEM; rxq = rte_zmalloc_socket("sfc-ef10-rxq", sizeof(*rxq), RTE_CACHE_LINE_SIZE, socket_id); @@ -696,6 +700,7 @@ sfc_ef10_rx_qcreate(uint16_t port_id, uint16_t queue_id, rte_free(rxq); fail_rxq_alloc: +fail_nic_dma: fail_rxq_args: return rc; } diff --git a/drivers/net/sfc/sfc_ef10_tx.c b/drivers/net/sfc/sfc_ef10_tx.c index 2463c1423a..5403a60707 100644 --- a/drivers/net/sfc/sfc_ef10_tx.c +++ b/drivers/net/sfc/sfc_ef10_tx.c @@ -942,6 +942,10 @@ sfc_ef10_tx_qcreate(uint16_t port_id, uint16_t queue_id, if (info->txq_entries != info->evq_entries) goto fail_bad_args; + rc = ENOTSUP; + if (info->nic_dma_info->nb_regions > 0) + goto fail_nic_dma; + rc = ENOMEM; txq = rte_zmalloc_socket("sfc-ef10-txq", sizeof(*txq), RTE_CACHE_LINE_SIZE, socket_id); @@ -995,6 +999,7 @@ sfc_ef10_tx_qcreate(uint16_t port_id, uint16_t queue_id, rte_free(txq); fail_txq_alloc: +fail_nic_dma: fail_bad_args: return rc; } diff --git a/drivers/net/sfc/sfc_ethdev.c b/drivers/net/sfc/sfc_ethdev.c index a8b971a6c4..d4210b63dd 100644 --- a/drivers/net/sfc/sfc_ethdev.c +++ b/drivers/net/sfc/sfc_ethdev.c @@ -32,6 +32,7 @@ #include "sfc_repr.h" #include "sfc_sw_stats.h" #include "sfc_switch.h" +#include "sfc_nic_dma.h" #define SFC_XSTAT_ID_INVALID_VAL UINT64_MAX #define SFC_XSTAT_ID_INVALID_NAME '\0' @@ -375,6 +376,7 @@ sfc_dev_close(struct rte_eth_dev *dev) sfc_eth_dev_clear_ops(dev); + sfc_nic_dma_detach(sa); sfc_detach(sa); sfc_unprobe(sa); @@ -2840,11 +2842,22 @@ sfc_eth_dev_init(struct rte_eth_dev *dev, void *init_params) from = (const struct rte_ether_addr *)(encp->enc_mac_addr); rte_ether_addr_copy(from, &dev->data->mac_addrs[0]); + /* + * Setup the NIC DMA mapping handler. All internal mempools + * MUST be created on attach before this point, and the + * adapter MUST NOT create mempools with the adapter lock + * held after this point. + */ + rc = sfc_nic_dma_attach(sa); + if (rc != 0) + goto fail_nic_dma_attach; + sfc_adapter_unlock(sa); sfc_log_init(sa, "done"); return 0; +fail_nic_dma_attach: fail_switchdev_no_mae: sfc_detach(sa); diff --git a/drivers/net/sfc/sfc_ev.c b/drivers/net/sfc/sfc_ev.c index ba4409369a..f949abbfc3 100644 --- a/drivers/net/sfc/sfc_ev.c +++ b/drivers/net/sfc/sfc_ev.c @@ -911,6 +911,7 @@ sfc_ev_qinit(struct sfc_adapter *sa, /* Allocate DMA space */ rc = sfc_dma_alloc(sa, sfc_evq_type2str(type), type_index, + EFX_NIC_DMA_ADDR_EVENT_RING, efx_evq_size(sa->nic, evq->entries, sa->evq_flags), socket_id, &evq->mem); if (rc != 0) diff --git a/drivers/net/sfc/sfc_mcdi.c b/drivers/net/sfc/sfc_mcdi.c index 1a237650cd..65e5e3817c 100644 --- a/drivers/net/sfc/sfc_mcdi.c +++ b/drivers/net/sfc/sfc_mcdi.c @@ -19,9 +19,10 @@ static int sfc_mcdi_dma_alloc(void *cookie, const char *name, size_t len, efsys_mem_t *esmp) { - const struct sfc_adapter *sa = cookie; + struct sfc_adapter *sa = cookie; - return sfc_dma_alloc(sa, name, 0, len, sa->socket_id, esmp); + return sfc_dma_alloc(sa, name, 0, EFX_NIC_DMA_ADDR_MCDI_BUF, len, + sa->socket_id, esmp); } static sfc_efx_mcdi_dma_free_cb sfc_mcdi_dma_free; diff --git a/drivers/net/sfc/sfc_nic_dma.c b/drivers/net/sfc/sfc_nic_dma.c new file mode 100644 index 0000000000..59bc113939 --- /dev/null +++ b/drivers/net/sfc/sfc_nic_dma.c @@ -0,0 +1,335 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2021 Xilinx, Inc. + */ + +#include +#include + +#include "efx.h" + +#include "sfc_log.h" +#include "sfc.h" +#include "sfc_nic_dma.h" + +static int +sfc_nic_dma_add_region(struct sfc_nic_dma_info *nic_dma_info, + rte_iova_t nic_base, rte_iova_t trgt_base, + size_t map_len) +{ + struct sfc_nic_dma_region *region; + + if (nic_dma_info->nb_regions >= RTE_DIM(nic_dma_info->regions)) + return ENOMEM; + + region = &nic_dma_info->regions[nic_dma_info->nb_regions]; + region->nic_base = nic_base; + region->trgt_base = trgt_base; + region->trgt_end = trgt_base + map_len; + + nic_dma_info->nb_regions++; + return 0; +} + +/* + * Register mapping for all IOVA mempools at the time of creation to + * have mapping for all mbufs. + */ + +struct sfc_nic_dma_register_mempool_data { + struct sfc_adapter *sa; + int rc; +}; + +static void +sfc_nic_dma_register_mempool_chunk(struct rte_mempool *mp __rte_unused, + void *opaque, + struct rte_mempool_memhdr *memhdr, + unsigned mem_idx __rte_unused) +{ + struct sfc_nic_dma_register_mempool_data *register_data = opaque; + struct sfc_adapter *sa = register_data->sa; + struct sfc_adapter_shared *sas = sfc_sa2shared(sa); + efsys_dma_addr_t nic_base; + efsys_dma_addr_t trgt_base; + size_t map_len; + int rc; + + if (memhdr->iova == RTE_BAD_IOVA) + return; + + /* + * Check if the memory chunk is mapped already. In that case, there's + * nothing left to do. + */ + nic_base = sfc_nic_dma_map(&sas->nic_dma_info, memhdr->iova, + memhdr->len); + if (nic_base != RTE_BAD_IOVA) + return; + + rc = efx_nic_dma_config_add(sa->nic, memhdr->iova, memhdr->len, + &nic_base, &trgt_base, &map_len); + if (rc != 0) { + sfc_err(sa, + "cannot handle memory buffer VA=%p IOVA=%" PRIx64 " length=0x%" PRIx64 ": %s", + memhdr->addr, (uint64_t)memhdr->iova, memhdr->len, + rte_strerror(rc)); + register_data->rc = rc; + return; + } + + sfc_info(sa, + "registered memory buffer VA=%p IOVA=%" PRIx64 " length=0x%" PRIx64 " -> NIC_BASE=%" PRIx64 " TRGT_BASE=%" PRIx64 " MAP_LEN=%" PRIx64, + memhdr->addr, (uint64_t)memhdr->iova, memhdr->len, + (uint64_t)nic_base, (uint64_t)trgt_base, (uint64_t)map_len); + + rc = sfc_nic_dma_add_region(&sas->nic_dma_info, nic_base, trgt_base, + map_len); + if (rc != 0) { + sfc_err(sa, "failed to add regioned NIC DMA mapping: %s", + rte_strerror(rc)); + register_data->rc = rc; + } +} + +static int +sfc_nic_dma_register_mempool(struct sfc_adapter *sa, struct rte_mempool *mp) +{ + struct sfc_nic_dma_register_mempool_data register_data = { + .sa = sa, + }; + uint32_t iters; + int result = 0; + int rc; + + SFC_ASSERT(sfc_adapter_is_locked(sa)); + + if (mp->flags & RTE_MEMPOOL_F_NON_IO) + return 0; + + iters = rte_mempool_mem_iter(mp, sfc_nic_dma_register_mempool_chunk, + ®ister_data); + if (iters != mp->nb_mem_chunks) { + sfc_err(sa, + "failed to iterate over memory chunks, some mbufs may be unusable"); + result = EFAULT; + /* + * Return an error, but try to continue if error is + * async and cannot be handled properly. + */ + } + + if (register_data.rc != 0) { + sfc_err(sa, + "failed to map some memory chunks (%s), some mbufs may be unusable", + rte_strerror(register_data.rc)); + result = register_data.rc; + /* Try to continue */ + } + + /* + * There is no point to apply mapping changes triggered by mempool + * registration. Configuration will be propagated on start and + * mbufs mapping is required in started state only. + */ + if (sa->state == SFC_ETHDEV_STARTED) { + /* + * It's safe to reconfigure the DMA mapping even if no changes + * have been made during memory chunks iteration. In that case, + * this operation will not change anything either. + */ + rc = efx_nic_dma_reconfigure(sa->nic); + if (rc != 0) { + sfc_err(sa, "cannot reconfigure NIC DMA: %s", + rte_strerror(rc)); + result = rc; + } + } + + return result; +} + +static void +sfc_mempool_event_cb(enum rte_mempool_event event, struct rte_mempool *mp, + void *user_data) +{ + struct sfc_adapter *sa = user_data; + + if (event != RTE_MEMPOOL_EVENT_READY) + return; + + sfc_adapter_lock(sa); + + (void)sfc_nic_dma_register_mempool(sa, mp); + + sfc_adapter_unlock(sa); +} + +struct sfc_mempool_walk_data { + struct sfc_adapter *sa; + int rc; +}; + +static void +sfc_mempool_walk_cb(struct rte_mempool *mp, void *arg) +{ + struct sfc_mempool_walk_data *walk_data = arg; + int rc; + + rc = sfc_nic_dma_register_mempool(walk_data->sa, mp); + if (rc != 0) + walk_data->rc = rc; +} + +static int +sfc_nic_dma_attach_regioned(struct sfc_adapter *sa) +{ + struct sfc_adapter_shared *sas = sfc_sa2shared(sa); + struct sfc_mempool_walk_data walk_data = { + .sa = sa, + }; + int rc; + + rc = rte_mempool_event_callback_register(sfc_mempool_event_cb, sa); + if (rc != 0) { + sfc_err(sa, "failed to register mempool event callback"); + rc = EFAULT; + goto fail_mempool_event_callback_register; + } + + rte_mempool_walk(sfc_mempool_walk_cb, &walk_data); + if (walk_data.rc != 0) { + rc = walk_data.rc; + goto fail_mempool_walk; + } + + return 0; + +fail_mempool_walk: + rte_mempool_event_callback_unregister(sfc_mempool_event_cb, sa); + sas->nic_dma_info.nb_regions = 0; + +fail_mempool_event_callback_register: + return rc; +} + +static void +sfc_nic_dma_detach_regioned(struct sfc_adapter *sa) +{ + struct sfc_adapter_shared *sas = sfc_sa2shared(sa); + + rte_mempool_event_callback_unregister(sfc_mempool_event_cb, sa); + sas->nic_dma_info.nb_regions = 0; +} + +int +sfc_nic_dma_attach(struct sfc_adapter *sa) +{ + const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); + int rc; + + sfc_log_init(sa, "dma_mapping_type=%u", encp->enc_dma_mapping); + + switch (encp->enc_dma_mapping) { + case EFX_NIC_DMA_MAPPING_FLAT: + /* No mapping required */ + rc = 0; + break; + case EFX_NIC_DMA_MAPPING_REGIONED: + rc = sfc_nic_dma_attach_regioned(sa); + break; + default: + rc = ENOTSUP; + break; + } + + sfc_log_init(sa, "done: %s", rte_strerror(rc)); + return rc; +} + +void +sfc_nic_dma_detach(struct sfc_adapter *sa) +{ + const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); + + sfc_log_init(sa, "dma_mapping_type=%u", encp->enc_dma_mapping); + + switch (encp->enc_dma_mapping) { + case EFX_NIC_DMA_MAPPING_FLAT: + /* Nothing to do here */ + break; + case EFX_NIC_DMA_MAPPING_REGIONED: + sfc_nic_dma_detach_regioned(sa); + break; + default: + break; + } + + sfc_log_init(sa, "done"); +} + +int +sfc_nic_dma_mz_map(struct sfc_adapter *sa, const struct rte_memzone *mz, + efx_nic_dma_addr_type_t addr_type, + efsys_dma_addr_t *dma_addr) +{ + efsys_dma_addr_t nic_base; + efsys_dma_addr_t trgt_base; + size_t map_len; + int rc; + + /* + * Check if the memzone can be mapped already without changing the DMA + * configuration. + * libefx is used instead of the driver cache since it can take the type + * of the buffer into account and make a better decision when it comes + * to buffers that are mapped by the FW itself. + */ + rc = efx_nic_dma_map(sa->nic, addr_type, mz->iova, mz->len, dma_addr); + if (rc == 0) + return 0; + + if (rc != ENOENT) { + sfc_err(sa, + "failed to map memory buffer VA=%p IOVA=%" PRIx64 " length=0x%" PRIx64 ": %s", + mz->addr, (uint64_t)mz->iova, mz->len, + rte_strerror(rc)); + return rc; + } + + rc = efx_nic_dma_config_add(sa->nic, mz->iova, mz->len, + &nic_base, &trgt_base, &map_len); + if (rc != 0) { + sfc_err(sa, + "cannot handle memory buffer VA=%p IOVA=%" PRIx64 " length=0x%" PRIx64 ": %s", + mz->addr, (uint64_t)mz->iova, mz->len, + rte_strerror(rc)); + return EFAULT; + } + + rc = sfc_nic_dma_add_region(&sfc_sa2shared(sa)->nic_dma_info, + nic_base, trgt_base, map_len); + if (rc != 0) { + sfc_err(sa, + "failed to add DMA region VA=%p IOVA=%" PRIx64 " length=0x%" PRIx64 ": %s", + mz->addr, (uint64_t)mz->iova, mz->len, + rte_strerror(rc)); + return rc; + } + + rc = efx_nic_dma_reconfigure(sa->nic); + if (rc != 0) { + sfc_err(sa, "failed to reconfigure DMA"); + return rc; + } + + rc = efx_nic_dma_map(sa->nic, addr_type, mz->iova, mz->len, dma_addr); + if (rc != 0) { + sfc_err(sa, + "failed to map memory buffer VA=%p IOVA=%" PRIx64 " length=0x%" PRIx64 ": %s", + mz->addr, (uint64_t)mz->iova, mz->len, + rte_strerror(rc)); + return rc; + } + + return 0; +} diff --git a/drivers/net/sfc/sfc_nic_dma.h b/drivers/net/sfc/sfc_nic_dma.h new file mode 100644 index 0000000000..46ce74ecac --- /dev/null +++ b/drivers/net/sfc/sfc_nic_dma.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2021 Xilinx, Inc. + */ + +#ifndef _SFC_NIC_DMA_H +#define _SFC_NIC_DMA_H + +#include + +#include "efx.h" + +#include "sfc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int sfc_nic_dma_attach(struct sfc_adapter *sa); +void sfc_nic_dma_detach(struct sfc_adapter *sa); + +int sfc_nic_dma_mz_map(struct sfc_adapter *sa, const struct rte_memzone *mz, + efx_nic_dma_addr_type_t addr_type, + efsys_dma_addr_t *dma_addr); + +#ifdef __cplusplus +} +#endif + +#endif /* _SFC_NIC_DMA_H */ diff --git a/drivers/net/sfc/sfc_nic_dma_dp.h b/drivers/net/sfc/sfc_nic_dma_dp.h new file mode 100644 index 0000000000..b7e52ec9b4 --- /dev/null +++ b/drivers/net/sfc/sfc_nic_dma_dp.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2021 Xilinx, Inc. + */ + +#ifndef _SFC_NIC_DMA_DP_H +#define _SFC_NIC_DMA_DP_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define SFC_NIC_DMA_REGIONS_MAX 2 + +struct sfc_nic_dma_region { + rte_iova_t nic_base; + rte_iova_t trgt_base; + rte_iova_t trgt_end; +}; + +/** Driver cache for NIC DMA regions */ +struct sfc_nic_dma_info { + struct sfc_nic_dma_region regions[SFC_NIC_DMA_REGIONS_MAX]; + unsigned int nb_regions; +}; + +static inline rte_iova_t +sfc_nic_dma_map(const struct sfc_nic_dma_info *nic_dma_info, + rte_iova_t trgt_addr, size_t len) +{ + unsigned int i; + + for (i = 0; i < nic_dma_info->nb_regions; i++) { + const struct sfc_nic_dma_region *region; + + region = &nic_dma_info->regions[i]; + /* + * Do not sum trgt_addr and len to avoid overflow + * checking. + */ + if (region->trgt_base <= trgt_addr && + trgt_addr <= region->trgt_end && + len <= region->trgt_end - trgt_addr) { + return region->nic_base + + (trgt_addr - region->trgt_base); + } + } + + return RTE_BAD_IOVA; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _SFC_NIC_DMA_DP_H */ diff --git a/drivers/net/sfc/sfc_port.c b/drivers/net/sfc/sfc_port.c index 27b02b1119..91139375ea 100644 --- a/drivers/net/sfc/sfc_port.c +++ b/drivers/net/sfc/sfc_port.c @@ -440,7 +440,8 @@ sfc_port_attach(struct sfc_adapter *sa) mac_nstats = efx_nic_cfg_get(sa->nic)->enc_mac_stats_nstats; mac_stats_size = RTE_ALIGN(mac_nstats * sizeof(uint64_t), EFX_BUF_SIZE); - rc = sfc_dma_alloc(sa, "mac_stats", 0, mac_stats_size, + rc = sfc_dma_alloc(sa, "mac_stats", 0, EFX_NIC_DMA_ADDR_MAC_STATS_BUF, + mac_stats_size, sa->socket_id, &port->mac_stats_dma_mem); if (rc != 0) goto fail_mac_stats_dma_alloc; diff --git a/drivers/net/sfc/sfc_rx.c b/drivers/net/sfc/sfc_rx.c index 17ff2aa67a..7104284106 100644 --- a/drivers/net/sfc/sfc_rx.c +++ b/drivers/net/sfc/sfc_rx.c @@ -483,6 +483,10 @@ sfc_efx_rx_qcreate(uint16_t port_id, uint16_t queue_id, struct sfc_efx_rxq *rxq; int rc; + rc = ENOTSUP; + if (info->nic_dma_info->nb_regions > 0) + goto fail_nic_dma; + rc = ENOMEM; rxq = rte_zmalloc_socket("sfc-efx-rxq", sizeof(*rxq), RTE_CACHE_LINE_SIZE, socket_id); @@ -518,6 +522,7 @@ sfc_efx_rx_qcreate(uint16_t port_id, uint16_t queue_id, rte_free(rxq); fail_rxq_alloc: +fail_nic_dma: return rc; } @@ -1218,7 +1223,7 @@ sfc_rx_qinit(struct sfc_adapter *sa, sfc_sw_index_t sw_index, rxq->buf_size = buf_size; - rc = sfc_dma_alloc(sa, "rxq", sw_index, + rc = sfc_dma_alloc(sa, "rxq", sw_index, EFX_NIC_DMA_ADDR_RX_RING, efx_rxq_size(sa->nic, rxq_info->entries), socket_id, &rxq->mem); if (rc != 0) @@ -1248,6 +1253,8 @@ sfc_rx_qinit(struct sfc_adapter *sa, sfc_sw_index_t sw_index, info.vi_window_shift = encp->enc_vi_window_shift; info.fcw_offset = sa->fcw_offset; + info.nic_dma_info = &sas->nic_dma_info; + rc = sa->priv.dp_rx->qcreate(sa->eth_dev->data->port_id, sw_index, &RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr, socket_id, &info, &rxq_info->dp); diff --git a/drivers/net/sfc/sfc_tx.c b/drivers/net/sfc/sfc_tx.c index d59a1af3af..0dccf21f7c 100644 --- a/drivers/net/sfc/sfc_tx.c +++ b/drivers/net/sfc/sfc_tx.c @@ -194,7 +194,7 @@ sfc_tx_qinit(struct sfc_adapter *sa, sfc_sw_index_t sw_index, SFC_TX_DEFAULT_FREE_THRESH; txq_info->offloads = offloads; - rc = sfc_dma_alloc(sa, "txq", sw_index, + rc = sfc_dma_alloc(sa, "txq", sw_index, EFX_NIC_DMA_ADDR_TX_RING, efx_txq_size(sa->nic, txq_info->entries), socket_id, &txq->mem); if (rc != 0) @@ -226,6 +226,8 @@ sfc_tx_qinit(struct sfc_adapter *sa, sfc_sw_index_t sw_index, info.tso_max_payload_len = encp->enc_tx_tso_max_payload_length; info.tso_max_nb_outgoing_frames = encp->enc_tx_tso_max_nframes; + info.nic_dma_info = &sas->nic_dma_info; + rc = sa->priv.dp_tx->qcreate(sa->eth_dev->data->port_id, sw_index, &RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr, socket_id, &info, &txq_info->dp); @@ -1082,6 +1084,10 @@ sfc_efx_tx_qcreate(uint16_t port_id, uint16_t queue_id, struct sfc_txq *ctrl_txq; int rc; + rc = ENOTSUP; + if (info->nic_dma_info->nb_regions > 0) + goto fail_nic_dma; + rc = ENOMEM; txq = rte_zmalloc_socket("sfc-efx-txq", sizeof(*txq), RTE_CACHE_LINE_SIZE, socket_id); @@ -1133,6 +1139,7 @@ sfc_efx_tx_qcreate(uint16_t port_id, uint16_t queue_id, rte_free(txq); fail_txq_alloc: +fail_nic_dma: return rc; }