From patchwork Thu Oct 5 02:18:51 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Chautru, Nicolas" X-Patchwork-Id: 132317 X-Patchwork-Delegate: maxime.coquelin@redhat.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 99BA6426BB; Thu, 5 Oct 2023 04:27:10 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id AEE6D40A6E; Thu, 5 Oct 2023 04:26:09 +0200 (CEST) Received: from mgamail.intel.com (mgamail.intel.com [192.55.52.151]) by mails.dpdk.org (Postfix) with ESMTP id 99FE6402CE for ; Thu, 5 Oct 2023 04:25:59 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1696472759; x=1728008759; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=XgzGLAJwW8oL7Tkul7/4gwcuGaIzEyx7KvRV4pP8NCY=; b=fnuQqUdvLUvgKWyoDHVNP+KG46SikXWSdpPQeXTFmMri+Y1JeDIg4hfn 8yeor+OybTKs1DHZaiVSitjnoqmBF0LJMxObYFphYy0wcpsu7PedUPp5f 6IVBQyuqEii0V6oyBhNUqwCw1yiePbZ8uaKNS5ufgnNzmcUfCRk89qAW2 EXYH8SVkx465UPUV+Ix5s25kH8OYizDWu00UR5Ts0kPkiwk/tOF2gwNgi DKFVYhm0DKSfZd0w2n18TT7PLdCG4zseoaoJ9nXzg3eqm3z5Rr58dJDNH B2MfoLhjPAQENPIVzM0i9JUb3F8B4+5AmhgnaP9RxmUr7jhn3gi1V4GF+ w==; X-IronPort-AV: E=McAfee;i="6600,9927,10853"; a="363657694" X-IronPort-AV: E=Sophos;i="6.03,201,1694761200"; d="scan'208";a="363657694" Received: from orsmga008.jf.intel.com ([10.7.209.65]) by fmsmga107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 04 Oct 2023 19:25:58 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10853"; a="781063042" X-IronPort-AV: E=Sophos;i="6.03,201,1694761200"; d="scan'208";a="781063042" Received: from spr-npg-bds1-eec2.sn.intel.com (HELO spr-npg-bds1-eec2..) ([10.233.181.123]) by orsmga008.jf.intel.com with ESMTP; 04 Oct 2023 19:25:57 -0700 From: Nicolas Chautru To: dev@dpdk.org, maxime.coquelin@redhat.com Cc: hemant.agrawal@nxp.com, david.marchand@redhat.com, hernan.vargas@intel.com, Nicolas Chautru Subject: [PATCH v4 09/12] baseband/acc: add FFT support to VRB2 variant Date: Thu, 5 Oct 2023 02:18:51 +0000 Message-Id: <20231005021854.109096-10-nicolas.chautru@intel.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: <20231005021854.109096-1-nicolas.chautru@intel.com> References: <20231005021854.109096-1-nicolas.chautru@intel.com> MIME-Version: 1.0 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Support for the FFT the processing specific to the VRB2 variant. Signed-off-by: Nicolas Chautru Reviewed-by: Maxime Coquelin --- drivers/baseband/acc/rte_vrb_pmd.c | 137 +++++++++++++++++++++++++++-- 1 file changed, 129 insertions(+), 8 deletions(-) diff --git a/drivers/baseband/acc/rte_vrb_pmd.c b/drivers/baseband/acc/rte_vrb_pmd.c index 3af83b7fe4..c47416a443 100644 --- a/drivers/baseband/acc/rte_vrb_pmd.c +++ b/drivers/baseband/acc/rte_vrb_pmd.c @@ -900,6 +900,9 @@ vrb_queue_setup(struct rte_bbdev *dev, uint16_t queue_id, ACC_FCW_LD_BLEN : (conf->op_type == RTE_BBDEV_OP_FFT ? ACC_FCW_FFT_BLEN : ACC_FCW_MLDTS_BLEN)))); + if ((q->d->device_variant == VRB2_VARIANT) && (conf->op_type == RTE_BBDEV_OP_FFT)) + fcw_len = ACC_FCW_FFT_BLEN_3; + for (desc_idx = 0; desc_idx < d->sw_ring_max_depth; desc_idx++) { desc = q->ring_addr + desc_idx; desc->req.word0 = ACC_DMA_DESC_TYPE; @@ -1222,10 +1225,8 @@ vrb_dev_info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info) RTE_BBDEV_FFT_DFT_BYPASS | RTE_BBDEV_FFT_IDFT_BYPASS | RTE_BBDEV_FFT_WINDOWING_BYPASS, - .num_buffers_src = - RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, - .num_buffers_dst = - RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, + .num_buffers_src = 1, + .num_buffers_dst = 1, .fft_windows_num = ACC_MAX_FFT_WIN, } }, @@ -1321,6 +1322,23 @@ vrb_dev_info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info) .num_buffers_soft_out = 0, } }, + { + .type = RTE_BBDEV_OP_FFT, + .cap.fft = { + .capability_flags = + RTE_BBDEV_FFT_WINDOWING | + RTE_BBDEV_FFT_CS_ADJUSTMENT | + RTE_BBDEV_FFT_DFT_BYPASS | + RTE_BBDEV_FFT_IDFT_BYPASS | + RTE_BBDEV_FFT_FP16_INPUT | + RTE_BBDEV_FFT_FP16_OUTPUT | + RTE_BBDEV_FFT_POWER_MEAS | + RTE_BBDEV_FFT_WINDOWING_BYPASS, + .num_buffers_src = 1, + .num_buffers_dst = 1, + .fft_windows_num = ACC_MAX_FFT_WIN, + } + }, RTE_BBDEV_END_OF_CAPABILITIES_LIST() }; @@ -3607,6 +3625,47 @@ vrb1_fcw_fft_fill(struct rte_bbdev_fft_op *op, struct acc_fcw_fft *fcw) fcw->bypass = 0; } +/* Fill in a frame control word for FFT processing. */ +static inline void +vrb2_fcw_fft_fill(struct rte_bbdev_fft_op *op, struct acc_fcw_fft_3 *fcw) +{ + fcw->in_frame_size = op->fft.input_sequence_size; + fcw->leading_pad_size = op->fft.input_leading_padding; + fcw->out_frame_size = op->fft.output_sequence_size; + fcw->leading_depad_size = op->fft.output_leading_depadding; + fcw->cs_window_sel = op->fft.window_index[0] + + (op->fft.window_index[1] << 8) + + (op->fft.window_index[2] << 16) + + (op->fft.window_index[3] << 24); + fcw->cs_window_sel2 = op->fft.window_index[4] + + (op->fft.window_index[5] << 8); + fcw->cs_enable_bmap = op->fft.cs_bitmap; + fcw->num_antennas = op->fft.num_antennas_log2; + fcw->idft_size = op->fft.idft_log2; + fcw->dft_size = op->fft.dft_log2; + fcw->cs_offset = op->fft.cs_time_adjustment; + fcw->idft_shift = op->fft.idft_shift; + fcw->dft_shift = op->fft.dft_shift; + fcw->cs_multiplier = op->fft.ncs_reciprocal; + fcw->power_shift = op->fft.power_shift; + fcw->exp_adj = op->fft.fp16_exp_adjust; + fcw->fp16_in = check_bit(op->fft.op_flags, RTE_BBDEV_FFT_FP16_INPUT); + fcw->fp16_out = check_bit(op->fft.op_flags, RTE_BBDEV_FFT_FP16_OUTPUT); + fcw->power_en = check_bit(op->fft.op_flags, RTE_BBDEV_FFT_POWER_MEAS); + if (check_bit(op->fft.op_flags, + RTE_BBDEV_FFT_IDFT_BYPASS)) { + if (check_bit(op->fft.op_flags, + RTE_BBDEV_FFT_WINDOWING_BYPASS)) + fcw->bypass = 2; + else + fcw->bypass = 1; + } else if (check_bit(op->fft.op_flags, + RTE_BBDEV_FFT_DFT_BYPASS)) + fcw->bypass = 3; + else + fcw->bypass = 0; +} + static inline int vrb1_dma_desc_fft_fill(struct rte_bbdev_fft_op *op, struct acc_dma_req_desc *desc, @@ -3640,6 +3699,58 @@ vrb1_dma_desc_fft_fill(struct rte_bbdev_fft_op *op, return 0; } +static inline int +vrb2_dma_desc_fft_fill(struct rte_bbdev_fft_op *op, + struct acc_dma_req_desc *desc, + struct rte_mbuf *input, struct rte_mbuf *output, struct rte_mbuf *win_input, + struct rte_mbuf *pwr, uint32_t *in_offset, uint32_t *out_offset, + uint32_t *win_offset, uint32_t *pwr_offset) +{ + bool pwr_en = check_bit(op->fft.op_flags, RTE_BBDEV_FFT_POWER_MEAS); + bool win_en = check_bit(op->fft.op_flags, RTE_BBDEV_FFT_DEWINDOWING); + int num_cs = 0, i, bd_idx = 1; + + /* FCW already done */ + acc_header_init(desc); + + RTE_SET_USED(win_input); + RTE_SET_USED(win_offset); + + desc->data_ptrs[bd_idx].address = rte_pktmbuf_iova_offset(input, *in_offset); + desc->data_ptrs[bd_idx].blen = op->fft.input_sequence_size * ACC_IQ_SIZE; + desc->data_ptrs[bd_idx].blkid = ACC_DMA_BLKID_IN; + desc->data_ptrs[bd_idx].last = 1; + desc->data_ptrs[bd_idx].dma_ext = 0; + bd_idx++; + + desc->data_ptrs[bd_idx].address = rte_pktmbuf_iova_offset(output, *out_offset); + desc->data_ptrs[bd_idx].blen = op->fft.output_sequence_size * ACC_IQ_SIZE; + desc->data_ptrs[bd_idx].blkid = ACC_DMA_BLKID_OUT_HARD; + desc->data_ptrs[bd_idx].last = pwr_en ? 0 : 1; + desc->data_ptrs[bd_idx].dma_ext = 0; + desc->m2dlen = win_en ? 3 : 2; + desc->d2mlen = pwr_en ? 2 : 1; + desc->ib_ant_offset = op->fft.input_sequence_size; + desc->num_ant = op->fft.num_antennas_log2 - 3; + + for (i = 0; i < RTE_BBDEV_MAX_CS; i++) + if (check_bit(op->fft.cs_bitmap, 1 << i)) + num_cs++; + desc->num_cs = num_cs; + + if (pwr_en && pwr) { + bd_idx++; + desc->data_ptrs[bd_idx].address = rte_pktmbuf_iova_offset(pwr, *pwr_offset); + desc->data_ptrs[bd_idx].blen = num_cs * (1 << op->fft.num_antennas_log2) * 4; + desc->data_ptrs[bd_idx].blkid = ACC_DMA_BLKID_OUT_SOFT; + desc->data_ptrs[bd_idx].last = 1; + desc->data_ptrs[bd_idx].dma_ext = 0; + } + desc->ob_cyc_offset = op->fft.output_sequence_size; + desc->ob_ant_offset = op->fft.output_sequence_size * num_cs; + desc->op_addr = op; + return 0; +} /** Enqueue one FFT operation for device. */ static inline int @@ -3647,22 +3758,32 @@ vrb_enqueue_fft_one_op(struct acc_queue *q, struct rte_bbdev_fft_op *op, uint16_t total_enqueued_cbs) { union acc_dma_desc *desc; - struct rte_mbuf *input, *output; - uint32_t in_offset, out_offset; + struct rte_mbuf *input, *output, *pwr, *win; + uint32_t in_offset, out_offset, pwr_offset, win_offset; struct acc_fcw_fft *fcw; desc = acc_desc(q, total_enqueued_cbs); input = op->fft.base_input.data; output = op->fft.base_output.data; + pwr = op->fft.power_meas_output.data; + win = op->fft.dewindowing_input.data; in_offset = op->fft.base_input.offset; out_offset = op->fft.base_output.offset; + pwr_offset = op->fft.power_meas_output.offset; + win_offset = op->fft.dewindowing_input.offset; fcw = (struct acc_fcw_fft *) (q->fcw_ring + ((q->sw_ring_head + total_enqueued_cbs) & q->sw_ring_wrap_mask) * ACC_MAX_FCW_SIZE); - vrb1_fcw_fft_fill(op, fcw); - vrb1_dma_desc_fft_fill(op, &desc->req, input, output, &in_offset, &out_offset); + if (q->d->device_variant == VRB1_VARIANT) { + vrb1_fcw_fft_fill(op, fcw); + vrb1_dma_desc_fft_fill(op, &desc->req, input, output, &in_offset, &out_offset); + } else { + vrb2_fcw_fft_fill(op, (struct acc_fcw_fft_3 *) fcw); + vrb2_dma_desc_fft_fill(op, &desc->req, input, output, win, pwr, + &in_offset, &out_offset, &win_offset, &pwr_offset); + } #ifdef RTE_LIBRTE_BBDEV_DEBUG rte_memdump(stderr, "FCW", &desc->req.fcw_fft, sizeof(desc->req.fcw_fft));