[v3,04/12] baseband/acc: allocate FCW memory separately
Checks
Commit Message
This allows more flexibility to the FCW size for the
unified driver. No actual functional change.
Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
---
drivers/baseband/acc/acc_common.h | 4 +++-
drivers/baseband/acc/rte_vrb_pmd.c | 25 ++++++++++++++++++++++++-
2 files changed, 27 insertions(+), 2 deletions(-)
Comments
On 9/29/23 18:35, Nicolas Chautru wrote:
> This allows more flexibility to the FCW size for the
> unified driver. No actual functional change.
>
> Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
> ---
> drivers/baseband/acc/acc_common.h | 4 +++-
> drivers/baseband/acc/rte_vrb_pmd.c | 25 ++++++++++++++++++++++++-
> 2 files changed, 27 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/baseband/acc/acc_common.h b/drivers/baseband/acc/acc_common.h
> index 7d24c644c0..2c7425e524 100644
> --- a/drivers/baseband/acc/acc_common.h
> +++ b/drivers/baseband/acc/acc_common.h
> @@ -101,6 +101,7 @@
> #define ACC_NUM_QGRPS_PER_WORD 8
> #define ACC_MAX_NUM_QGRPS 32
> #define ACC_RING_SIZE_GRANULARITY 64
> +#define ACC_MAX_FCW_SIZE 128
>
> /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
> #define ACC_N_ZC_1 66 /* N = 66 Zc for BG 1 */
> @@ -584,13 +585,14 @@ struct __rte_cache_aligned acc_queue {
> uint32_t aq_enqueued; /* Count how many "batches" have been enqueued */
> uint32_t aq_dequeued; /* Count how many "batches" have been dequeued */
> uint32_t irq_enable; /* Enable ops dequeue interrupts if set to 1 */
> - struct rte_mempool *fcw_mempool; /* FCW mempool */
> enum rte_bbdev_op_type op_type; /* Type of this Queue: TE or TD */
> /* Internal Buffers for loopback input */
> uint8_t *lb_in;
> uint8_t *lb_out;
> + uint8_t *fcw_ring;
> rte_iova_t lb_in_addr_iova;
> rte_iova_t lb_out_addr_iova;
> + rte_iova_t fcw_ring_addr_iova;
> int8_t *derm_buffer; /* interim buffer for de-rm in SDK */
> struct acc_device *d;
> };
> diff --git a/drivers/baseband/acc/rte_vrb_pmd.c b/drivers/baseband/acc/rte_vrb_pmd.c
> index f11882f90e..cf0551c0c7 100644
> --- a/drivers/baseband/acc/rte_vrb_pmd.c
> +++ b/drivers/baseband/acc/rte_vrb_pmd.c
> @@ -890,6 +890,25 @@ vrb_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
> goto free_companion_ring_addr;
> }
>
> + q->fcw_ring = rte_zmalloc_socket(dev->device->driver->name,
> + ACC_MAX_FCW_SIZE * d->sw_ring_max_depth,
> + RTE_CACHE_LINE_SIZE, conf->socket);
> + if (q->fcw_ring == NULL) {
> + rte_bbdev_log(ERR, "Failed to allocate fcw_ring memory");
> + ret = -ENOMEM;
> + goto free_companion_ring_addr;
> + }
> + q->fcw_ring_addr_iova = rte_malloc_virt2iova(q->fcw_ring);
> +
> + /* For FFT we need to store the FCW separately */
> + if (conf->op_type == RTE_BBDEV_OP_FFT) {
> + for (desc_idx = 0; desc_idx < d->sw_ring_max_depth; desc_idx++) {
> + desc = q->ring_addr + desc_idx;
> + desc->req.data_ptrs[0].address = q->fcw_ring_addr_iova +
> + desc_idx * ACC_MAX_FCW_SIZE;
> + }
> + }
> +
> q->qgrp_id = (q_idx >> VRB1_GRP_ID_SHIFT) & 0xF;
> q->vf_id = (q_idx >> VRB1_VF_ID_SHIFT) & 0x3F;
> q->aq_id = q_idx & 0xF;
> @@ -1001,6 +1020,7 @@ vrb_queue_release(struct rte_bbdev *dev, uint16_t q_id)
> if (q != NULL) {
> /* Mark the Queue as un-assigned. */
> d->q_assigned_bit_map[q->qgrp_id] &= (~0ULL - (1 << (uint64_t) q->aq_id));
> + rte_free(q->fcw_ring);
> rte_free(q->companion_ring_addr);
> rte_free(q->lb_in);
> rte_free(q->lb_out);
> @@ -3234,7 +3254,10 @@ vrb_enqueue_fft_one_op(struct acc_queue *q, struct rte_bbdev_fft_op *op,
> output = op->fft.base_output.data;
> in_offset = op->fft.base_input.offset;
> out_offset = op->fft.base_output.offset;
> - fcw = &desc->req.fcw_fft;
> +
> + fcw = (struct acc_fcw_fft *) (q->fcw_ring +
> + ((q->sw_ring_head + total_enqueued_cbs) & q->sw_ring_wrap_mask)
> + * ACC_MAX_FCW_SIZE);
>
> vrb1_fcw_fft_fill(op, fcw);
> vrb1_dma_desc_fft_fill(op, &desc->req, input, output, &in_offset, &out_offset);
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Thanks,
Maxime
@@ -101,6 +101,7 @@
#define ACC_NUM_QGRPS_PER_WORD 8
#define ACC_MAX_NUM_QGRPS 32
#define ACC_RING_SIZE_GRANULARITY 64
+#define ACC_MAX_FCW_SIZE 128
/* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
#define ACC_N_ZC_1 66 /* N = 66 Zc for BG 1 */
@@ -584,13 +585,14 @@ struct __rte_cache_aligned acc_queue {
uint32_t aq_enqueued; /* Count how many "batches" have been enqueued */
uint32_t aq_dequeued; /* Count how many "batches" have been dequeued */
uint32_t irq_enable; /* Enable ops dequeue interrupts if set to 1 */
- struct rte_mempool *fcw_mempool; /* FCW mempool */
enum rte_bbdev_op_type op_type; /* Type of this Queue: TE or TD */
/* Internal Buffers for loopback input */
uint8_t *lb_in;
uint8_t *lb_out;
+ uint8_t *fcw_ring;
rte_iova_t lb_in_addr_iova;
rte_iova_t lb_out_addr_iova;
+ rte_iova_t fcw_ring_addr_iova;
int8_t *derm_buffer; /* interim buffer for de-rm in SDK */
struct acc_device *d;
};
@@ -890,6 +890,25 @@ vrb_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
goto free_companion_ring_addr;
}
+ q->fcw_ring = rte_zmalloc_socket(dev->device->driver->name,
+ ACC_MAX_FCW_SIZE * d->sw_ring_max_depth,
+ RTE_CACHE_LINE_SIZE, conf->socket);
+ if (q->fcw_ring == NULL) {
+ rte_bbdev_log(ERR, "Failed to allocate fcw_ring memory");
+ ret = -ENOMEM;
+ goto free_companion_ring_addr;
+ }
+ q->fcw_ring_addr_iova = rte_malloc_virt2iova(q->fcw_ring);
+
+ /* For FFT we need to store the FCW separately */
+ if (conf->op_type == RTE_BBDEV_OP_FFT) {
+ for (desc_idx = 0; desc_idx < d->sw_ring_max_depth; desc_idx++) {
+ desc = q->ring_addr + desc_idx;
+ desc->req.data_ptrs[0].address = q->fcw_ring_addr_iova +
+ desc_idx * ACC_MAX_FCW_SIZE;
+ }
+ }
+
q->qgrp_id = (q_idx >> VRB1_GRP_ID_SHIFT) & 0xF;
q->vf_id = (q_idx >> VRB1_VF_ID_SHIFT) & 0x3F;
q->aq_id = q_idx & 0xF;
@@ -1001,6 +1020,7 @@ vrb_queue_release(struct rte_bbdev *dev, uint16_t q_id)
if (q != NULL) {
/* Mark the Queue as un-assigned. */
d->q_assigned_bit_map[q->qgrp_id] &= (~0ULL - (1 << (uint64_t) q->aq_id));
+ rte_free(q->fcw_ring);
rte_free(q->companion_ring_addr);
rte_free(q->lb_in);
rte_free(q->lb_out);
@@ -3234,7 +3254,10 @@ vrb_enqueue_fft_one_op(struct acc_queue *q, struct rte_bbdev_fft_op *op,
output = op->fft.base_output.data;
in_offset = op->fft.base_input.offset;
out_offset = op->fft.base_output.offset;
- fcw = &desc->req.fcw_fft;
+
+ fcw = (struct acc_fcw_fft *) (q->fcw_ring +
+ ((q->sw_ring_head + total_enqueued_cbs) & q->sw_ring_wrap_mask)
+ * ACC_MAX_FCW_SIZE);
vrb1_fcw_fft_fill(op, fcw);
vrb1_dma_desc_fft_fill(op, &desc->req, input, output, &in_offset, &out_offset);