[v3,08/12] baseband/acc: add FEC capabilities for the VRB2 variant

Message ID 20230929163516.3636499-9-nicolas.chautru@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Maxime Coquelin
Headers
Series VRB2 bbdev PMD introduction |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Chautru, Nicolas Sept. 29, 2023, 4:35 p.m. UTC
  New implementation for some of the FEC features
specific to the VRB2 variant.

Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
---
 drivers/baseband/acc/rte_vrb_pmd.c | 567 ++++++++++++++++++++++++++++-
 1 file changed, 548 insertions(+), 19 deletions(-)
  

Comments

Maxime Coquelin Oct. 3, 2023, 2:28 p.m. UTC | #1
On 9/29/23 18:35, Nicolas Chautru wrote:
> New implementation for some of the FEC features
> specific to the VRB2 variant.
> 
> Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
> ---
>   drivers/baseband/acc/rte_vrb_pmd.c | 567 ++++++++++++++++++++++++++++-
>   1 file changed, 548 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/baseband/acc/rte_vrb_pmd.c b/drivers/baseband/acc/rte_vrb_pmd.c
> index 48e779ce77..93add82947 100644
> --- a/drivers/baseband/acc/rte_vrb_pmd.c
> +++ b/drivers/baseband/acc/rte_vrb_pmd.c
> @@ -1235,6 +1235,94 @@ vrb_dev_info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info)
>   	};
>   
>   	static const struct rte_bbdev_op_cap vrb2_bbdev_capabilities[] = {
> +		{
> +			.type = RTE_BBDEV_OP_TURBO_DEC,
> +			.cap.turbo_dec = {
> +				.capability_flags =
> +					RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
> +					RTE_BBDEV_TURBO_CRC_TYPE_24B |
> +					RTE_BBDEV_TURBO_DEC_CRC_24B_DROP |
> +					RTE_BBDEV_TURBO_EQUALIZER |
> +					RTE_BBDEV_TURBO_SOFT_OUT_SATURATE |
> +					RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
> +					RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH |
> +					RTE_BBDEV_TURBO_SOFT_OUTPUT |
> +					RTE_BBDEV_TURBO_EARLY_TERMINATION |
> +					RTE_BBDEV_TURBO_DEC_INTERRUPTS |
> +					RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
> +					RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT |
> +					RTE_BBDEV_TURBO_MAP_DEC |
> +					RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
> +					RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
> +				.max_llr_modulus = INT8_MAX,
> +				.num_buffers_src =
> +						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> +				.num_buffers_hard_out =
> +						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> +				.num_buffers_soft_out =
> +						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> +			}
> +		},
> +		{
> +			.type = RTE_BBDEV_OP_TURBO_ENC,
> +			.cap.turbo_enc = {
> +				.capability_flags =
> +					RTE_BBDEV_TURBO_CRC_24B_ATTACH |
> +					RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
> +					RTE_BBDEV_TURBO_RATE_MATCH |
> +					RTE_BBDEV_TURBO_ENC_INTERRUPTS |
> +					RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
> +				.num_buffers_src =
> +						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> +				.num_buffers_dst =
> +						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> +			}
> +		},
> +		{
> +			.type   = RTE_BBDEV_OP_LDPC_ENC,
> +			.cap.ldpc_enc = {
> +				.capability_flags =
> +					RTE_BBDEV_LDPC_RATE_MATCH |
> +					RTE_BBDEV_LDPC_CRC_24B_ATTACH |
> +					RTE_BBDEV_LDPC_INTERLEAVER_BYPASS |
> +					RTE_BBDEV_LDPC_ENC_INTERRUPTS |
> +					RTE_BBDEV_LDPC_ENC_SCATTER_GATHER |
> +					RTE_BBDEV_LDPC_ENC_CONCATENATION,
> +				.num_buffers_src =
> +						RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
> +				.num_buffers_dst =
> +						RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
> +			}
> +		},
> +		{
> +			.type   = RTE_BBDEV_OP_LDPC_DEC,
> +			.cap.ldpc_dec = {
> +			.capability_flags =
> +				RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK |
> +				RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP |
> +				RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK |
> +				RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK |
> +				RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
> +				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
> +				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE |
> +				RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS |
> +				RTE_BBDEV_LDPC_DEC_SCATTER_GATHER |
> +				RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION |
> +				RTE_BBDEV_LDPC_HARQ_4BIT_COMPRESSION |
> +				RTE_BBDEV_LDPC_LLR_COMPRESSION |
> +				RTE_BBDEV_LDPC_SOFT_OUT_ENABLE |
> +				RTE_BBDEV_LDPC_SOFT_OUT_RM_BYPASS |
> +				RTE_BBDEV_LDPC_SOFT_OUT_DEINTERLEAVER_BYPASS |
> +				RTE_BBDEV_LDPC_DEC_INTERRUPTS,
> +			.llr_size = 8,
> +			.llr_decimals = 2,
> +			.num_buffers_src =
> +					RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
> +			.num_buffers_hard_out =
> +					RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
> +			.num_buffers_soft_out = 0,
> +			}
> +		},
>   		RTE_BBDEV_END_OF_CAPABILITIES_LIST()
>   	};
>   
> @@ -1774,6 +1862,141 @@ vrb1_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
>   	return 0;
>   }
>   
> +/* Fill in a frame control word for LDPC decoding. */
> +static inline void
> +vrb2_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc_fcw_ld *fcw,
> +		union acc_harq_layout_data *harq_layout)
> +{
> +	uint16_t harq_out_length, harq_in_length, ncb_p, k0_p, parity_offset;
> +	uint32_t harq_index;
> +	uint32_t l;


This is so similar with vrb1_fcw_ld_fill() that it does not make sense
to duplicate so much code.

Do you confirm there are no other difference than the SOFT_OUT stuff,
and reusing vrb2_fcw_ld_fill on VRB1 would just work as the op_flags are
checked (and they should not be set if capability is not advertized)?

> +	fcw->qm = op->ldpc_dec.q_m;
> +	fcw->nfiller = op->ldpc_dec.n_filler;
> +	fcw->BG = (op->ldpc_dec.basegraph - 1);
> +	fcw->Zc = op->ldpc_dec.z_c;
> +	fcw->ncb = op->ldpc_dec.n_cb;
> +	fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_dec.basegraph,
> +			op->ldpc_dec.rv_index);
> +	if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK)
> +		fcw->rm_e = op->ldpc_dec.cb_params.e;
> +	else
> +		fcw->rm_e = (op->ldpc_dec.tb_params.r <
> +				op->ldpc_dec.tb_params.cab) ?
> +						op->ldpc_dec.tb_params.ea :
> +						op->ldpc_dec.tb_params.eb;
> +
> +	if (unlikely(check_bit(op->ldpc_dec.op_flags,
> +			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) &&
> +			(op->ldpc_dec.harq_combined_input.length == 0))) {
> +		rte_bbdev_log(WARNING, "Null HARQ input size provided");
> +		/* Disable HARQ input in that case to carry forward. */
> +		op->ldpc_dec.op_flags ^= RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
> +	}
> +	if (unlikely(fcw->rm_e == 0)) {
> +		rte_bbdev_log(WARNING, "Null E input provided");
> +		fcw->rm_e = 2;
> +	}
> +
> +	fcw->hcin_en = check_bit(op->ldpc_dec.op_flags,
> +			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE);
> +	fcw->hcout_en = check_bit(op->ldpc_dec.op_flags,
> +			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
> +	fcw->crc_select = check_bit(op->ldpc_dec.op_flags,
> +			RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK);
> +	fcw->so_en = check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_SOFT_OUT_ENABLE);
> +	fcw->so_bypass_intlv = check_bit(op->ldpc_dec.op_flags,
> +			RTE_BBDEV_LDPC_SOFT_OUT_DEINTERLEAVER_BYPASS);
> +	fcw->so_bypass_rm = check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_SOFT_OUT_RM_BYPASS);
> +	fcw->bypass_dec = 0;
> +	fcw->bypass_intlv = check_bit(op->ldpc_dec.op_flags,
> +			RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS);
> +	if (op->ldpc_dec.q_m == 1) {
> +		fcw->bypass_intlv = 1;
> +		fcw->qm = 2;
> +	}
> +	if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION)) {
> +		fcw->hcin_decomp_mode = 1;
> +		fcw->hcout_comp_mode = 1;
> +	} else if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_HARQ_4BIT_COMPRESSION)) {
> +		fcw->hcin_decomp_mode = 4;
> +		fcw->hcout_comp_mode = 4;
> +	} else {
> +		fcw->hcin_decomp_mode = 0;
> +		fcw->hcout_comp_mode = 0;
> +	}
> +
> +	fcw->llr_pack_mode = check_bit(op->ldpc_dec.op_flags,
> +			RTE_BBDEV_LDPC_LLR_COMPRESSION);
> +	harq_index = hq_index(op->ldpc_dec.harq_combined_output.offset);
> +	if (fcw->hcin_en > 0) {
> +		harq_in_length = op->ldpc_dec.harq_combined_input.length;
> +		if (fcw->hcin_decomp_mode == 1)
> +			harq_in_length = harq_in_length * 8 / 6;
> +		else if (fcw->hcin_decomp_mode == 4)
> +			harq_in_length = harq_in_length * 2;
> +		harq_in_length = RTE_MIN(harq_in_length, op->ldpc_dec.n_cb
> +				- op->ldpc_dec.n_filler);
> +		harq_in_length = RTE_ALIGN_CEIL(harq_in_length, 64);
> +		fcw->hcin_size0 = harq_in_length;
> +		fcw->hcin_offset = 0;
> +		fcw->hcin_size1 = 0;
> +	} else {
> +		fcw->hcin_size0 = 0;
> +		fcw->hcin_offset = 0;
> +		fcw->hcin_size1 = 0;
> +	}
> +
> +	fcw->itmax = op->ldpc_dec.iter_max;
> +	fcw->so_it = op->ldpc_dec.iter_max;
> +	fcw->itstop = check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE);
> +	fcw->cnu_algo = ACC_ALGO_MSA;
> +	fcw->synd_precoder = fcw->itstop;
> +
> +	fcw->minsum_offset = 1;
> +	fcw->dec_llrclip   = 2;
> +
> +	/*
> +	 * These are all implicitly set
> +	 * fcw->synd_post = 0;
> +	 * fcw->dec_convllr = 0;
> +	 * fcw->hcout_convllr = 0;
> +	 * fcw->hcout_size1 = 0;
> +	 * fcw->hcout_offset = 0;
> +	 * fcw->negstop_th = 0;
> +	 * fcw->negstop_it = 0;
> +	 * fcw->negstop_en = 0;
> +	 * fcw->gain_i = 1;
> +	 * fcw->gain_h = 1;
> +	 */
> +	if (fcw->hcout_en > 0) {
> +		parity_offset = (op->ldpc_dec.basegraph == 1 ? 20 : 8)
> +			* op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
> +		k0_p = (fcw->k0 > parity_offset) ?
> +				fcw->k0 - op->ldpc_dec.n_filler : fcw->k0;
> +		ncb_p = fcw->ncb - op->ldpc_dec.n_filler;
> +		l = k0_p + fcw->rm_e;
> +		harq_out_length = (uint16_t) fcw->hcin_size0;
> +		harq_out_length = RTE_MIN(RTE_MAX(harq_out_length, l), ncb_p);
> +		harq_out_length = RTE_ALIGN_CEIL(harq_out_length, 64);
> +		fcw->hcout_size0 = harq_out_length;
> +		fcw->hcout_size1 = 0;
> +		fcw->hcout_offset = 0;
> +		harq_layout[harq_index].offset = fcw->hcout_offset;
> +		harq_layout[harq_index].size0 = fcw->hcout_size0;
> +	} else {
> +		fcw->hcout_size0 = 0;
> +		fcw->hcout_size1 = 0;
> +		fcw->hcout_offset = 0;
> +	}
> +
> +	fcw->tb_crc_select = 0;
> +	if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK))
> +		fcw->tb_crc_select = 2;
> +	if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK))
> +		fcw->tb_crc_select = 1;
> +}
> +
>   static inline void
>   vrb_dma_desc_ld_update(struct rte_bbdev_dec_op *op,
>   		struct acc_dma_req_desc *desc,
> @@ -1817,6 +2040,139 @@ vrb_dma_desc_ld_update(struct rte_bbdev_dec_op *op,
>   	desc->op_addr = op;
>   }
>   
> +static inline int
> +vrb2_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
> +		struct acc_dma_req_desc *desc,
> +		struct rte_mbuf **input, struct rte_mbuf *h_output,
> +		uint32_t *in_offset, uint32_t *h_out_offset,
> +		uint32_t *h_out_length, uint32_t *mbuf_total_left,
> +		uint32_t *seg_total_left, struct acc_fcw_ld *fcw)
> +{
Same here.

I compared with vrb1_dma_desc_ld_fill(), and I don't see why we need two 
functions.

The only differences are either backed by capability checks, and vrb1 
already sets fcw->hcin_decomp_mode, so this code should work as-is on 
vrb1 if I'm not mistaken.

> +	struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec;
> +	int next_triplet = 1; /* FCW already done. */
> +	uint32_t input_length;
> +	uint16_t output_length, crc24_overlap = 0;
> +	uint16_t sys_cols, K, h_p_size, h_np_size;
> +
> +	acc_header_init(desc);
> +
> +	if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP))
> +		crc24_overlap = 24;
> +
> +	/* Compute some LDPC BG lengths. */
> +	input_length = fcw->rm_e;
> +	if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_LLR_COMPRESSION))
> +		input_length = (input_length * 3 + 3) / 4;
> +	sys_cols = (dec->basegraph == 1) ? 22 : 10;
> +	K = sys_cols * dec->z_c;
> +	output_length = K - dec->n_filler - crc24_overlap;
> +
> +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < input_length))) {
> +		rte_bbdev_log(ERR,
> +				"Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
> +				*mbuf_total_left, input_length);
> +		return -1;
> +	}
> +
> +	next_triplet = acc_dma_fill_blk_type_in(desc, input,
> +			in_offset, input_length,
> +			seg_total_left, next_triplet,
> +			check_bit(op->ldpc_dec.op_flags,
> +			RTE_BBDEV_LDPC_DEC_SCATTER_GATHER));
> +
> +	if (unlikely(next_triplet < 0)) {
> +		rte_bbdev_log(ERR,
> +				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
> +				op);
> +		return -1;
> +	}
> +
> +	if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) {
> +		if (op->ldpc_dec.harq_combined_input.data == 0) {
> +			rte_bbdev_log(ERR, "HARQ input is not defined");
> +			return -1;
> +		}
> +		h_p_size = fcw->hcin_size0 + fcw->hcin_size1;
> +		if (fcw->hcin_decomp_mode == 1)
> +			h_p_size = (h_p_size * 3 + 3) / 4;
> +		else if (fcw->hcin_decomp_mode == 4)
> +			h_p_size = h_p_size / 2;
> +		if (op->ldpc_dec.harq_combined_input.data == 0) {
> +			rte_bbdev_log(ERR, "HARQ input is not defined");
> +			return -1;
> +		}
> +		acc_dma_fill_blk_type(
> +				desc,
> +				op->ldpc_dec.harq_combined_input.data,
> +				op->ldpc_dec.harq_combined_input.offset,
> +				h_p_size,
> +				next_triplet,
> +				ACC_DMA_BLKID_IN_HARQ);
> +		next_triplet++;
> +	}
> +
> +	desc->data_ptrs[next_triplet - 1].last = 1;
> +	desc->m2dlen = next_triplet;
> +	*mbuf_total_left -= input_length;
> +
> +	next_triplet = acc_dma_fill_blk_type(desc, h_output,
> +			*h_out_offset, output_length >> 3, next_triplet,
> +			ACC_DMA_BLKID_OUT_HARD);
> +
> +	if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)) {
> +		if (op->ldpc_dec.soft_output.data == 0) {
> +			rte_bbdev_log(ERR, "Soft output is not defined");
> +			return -1;
> +		}
> +		dec->soft_output.length = fcw->rm_e;
> +		acc_dma_fill_blk_type(desc, dec->soft_output.data, dec->soft_output.offset,
> +				fcw->rm_e, next_triplet, ACC_DMA_BLKID_OUT_SOFT);
> +		next_triplet++;
> +	}
> +
> +	if (check_bit(op->ldpc_dec.op_flags,
> +				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) {
> +		if (op->ldpc_dec.harq_combined_output.data == 0) {
> +			rte_bbdev_log(ERR, "HARQ output is not defined");
> +			return -1;
> +		}
> +
> +		/* Pruned size of the HARQ */
> +		h_p_size = fcw->hcout_size0 + fcw->hcout_size1;
> +		/* Non-Pruned size of the HARQ */
> +		h_np_size = fcw->hcout_offset > 0 ?
> +				fcw->hcout_offset + fcw->hcout_size1 :
> +				h_p_size;
> +		if (fcw->hcin_decomp_mode == 1) {
> +			h_np_size = (h_np_size * 3 + 3) / 4;
> +			h_p_size = (h_p_size * 3 + 3) / 4;
> +		} else if (fcw->hcin_decomp_mode == 4) {
> +			h_np_size = h_np_size / 2;
> +			h_p_size = h_p_size / 2;
> +		}
> +		dec->harq_combined_output.length = h_np_size;
> +		acc_dma_fill_blk_type(
> +				desc,
> +				dec->harq_combined_output.data,
> +				dec->harq_combined_output.offset,
> +				h_p_size,
> +				next_triplet,
> +				ACC_DMA_BLKID_OUT_HARQ);
> +
> +		next_triplet++;
> +	}
> +
> +	*h_out_length = output_length >> 3;
> +	dec->hard_output.length += *h_out_length;
> +	*h_out_offset += *h_out_length;
> +	desc->data_ptrs[next_triplet - 1].last = 1;
> +	desc->d2mlen = next_triplet - desc->m2dlen;
> +
> +	desc->op_addr = op;
> +
> +	return 0;
> +}
> +
>   /* Enqueue one encode operations for device in CB mode. */
>   static inline int
>   enqueue_enc_one_op_cb(struct acc_queue *q, struct rte_bbdev_enc_op *op,
> @@ -1877,6 +2233,7 @@ enqueue_ldpc_enc_n_op_cb(struct acc_queue *q, struct rte_bbdev_enc_op **ops,
>   	/** This could be done at polling. */
>   	acc_header_init(&desc->req);
>   	desc->req.numCBs = num;
> +	desc->req.dltb = 0;
>   
>   	in_length_in_bytes = ops[0]->ldpc_enc.input.data->data_len;
>   	out_length = (enc->cb_params.e + 7) >> 3;
> @@ -2102,6 +2459,105 @@ vrb1_enqueue_ldpc_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op
>   	return return_descs;
>   }
>   
> +/* Fill in a frame control word for LDPC encoding. */
> +static inline void
> +vrb2_fcw_letb_fill(const struct rte_bbdev_enc_op *op, struct acc_fcw_le *fcw)
> +{
> +	fcw->qm = op->ldpc_enc.q_m;
> +	fcw->nfiller = op->ldpc_enc.n_filler;
> +	fcw->BG = (op->ldpc_enc.basegraph - 1);
> +	fcw->Zc = op->ldpc_enc.z_c;
> +	fcw->ncb = op->ldpc_enc.n_cb;
> +	fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_enc.basegraph,
> +			op->ldpc_enc.rv_index);
> +	fcw->rm_e = op->ldpc_enc.tb_params.ea;
> +	fcw->rm_e_b = op->ldpc_enc.tb_params.eb;
> +	fcw->crc_select = check_bit(op->ldpc_enc.op_flags,
> +			RTE_BBDEV_LDPC_CRC_24B_ATTACH);
> +	fcw->bypass_intlv = 0;
> +	if (op->ldpc_enc.tb_params.c > 1) {
> +		fcw->mcb_count = 0;
> +		fcw->C = op->ldpc_enc.tb_params.c;
> +		fcw->Cab = op->ldpc_enc.tb_params.cab;
> +	} else {
> +		fcw->mcb_count = 1;
> +		fcw->C = 0;
> +	}
> +}
> +
> +/* Enqueue one encode operations for device in TB mode.
> + * returns the number of descs used.
> + */
> +static inline int
> +vrb2_enqueue_ldpc_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op,
> +		uint16_t enq_descs)
> +{
> +	union acc_dma_desc *desc = NULL;
> +	uint32_t in_offset, out_offset, out_length, seg_total_left;
> +	struct rte_mbuf *input, *output_head, *output;
> +
> +	uint16_t desc_idx = ((q->sw_ring_head + enq_descs) & q->sw_ring_wrap_mask);
> +	desc = q->ring_addr + desc_idx;

Use acc_desc()?

> +	vrb2_fcw_letb_fill(op, &desc->req.fcw_le);
> +	struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc;
> +	int next_triplet = 1; /* FCW already done */
> +	uint32_t in_length_in_bytes;
> +	uint16_t K, in_length_in_bits;
> +
> +	input = enc->input.data;
> +	output_head = output = enc->output.data;
> +	in_offset = enc->input.offset;
> +	out_offset = enc->output.offset;
> +	seg_total_left = rte_pktmbuf_data_len(enc->input.data) - in_offset;
> +
> +	acc_header_init(&desc->req);
> +	K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c;
> +	in_length_in_bits = K - enc->n_filler;
> +	if ((enc->op_flags & RTE_BBDEV_LDPC_CRC_24A_ATTACH) ||
> +			(enc->op_flags & RTE_BBDEV_LDPC_CRC_24B_ATTACH))
> +		in_length_in_bits -= 24;
> +	in_length_in_bytes = (in_length_in_bits >> 3) * enc->tb_params.c;
> +
> +	next_triplet = acc_dma_fill_blk_type_in(&desc->req, &input, &in_offset,
> +			in_length_in_bytes, &seg_total_left, next_triplet,
> +			check_bit(enc->op_flags, RTE_BBDEV_LDPC_ENC_SCATTER_GATHER));
> +	if (unlikely(next_triplet < 0)) {
> +		rte_bbdev_log(ERR,
> +				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
> +				op);
> +		return -1;
> +	}
> +	desc->req.data_ptrs[next_triplet - 1].last = 1;
> +	desc->req.m2dlen = next_triplet;
> +
> +	/* Set output length */
> +	/* Integer round up division by 8 */
> +	out_length = (enc->tb_params.ea * enc->tb_params.cab +
> +			enc->tb_params.eb * (enc->tb_params.c - enc->tb_params.cab)  + 7) >> 3;
> +
> +	next_triplet = acc_dma_fill_blk_type(&desc->req, output, out_offset,
> +			out_length, next_triplet, ACC_DMA_BLKID_OUT_ENC);
> +	enc->output.length = out_length;
> +	out_offset += out_length;
> +	desc->req.data_ptrs[next_triplet - 1].last = 1;
> +	desc->req.data_ptrs[next_triplet - 1].dma_ext = 0;
> +	desc->req.d2mlen = next_triplet - desc->req.m2dlen;
> +	desc->req.numCBs = enc->tb_params.c;
> +	if (desc->req.numCBs > 1)
> +		desc->req.dltb = 1;
> +	desc->req.op_addr = op;
> +
> +	if (out_length < ACC_MAX_E_MBUF)
> +		mbuf_append(output_head, output, out_length);
> +
> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> +	rte_memdump(stderr, "FCW", &desc->req.fcw_le, sizeof(desc->req.fcw_le));
> +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> +#endif
> +	/* One CB (one op) was successfully prepared to enqueue */
> +	return 1;

This function is quite different from the VRB1 variant.
Is the underlying hardware completely different, or just a different
implementation?

> +}
> +
>   /** Enqueue one decode operations for device in CB mode. */
>   static inline int
>   enqueue_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
> @@ -2215,10 +2671,16 @@ vrb_enqueue_ldpc_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
>   		else
>   			seg_total_left = fcw->rm_e;
>   
> -		ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input, h_output,
> -				&in_offset, &h_out_offset,
> -				&h_out_length, &mbuf_total_left,
> -				&seg_total_left, fcw);
> +		if (q->d->device_variant == VRB1_VARIANT)
> +			ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input, h_output,
> +					&in_offset, &h_out_offset,
> +					&h_out_length, &mbuf_total_left,
> +					&seg_total_left, fcw);
> +		else
> +			ret = vrb2_dma_desc_ld_fill(op, &desc->req, &input, h_output,
> +					&in_offset, &h_out_offset,
> +					&h_out_length, &mbuf_total_left,
> +					&seg_total_left, fcw);
>   		if (unlikely(ret < 0))
>   			return ret;
>   	}
> @@ -2308,11 +2770,18 @@ vrb_enqueue_ldpc_dec_one_op_tb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
>   		rte_memcpy(&desc->req.fcw_ld, &desc_first->req.fcw_ld, ACC_FCW_LD_BLEN);
>   		desc->req.fcw_ld.tb_trailer_size = (c - r - 1) * trail_len;
>   
> -		ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input,
> -				h_output, &in_offset, &h_out_offset,
> -				&h_out_length,
> -				&mbuf_total_left, &seg_total_left,
> -				&desc->req.fcw_ld);
> +		if (q->d->device_variant == VRB1_VARIANT)
> +			ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input,
> +					h_output, &in_offset, &h_out_offset,
> +					&h_out_length,
> +					&mbuf_total_left, &seg_total_left,
> +					&desc->req.fcw_ld);
> +		else
> +			ret = vrb2_dma_desc_ld_fill(op, &desc->req, &input,
> +					h_output, &in_offset, &h_out_offset,
> +					&h_out_length,
> +					&mbuf_total_left, &seg_total_left,
> +					&desc->req.fcw_ld);
>   
>   		if (unlikely(ret < 0))
>   			return ret;
> @@ -2576,14 +3045,22 @@ vrb_enqueue_ldpc_enc_tb(struct rte_bbdev_queue_data *q_data,
>   	int descs_used;
>   
>   	for (i = 0; i < num; ++i) {
> -		cbs_in_tb = get_num_cbs_in_tb_ldpc_enc(&ops[i]->ldpc_enc);
> -		/* Check if there are available space for further processing. */
> -		if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) {
> -			acc_enqueue_ring_full(q_data);
> -			break;
> +		if (q->d->device_variant == VRB1_VARIANT) {
> +			cbs_in_tb = get_num_cbs_in_tb_ldpc_enc(&ops[i]->ldpc_enc);
> +			/* Check if there are available space for further processing. */
> +			if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) {
> +				acc_enqueue_ring_full(q_data);
> +				break;
> +			}
> +			descs_used = vrb1_enqueue_ldpc_enc_one_op_tb(q, ops[i],
> +					enqueued_descs, cbs_in_tb);
> +		} else {
> +			if (unlikely(avail < 1)) {
> +				acc_enqueue_ring_full(q_data);
> +				break;
> +			}
> +			descs_used = vrb2_enqueue_ldpc_enc_one_op_tb(q, ops[i], enqueued_descs);
>   		}
> -
> -		descs_used = vrb1_enqueue_ldpc_enc_one_op_tb(q, ops[i], enqueued_descs, cbs_in_tb);
>   		if (descs_used < 0) {
>   			acc_enqueue_invalid(q_data);
>   			break;
> @@ -2865,6 +3342,52 @@ vrb_dequeue_enc_one_op_cb(struct acc_queue *q, struct rte_bbdev_enc_op **ref_op,
>   	return desc->req.numCBs;
>   }
>   
> +/* Dequeue one LDPC encode operations from VRB2 device in TB mode. */
> +static inline int
> +vrb2_dequeue_ldpc_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op **ref_op,
> +		uint16_t *dequeued_ops, uint32_t *aq_dequeued,
> +		uint16_t *dequeued_descs)
> +{
> +	union acc_dma_desc *desc, atom_desc;
> +	union acc_dma_rsp_desc rsp;
> +	struct rte_bbdev_enc_op *op;
> +	int desc_idx = ((q->sw_ring_tail + *dequeued_descs) & q->sw_ring_wrap_mask);
> +
> +	desc = q->ring_addr + desc_idx;
> +	atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc, __ATOMIC_RELAXED);
> +
> +	/* Check fdone bit. */
> +	if (!(atom_desc.rsp.val & ACC_FDONE))
> +		return -1;
> +
> +	rsp.val = atom_desc.rsp.val;
> +	rte_bbdev_log_debug("Resp. desc %p: %x", desc, rsp.val);
> +
> +	/* Dequeue. */
> +	op = desc->req.op_addr;
> +
> +	/* Clearing status, it will be set based on response. */
> +	op->status = 0;
> +	op->status |= rsp.input_err << RTE_BBDEV_DATA_ERROR;
> +	op->status |= rsp.dma_err << RTE_BBDEV_DRV_ERROR;
> +	op->status |= rsp.fcw_err << RTE_BBDEV_DRV_ERROR;
> +	op->status |= rsp.engine_hung << RTE_BBDEV_ENGINE_ERROR;
> +
> +	if (desc->req.last_desc_in_batch) {
> +		(*aq_dequeued)++;
> +		desc->req.last_desc_in_batch = 0;
> +	}
> +	desc->rsp.val = ACC_DMA_DESC_TYPE;
> +	desc->rsp.add_info_0 = 0; /* Reserved bits. */
> +	desc->rsp.add_info_1 = 0; /* Reserved bits. */
> +
> +	/* One op was successfully dequeued */
> +	ref_op[0] = op;
> +	(*dequeued_descs)++;
> +	(*dequeued_ops)++;
> +	return 1;
> +}
> +
>   /* Dequeue one LDPC encode operations from device in TB mode.
>    * That operation may cover multiple descriptors.
>    */
> @@ -3189,9 +3712,14 @@ vrb_dequeue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
>   
>   	for (i = 0; i < avail; i++) {
>   		if (cbm == RTE_BBDEV_TRANSPORT_BLOCK)
> -			ret = vrb_dequeue_enc_one_op_tb(q, &ops[dequeued_ops],
> -					&dequeued_ops, &aq_dequeued,
> -					&dequeued_descs, num);
> +			if (q->d->device_variant == VRB1_VARIANT)
> +				ret = vrb_dequeue_enc_one_op_tb(q, &ops[dequeued_ops],
> +						&dequeued_ops, &aq_dequeued,
> +						&dequeued_descs, num);
> +			else
> +				ret = vrb2_dequeue_ldpc_enc_one_op_tb(q, &ops[dequeued_ops],
> +						&dequeued_ops, &aq_dequeued,
> +						&dequeued_descs);
>   		else
>   			ret = vrb_dequeue_enc_one_op_cb(q, &ops[dequeued_ops],
>   					&dequeued_ops, &aq_dequeued,
> @@ -3536,6 +4064,7 @@ vrb_bbdev_init(struct rte_bbdev *dev, struct rte_pci_driver *drv)
>   	} else {
>   		d->device_variant = VRB2_VARIANT;
>   		d->queue_offset = vrb2_queue_offset;
> +		d->fcw_ld_fill = vrb2_fcw_ld_fill;
>   		d->num_qgroups = VRB2_NUM_QGRPS;
>   		d->num_aqs = VRB2_NUM_AQS;
>   		if (d->pf_device)


It looks like most (60%+) of the code in this patch could be removed if
duplication was avoided.

Thanks,
Maxime
  
Chautru, Nicolas Oct. 4, 2023, 9:11 p.m. UTC | #2
Hi Maxime, 

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Tuesday, October 3, 2023 7:28 AM
> To: Chautru, Nicolas <nicolas.chautru@intel.com>; dev@dpdk.org
> Cc: hemant.agrawal@nxp.com; david.marchand@redhat.com; Vargas, Hernan
> <hernan.vargas@intel.com>
> Subject: Re: [PATCH v3 08/12] baseband/acc: add FEC capabilities for the VRB2
> variant
> 
> 
> 
> On 9/29/23 18:35, Nicolas Chautru wrote:
> > New implementation for some of the FEC features specific to the VRB2
> > variant.
> >
> > Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
> > ---
> >   drivers/baseband/acc/rte_vrb_pmd.c | 567
> ++++++++++++++++++++++++++++-
> >   1 file changed, 548 insertions(+), 19 deletions(-)
> >
> > diff --git a/drivers/baseband/acc/rte_vrb_pmd.c
> > b/drivers/baseband/acc/rte_vrb_pmd.c
> > index 48e779ce77..93add82947 100644
> > --- a/drivers/baseband/acc/rte_vrb_pmd.c
> > +++ b/drivers/baseband/acc/rte_vrb_pmd.c
> > @@ -1235,6 +1235,94 @@ vrb_dev_info_get(struct rte_bbdev *dev, struct
> rte_bbdev_driver_info *dev_info)
> >   	};
> >
> >   	static const struct rte_bbdev_op_cap vrb2_bbdev_capabilities[] = {
> > +		{
> > +			.type = RTE_BBDEV_OP_TURBO_DEC,
> > +			.cap.turbo_dec = {
> > +				.capability_flags =
> > +
> 	RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
> > +					RTE_BBDEV_TURBO_CRC_TYPE_24B |
> > +
> 	RTE_BBDEV_TURBO_DEC_CRC_24B_DROP |
> > +					RTE_BBDEV_TURBO_EQUALIZER |
> > +
> 	RTE_BBDEV_TURBO_SOFT_OUT_SATURATE |
> > +
> 	RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
> > +
> 	RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH |
> > +					RTE_BBDEV_TURBO_SOFT_OUTPUT |
> > +
> 	RTE_BBDEV_TURBO_EARLY_TERMINATION |
> > +
> 	RTE_BBDEV_TURBO_DEC_INTERRUPTS |
> > +
> 	RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
> > +
> 	RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT |
> > +					RTE_BBDEV_TURBO_MAP_DEC |
> > +
> 	RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
> > +
> 	RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
> > +				.max_llr_modulus = INT8_MAX,
> > +				.num_buffers_src =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +				.num_buffers_hard_out =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +				.num_buffers_soft_out =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +			}
> > +		},
> > +		{
> > +			.type = RTE_BBDEV_OP_TURBO_ENC,
> > +			.cap.turbo_enc = {
> > +				.capability_flags =
> > +
> 	RTE_BBDEV_TURBO_CRC_24B_ATTACH |
> > +
> 	RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
> > +					RTE_BBDEV_TURBO_RATE_MATCH |
> > +
> 	RTE_BBDEV_TURBO_ENC_INTERRUPTS |
> > +
> 	RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
> > +				.num_buffers_src =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +				.num_buffers_dst =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +			}
> > +		},
> > +		{
> > +			.type   = RTE_BBDEV_OP_LDPC_ENC,
> > +			.cap.ldpc_enc = {
> > +				.capability_flags =
> > +					RTE_BBDEV_LDPC_RATE_MATCH |
> > +					RTE_BBDEV_LDPC_CRC_24B_ATTACH
> |
> > +
> 	RTE_BBDEV_LDPC_INTERLEAVER_BYPASS |
> > +					RTE_BBDEV_LDPC_ENC_INTERRUPTS
> |
> > +
> 	RTE_BBDEV_LDPC_ENC_SCATTER_GATHER |
> > +
> 	RTE_BBDEV_LDPC_ENC_CONCATENATION,
> > +				.num_buffers_src =
> > +
> 	RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
> > +				.num_buffers_dst =
> > +
> 	RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
> > +			}
> > +		},
> > +		{
> > +			.type   = RTE_BBDEV_OP_LDPC_DEC,
> > +			.cap.ldpc_dec = {
> > +			.capability_flags =
> > +				RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK |
> > +				RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP |
> > +				RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK |
> > +				RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK |
> > +				RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE
> |
> > +
> 	RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
> > +				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE
> |
> > +				RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS |
> > +				RTE_BBDEV_LDPC_DEC_SCATTER_GATHER |
> > +
> 	RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION |
> > +
> 	RTE_BBDEV_LDPC_HARQ_4BIT_COMPRESSION |
> > +				RTE_BBDEV_LDPC_LLR_COMPRESSION |
> > +				RTE_BBDEV_LDPC_SOFT_OUT_ENABLE |
> > +				RTE_BBDEV_LDPC_SOFT_OUT_RM_BYPASS |
> > +
> 	RTE_BBDEV_LDPC_SOFT_OUT_DEINTERLEAVER_BYPASS |
> > +				RTE_BBDEV_LDPC_DEC_INTERRUPTS,
> > +			.llr_size = 8,
> > +			.llr_decimals = 2,
> > +			.num_buffers_src =
> > +
> 	RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
> > +			.num_buffers_hard_out =
> > +
> 	RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
> > +			.num_buffers_soft_out = 0,
> > +			}
> > +		},
> >   		RTE_BBDEV_END_OF_CAPABILITIES_LIST()
> >   	};
> >
> > @@ -1774,6 +1862,141 @@ vrb1_dma_desc_ld_fill(struct rte_bbdev_dec_op
> *op,
> >   	return 0;
> >   }
> >
> > +/* Fill in a frame control word for LDPC decoding. */ static inline
> > +void vrb2_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc_fcw_ld
> > +*fcw,
> > +		union acc_harq_layout_data *harq_layout) {
> > +	uint16_t harq_out_length, harq_in_length, ncb_p, k0_p, parity_offset;
> > +	uint32_t harq_index;
> > +	uint32_t l;
> 
> 
> This is so similar with vrb1_fcw_ld_fill() that it does not make sense
> to duplicate so much code.
> 
> Do you confirm there are no other difference than the SOFT_OUT stuff,
> and reusing vrb2_fcw_ld_fill on VRB1 would just work as the op_flags are
> checked (and they should not be set if capability is not advertized)?

There are quite of lot of difference to the fundamental underlying IP, the  IP decoder is different with different tuning point, the SO and HARQ support are different. 
Still I believe we can support both in the same function without being a too much a problem moving forward. Doing this in v4. 


> 
> > +	fcw->qm = op->ldpc_dec.q_m;
> > +	fcw->nfiller = op->ldpc_dec.n_filler;
> > +	fcw->BG = (op->ldpc_dec.basegraph - 1);
> > +	fcw->Zc = op->ldpc_dec.z_c;
> > +	fcw->ncb = op->ldpc_dec.n_cb;
> > +	fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_dec.basegraph,
> > +			op->ldpc_dec.rv_index);
> > +	if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK)
> > +		fcw->rm_e = op->ldpc_dec.cb_params.e;
> > +	else
> > +		fcw->rm_e = (op->ldpc_dec.tb_params.r <
> > +				op->ldpc_dec.tb_params.cab) ?
> > +						op->ldpc_dec.tb_params.ea :
> > +						op->ldpc_dec.tb_params.eb;
> > +
> > +	if (unlikely(check_bit(op->ldpc_dec.op_flags,
> > +			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) &&
> > +			(op->ldpc_dec.harq_combined_input.length == 0))) {
> > +		rte_bbdev_log(WARNING, "Null HARQ input size provided");
> > +		/* Disable HARQ input in that case to carry forward. */
> > +		op->ldpc_dec.op_flags ^=
> RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
> > +	}
> > +	if (unlikely(fcw->rm_e == 0)) {
> > +		rte_bbdev_log(WARNING, "Null E input provided");
> > +		fcw->rm_e = 2;
> > +	}
> > +
> > +	fcw->hcin_en = check_bit(op->ldpc_dec.op_flags,
> > +			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE);
> > +	fcw->hcout_en = check_bit(op->ldpc_dec.op_flags,
> > +			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
> > +	fcw->crc_select = check_bit(op->ldpc_dec.op_flags,
> > +			RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK);
> > +	fcw->so_en = check_bit(op->ldpc_dec.op_flags,
> RTE_BBDEV_LDPC_SOFT_OUT_ENABLE);
> > +	fcw->so_bypass_intlv = check_bit(op->ldpc_dec.op_flags,
> > +
> 	RTE_BBDEV_LDPC_SOFT_OUT_DEINTERLEAVER_BYPASS);
> > +	fcw->so_bypass_rm = check_bit(op->ldpc_dec.op_flags,
> RTE_BBDEV_LDPC_SOFT_OUT_RM_BYPASS);
> > +	fcw->bypass_dec = 0;
> > +	fcw->bypass_intlv = check_bit(op->ldpc_dec.op_flags,
> > +			RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS);
> > +	if (op->ldpc_dec.q_m == 1) {
> > +		fcw->bypass_intlv = 1;
> > +		fcw->qm = 2;
> > +	}
> > +	if (check_bit(op->ldpc_dec.op_flags,
> RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION)) {
> > +		fcw->hcin_decomp_mode = 1;
> > +		fcw->hcout_comp_mode = 1;
> > +	} else if (check_bit(op->ldpc_dec.op_flags,
> RTE_BBDEV_LDPC_HARQ_4BIT_COMPRESSION)) {
> > +		fcw->hcin_decomp_mode = 4;
> > +		fcw->hcout_comp_mode = 4;
> > +	} else {
> > +		fcw->hcin_decomp_mode = 0;
> > +		fcw->hcout_comp_mode = 0;
> > +	}
> > +
> > +	fcw->llr_pack_mode = check_bit(op->ldpc_dec.op_flags,
> > +			RTE_BBDEV_LDPC_LLR_COMPRESSION);
> > +	harq_index = hq_index(op->ldpc_dec.harq_combined_output.offset);
> > +	if (fcw->hcin_en > 0) {
> > +		harq_in_length = op->ldpc_dec.harq_combined_input.length;
> > +		if (fcw->hcin_decomp_mode == 1)
> > +			harq_in_length = harq_in_length * 8 / 6;
> > +		else if (fcw->hcin_decomp_mode == 4)
> > +			harq_in_length = harq_in_length * 2;
> > +		harq_in_length = RTE_MIN(harq_in_length, op->ldpc_dec.n_cb
> > +				- op->ldpc_dec.n_filler);
> > +		harq_in_length = RTE_ALIGN_CEIL(harq_in_length, 64);
> > +		fcw->hcin_size0 = harq_in_length;
> > +		fcw->hcin_offset = 0;
> > +		fcw->hcin_size1 = 0;
> > +	} else {
> > +		fcw->hcin_size0 = 0;
> > +		fcw->hcin_offset = 0;
> > +		fcw->hcin_size1 = 0;
> > +	}
> > +
> > +	fcw->itmax = op->ldpc_dec.iter_max;
> > +	fcw->so_it = op->ldpc_dec.iter_max;
> > +	fcw->itstop = check_bit(op->ldpc_dec.op_flags,
> RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE);
> > +	fcw->cnu_algo = ACC_ALGO_MSA;
> > +	fcw->synd_precoder = fcw->itstop;
> > +
> > +	fcw->minsum_offset = 1;
> > +	fcw->dec_llrclip   = 2;
> > +
> > +	/*
> > +	 * These are all implicitly set
> > +	 * fcw->synd_post = 0;
> > +	 * fcw->dec_convllr = 0;
> > +	 * fcw->hcout_convllr = 0;
> > +	 * fcw->hcout_size1 = 0;
> > +	 * fcw->hcout_offset = 0;
> > +	 * fcw->negstop_th = 0;
> > +	 * fcw->negstop_it = 0;
> > +	 * fcw->negstop_en = 0;
> > +	 * fcw->gain_i = 1;
> > +	 * fcw->gain_h = 1;
> > +	 */
> > +	if (fcw->hcout_en > 0) {
> > +		parity_offset = (op->ldpc_dec.basegraph == 1 ? 20 : 8)
> > +			* op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
> > +		k0_p = (fcw->k0 > parity_offset) ?
> > +				fcw->k0 - op->ldpc_dec.n_filler : fcw->k0;
> > +		ncb_p = fcw->ncb - op->ldpc_dec.n_filler;
> > +		l = k0_p + fcw->rm_e;
> > +		harq_out_length = (uint16_t) fcw->hcin_size0;
> > +		harq_out_length = RTE_MIN(RTE_MAX(harq_out_length, l),
> ncb_p);
> > +		harq_out_length = RTE_ALIGN_CEIL(harq_out_length, 64);
> > +		fcw->hcout_size0 = harq_out_length;
> > +		fcw->hcout_size1 = 0;
> > +		fcw->hcout_offset = 0;
> > +		harq_layout[harq_index].offset = fcw->hcout_offset;
> > +		harq_layout[harq_index].size0 = fcw->hcout_size0;
> > +	} else {
> > +		fcw->hcout_size0 = 0;
> > +		fcw->hcout_size1 = 0;
> > +		fcw->hcout_offset = 0;
> > +	}
> > +
> > +	fcw->tb_crc_select = 0;
> > +	if (check_bit(op->ldpc_dec.op_flags,
> RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK))
> > +		fcw->tb_crc_select = 2;
> > +	if (check_bit(op->ldpc_dec.op_flags,
> RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK))
> > +		fcw->tb_crc_select = 1;
> > +}
> > +
> >   static inline void
> >   vrb_dma_desc_ld_update(struct rte_bbdev_dec_op *op,
> >   		struct acc_dma_req_desc *desc,
> > @@ -1817,6 +2040,139 @@ vrb_dma_desc_ld_update(struct
> rte_bbdev_dec_op *op,
> >   	desc->op_addr = op;
> >   }
> >
> > +static inline int
> > +vrb2_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
> > +		struct acc_dma_req_desc *desc,
> > +		struct rte_mbuf **input, struct rte_mbuf *h_output,
> > +		uint32_t *in_offset, uint32_t *h_out_offset,
> > +		uint32_t *h_out_length, uint32_t *mbuf_total_left,
> > +		uint32_t *seg_total_left, struct acc_fcw_ld *fcw)
> > +{
> Same here.
> 
> I compared with vrb1_dma_desc_ld_fill(), and I don't see why we need two
> functions.
> 
> The only differences are either backed by capability checks, and vrb1
> already sets fcw->hcin_decomp_mode, so this code should work as-is on
> vrb1 if I'm not mistaken.

Yes fair enough, doing this in v3. 

> 
> > +	struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec;
> > +	int next_triplet = 1; /* FCW already done. */
> > +	uint32_t input_length;
> > +	uint16_t output_length, crc24_overlap = 0;
> > +	uint16_t sys_cols, K, h_p_size, h_np_size;
> > +
> > +	acc_header_init(desc);
> > +
> > +	if (check_bit(op->ldpc_dec.op_flags,
> RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP))
> > +		crc24_overlap = 24;
> > +
> > +	/* Compute some LDPC BG lengths. */
> > +	input_length = fcw->rm_e;
> > +	if (check_bit(op->ldpc_dec.op_flags,
> RTE_BBDEV_LDPC_LLR_COMPRESSION))
> > +		input_length = (input_length * 3 + 3) / 4;
> > +	sys_cols = (dec->basegraph == 1) ? 22 : 10;
> > +	K = sys_cols * dec->z_c;
> > +	output_length = K - dec->n_filler - crc24_overlap;
> > +
> > +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left <
> input_length))) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between mbuf length and included
> CB sizes: mbuf len %u, cb len %u",
> > +				*mbuf_total_left, input_length);
> > +		return -1;
> > +	}
> > +
> > +	next_triplet = acc_dma_fill_blk_type_in(desc, input,
> > +			in_offset, input_length,
> > +			seg_total_left, next_triplet,
> > +			check_bit(op->ldpc_dec.op_flags,
> > +			RTE_BBDEV_LDPC_DEC_SCATTER_GATHER));
> > +
> > +	if (unlikely(next_triplet < 0)) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between data to process and mbuf
> data length in bbdev_op: %p",
> > +				op);
> > +		return -1;
> > +	}
> > +
> > +	if (check_bit(op->ldpc_dec.op_flags,
> RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) {
> > +		if (op->ldpc_dec.harq_combined_input.data == 0) {
> > +			rte_bbdev_log(ERR, "HARQ input is not defined");
> > +			return -1;
> > +		}
> > +		h_p_size = fcw->hcin_size0 + fcw->hcin_size1;
> > +		if (fcw->hcin_decomp_mode == 1)
> > +			h_p_size = (h_p_size * 3 + 3) / 4;
> > +		else if (fcw->hcin_decomp_mode == 4)
> > +			h_p_size = h_p_size / 2;
> > +		if (op->ldpc_dec.harq_combined_input.data == 0) {
> > +			rte_bbdev_log(ERR, "HARQ input is not defined");
> > +			return -1;
> > +		}
> > +		acc_dma_fill_blk_type(
> > +				desc,
> > +				op->ldpc_dec.harq_combined_input.data,
> > +				op->ldpc_dec.harq_combined_input.offset,
> > +				h_p_size,
> > +				next_triplet,
> > +				ACC_DMA_BLKID_IN_HARQ);
> > +		next_triplet++;
> > +	}
> > +
> > +	desc->data_ptrs[next_triplet - 1].last = 1;
> > +	desc->m2dlen = next_triplet;
> > +	*mbuf_total_left -= input_length;
> > +
> > +	next_triplet = acc_dma_fill_blk_type(desc, h_output,
> > +			*h_out_offset, output_length >> 3, next_triplet,
> > +			ACC_DMA_BLKID_OUT_HARD);
> > +
> > +	if (check_bit(op->ldpc_dec.op_flags,
> RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)) {
> > +		if (op->ldpc_dec.soft_output.data == 0) {
> > +			rte_bbdev_log(ERR, "Soft output is not defined");
> > +			return -1;
> > +		}
> > +		dec->soft_output.length = fcw->rm_e;
> > +		acc_dma_fill_blk_type(desc, dec->soft_output.data, dec-
> >soft_output.offset,
> > +				fcw->rm_e, next_triplet,
> ACC_DMA_BLKID_OUT_SOFT);
> > +		next_triplet++;
> > +	}
> > +
> > +	if (check_bit(op->ldpc_dec.op_flags,
> > +
> 	RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) {
> > +		if (op->ldpc_dec.harq_combined_output.data == 0) {
> > +			rte_bbdev_log(ERR, "HARQ output is not defined");
> > +			return -1;
> > +		}
> > +
> > +		/* Pruned size of the HARQ */
> > +		h_p_size = fcw->hcout_size0 + fcw->hcout_size1;
> > +		/* Non-Pruned size of the HARQ */
> > +		h_np_size = fcw->hcout_offset > 0 ?
> > +				fcw->hcout_offset + fcw->hcout_size1 :
> > +				h_p_size;
> > +		if (fcw->hcin_decomp_mode == 1) {
> > +			h_np_size = (h_np_size * 3 + 3) / 4;
> > +			h_p_size = (h_p_size * 3 + 3) / 4;
> > +		} else if (fcw->hcin_decomp_mode == 4) {
> > +			h_np_size = h_np_size / 2;
> > +			h_p_size = h_p_size / 2;
> > +		}
> > +		dec->harq_combined_output.length = h_np_size;
> > +		acc_dma_fill_blk_type(
> > +				desc,
> > +				dec->harq_combined_output.data,
> > +				dec->harq_combined_output.offset,
> > +				h_p_size,
> > +				next_triplet,
> > +				ACC_DMA_BLKID_OUT_HARQ);
> > +
> > +		next_triplet++;
> > +	}
> > +
> > +	*h_out_length = output_length >> 3;
> > +	dec->hard_output.length += *h_out_length;
> > +	*h_out_offset += *h_out_length;
> > +	desc->data_ptrs[next_triplet - 1].last = 1;
> > +	desc->d2mlen = next_triplet - desc->m2dlen;
> > +
> > +	desc->op_addr = op;
> > +
> > +	return 0;
> > +}
> > +
> >   /* Enqueue one encode operations for device in CB mode. */
> >   static inline int
> >   enqueue_enc_one_op_cb(struct acc_queue *q, struct rte_bbdev_enc_op
> *op,
> > @@ -1877,6 +2233,7 @@ enqueue_ldpc_enc_n_op_cb(struct acc_queue *q,
> struct rte_bbdev_enc_op **ops,
> >   	/** This could be done at polling. */
> >   	acc_header_init(&desc->req);
> >   	desc->req.numCBs = num;
> > +	desc->req.dltb = 0;
> >
> >   	in_length_in_bytes = ops[0]->ldpc_enc.input.data->data_len;
> >   	out_length = (enc->cb_params.e + 7) >> 3;
> > @@ -2102,6 +2459,105 @@ vrb1_enqueue_ldpc_enc_one_op_tb(struct
> acc_queue *q, struct rte_bbdev_enc_op *op
> >   	return return_descs;
> >   }
> >
> > +/* Fill in a frame control word for LDPC encoding. */
> > +static inline void
> > +vrb2_fcw_letb_fill(const struct rte_bbdev_enc_op *op, struct acc_fcw_le
> *fcw)
> > +{
> > +	fcw->qm = op->ldpc_enc.q_m;
> > +	fcw->nfiller = op->ldpc_enc.n_filler;
> > +	fcw->BG = (op->ldpc_enc.basegraph - 1);
> > +	fcw->Zc = op->ldpc_enc.z_c;
> > +	fcw->ncb = op->ldpc_enc.n_cb;
> > +	fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_enc.basegraph,
> > +			op->ldpc_enc.rv_index);
> > +	fcw->rm_e = op->ldpc_enc.tb_params.ea;
> > +	fcw->rm_e_b = op->ldpc_enc.tb_params.eb;
> > +	fcw->crc_select = check_bit(op->ldpc_enc.op_flags,
> > +			RTE_BBDEV_LDPC_CRC_24B_ATTACH);
> > +	fcw->bypass_intlv = 0;
> > +	if (op->ldpc_enc.tb_params.c > 1) {
> > +		fcw->mcb_count = 0;
> > +		fcw->C = op->ldpc_enc.tb_params.c;
> > +		fcw->Cab = op->ldpc_enc.tb_params.cab;
> > +	} else {
> > +		fcw->mcb_count = 1;
> > +		fcw->C = 0;
> > +	}
> > +}
> > +
> > +/* Enqueue one encode operations for device in TB mode.
> > + * returns the number of descs used.
> > + */
> > +static inline int
> > +vrb2_enqueue_ldpc_enc_one_op_tb(struct acc_queue *q, struct
> rte_bbdev_enc_op *op,
> > +		uint16_t enq_descs)
> > +{
> > +	union acc_dma_desc *desc = NULL;
> > +	uint32_t in_offset, out_offset, out_length, seg_total_left;
> > +	struct rte_mbuf *input, *output_head, *output;
> > +
> > +	uint16_t desc_idx = ((q->sw_ring_head + enq_descs) & q-
> >sw_ring_wrap_mask);
> > +	desc = q->ring_addr + desc_idx;
> 
> Use acc_desc()?

thanks

> 
> > +	vrb2_fcw_letb_fill(op, &desc->req.fcw_le);
> > +	struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc;
> > +	int next_triplet = 1; /* FCW already done */
> > +	uint32_t in_length_in_bytes;
> > +	uint16_t K, in_length_in_bits;
> > +
> > +	input = enc->input.data;
> > +	output_head = output = enc->output.data;
> > +	in_offset = enc->input.offset;
> > +	out_offset = enc->output.offset;
> > +	seg_total_left = rte_pktmbuf_data_len(enc->input.data) - in_offset;
> > +
> > +	acc_header_init(&desc->req);
> > +	K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c;
> > +	in_length_in_bits = K - enc->n_filler;
> > +	if ((enc->op_flags & RTE_BBDEV_LDPC_CRC_24A_ATTACH) ||
> > +			(enc->op_flags &
> RTE_BBDEV_LDPC_CRC_24B_ATTACH))
> > +		in_length_in_bits -= 24;
> > +	in_length_in_bytes = (in_length_in_bits >> 3) * enc->tb_params.c;
> > +
> > +	next_triplet = acc_dma_fill_blk_type_in(&desc->req, &input,
> &in_offset,
> > +			in_length_in_bytes, &seg_total_left, next_triplet,
> > +			check_bit(enc->op_flags,
> RTE_BBDEV_LDPC_ENC_SCATTER_GATHER));
> > +	if (unlikely(next_triplet < 0)) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between data to process and mbuf
> data length in bbdev_op: %p",
> > +				op);
> > +		return -1;
> > +	}
> > +	desc->req.data_ptrs[next_triplet - 1].last = 1;
> > +	desc->req.m2dlen = next_triplet;
> > +
> > +	/* Set output length */
> > +	/* Integer round up division by 8 */
> > +	out_length = (enc->tb_params.ea * enc->tb_params.cab +
> > +			enc->tb_params.eb * (enc->tb_params.c - enc-
> >tb_params.cab)  + 7) >> 3;
> > +
> > +	next_triplet = acc_dma_fill_blk_type(&desc->req, output, out_offset,
> > +			out_length, next_triplet, ACC_DMA_BLKID_OUT_ENC);
> > +	enc->output.length = out_length;
> > +	out_offset += out_length;
> > +	desc->req.data_ptrs[next_triplet - 1].last = 1;
> > +	desc->req.data_ptrs[next_triplet - 1].dma_ext = 0;
> > +	desc->req.d2mlen = next_triplet - desc->req.m2dlen;
> > +	desc->req.numCBs = enc->tb_params.c;
> > +	if (desc->req.numCBs > 1)
> > +		desc->req.dltb = 1;
> > +	desc->req.op_addr = op;
> > +
> > +	if (out_length < ACC_MAX_E_MBUF)
> > +		mbuf_append(output_head, output, out_length);
> > +
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	rte_memdump(stderr, "FCW", &desc->req.fcw_le, sizeof(desc-
> >req.fcw_le));
> > +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> > +#endif
> > +	/* One CB (one op) was successfully prepared to enqueue */
> > +	return 1;
> 
> This function is quite different from the VRB1 variant.
> Is the underlying hardware completely different, or just a different
> implementation?

The underlying HW is different in this mode of operation, notably as it
supports RTE_BBDEV_LDPC_ENC_CONCATENATION hence more of true TB
implementation. 
Kept separate on purpose. 

> 
> > +}
> > +
> >   /** Enqueue one decode operations for device in CB mode. */
> >   static inline int
> >   enqueue_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op
> *op,
> > @@ -2215,10 +2671,16 @@ vrb_enqueue_ldpc_dec_one_op_cb(struct
> acc_queue *q, struct rte_bbdev_dec_op *op,
> >   		else
> >   			seg_total_left = fcw->rm_e;
> >
> > -		ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input, h_output,
> > -				&in_offset, &h_out_offset,
> > -				&h_out_length, &mbuf_total_left,
> > -				&seg_total_left, fcw);
> > +		if (q->d->device_variant == VRB1_VARIANT)
> > +			ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input,
> h_output,
> > +					&in_offset, &h_out_offset,
> > +					&h_out_length, &mbuf_total_left,
> > +					&seg_total_left, fcw);
> > +		else
> > +			ret = vrb2_dma_desc_ld_fill(op, &desc->req, &input,
> h_output,
> > +					&in_offset, &h_out_offset,
> > +					&h_out_length, &mbuf_total_left,
> > +					&seg_total_left, fcw);
> >   		if (unlikely(ret < 0))
> >   			return ret;
> >   	}
> > @@ -2308,11 +2770,18 @@ vrb_enqueue_ldpc_dec_one_op_tb(struct
> acc_queue *q, struct rte_bbdev_dec_op *op,
> >   		rte_memcpy(&desc->req.fcw_ld, &desc_first->req.fcw_ld,
> ACC_FCW_LD_BLEN);
> >   		desc->req.fcw_ld.tb_trailer_size = (c - r - 1) * trail_len;
> >
> > -		ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input,
> > -				h_output, &in_offset, &h_out_offset,
> > -				&h_out_length,
> > -				&mbuf_total_left, &seg_total_left,
> > -				&desc->req.fcw_ld);
> > +		if (q->d->device_variant == VRB1_VARIANT)
> > +			ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input,
> > +					h_output, &in_offset, &h_out_offset,
> > +					&h_out_length,
> > +					&mbuf_total_left, &seg_total_left,
> > +					&desc->req.fcw_ld);
> > +		else
> > +			ret = vrb2_dma_desc_ld_fill(op, &desc->req, &input,
> > +					h_output, &in_offset, &h_out_offset,
> > +					&h_out_length,
> > +					&mbuf_total_left, &seg_total_left,
> > +					&desc->req.fcw_ld);
> >
> >   		if (unlikely(ret < 0))
> >   			return ret;
> > @@ -2576,14 +3045,22 @@ vrb_enqueue_ldpc_enc_tb(struct
> rte_bbdev_queue_data *q_data,
> >   	int descs_used;
> >
> >   	for (i = 0; i < num; ++i) {
> > -		cbs_in_tb = get_num_cbs_in_tb_ldpc_enc(&ops[i]->ldpc_enc);
> > -		/* Check if there are available space for further processing. */
> > -		if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) {
> > -			acc_enqueue_ring_full(q_data);
> > -			break;
> > +		if (q->d->device_variant == VRB1_VARIANT) {
> > +			cbs_in_tb = get_num_cbs_in_tb_ldpc_enc(&ops[i]-
> >ldpc_enc);
> > +			/* Check if there are available space for further
> processing. */
> > +			if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) {
> > +				acc_enqueue_ring_full(q_data);
> > +				break;
> > +			}
> > +			descs_used = vrb1_enqueue_ldpc_enc_one_op_tb(q,
> ops[i],
> > +					enqueued_descs, cbs_in_tb);
> > +		} else {
> > +			if (unlikely(avail < 1)) {
> > +				acc_enqueue_ring_full(q_data);
> > +				break;
> > +			}
> > +			descs_used = vrb2_enqueue_ldpc_enc_one_op_tb(q,
> ops[i], enqueued_descs);
> >   		}
> > -
> > -		descs_used = vrb1_enqueue_ldpc_enc_one_op_tb(q, ops[i],
> enqueued_descs, cbs_in_tb);
> >   		if (descs_used < 0) {
> >   			acc_enqueue_invalid(q_data);
> >   			break;
> > @@ -2865,6 +3342,52 @@ vrb_dequeue_enc_one_op_cb(struct acc_queue
> *q, struct rte_bbdev_enc_op **ref_op,
> >   	return desc->req.numCBs;
> >   }
> >
> > +/* Dequeue one LDPC encode operations from VRB2 device in TB mode. */
> > +static inline int
> > +vrb2_dequeue_ldpc_enc_one_op_tb(struct acc_queue *q, struct
> rte_bbdev_enc_op **ref_op,
> > +		uint16_t *dequeued_ops, uint32_t *aq_dequeued,
> > +		uint16_t *dequeued_descs)
> > +{
> > +	union acc_dma_desc *desc, atom_desc;
> > +	union acc_dma_rsp_desc rsp;
> > +	struct rte_bbdev_enc_op *op;
> > +	int desc_idx = ((q->sw_ring_tail + *dequeued_descs) & q-
> >sw_ring_wrap_mask);
> > +
> > +	desc = q->ring_addr + desc_idx;
> > +	atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
> __ATOMIC_RELAXED);
> > +
> > +	/* Check fdone bit. */
> > +	if (!(atom_desc.rsp.val & ACC_FDONE))
> > +		return -1;
> > +
> > +	rsp.val = atom_desc.rsp.val;
> > +	rte_bbdev_log_debug("Resp. desc %p: %x", desc, rsp.val);
> > +
> > +	/* Dequeue. */
> > +	op = desc->req.op_addr;
> > +
> > +	/* Clearing status, it will be set based on response. */
> > +	op->status = 0;
> > +	op->status |= rsp.input_err << RTE_BBDEV_DATA_ERROR;
> > +	op->status |= rsp.dma_err << RTE_BBDEV_DRV_ERROR;
> > +	op->status |= rsp.fcw_err << RTE_BBDEV_DRV_ERROR;
> > +	op->status |= rsp.engine_hung << RTE_BBDEV_ENGINE_ERROR;
> > +
> > +	if (desc->req.last_desc_in_batch) {
> > +		(*aq_dequeued)++;
> > +		desc->req.last_desc_in_batch = 0;
> > +	}
> > +	desc->rsp.val = ACC_DMA_DESC_TYPE;
> > +	desc->rsp.add_info_0 = 0; /* Reserved bits. */
> > +	desc->rsp.add_info_1 = 0; /* Reserved bits. */
> > +
> > +	/* One op was successfully dequeued */
> > +	ref_op[0] = op;
> > +	(*dequeued_descs)++;
> > +	(*dequeued_ops)++;
> > +	return 1;
> > +}
> > +
> >   /* Dequeue one LDPC encode operations from device in TB mode.
> >    * That operation may cover multiple descriptors.
> >    */
> > @@ -3189,9 +3712,14 @@ vrb_dequeue_ldpc_enc(struct
> rte_bbdev_queue_data *q_data,
> >
> >   	for (i = 0; i < avail; i++) {
> >   		if (cbm == RTE_BBDEV_TRANSPORT_BLOCK)
> > -			ret = vrb_dequeue_enc_one_op_tb(q,
> &ops[dequeued_ops],
> > -					&dequeued_ops, &aq_dequeued,
> > -					&dequeued_descs, num);
> > +			if (q->d->device_variant == VRB1_VARIANT)
> > +				ret = vrb_dequeue_enc_one_op_tb(q,
> &ops[dequeued_ops],
> > +						&dequeued_ops,
> &aq_dequeued,
> > +						&dequeued_descs, num);
> > +			else
> > +				ret = vrb2_dequeue_ldpc_enc_one_op_tb(q,
> &ops[dequeued_ops],
> > +						&dequeued_ops,
> &aq_dequeued,
> > +						&dequeued_descs);
> >   		else
> >   			ret = vrb_dequeue_enc_one_op_cb(q,
> &ops[dequeued_ops],
> >   					&dequeued_ops, &aq_dequeued,
> > @@ -3536,6 +4064,7 @@ vrb_bbdev_init(struct rte_bbdev *dev, struct
> rte_pci_driver *drv)
> >   	} else {
> >   		d->device_variant = VRB2_VARIANT;
> >   		d->queue_offset = vrb2_queue_offset;
> > +		d->fcw_ld_fill = vrb2_fcw_ld_fill;
> >   		d->num_qgroups = VRB2_NUM_QGRPS;
> >   		d->num_aqs = VRB2_NUM_AQS;
> >   		if (d->pf_device)
> 
> 
> It looks like most (60%+) of the code in this patch could be removed if
> duplication was avoided.
> 
> Thanks,
> Maxime
  
Maxime Coquelin Oct. 5, 2023, 2:36 p.m. UTC | #3
On 10/4/23 23:11, Chautru, Nicolas wrote:
> Hi Maxime,
> 
>> -----Original Message-----
>> From: Maxime Coquelin <maxime.coquelin@redhat.com>
>> Sent: Tuesday, October 3, 2023 7:28 AM
>> To: Chautru, Nicolas <nicolas.chautru@intel.com>; dev@dpdk.org
>> Cc: hemant.agrawal@nxp.com; david.marchand@redhat.com; Vargas, Hernan
>> <hernan.vargas@intel.com>
>> Subject: Re: [PATCH v3 08/12] baseband/acc: add FEC capabilities for the VRB2
>> variant
>>
>>
>>
>> On 9/29/23 18:35, Nicolas Chautru wrote:
>>> New implementation for some of the FEC features specific to the VRB2
>>> variant.
>>>
>>> Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
>>> ---
>>>    drivers/baseband/acc/rte_vrb_pmd.c | 567
>> ++++++++++++++++++++++++++++-
>>>    1 file changed, 548 insertions(+), 19 deletions(-)
>>>
>>> diff --git a/drivers/baseband/acc/rte_vrb_pmd.c
>>> b/drivers/baseband/acc/rte_vrb_pmd.c
>>> index 48e779ce77..93add82947 100644
>>> --- a/drivers/baseband/acc/rte_vrb_pmd.c
>>> +++ b/drivers/baseband/acc/rte_vrb_pmd.c
>>> @@ -1235,6 +1235,94 @@ vrb_dev_info_get(struct rte_bbdev *dev, struct
>> rte_bbdev_driver_info *dev_info)
>>>    	};
>>>
>>>    	static const struct rte_bbdev_op_cap vrb2_bbdev_capabilities[] = {
>>> +		{
>>> +			.type = RTE_BBDEV_OP_TURBO_DEC,
>>> +			.cap.turbo_dec = {
>>> +				.capability_flags =
>>> +
>> 	RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
>>> +					RTE_BBDEV_TURBO_CRC_TYPE_24B |
>>> +
>> 	RTE_BBDEV_TURBO_DEC_CRC_24B_DROP |
>>> +					RTE_BBDEV_TURBO_EQUALIZER |
>>> +
>> 	RTE_BBDEV_TURBO_SOFT_OUT_SATURATE |
>>> +
>> 	RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
>>> +
>> 	RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH |
>>> +					RTE_BBDEV_TURBO_SOFT_OUTPUT |
>>> +
>> 	RTE_BBDEV_TURBO_EARLY_TERMINATION |
>>> +
>> 	RTE_BBDEV_TURBO_DEC_INTERRUPTS |
>>> +
>> 	RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
>>> +
>> 	RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT |
>>> +					RTE_BBDEV_TURBO_MAP_DEC |
>>> +
>> 	RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
>>> +
>> 	RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
>>> +				.max_llr_modulus = INT8_MAX,
>>> +				.num_buffers_src =
>>> +
>> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
>>> +				.num_buffers_hard_out =
>>> +
>> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
>>> +				.num_buffers_soft_out =
>>> +
>> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
>>> +			}
>>> +		},
>>> +		{
>>> +			.type = RTE_BBDEV_OP_TURBO_ENC,
>>> +			.cap.turbo_enc = {
>>> +				.capability_flags =
>>> +
>> 	RTE_BBDEV_TURBO_CRC_24B_ATTACH |
>>> +
>> 	RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
>>> +					RTE_BBDEV_TURBO_RATE_MATCH |
>>> +
>> 	RTE_BBDEV_TURBO_ENC_INTERRUPTS |
>>> +
>> 	RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
>>> +				.num_buffers_src =
>>> +
>> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
>>> +				.num_buffers_dst =
>>> +
>> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
>>> +			}
>>> +		},
>>> +		{
>>> +			.type   = RTE_BBDEV_OP_LDPC_ENC,
>>> +			.cap.ldpc_enc = {
>>> +				.capability_flags =
>>> +					RTE_BBDEV_LDPC_RATE_MATCH |
>>> +					RTE_BBDEV_LDPC_CRC_24B_ATTACH
>> |
>>> +
>> 	RTE_BBDEV_LDPC_INTERLEAVER_BYPASS |
>>> +					RTE_BBDEV_LDPC_ENC_INTERRUPTS
>> |
>>> +
>> 	RTE_BBDEV_LDPC_ENC_SCATTER_GATHER |
>>> +
>> 	RTE_BBDEV_LDPC_ENC_CONCATENATION,
>>> +				.num_buffers_src =
>>> +
>> 	RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
>>> +				.num_buffers_dst =
>>> +
>> 	RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
>>> +			}
>>> +		},
>>> +		{
>>> +			.type   = RTE_BBDEV_OP_LDPC_DEC,
>>> +			.cap.ldpc_dec = {
>>> +			.capability_flags =
>>> +				RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK |
>>> +				RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP |
>>> +				RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK |
>>> +				RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK |
>>> +				RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE
>> |
>>> +
>> 	RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
>>> +				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE
>> |
>>> +				RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS |
>>> +				RTE_BBDEV_LDPC_DEC_SCATTER_GATHER |
>>> +
>> 	RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION |
>>> +
>> 	RTE_BBDEV_LDPC_HARQ_4BIT_COMPRESSION |
>>> +				RTE_BBDEV_LDPC_LLR_COMPRESSION |
>>> +				RTE_BBDEV_LDPC_SOFT_OUT_ENABLE |
>>> +				RTE_BBDEV_LDPC_SOFT_OUT_RM_BYPASS |
>>> +
>> 	RTE_BBDEV_LDPC_SOFT_OUT_DEINTERLEAVER_BYPASS |
>>> +				RTE_BBDEV_LDPC_DEC_INTERRUPTS,
>>> +			.llr_size = 8,
>>> +			.llr_decimals = 2,
>>> +			.num_buffers_src =
>>> +
>> 	RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
>>> +			.num_buffers_hard_out =
>>> +
>> 	RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
>>> +			.num_buffers_soft_out = 0,
>>> +			}
>>> +		},
>>>    		RTE_BBDEV_END_OF_CAPABILITIES_LIST()
>>>    	};
>>>
>>> @@ -1774,6 +1862,141 @@ vrb1_dma_desc_ld_fill(struct rte_bbdev_dec_op
>> *op,
>>>    	return 0;
>>>    }
>>>
>>> +/* Fill in a frame control word for LDPC decoding. */ static inline
>>> +void vrb2_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc_fcw_ld
>>> +*fcw,
>>> +		union acc_harq_layout_data *harq_layout) {
>>> +	uint16_t harq_out_length, harq_in_length, ncb_p, k0_p, parity_offset;
>>> +	uint32_t harq_index;
>>> +	uint32_t l;
>>
>>
>> This is so similar with vrb1_fcw_ld_fill() that it does not make sense
>> to duplicate so much code.
>>
>> Do you confirm there are no other difference than the SOFT_OUT stuff,
>> and reusing vrb2_fcw_ld_fill on VRB1 would just work as the op_flags are
>> checked (and they should not be set if capability is not advertized)?
> 
> There are quite of lot of difference to the fundamental underlying IP, the  IP decoder is different with different tuning point, the SO and HARQ support are different.
> Still I believe we can support both in the same function without being a too much a problem moving forward. Doing this in v4.

Thanks,


> 
> 
>>
>>> +	fcw->qm = op->ldpc_dec.q_m;
>>> +	fcw->nfiller = op->ldpc_dec.n_filler;
>>> +	fcw->BG = (op->ldpc_dec.basegraph - 1);
>>> +	fcw->Zc = op->ldpc_dec.z_c;
>>> +	fcw->ncb = op->ldpc_dec.n_cb;
>>> +	fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_dec.basegraph,
>>> +			op->ldpc_dec.rv_index);
>>> +	if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK)
>>> +		fcw->rm_e = op->ldpc_dec.cb_params.e;
>>> +	else
>>> +		fcw->rm_e = (op->ldpc_dec.tb_params.r <
>>> +				op->ldpc_dec.tb_params.cab) ?
>>> +						op->ldpc_dec.tb_params.ea :
>>> +						op->ldpc_dec.tb_params.eb;
>>> +
>>> +	if (unlikely(check_bit(op->ldpc_dec.op_flags,
>>> +			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) &&
>>> +			(op->ldpc_dec.harq_combined_input.length == 0))) {
>>> +		rte_bbdev_log(WARNING, "Null HARQ input size provided");
>>> +		/* Disable HARQ input in that case to carry forward. */
>>> +		op->ldpc_dec.op_flags ^=
>> RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
>>> +	}
>>> +	if (unlikely(fcw->rm_e == 0)) {
>>> +		rte_bbdev_log(WARNING, "Null E input provided");
>>> +		fcw->rm_e = 2;
>>> +	}
>>> +
>>> +	fcw->hcin_en = check_bit(op->ldpc_dec.op_flags,
>>> +			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE);
>>> +	fcw->hcout_en = check_bit(op->ldpc_dec.op_flags,
>>> +			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
>>> +	fcw->crc_select = check_bit(op->ldpc_dec.op_flags,
>>> +			RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK);
>>> +	fcw->so_en = check_bit(op->ldpc_dec.op_flags,
>> RTE_BBDEV_LDPC_SOFT_OUT_ENABLE);
>>> +	fcw->so_bypass_intlv = check_bit(op->ldpc_dec.op_flags,
>>> +
>> 	RTE_BBDEV_LDPC_SOFT_OUT_DEINTERLEAVER_BYPASS);
>>> +	fcw->so_bypass_rm = check_bit(op->ldpc_dec.op_flags,
>> RTE_BBDEV_LDPC_SOFT_OUT_RM_BYPASS);
>>> +	fcw->bypass_dec = 0;
>>> +	fcw->bypass_intlv = check_bit(op->ldpc_dec.op_flags,
>>> +			RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS);
>>> +	if (op->ldpc_dec.q_m == 1) {
>>> +		fcw->bypass_intlv = 1;
>>> +		fcw->qm = 2;
>>> +	}
>>> +	if (check_bit(op->ldpc_dec.op_flags,
>> RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION)) {
>>> +		fcw->hcin_decomp_mode = 1;
>>> +		fcw->hcout_comp_mode = 1;
>>> +	} else if (check_bit(op->ldpc_dec.op_flags,
>> RTE_BBDEV_LDPC_HARQ_4BIT_COMPRESSION)) {
>>> +		fcw->hcin_decomp_mode = 4;
>>> +		fcw->hcout_comp_mode = 4;
>>> +	} else {
>>> +		fcw->hcin_decomp_mode = 0;
>>> +		fcw->hcout_comp_mode = 0;
>>> +	}
>>> +
>>> +	fcw->llr_pack_mode = check_bit(op->ldpc_dec.op_flags,
>>> +			RTE_BBDEV_LDPC_LLR_COMPRESSION);
>>> +	harq_index = hq_index(op->ldpc_dec.harq_combined_output.offset);
>>> +	if (fcw->hcin_en > 0) {
>>> +		harq_in_length = op->ldpc_dec.harq_combined_input.length;
>>> +		if (fcw->hcin_decomp_mode == 1)
>>> +			harq_in_length = harq_in_length * 8 / 6;
>>> +		else if (fcw->hcin_decomp_mode == 4)
>>> +			harq_in_length = harq_in_length * 2;
>>> +		harq_in_length = RTE_MIN(harq_in_length, op->ldpc_dec.n_cb
>>> +				- op->ldpc_dec.n_filler);
>>> +		harq_in_length = RTE_ALIGN_CEIL(harq_in_length, 64);
>>> +		fcw->hcin_size0 = harq_in_length;
>>> +		fcw->hcin_offset = 0;
>>> +		fcw->hcin_size1 = 0;
>>> +	} else {
>>> +		fcw->hcin_size0 = 0;
>>> +		fcw->hcin_offset = 0;
>>> +		fcw->hcin_size1 = 0;
>>> +	}
>>> +
>>> +	fcw->itmax = op->ldpc_dec.iter_max;
>>> +	fcw->so_it = op->ldpc_dec.iter_max;
>>> +	fcw->itstop = check_bit(op->ldpc_dec.op_flags,
>> RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE);
>>> +	fcw->cnu_algo = ACC_ALGO_MSA;
>>> +	fcw->synd_precoder = fcw->itstop;
>>> +
>>> +	fcw->minsum_offset = 1;
>>> +	fcw->dec_llrclip   = 2;
>>> +
>>> +	/*
>>> +	 * These are all implicitly set
>>> +	 * fcw->synd_post = 0;
>>> +	 * fcw->dec_convllr = 0;
>>> +	 * fcw->hcout_convllr = 0;
>>> +	 * fcw->hcout_size1 = 0;
>>> +	 * fcw->hcout_offset = 0;
>>> +	 * fcw->negstop_th = 0;
>>> +	 * fcw->negstop_it = 0;
>>> +	 * fcw->negstop_en = 0;
>>> +	 * fcw->gain_i = 1;
>>> +	 * fcw->gain_h = 1;
>>> +	 */
>>> +	if (fcw->hcout_en > 0) {
>>> +		parity_offset = (op->ldpc_dec.basegraph == 1 ? 20 : 8)
>>> +			* op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
>>> +		k0_p = (fcw->k0 > parity_offset) ?
>>> +				fcw->k0 - op->ldpc_dec.n_filler : fcw->k0;
>>> +		ncb_p = fcw->ncb - op->ldpc_dec.n_filler;
>>> +		l = k0_p + fcw->rm_e;
>>> +		harq_out_length = (uint16_t) fcw->hcin_size0;
>>> +		harq_out_length = RTE_MIN(RTE_MAX(harq_out_length, l),
>> ncb_p);
>>> +		harq_out_length = RTE_ALIGN_CEIL(harq_out_length, 64);
>>> +		fcw->hcout_size0 = harq_out_length;
>>> +		fcw->hcout_size1 = 0;
>>> +		fcw->hcout_offset = 0;
>>> +		harq_layout[harq_index].offset = fcw->hcout_offset;
>>> +		harq_layout[harq_index].size0 = fcw->hcout_size0;
>>> +	} else {
>>> +		fcw->hcout_size0 = 0;
>>> +		fcw->hcout_size1 = 0;
>>> +		fcw->hcout_offset = 0;
>>> +	}
>>> +
>>> +	fcw->tb_crc_select = 0;
>>> +	if (check_bit(op->ldpc_dec.op_flags,
>> RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK))
>>> +		fcw->tb_crc_select = 2;
>>> +	if (check_bit(op->ldpc_dec.op_flags,
>> RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK))
>>> +		fcw->tb_crc_select = 1;
>>> +}
>>> +
>>>    static inline void
>>>    vrb_dma_desc_ld_update(struct rte_bbdev_dec_op *op,
>>>    		struct acc_dma_req_desc *desc,
>>> @@ -1817,6 +2040,139 @@ vrb_dma_desc_ld_update(struct
>> rte_bbdev_dec_op *op,
>>>    	desc->op_addr = op;
>>>    }
>>>
>>> +static inline int
>>> +vrb2_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
>>> +		struct acc_dma_req_desc *desc,
>>> +		struct rte_mbuf **input, struct rte_mbuf *h_output,
>>> +		uint32_t *in_offset, uint32_t *h_out_offset,
>>> +		uint32_t *h_out_length, uint32_t *mbuf_total_left,
>>> +		uint32_t *seg_total_left, struct acc_fcw_ld *fcw)
>>> +{
>> Same here.
>>
>> I compared with vrb1_dma_desc_ld_fill(), and I don't see why we need two
>> functions.
>>
>> The only differences are either backed by capability checks, and vrb1
>> already sets fcw->hcin_decomp_mode, so this code should work as-is on
>> vrb1 if I'm not mistaken.
> 
> Yes fair enough, doing this in v3.

Thanks.

> 
>>
>>> +	struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec;
>>> +	int next_triplet = 1; /* FCW already done. */
>>> +	uint32_t input_length;
>>> +	uint16_t output_length, crc24_overlap = 0;
>>> +	uint16_t sys_cols, K, h_p_size, h_np_size;
>>> +
>>> +	acc_header_init(desc);
>>> +
>>> +	if (check_bit(op->ldpc_dec.op_flags,
>> RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP))
>>> +		crc24_overlap = 24;
>>> +
>>> +	/* Compute some LDPC BG lengths. */
>>> +	input_length = fcw->rm_e;
>>> +	if (check_bit(op->ldpc_dec.op_flags,
>> RTE_BBDEV_LDPC_LLR_COMPRESSION))
>>> +		input_length = (input_length * 3 + 3) / 4;
>>> +	sys_cols = (dec->basegraph == 1) ? 22 : 10;
>>> +	K = sys_cols * dec->z_c;
>>> +	output_length = K - dec->n_filler - crc24_overlap;
>>> +
>>> +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left <
>> input_length))) {
>>> +		rte_bbdev_log(ERR,
>>> +				"Mismatch between mbuf length and included
>> CB sizes: mbuf len %u, cb len %u",
>>> +				*mbuf_total_left, input_length);
>>> +		return -1;
>>> +	}
>>> +
>>> +	next_triplet = acc_dma_fill_blk_type_in(desc, input,
>>> +			in_offset, input_length,
>>> +			seg_total_left, next_triplet,
>>> +			check_bit(op->ldpc_dec.op_flags,
>>> +			RTE_BBDEV_LDPC_DEC_SCATTER_GATHER));
>>> +
>>> +	if (unlikely(next_triplet < 0)) {
>>> +		rte_bbdev_log(ERR,
>>> +				"Mismatch between data to process and mbuf
>> data length in bbdev_op: %p",
>>> +				op);
>>> +		return -1;
>>> +	}
>>> +
>>> +	if (check_bit(op->ldpc_dec.op_flags,
>> RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) {
>>> +		if (op->ldpc_dec.harq_combined_input.data == 0) {
>>> +			rte_bbdev_log(ERR, "HARQ input is not defined");
>>> +			return -1;
>>> +		}
>>> +		h_p_size = fcw->hcin_size0 + fcw->hcin_size1;
>>> +		if (fcw->hcin_decomp_mode == 1)
>>> +			h_p_size = (h_p_size * 3 + 3) / 4;
>>> +		else if (fcw->hcin_decomp_mode == 4)
>>> +			h_p_size = h_p_size / 2;
>>> +		if (op->ldpc_dec.harq_combined_input.data == 0) {
>>> +			rte_bbdev_log(ERR, "HARQ input is not defined");
>>> +			return -1;
>>> +		}
>>> +		acc_dma_fill_blk_type(
>>> +				desc,
>>> +				op->ldpc_dec.harq_combined_input.data,
>>> +				op->ldpc_dec.harq_combined_input.offset,
>>> +				h_p_size,
>>> +				next_triplet,
>>> +				ACC_DMA_BLKID_IN_HARQ);
>>> +		next_triplet++;
>>> +	}
>>> +
>>> +	desc->data_ptrs[next_triplet - 1].last = 1;
>>> +	desc->m2dlen = next_triplet;
>>> +	*mbuf_total_left -= input_length;
>>> +
>>> +	next_triplet = acc_dma_fill_blk_type(desc, h_output,
>>> +			*h_out_offset, output_length >> 3, next_triplet,
>>> +			ACC_DMA_BLKID_OUT_HARD);
>>> +
>>> +	if (check_bit(op->ldpc_dec.op_flags,
>> RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)) {
>>> +		if (op->ldpc_dec.soft_output.data == 0) {
>>> +			rte_bbdev_log(ERR, "Soft output is not defined");
>>> +			return -1;
>>> +		}
>>> +		dec->soft_output.length = fcw->rm_e;
>>> +		acc_dma_fill_blk_type(desc, dec->soft_output.data, dec-
>>> soft_output.offset,
>>> +				fcw->rm_e, next_triplet,
>> ACC_DMA_BLKID_OUT_SOFT);
>>> +		next_triplet++;
>>> +	}
>>> +
>>> +	if (check_bit(op->ldpc_dec.op_flags,
>>> +
>> 	RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) {
>>> +		if (op->ldpc_dec.harq_combined_output.data == 0) {
>>> +			rte_bbdev_log(ERR, "HARQ output is not defined");
>>> +			return -1;
>>> +		}
>>> +
>>> +		/* Pruned size of the HARQ */
>>> +		h_p_size = fcw->hcout_size0 + fcw->hcout_size1;
>>> +		/* Non-Pruned size of the HARQ */
>>> +		h_np_size = fcw->hcout_offset > 0 ?
>>> +				fcw->hcout_offset + fcw->hcout_size1 :
>>> +				h_p_size;
>>> +		if (fcw->hcin_decomp_mode == 1) {
>>> +			h_np_size = (h_np_size * 3 + 3) / 4;
>>> +			h_p_size = (h_p_size * 3 + 3) / 4;
>>> +		} else if (fcw->hcin_decomp_mode == 4) {
>>> +			h_np_size = h_np_size / 2;
>>> +			h_p_size = h_p_size / 2;
>>> +		}
>>> +		dec->harq_combined_output.length = h_np_size;
>>> +		acc_dma_fill_blk_type(
>>> +				desc,
>>> +				dec->harq_combined_output.data,
>>> +				dec->harq_combined_output.offset,
>>> +				h_p_size,
>>> +				next_triplet,
>>> +				ACC_DMA_BLKID_OUT_HARQ);
>>> +
>>> +		next_triplet++;
>>> +	}
>>> +
>>> +	*h_out_length = output_length >> 3;
>>> +	dec->hard_output.length += *h_out_length;
>>> +	*h_out_offset += *h_out_length;
>>> +	desc->data_ptrs[next_triplet - 1].last = 1;
>>> +	desc->d2mlen = next_triplet - desc->m2dlen;
>>> +
>>> +	desc->op_addr = op;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>>    /* Enqueue one encode operations for device in CB mode. */
>>>    static inline int
>>>    enqueue_enc_one_op_cb(struct acc_queue *q, struct rte_bbdev_enc_op
>> *op,
>>> @@ -1877,6 +2233,7 @@ enqueue_ldpc_enc_n_op_cb(struct acc_queue *q,
>> struct rte_bbdev_enc_op **ops,
>>>    	/** This could be done at polling. */
>>>    	acc_header_init(&desc->req);
>>>    	desc->req.numCBs = num;
>>> +	desc->req.dltb = 0;
>>>
>>>    	in_length_in_bytes = ops[0]->ldpc_enc.input.data->data_len;
>>>    	out_length = (enc->cb_params.e + 7) >> 3;
>>> @@ -2102,6 +2459,105 @@ vrb1_enqueue_ldpc_enc_one_op_tb(struct
>> acc_queue *q, struct rte_bbdev_enc_op *op
>>>    	return return_descs;
>>>    }
>>>
>>> +/* Fill in a frame control word for LDPC encoding. */
>>> +static inline void
>>> +vrb2_fcw_letb_fill(const struct rte_bbdev_enc_op *op, struct acc_fcw_le
>> *fcw)
>>> +{
>>> +	fcw->qm = op->ldpc_enc.q_m;
>>> +	fcw->nfiller = op->ldpc_enc.n_filler;
>>> +	fcw->BG = (op->ldpc_enc.basegraph - 1);
>>> +	fcw->Zc = op->ldpc_enc.z_c;
>>> +	fcw->ncb = op->ldpc_enc.n_cb;
>>> +	fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_enc.basegraph,
>>> +			op->ldpc_enc.rv_index);
>>> +	fcw->rm_e = op->ldpc_enc.tb_params.ea;
>>> +	fcw->rm_e_b = op->ldpc_enc.tb_params.eb;
>>> +	fcw->crc_select = check_bit(op->ldpc_enc.op_flags,
>>> +			RTE_BBDEV_LDPC_CRC_24B_ATTACH);
>>> +	fcw->bypass_intlv = 0;
>>> +	if (op->ldpc_enc.tb_params.c > 1) {
>>> +		fcw->mcb_count = 0;
>>> +		fcw->C = op->ldpc_enc.tb_params.c;
>>> +		fcw->Cab = op->ldpc_enc.tb_params.cab;
>>> +	} else {
>>> +		fcw->mcb_count = 1;
>>> +		fcw->C = 0;
>>> +	}
>>> +}
>>> +
>>> +/* Enqueue one encode operations for device in TB mode.
>>> + * returns the number of descs used.
>>> + */
>>> +static inline int
>>> +vrb2_enqueue_ldpc_enc_one_op_tb(struct acc_queue *q, struct
>> rte_bbdev_enc_op *op,
>>> +		uint16_t enq_descs)
>>> +{
>>> +	union acc_dma_desc *desc = NULL;
>>> +	uint32_t in_offset, out_offset, out_length, seg_total_left;
>>> +	struct rte_mbuf *input, *output_head, *output;
>>> +
>>> +	uint16_t desc_idx = ((q->sw_ring_head + enq_descs) & q-
>>> sw_ring_wrap_mask);
>>> +	desc = q->ring_addr + desc_idx;
>>
>> Use acc_desc()?
> 
> thanks
> 
>>
>>> +	vrb2_fcw_letb_fill(op, &desc->req.fcw_le);
>>> +	struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc;
>>> +	int next_triplet = 1; /* FCW already done */
>>> +	uint32_t in_length_in_bytes;
>>> +	uint16_t K, in_length_in_bits;
>>> +
>>> +	input = enc->input.data;
>>> +	output_head = output = enc->output.data;
>>> +	in_offset = enc->input.offset;
>>> +	out_offset = enc->output.offset;
>>> +	seg_total_left = rte_pktmbuf_data_len(enc->input.data) - in_offset;
>>> +
>>> +	acc_header_init(&desc->req);
>>> +	K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c;
>>> +	in_length_in_bits = K - enc->n_filler;
>>> +	if ((enc->op_flags & RTE_BBDEV_LDPC_CRC_24A_ATTACH) ||
>>> +			(enc->op_flags &
>> RTE_BBDEV_LDPC_CRC_24B_ATTACH))
>>> +		in_length_in_bits -= 24;
>>> +	in_length_in_bytes = (in_length_in_bits >> 3) * enc->tb_params.c;
>>> +
>>> +	next_triplet = acc_dma_fill_blk_type_in(&desc->req, &input,
>> &in_offset,
>>> +			in_length_in_bytes, &seg_total_left, next_triplet,
>>> +			check_bit(enc->op_flags,
>> RTE_BBDEV_LDPC_ENC_SCATTER_GATHER));
>>> +	if (unlikely(next_triplet < 0)) {
>>> +		rte_bbdev_log(ERR,
>>> +				"Mismatch between data to process and mbuf
>> data length in bbdev_op: %p",
>>> +				op);
>>> +		return -1;
>>> +	}
>>> +	desc->req.data_ptrs[next_triplet - 1].last = 1;
>>> +	desc->req.m2dlen = next_triplet;
>>> +
>>> +	/* Set output length */
>>> +	/* Integer round up division by 8 */
>>> +	out_length = (enc->tb_params.ea * enc->tb_params.cab +
>>> +			enc->tb_params.eb * (enc->tb_params.c - enc-
>>> tb_params.cab)  + 7) >> 3;
>>> +
>>> +	next_triplet = acc_dma_fill_blk_type(&desc->req, output, out_offset,
>>> +			out_length, next_triplet, ACC_DMA_BLKID_OUT_ENC);
>>> +	enc->output.length = out_length;
>>> +	out_offset += out_length;
>>> +	desc->req.data_ptrs[next_triplet - 1].last = 1;
>>> +	desc->req.data_ptrs[next_triplet - 1].dma_ext = 0;
>>> +	desc->req.d2mlen = next_triplet - desc->req.m2dlen;
>>> +	desc->req.numCBs = enc->tb_params.c;
>>> +	if (desc->req.numCBs > 1)
>>> +		desc->req.dltb = 1;
>>> +	desc->req.op_addr = op;
>>> +
>>> +	if (out_length < ACC_MAX_E_MBUF)
>>> +		mbuf_append(output_head, output, out_length);
>>> +
>>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
>>> +	rte_memdump(stderr, "FCW", &desc->req.fcw_le, sizeof(desc-
>>> req.fcw_le));
>>> +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
>>> +#endif
>>> +	/* One CB (one op) was successfully prepared to enqueue */
>>> +	return 1;
>>
>> This function is quite different from the VRB1 variant.
>> Is the underlying hardware completely different, or just a different
>> implementation?
> 
> The underlying HW is different in this mode of operation, notably as it
> supports RTE_BBDEV_LDPC_ENC_CONCATENATION hence more of true TB
> implementation.
> Kept separate on purpose.

Ack, makes sense here.

>>
>>> +}
>>> +
>>>    /** Enqueue one decode operations for device in CB mode. */
>>>    static inline int
>>>    enqueue_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op
>> *op,
>>> @@ -2215,10 +2671,16 @@ vrb_enqueue_ldpc_dec_one_op_cb(struct
>> acc_queue *q, struct rte_bbdev_dec_op *op,
>>>    		else
>>>    			seg_total_left = fcw->rm_e;
>>>
>>> -		ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input, h_output,
>>> -				&in_offset, &h_out_offset,
>>> -				&h_out_length, &mbuf_total_left,
>>> -				&seg_total_left, fcw);
>>> +		if (q->d->device_variant == VRB1_VARIANT)
>>> +			ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input,
>> h_output,
>>> +					&in_offset, &h_out_offset,
>>> +					&h_out_length, &mbuf_total_left,
>>> +					&seg_total_left, fcw);
>>> +		else
>>> +			ret = vrb2_dma_desc_ld_fill(op, &desc->req, &input,
>> h_output,
>>> +					&in_offset, &h_out_offset,
>>> +					&h_out_length, &mbuf_total_left,
>>> +					&seg_total_left, fcw);
>>>    		if (unlikely(ret < 0))
>>>    			return ret;
>>>    	}
>>> @@ -2308,11 +2770,18 @@ vrb_enqueue_ldpc_dec_one_op_tb(struct
>> acc_queue *q, struct rte_bbdev_dec_op *op,
>>>    		rte_memcpy(&desc->req.fcw_ld, &desc_first->req.fcw_ld,
>> ACC_FCW_LD_BLEN);
>>>    		desc->req.fcw_ld.tb_trailer_size = (c - r - 1) * trail_len;
>>>
>>> -		ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input,
>>> -				h_output, &in_offset, &h_out_offset,
>>> -				&h_out_length,
>>> -				&mbuf_total_left, &seg_total_left,
>>> -				&desc->req.fcw_ld);
>>> +		if (q->d->device_variant == VRB1_VARIANT)
>>> +			ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input,
>>> +					h_output, &in_offset, &h_out_offset,
>>> +					&h_out_length,
>>> +					&mbuf_total_left, &seg_total_left,
>>> +					&desc->req.fcw_ld);
>>> +		else
>>> +			ret = vrb2_dma_desc_ld_fill(op, &desc->req, &input,
>>> +					h_output, &in_offset, &h_out_offset,
>>> +					&h_out_length,
>>> +					&mbuf_total_left, &seg_total_left,
>>> +					&desc->req.fcw_ld);
>>>
>>>    		if (unlikely(ret < 0))
>>>    			return ret;
>>> @@ -2576,14 +3045,22 @@ vrb_enqueue_ldpc_enc_tb(struct
>> rte_bbdev_queue_data *q_data,
>>>    	int descs_used;
>>>
>>>    	for (i = 0; i < num; ++i) {
>>> -		cbs_in_tb = get_num_cbs_in_tb_ldpc_enc(&ops[i]->ldpc_enc);
>>> -		/* Check if there are available space for further processing. */
>>> -		if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) {
>>> -			acc_enqueue_ring_full(q_data);
>>> -			break;
>>> +		if (q->d->device_variant == VRB1_VARIANT) {
>>> +			cbs_in_tb = get_num_cbs_in_tb_ldpc_enc(&ops[i]-
>>> ldpc_enc);
>>> +			/* Check if there are available space for further
>> processing. */
>>> +			if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) {
>>> +				acc_enqueue_ring_full(q_data);
>>> +				break;
>>> +			}
>>> +			descs_used = vrb1_enqueue_ldpc_enc_one_op_tb(q,
>> ops[i],
>>> +					enqueued_descs, cbs_in_tb);
>>> +		} else {
>>> +			if (unlikely(avail < 1)) {
>>> +				acc_enqueue_ring_full(q_data);
>>> +				break;
>>> +			}
>>> +			descs_used = vrb2_enqueue_ldpc_enc_one_op_tb(q,
>> ops[i], enqueued_descs);
>>>    		}
>>> -
>>> -		descs_used = vrb1_enqueue_ldpc_enc_one_op_tb(q, ops[i],
>> enqueued_descs, cbs_in_tb);
>>>    		if (descs_used < 0) {
>>>    			acc_enqueue_invalid(q_data);
>>>    			break;
>>> @@ -2865,6 +3342,52 @@ vrb_dequeue_enc_one_op_cb(struct acc_queue
>> *q, struct rte_bbdev_enc_op **ref_op,
>>>    	return desc->req.numCBs;
>>>    }
>>>
>>> +/* Dequeue one LDPC encode operations from VRB2 device in TB mode. */
>>> +static inline int
>>> +vrb2_dequeue_ldpc_enc_one_op_tb(struct acc_queue *q, struct
>> rte_bbdev_enc_op **ref_op,
>>> +		uint16_t *dequeued_ops, uint32_t *aq_dequeued,
>>> +		uint16_t *dequeued_descs)
>>> +{
>>> +	union acc_dma_desc *desc, atom_desc;
>>> +	union acc_dma_rsp_desc rsp;
>>> +	struct rte_bbdev_enc_op *op;
>>> +	int desc_idx = ((q->sw_ring_tail + *dequeued_descs) & q-
>>> sw_ring_wrap_mask);
>>> +
>>> +	desc = q->ring_addr + desc_idx;
>>> +	atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
>> __ATOMIC_RELAXED);
>>> +
>>> +	/* Check fdone bit. */
>>> +	if (!(atom_desc.rsp.val & ACC_FDONE))
>>> +		return -1;
>>> +
>>> +	rsp.val = atom_desc.rsp.val;
>>> +	rte_bbdev_log_debug("Resp. desc %p: %x", desc, rsp.val);
>>> +
>>> +	/* Dequeue. */
>>> +	op = desc->req.op_addr;
>>> +
>>> +	/* Clearing status, it will be set based on response. */
>>> +	op->status = 0;
>>> +	op->status |= rsp.input_err << RTE_BBDEV_DATA_ERROR;
>>> +	op->status |= rsp.dma_err << RTE_BBDEV_DRV_ERROR;
>>> +	op->status |= rsp.fcw_err << RTE_BBDEV_DRV_ERROR;
>>> +	op->status |= rsp.engine_hung << RTE_BBDEV_ENGINE_ERROR;
>>> +
>>> +	if (desc->req.last_desc_in_batch) {
>>> +		(*aq_dequeued)++;
>>> +		desc->req.last_desc_in_batch = 0;
>>> +	}
>>> +	desc->rsp.val = ACC_DMA_DESC_TYPE;
>>> +	desc->rsp.add_info_0 = 0; /* Reserved bits. */
>>> +	desc->rsp.add_info_1 = 0; /* Reserved bits. */
>>> +
>>> +	/* One op was successfully dequeued */
>>> +	ref_op[0] = op;
>>> +	(*dequeued_descs)++;
>>> +	(*dequeued_ops)++;
>>> +	return 1;
>>> +}
>>> +
>>>    /* Dequeue one LDPC encode operations from device in TB mode.
>>>     * That operation may cover multiple descriptors.
>>>     */
>>> @@ -3189,9 +3712,14 @@ vrb_dequeue_ldpc_enc(struct
>> rte_bbdev_queue_data *q_data,
>>>
>>>    	for (i = 0; i < avail; i++) {
>>>    		if (cbm == RTE_BBDEV_TRANSPORT_BLOCK)
>>> -			ret = vrb_dequeue_enc_one_op_tb(q,
>> &ops[dequeued_ops],
>>> -					&dequeued_ops, &aq_dequeued,
>>> -					&dequeued_descs, num);
>>> +			if (q->d->device_variant == VRB1_VARIANT)
>>> +				ret = vrb_dequeue_enc_one_op_tb(q,
>> &ops[dequeued_ops],
>>> +						&dequeued_ops,
>> &aq_dequeued,
>>> +						&dequeued_descs, num);
>>> +			else
>>> +				ret = vrb2_dequeue_ldpc_enc_one_op_tb(q,
>> &ops[dequeued_ops],
>>> +						&dequeued_ops,
>> &aq_dequeued,
>>> +						&dequeued_descs);
>>>    		else
>>>    			ret = vrb_dequeue_enc_one_op_cb(q,
>> &ops[dequeued_ops],
>>>    					&dequeued_ops, &aq_dequeued,
>>> @@ -3536,6 +4064,7 @@ vrb_bbdev_init(struct rte_bbdev *dev, struct
>> rte_pci_driver *drv)
>>>    	} else {
>>>    		d->device_variant = VRB2_VARIANT;
>>>    		d->queue_offset = vrb2_queue_offset;
>>> +		d->fcw_ld_fill = vrb2_fcw_ld_fill;
>>>    		d->num_qgroups = VRB2_NUM_QGRPS;
>>>    		d->num_aqs = VRB2_NUM_AQS;
>>>    		if (d->pf_device)
>>
>>
>> It looks like most (60%+) of the code in this patch could be removed if
>> duplication was avoided.
>>
>> Thanks,
>> Maxime
>
  

Patch

diff --git a/drivers/baseband/acc/rte_vrb_pmd.c b/drivers/baseband/acc/rte_vrb_pmd.c
index 48e779ce77..93add82947 100644
--- a/drivers/baseband/acc/rte_vrb_pmd.c
+++ b/drivers/baseband/acc/rte_vrb_pmd.c
@@ -1235,6 +1235,94 @@  vrb_dev_info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info)
 	};
 
 	static const struct rte_bbdev_op_cap vrb2_bbdev_capabilities[] = {
+		{
+			.type = RTE_BBDEV_OP_TURBO_DEC,
+			.cap.turbo_dec = {
+				.capability_flags =
+					RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
+					RTE_BBDEV_TURBO_CRC_TYPE_24B |
+					RTE_BBDEV_TURBO_DEC_CRC_24B_DROP |
+					RTE_BBDEV_TURBO_EQUALIZER |
+					RTE_BBDEV_TURBO_SOFT_OUT_SATURATE |
+					RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
+					RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH |
+					RTE_BBDEV_TURBO_SOFT_OUTPUT |
+					RTE_BBDEV_TURBO_EARLY_TERMINATION |
+					RTE_BBDEV_TURBO_DEC_INTERRUPTS |
+					RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
+					RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT |
+					RTE_BBDEV_TURBO_MAP_DEC |
+					RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
+					RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
+				.max_llr_modulus = INT8_MAX,
+				.num_buffers_src =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+				.num_buffers_hard_out =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+				.num_buffers_soft_out =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+			}
+		},
+		{
+			.type = RTE_BBDEV_OP_TURBO_ENC,
+			.cap.turbo_enc = {
+				.capability_flags =
+					RTE_BBDEV_TURBO_CRC_24B_ATTACH |
+					RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
+					RTE_BBDEV_TURBO_RATE_MATCH |
+					RTE_BBDEV_TURBO_ENC_INTERRUPTS |
+					RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
+				.num_buffers_src =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+				.num_buffers_dst =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+			}
+		},
+		{
+			.type   = RTE_BBDEV_OP_LDPC_ENC,
+			.cap.ldpc_enc = {
+				.capability_flags =
+					RTE_BBDEV_LDPC_RATE_MATCH |
+					RTE_BBDEV_LDPC_CRC_24B_ATTACH |
+					RTE_BBDEV_LDPC_INTERLEAVER_BYPASS |
+					RTE_BBDEV_LDPC_ENC_INTERRUPTS |
+					RTE_BBDEV_LDPC_ENC_SCATTER_GATHER |
+					RTE_BBDEV_LDPC_ENC_CONCATENATION,
+				.num_buffers_src =
+						RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
+				.num_buffers_dst =
+						RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
+			}
+		},
+		{
+			.type   = RTE_BBDEV_OP_LDPC_DEC,
+			.cap.ldpc_dec = {
+			.capability_flags =
+				RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK |
+				RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP |
+				RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK |
+				RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK |
+				RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
+				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
+				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE |
+				RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS |
+				RTE_BBDEV_LDPC_DEC_SCATTER_GATHER |
+				RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION |
+				RTE_BBDEV_LDPC_HARQ_4BIT_COMPRESSION |
+				RTE_BBDEV_LDPC_LLR_COMPRESSION |
+				RTE_BBDEV_LDPC_SOFT_OUT_ENABLE |
+				RTE_BBDEV_LDPC_SOFT_OUT_RM_BYPASS |
+				RTE_BBDEV_LDPC_SOFT_OUT_DEINTERLEAVER_BYPASS |
+				RTE_BBDEV_LDPC_DEC_INTERRUPTS,
+			.llr_size = 8,
+			.llr_decimals = 2,
+			.num_buffers_src =
+					RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
+			.num_buffers_hard_out =
+					RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
+			.num_buffers_soft_out = 0,
+			}
+		},
 		RTE_BBDEV_END_OF_CAPABILITIES_LIST()
 	};
 
@@ -1774,6 +1862,141 @@  vrb1_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
 	return 0;
 }
 
+/* Fill in a frame control word for LDPC decoding. */
+static inline void
+vrb2_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc_fcw_ld *fcw,
+		union acc_harq_layout_data *harq_layout)
+{
+	uint16_t harq_out_length, harq_in_length, ncb_p, k0_p, parity_offset;
+	uint32_t harq_index;
+	uint32_t l;
+
+	fcw->qm = op->ldpc_dec.q_m;
+	fcw->nfiller = op->ldpc_dec.n_filler;
+	fcw->BG = (op->ldpc_dec.basegraph - 1);
+	fcw->Zc = op->ldpc_dec.z_c;
+	fcw->ncb = op->ldpc_dec.n_cb;
+	fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_dec.basegraph,
+			op->ldpc_dec.rv_index);
+	if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK)
+		fcw->rm_e = op->ldpc_dec.cb_params.e;
+	else
+		fcw->rm_e = (op->ldpc_dec.tb_params.r <
+				op->ldpc_dec.tb_params.cab) ?
+						op->ldpc_dec.tb_params.ea :
+						op->ldpc_dec.tb_params.eb;
+
+	if (unlikely(check_bit(op->ldpc_dec.op_flags,
+			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) &&
+			(op->ldpc_dec.harq_combined_input.length == 0))) {
+		rte_bbdev_log(WARNING, "Null HARQ input size provided");
+		/* Disable HARQ input in that case to carry forward. */
+		op->ldpc_dec.op_flags ^= RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
+	}
+	if (unlikely(fcw->rm_e == 0)) {
+		rte_bbdev_log(WARNING, "Null E input provided");
+		fcw->rm_e = 2;
+	}
+
+	fcw->hcin_en = check_bit(op->ldpc_dec.op_flags,
+			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE);
+	fcw->hcout_en = check_bit(op->ldpc_dec.op_flags,
+			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
+	fcw->crc_select = check_bit(op->ldpc_dec.op_flags,
+			RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK);
+	fcw->so_en = check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_SOFT_OUT_ENABLE);
+	fcw->so_bypass_intlv = check_bit(op->ldpc_dec.op_flags,
+			RTE_BBDEV_LDPC_SOFT_OUT_DEINTERLEAVER_BYPASS);
+	fcw->so_bypass_rm = check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_SOFT_OUT_RM_BYPASS);
+	fcw->bypass_dec = 0;
+	fcw->bypass_intlv = check_bit(op->ldpc_dec.op_flags,
+			RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS);
+	if (op->ldpc_dec.q_m == 1) {
+		fcw->bypass_intlv = 1;
+		fcw->qm = 2;
+	}
+	if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION)) {
+		fcw->hcin_decomp_mode = 1;
+		fcw->hcout_comp_mode = 1;
+	} else if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_HARQ_4BIT_COMPRESSION)) {
+		fcw->hcin_decomp_mode = 4;
+		fcw->hcout_comp_mode = 4;
+	} else {
+		fcw->hcin_decomp_mode = 0;
+		fcw->hcout_comp_mode = 0;
+	}
+
+	fcw->llr_pack_mode = check_bit(op->ldpc_dec.op_flags,
+			RTE_BBDEV_LDPC_LLR_COMPRESSION);
+	harq_index = hq_index(op->ldpc_dec.harq_combined_output.offset);
+	if (fcw->hcin_en > 0) {
+		harq_in_length = op->ldpc_dec.harq_combined_input.length;
+		if (fcw->hcin_decomp_mode == 1)
+			harq_in_length = harq_in_length * 8 / 6;
+		else if (fcw->hcin_decomp_mode == 4)
+			harq_in_length = harq_in_length * 2;
+		harq_in_length = RTE_MIN(harq_in_length, op->ldpc_dec.n_cb
+				- op->ldpc_dec.n_filler);
+		harq_in_length = RTE_ALIGN_CEIL(harq_in_length, 64);
+		fcw->hcin_size0 = harq_in_length;
+		fcw->hcin_offset = 0;
+		fcw->hcin_size1 = 0;
+	} else {
+		fcw->hcin_size0 = 0;
+		fcw->hcin_offset = 0;
+		fcw->hcin_size1 = 0;
+	}
+
+	fcw->itmax = op->ldpc_dec.iter_max;
+	fcw->so_it = op->ldpc_dec.iter_max;
+	fcw->itstop = check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE);
+	fcw->cnu_algo = ACC_ALGO_MSA;
+	fcw->synd_precoder = fcw->itstop;
+
+	fcw->minsum_offset = 1;
+	fcw->dec_llrclip   = 2;
+
+	/*
+	 * These are all implicitly set
+	 * fcw->synd_post = 0;
+	 * fcw->dec_convllr = 0;
+	 * fcw->hcout_convllr = 0;
+	 * fcw->hcout_size1 = 0;
+	 * fcw->hcout_offset = 0;
+	 * fcw->negstop_th = 0;
+	 * fcw->negstop_it = 0;
+	 * fcw->negstop_en = 0;
+	 * fcw->gain_i = 1;
+	 * fcw->gain_h = 1;
+	 */
+	if (fcw->hcout_en > 0) {
+		parity_offset = (op->ldpc_dec.basegraph == 1 ? 20 : 8)
+			* op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
+		k0_p = (fcw->k0 > parity_offset) ?
+				fcw->k0 - op->ldpc_dec.n_filler : fcw->k0;
+		ncb_p = fcw->ncb - op->ldpc_dec.n_filler;
+		l = k0_p + fcw->rm_e;
+		harq_out_length = (uint16_t) fcw->hcin_size0;
+		harq_out_length = RTE_MIN(RTE_MAX(harq_out_length, l), ncb_p);
+		harq_out_length = RTE_ALIGN_CEIL(harq_out_length, 64);
+		fcw->hcout_size0 = harq_out_length;
+		fcw->hcout_size1 = 0;
+		fcw->hcout_offset = 0;
+		harq_layout[harq_index].offset = fcw->hcout_offset;
+		harq_layout[harq_index].size0 = fcw->hcout_size0;
+	} else {
+		fcw->hcout_size0 = 0;
+		fcw->hcout_size1 = 0;
+		fcw->hcout_offset = 0;
+	}
+
+	fcw->tb_crc_select = 0;
+	if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK))
+		fcw->tb_crc_select = 2;
+	if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK))
+		fcw->tb_crc_select = 1;
+}
+
 static inline void
 vrb_dma_desc_ld_update(struct rte_bbdev_dec_op *op,
 		struct acc_dma_req_desc *desc,
@@ -1817,6 +2040,139 @@  vrb_dma_desc_ld_update(struct rte_bbdev_dec_op *op,
 	desc->op_addr = op;
 }
 
+static inline int
+vrb2_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
+		struct acc_dma_req_desc *desc,
+		struct rte_mbuf **input, struct rte_mbuf *h_output,
+		uint32_t *in_offset, uint32_t *h_out_offset,
+		uint32_t *h_out_length, uint32_t *mbuf_total_left,
+		uint32_t *seg_total_left, struct acc_fcw_ld *fcw)
+{
+	struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec;
+	int next_triplet = 1; /* FCW already done. */
+	uint32_t input_length;
+	uint16_t output_length, crc24_overlap = 0;
+	uint16_t sys_cols, K, h_p_size, h_np_size;
+
+	acc_header_init(desc);
+
+	if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP))
+		crc24_overlap = 24;
+
+	/* Compute some LDPC BG lengths. */
+	input_length = fcw->rm_e;
+	if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_LLR_COMPRESSION))
+		input_length = (input_length * 3 + 3) / 4;
+	sys_cols = (dec->basegraph == 1) ? 22 : 10;
+	K = sys_cols * dec->z_c;
+	output_length = K - dec->n_filler - crc24_overlap;
+
+	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < input_length))) {
+		rte_bbdev_log(ERR,
+				"Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
+				*mbuf_total_left, input_length);
+		return -1;
+	}
+
+	next_triplet = acc_dma_fill_blk_type_in(desc, input,
+			in_offset, input_length,
+			seg_total_left, next_triplet,
+			check_bit(op->ldpc_dec.op_flags,
+			RTE_BBDEV_LDPC_DEC_SCATTER_GATHER));
+
+	if (unlikely(next_triplet < 0)) {
+		rte_bbdev_log(ERR,
+				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
+				op);
+		return -1;
+	}
+
+	if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) {
+		if (op->ldpc_dec.harq_combined_input.data == 0) {
+			rte_bbdev_log(ERR, "HARQ input is not defined");
+			return -1;
+		}
+		h_p_size = fcw->hcin_size0 + fcw->hcin_size1;
+		if (fcw->hcin_decomp_mode == 1)
+			h_p_size = (h_p_size * 3 + 3) / 4;
+		else if (fcw->hcin_decomp_mode == 4)
+			h_p_size = h_p_size / 2;
+		if (op->ldpc_dec.harq_combined_input.data == 0) {
+			rte_bbdev_log(ERR, "HARQ input is not defined");
+			return -1;
+		}
+		acc_dma_fill_blk_type(
+				desc,
+				op->ldpc_dec.harq_combined_input.data,
+				op->ldpc_dec.harq_combined_input.offset,
+				h_p_size,
+				next_triplet,
+				ACC_DMA_BLKID_IN_HARQ);
+		next_triplet++;
+	}
+
+	desc->data_ptrs[next_triplet - 1].last = 1;
+	desc->m2dlen = next_triplet;
+	*mbuf_total_left -= input_length;
+
+	next_triplet = acc_dma_fill_blk_type(desc, h_output,
+			*h_out_offset, output_length >> 3, next_triplet,
+			ACC_DMA_BLKID_OUT_HARD);
+
+	if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)) {
+		if (op->ldpc_dec.soft_output.data == 0) {
+			rte_bbdev_log(ERR, "Soft output is not defined");
+			return -1;
+		}
+		dec->soft_output.length = fcw->rm_e;
+		acc_dma_fill_blk_type(desc, dec->soft_output.data, dec->soft_output.offset,
+				fcw->rm_e, next_triplet, ACC_DMA_BLKID_OUT_SOFT);
+		next_triplet++;
+	}
+
+	if (check_bit(op->ldpc_dec.op_flags,
+				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) {
+		if (op->ldpc_dec.harq_combined_output.data == 0) {
+			rte_bbdev_log(ERR, "HARQ output is not defined");
+			return -1;
+		}
+
+		/* Pruned size of the HARQ */
+		h_p_size = fcw->hcout_size0 + fcw->hcout_size1;
+		/* Non-Pruned size of the HARQ */
+		h_np_size = fcw->hcout_offset > 0 ?
+				fcw->hcout_offset + fcw->hcout_size1 :
+				h_p_size;
+		if (fcw->hcin_decomp_mode == 1) {
+			h_np_size = (h_np_size * 3 + 3) / 4;
+			h_p_size = (h_p_size * 3 + 3) / 4;
+		} else if (fcw->hcin_decomp_mode == 4) {
+			h_np_size = h_np_size / 2;
+			h_p_size = h_p_size / 2;
+		}
+		dec->harq_combined_output.length = h_np_size;
+		acc_dma_fill_blk_type(
+				desc,
+				dec->harq_combined_output.data,
+				dec->harq_combined_output.offset,
+				h_p_size,
+				next_triplet,
+				ACC_DMA_BLKID_OUT_HARQ);
+
+		next_triplet++;
+	}
+
+	*h_out_length = output_length >> 3;
+	dec->hard_output.length += *h_out_length;
+	*h_out_offset += *h_out_length;
+	desc->data_ptrs[next_triplet - 1].last = 1;
+	desc->d2mlen = next_triplet - desc->m2dlen;
+
+	desc->op_addr = op;
+
+	return 0;
+}
+
 /* Enqueue one encode operations for device in CB mode. */
 static inline int
 enqueue_enc_one_op_cb(struct acc_queue *q, struct rte_bbdev_enc_op *op,
@@ -1877,6 +2233,7 @@  enqueue_ldpc_enc_n_op_cb(struct acc_queue *q, struct rte_bbdev_enc_op **ops,
 	/** This could be done at polling. */
 	acc_header_init(&desc->req);
 	desc->req.numCBs = num;
+	desc->req.dltb = 0;
 
 	in_length_in_bytes = ops[0]->ldpc_enc.input.data->data_len;
 	out_length = (enc->cb_params.e + 7) >> 3;
@@ -2102,6 +2459,105 @@  vrb1_enqueue_ldpc_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op
 	return return_descs;
 }
 
+/* Fill in a frame control word for LDPC encoding. */
+static inline void
+vrb2_fcw_letb_fill(const struct rte_bbdev_enc_op *op, struct acc_fcw_le *fcw)
+{
+	fcw->qm = op->ldpc_enc.q_m;
+	fcw->nfiller = op->ldpc_enc.n_filler;
+	fcw->BG = (op->ldpc_enc.basegraph - 1);
+	fcw->Zc = op->ldpc_enc.z_c;
+	fcw->ncb = op->ldpc_enc.n_cb;
+	fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_enc.basegraph,
+			op->ldpc_enc.rv_index);
+	fcw->rm_e = op->ldpc_enc.tb_params.ea;
+	fcw->rm_e_b = op->ldpc_enc.tb_params.eb;
+	fcw->crc_select = check_bit(op->ldpc_enc.op_flags,
+			RTE_BBDEV_LDPC_CRC_24B_ATTACH);
+	fcw->bypass_intlv = 0;
+	if (op->ldpc_enc.tb_params.c > 1) {
+		fcw->mcb_count = 0;
+		fcw->C = op->ldpc_enc.tb_params.c;
+		fcw->Cab = op->ldpc_enc.tb_params.cab;
+	} else {
+		fcw->mcb_count = 1;
+		fcw->C = 0;
+	}
+}
+
+/* Enqueue one encode operations for device in TB mode.
+ * returns the number of descs used.
+ */
+static inline int
+vrb2_enqueue_ldpc_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op,
+		uint16_t enq_descs)
+{
+	union acc_dma_desc *desc = NULL;
+	uint32_t in_offset, out_offset, out_length, seg_total_left;
+	struct rte_mbuf *input, *output_head, *output;
+
+	uint16_t desc_idx = ((q->sw_ring_head + enq_descs) & q->sw_ring_wrap_mask);
+	desc = q->ring_addr + desc_idx;
+	vrb2_fcw_letb_fill(op, &desc->req.fcw_le);
+	struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc;
+	int next_triplet = 1; /* FCW already done */
+	uint32_t in_length_in_bytes;
+	uint16_t K, in_length_in_bits;
+
+	input = enc->input.data;
+	output_head = output = enc->output.data;
+	in_offset = enc->input.offset;
+	out_offset = enc->output.offset;
+	seg_total_left = rte_pktmbuf_data_len(enc->input.data) - in_offset;
+
+	acc_header_init(&desc->req);
+	K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c;
+	in_length_in_bits = K - enc->n_filler;
+	if ((enc->op_flags & RTE_BBDEV_LDPC_CRC_24A_ATTACH) ||
+			(enc->op_flags & RTE_BBDEV_LDPC_CRC_24B_ATTACH))
+		in_length_in_bits -= 24;
+	in_length_in_bytes = (in_length_in_bits >> 3) * enc->tb_params.c;
+
+	next_triplet = acc_dma_fill_blk_type_in(&desc->req, &input, &in_offset,
+			in_length_in_bytes, &seg_total_left, next_triplet,
+			check_bit(enc->op_flags, RTE_BBDEV_LDPC_ENC_SCATTER_GATHER));
+	if (unlikely(next_triplet < 0)) {
+		rte_bbdev_log(ERR,
+				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
+				op);
+		return -1;
+	}
+	desc->req.data_ptrs[next_triplet - 1].last = 1;
+	desc->req.m2dlen = next_triplet;
+
+	/* Set output length */
+	/* Integer round up division by 8 */
+	out_length = (enc->tb_params.ea * enc->tb_params.cab +
+			enc->tb_params.eb * (enc->tb_params.c - enc->tb_params.cab)  + 7) >> 3;
+
+	next_triplet = acc_dma_fill_blk_type(&desc->req, output, out_offset,
+			out_length, next_triplet, ACC_DMA_BLKID_OUT_ENC);
+	enc->output.length = out_length;
+	out_offset += out_length;
+	desc->req.data_ptrs[next_triplet - 1].last = 1;
+	desc->req.data_ptrs[next_triplet - 1].dma_ext = 0;
+	desc->req.d2mlen = next_triplet - desc->req.m2dlen;
+	desc->req.numCBs = enc->tb_params.c;
+	if (desc->req.numCBs > 1)
+		desc->req.dltb = 1;
+	desc->req.op_addr = op;
+
+	if (out_length < ACC_MAX_E_MBUF)
+		mbuf_append(output_head, output, out_length);
+
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+	rte_memdump(stderr, "FCW", &desc->req.fcw_le, sizeof(desc->req.fcw_le));
+	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
+#endif
+	/* One CB (one op) was successfully prepared to enqueue */
+	return 1;
+}
+
 /** Enqueue one decode operations for device in CB mode. */
 static inline int
 enqueue_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
@@ -2215,10 +2671,16 @@  vrb_enqueue_ldpc_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
 		else
 			seg_total_left = fcw->rm_e;
 
-		ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input, h_output,
-				&in_offset, &h_out_offset,
-				&h_out_length, &mbuf_total_left,
-				&seg_total_left, fcw);
+		if (q->d->device_variant == VRB1_VARIANT)
+			ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input, h_output,
+					&in_offset, &h_out_offset,
+					&h_out_length, &mbuf_total_left,
+					&seg_total_left, fcw);
+		else
+			ret = vrb2_dma_desc_ld_fill(op, &desc->req, &input, h_output,
+					&in_offset, &h_out_offset,
+					&h_out_length, &mbuf_total_left,
+					&seg_total_left, fcw);
 		if (unlikely(ret < 0))
 			return ret;
 	}
@@ -2308,11 +2770,18 @@  vrb_enqueue_ldpc_dec_one_op_tb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
 		rte_memcpy(&desc->req.fcw_ld, &desc_first->req.fcw_ld, ACC_FCW_LD_BLEN);
 		desc->req.fcw_ld.tb_trailer_size = (c - r - 1) * trail_len;
 
-		ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input,
-				h_output, &in_offset, &h_out_offset,
-				&h_out_length,
-				&mbuf_total_left, &seg_total_left,
-				&desc->req.fcw_ld);
+		if (q->d->device_variant == VRB1_VARIANT)
+			ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input,
+					h_output, &in_offset, &h_out_offset,
+					&h_out_length,
+					&mbuf_total_left, &seg_total_left,
+					&desc->req.fcw_ld);
+		else
+			ret = vrb2_dma_desc_ld_fill(op, &desc->req, &input,
+					h_output, &in_offset, &h_out_offset,
+					&h_out_length,
+					&mbuf_total_left, &seg_total_left,
+					&desc->req.fcw_ld);
 
 		if (unlikely(ret < 0))
 			return ret;
@@ -2576,14 +3045,22 @@  vrb_enqueue_ldpc_enc_tb(struct rte_bbdev_queue_data *q_data,
 	int descs_used;
 
 	for (i = 0; i < num; ++i) {
-		cbs_in_tb = get_num_cbs_in_tb_ldpc_enc(&ops[i]->ldpc_enc);
-		/* Check if there are available space for further processing. */
-		if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) {
-			acc_enqueue_ring_full(q_data);
-			break;
+		if (q->d->device_variant == VRB1_VARIANT) {
+			cbs_in_tb = get_num_cbs_in_tb_ldpc_enc(&ops[i]->ldpc_enc);
+			/* Check if there are available space for further processing. */
+			if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) {
+				acc_enqueue_ring_full(q_data);
+				break;
+			}
+			descs_used = vrb1_enqueue_ldpc_enc_one_op_tb(q, ops[i],
+					enqueued_descs, cbs_in_tb);
+		} else {
+			if (unlikely(avail < 1)) {
+				acc_enqueue_ring_full(q_data);
+				break;
+			}
+			descs_used = vrb2_enqueue_ldpc_enc_one_op_tb(q, ops[i], enqueued_descs);
 		}
-
-		descs_used = vrb1_enqueue_ldpc_enc_one_op_tb(q, ops[i], enqueued_descs, cbs_in_tb);
 		if (descs_used < 0) {
 			acc_enqueue_invalid(q_data);
 			break;
@@ -2865,6 +3342,52 @@  vrb_dequeue_enc_one_op_cb(struct acc_queue *q, struct rte_bbdev_enc_op **ref_op,
 	return desc->req.numCBs;
 }
 
+/* Dequeue one LDPC encode operations from VRB2 device in TB mode. */
+static inline int
+vrb2_dequeue_ldpc_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op **ref_op,
+		uint16_t *dequeued_ops, uint32_t *aq_dequeued,
+		uint16_t *dequeued_descs)
+{
+	union acc_dma_desc *desc, atom_desc;
+	union acc_dma_rsp_desc rsp;
+	struct rte_bbdev_enc_op *op;
+	int desc_idx = ((q->sw_ring_tail + *dequeued_descs) & q->sw_ring_wrap_mask);
+
+	desc = q->ring_addr + desc_idx;
+	atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc, __ATOMIC_RELAXED);
+
+	/* Check fdone bit. */
+	if (!(atom_desc.rsp.val & ACC_FDONE))
+		return -1;
+
+	rsp.val = atom_desc.rsp.val;
+	rte_bbdev_log_debug("Resp. desc %p: %x", desc, rsp.val);
+
+	/* Dequeue. */
+	op = desc->req.op_addr;
+
+	/* Clearing status, it will be set based on response. */
+	op->status = 0;
+	op->status |= rsp.input_err << RTE_BBDEV_DATA_ERROR;
+	op->status |= rsp.dma_err << RTE_BBDEV_DRV_ERROR;
+	op->status |= rsp.fcw_err << RTE_BBDEV_DRV_ERROR;
+	op->status |= rsp.engine_hung << RTE_BBDEV_ENGINE_ERROR;
+
+	if (desc->req.last_desc_in_batch) {
+		(*aq_dequeued)++;
+		desc->req.last_desc_in_batch = 0;
+	}
+	desc->rsp.val = ACC_DMA_DESC_TYPE;
+	desc->rsp.add_info_0 = 0; /* Reserved bits. */
+	desc->rsp.add_info_1 = 0; /* Reserved bits. */
+
+	/* One op was successfully dequeued */
+	ref_op[0] = op;
+	(*dequeued_descs)++;
+	(*dequeued_ops)++;
+	return 1;
+}
+
 /* Dequeue one LDPC encode operations from device in TB mode.
  * That operation may cover multiple descriptors.
  */
@@ -3189,9 +3712,14 @@  vrb_dequeue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
 
 	for (i = 0; i < avail; i++) {
 		if (cbm == RTE_BBDEV_TRANSPORT_BLOCK)
-			ret = vrb_dequeue_enc_one_op_tb(q, &ops[dequeued_ops],
-					&dequeued_ops, &aq_dequeued,
-					&dequeued_descs, num);
+			if (q->d->device_variant == VRB1_VARIANT)
+				ret = vrb_dequeue_enc_one_op_tb(q, &ops[dequeued_ops],
+						&dequeued_ops, &aq_dequeued,
+						&dequeued_descs, num);
+			else
+				ret = vrb2_dequeue_ldpc_enc_one_op_tb(q, &ops[dequeued_ops],
+						&dequeued_ops, &aq_dequeued,
+						&dequeued_descs);
 		else
 			ret = vrb_dequeue_enc_one_op_cb(q, &ops[dequeued_ops],
 					&dequeued_ops, &aq_dequeued,
@@ -3536,6 +4064,7 @@  vrb_bbdev_init(struct rte_bbdev *dev, struct rte_pci_driver *drv)
 	} else {
 		d->device_variant = VRB2_VARIANT;
 		d->queue_offset = vrb2_queue_offset;
+		d->fcw_ld_fill = vrb2_fcw_ld_fill;
 		d->num_qgroups = VRB2_NUM_QGRPS;
 		d->num_aqs = VRB2_NUM_AQS;
 		if (d->pf_device)