@@ -122,6 +122,7 @@ struct mlx5_rxq_data {
volatile struct mlx5_cqe(*cqes)[];
struct mlx5_cqe title_cqe; /* Title CQE for CQE compression. */
struct rte_mbuf *(*elts)[];
+ struct rte_mbuf title_pkt; /* Title packet for CQE compression. */
struct mlx5_mprq_buf *(*mprq_bufs)[];
struct rte_mempool *mp;
struct rte_mempool *mprq_mp; /* Mempool for Multi-Packet RQ. */
@@ -290,13 +290,14 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
const uint16_t q_mask = q_n - 1;
const uint16_t e_n = 1 << rxq->elts_n;
const uint16_t e_mask = e_n - 1;
- volatile struct mlx5_cqe *cq;
+ volatile struct mlx5_cqe *cq, *next;
struct rte_mbuf **elts;
uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
uint16_t nocmp_n = 0;
uint16_t rcvd_pkt = 0;
unsigned int cq_idx = rxq->cq_ci & q_mask;
unsigned int elts_idx;
+ int ret;
MLX5_ASSERT(rxq->sges_n == 0);
MLX5_ASSERT(rxq->cqe_n == rxq->elts_n);
@@ -342,6 +343,15 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
rxq->cq_ci += nocmp_n;
rxq->rq_pi += nocmp_n;
rcvd_pkt += nocmp_n;
+ /* Copy title packet for future compressed sessions. */
+ if (rxq->cqe_comp_layout) {
+ next = &(*rxq->cqes)[rxq->cq_ci & q_mask];
+ ret = check_cqe_iteration(next, rxq->cqe_n, rxq->cq_ci);
+ if (ret != MLX5_CQE_STATUS_SW_OWN ||
+ MLX5_CQE_FORMAT(next->op_own) == MLX5_COMPRESSED)
+ rte_memcpy(&rxq->title_pkt, elts[nocmp_n - 1],
+ sizeof(struct rte_mbuf));
+ }
/* Decompress the last CQE if compressed. */
if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
@@ -431,7 +441,7 @@ rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num);
const uint32_t elts_n = wqe_n * strd_n;
const uint32_t elts_mask = elts_n - 1;
- volatile struct mlx5_cqe *cq;
+ volatile struct mlx5_cqe *cq, *next;
struct rte_mbuf **elts;
uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
uint16_t nocmp_n = 0;
@@ -439,6 +449,7 @@ rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
uint16_t cp_pkt = 0;
unsigned int cq_idx = rxq->cq_ci & q_mask;
unsigned int elts_idx;
+ int ret;
MLX5_ASSERT(rxq->sges_n == 0);
cq = &(*rxq->cqes)[cq_idx];
@@ -482,6 +493,15 @@ rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
MLX5_ASSERT(nocmp_n <= pkts_n);
cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, nocmp_n);
rcvd_pkt += cp_pkt;
+ /* Copy title packet for future compressed sessions. */
+ if (rxq->cqe_comp_layout) {
+ next = &(*rxq->cqes)[rxq->cq_ci & q_mask];
+ ret = check_cqe_iteration(next, rxq->cqe_n, rxq->cq_ci);
+ if (ret != MLX5_CQE_STATUS_SW_OWN ||
+ MLX5_CQE_FORMAT(next->op_own) == MLX5_COMPRESSED)
+ rte_memcpy(&rxq->title_pkt, elts[nocmp_n - 1],
+ sizeof(struct rte_mbuf));
+ }
/* Decompress the last CQE if compressed. */
if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
@@ -76,8 +76,10 @@ static inline uint16_t
rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
struct rte_mbuf **elts)
{
- volatile struct mlx5_mini_cqe8 *mcq = (void *)&(cq + 1)->pkt_info;
- struct rte_mbuf *t_pkt = elts[0]; /* Title packet is pre-built. */
+ volatile struct mlx5_mini_cqe8 *mcq =
+ (void *)&(cq + !rxq->cqe_comp_layout)->pkt_info;
+ /* Title packet is pre-built. */
+ struct rte_mbuf *t_pkt = rxq->cqe_comp_layout ? &rxq->title_pkt : elts[0];
const __vector unsigned char zero = (__vector unsigned char){0};
/* Mask to shuffle from extracted mini CQE to mbuf. */
const __vector unsigned char shuf_mask1 = (__vector unsigned char){
@@ -93,8 +95,10 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
-1, -1, /* skip vlan_tci */
11, 10, 9, 8}; /* bswap32, rss */
/* Restore the compressed count. Must be 16 bits. */
- const uint16_t mcqe_n = t_pkt->data_len +
- (rxq->crc_present * RTE_ETHER_CRC_LEN);
+ uint16_t mcqe_n = (rxq->cqe_comp_layout) ?
+ (MLX5_CQE_NUM_MINIS(cq->op_own) + 1) :
+ t_pkt->data_len + (rxq->crc_present * RTE_ETHER_CRC_LEN);
+ uint16_t pkts_n = mcqe_n;
const __vector unsigned char rearm =
(__vector unsigned char)vec_vsx_ld(0,
(signed int const *)&t_pkt->rearm_data);
@@ -132,6 +136,9 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
* D. store rx_descriptor_fields1.
* E. store flow tag (rte_flow mark).
*/
+cycle:
+ if (rxq->cqe_comp_layout)
+ rte_prefetch0((void *)(cq + mcqe_n));
for (pos = 0; pos < mcqe_n; ) {
__vector unsigned char mcqe1, mcqe2;
__vector unsigned char rxdf1, rxdf2;
@@ -154,9 +161,10 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
const __vector unsigned long shmax = {64, 64};
#endif
- for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
- if (likely(pos + i < mcqe_n))
- rte_prefetch0((void *)(cq + pos + i));
+ if (!rxq->cqe_comp_layout)
+ for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
+ if (likely(pos + i < mcqe_n))
+ rte_prefetch0((void *)(cq + pos + i));
/* A.1 load mCQEs into a 128bit register. */
mcqe1 = (__vector unsigned char)vec_vsx_ld(0,
(signed int const *)&mcq[pos % 8]);
@@ -488,25 +496,43 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
pos += MLX5_VPMD_DESCS_PER_LOOP;
/* Move to next CQE and invalidate consumed CQEs. */
- if (!(pos & 0x7) && pos < mcqe_n) {
- if (pos + 8 < mcqe_n)
- rte_prefetch0((void *)(cq + pos + 8));
- mcq = (void *)&(cq + pos)->pkt_info;
- for (i = 0; i < 8; ++i)
- cq[inv++].op_own = MLX5_CQE_INVALIDATE;
+ if (!rxq->cqe_comp_layout) {
+ if (!(pos & 0x7) && pos < mcqe_n) {
+ if (pos + 8 < mcqe_n)
+ rte_prefetch0((void *)(cq + pos + 8));
+ mcq = (void *)&(cq + pos)->pkt_info;
+ for (i = 0; i < 8; ++i)
+ cq[inv++].op_own = MLX5_CQE_INVALIDATE;
+ }
}
}
- /* Invalidate the rest of CQEs. */
- for (; inv < mcqe_n; ++inv)
- cq[inv].op_own = MLX5_CQE_INVALIDATE;
+ if (rxq->cqe_comp_layout) {
+ int ret;
+ /* Keep unzipping if the next CQE is the miniCQE array. */
+ cq = &cq[mcqe_n];
+ ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci + pkts_n);
+ if (ret == MLX5_CQE_STATUS_SW_OWN &&
+ MLX5_CQE_FORMAT(cq->op_own) == MLX5_COMPRESSED) {
+ pos = 0;
+ elts = &elts[mcqe_n];
+ mcq = (void *)cq;
+ mcqe_n = MLX5_CQE_NUM_MINIS(cq->op_own) + 1;
+ pkts_n += mcqe_n;
+ goto cycle;
+ }
+ } else {
+ /* Invalidate the rest of CQEs. */
+ for (; inv < pkts_n; ++inv)
+ cq[inv].op_own = MLX5_CQE_INVALIDATE;
+ }
#ifdef MLX5_PMD_SOFT_COUNTERS
- rxq->stats.ipackets += mcqe_n;
+ rxq->stats.ipackets += pkts_n;
rxq->stats.ibytes += rcvd_byte;
#endif
- return mcqe_n;
+ return pkts_n;
}
/**
@@ -787,9 +813,13 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
uint64_t n = 0;
uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
uint16_t nocmp_n = 0;
- unsigned int ownership = !!(rxq->cq_ci & (q_mask + 1));
+ const uint8_t vic = rxq->cq_ci >> rxq->cqe_n;
+ unsigned int own = !(rxq->cq_ci & (q_mask + 1));
const __vector unsigned char zero = (__vector unsigned char){0};
const __vector unsigned char ones = vec_splat_u8(-1);
+ const __vector unsigned char vic_check =
+ (__vector unsigned char)(__vector unsigned long){
+ 0x00ff000000ff0000LL, 0x00ff000000ff0000LL};
const __vector unsigned char owner_check =
(__vector unsigned char)(__vector unsigned long){
0x0100000001000000LL, 0x0100000001000000LL};
@@ -837,7 +867,16 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
(__vector unsigned short){0, 0, 0, 0, 0xffff, 0xffff, 0, 0};
const __vector unsigned short cqe_sel_mask2 =
(__vector unsigned short){0, 0, 0xffff, 0, 0, 0, 0, 0};
-
+ const __vector unsigned char validity = (__vector unsigned char){
+ 0, 0, vic, 0,
+ 0, 0, vic, 0,
+ 0, 0, vic, 0,
+ 0, 0, vic, 0};
+ const __vector unsigned char ownership = (__vector unsigned char){
+ 0, 0, 0, own,
+ 0, 0, 0, own,
+ 0, 0, 0, own,
+ 0, 0, 0, own};
/*
* A. load first Qword (8bytes) in one loop.
* B. copy 4 mbuf pointers from elts ring to returning pkts.
@@ -848,7 +887,7 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
* uint8_t pkt_info;
* uint8_t flow_tag[3];
* uint16_t byte_cnt;
- * uint8_t rsvd4;
+ * uint8_t validity_iteration_count;
* uint8_t op_own;
* uint16_t hdr_type_etc;
* uint16_t vlan_info;
@@ -1082,17 +1121,25 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
*(__vector unsigned char *)
&pkts[pos]->pkt_len = pkt_mb0;
- /* E.2 flip owner bit to mark CQEs from last round. */
- owner_mask = (__vector unsigned char)
- vec_and((__vector unsigned long)op_own,
- (__vector unsigned long)owner_check);
- if (ownership)
+ /* E.2 mask out CQEs belonging to HW. */
+ if (rxq->cqe_comp_layout) {
+ owner_mask = (__vector unsigned char)
+ vec_and((__vector unsigned long)op_own,
+ (__vector unsigned long)vic_check);
+ owner_mask = (__vector unsigned char)
+ vec_cmpeq((__vector unsigned int)owner_mask,
+ (__vector unsigned int)validity);
owner_mask = (__vector unsigned char)
vec_xor((__vector unsigned long)owner_mask,
+ (__vector unsigned long)ones);
+ } else {
+ owner_mask = (__vector unsigned char)
+ vec_and((__vector unsigned long)op_own,
(__vector unsigned long)owner_check);
- owner_mask = (__vector unsigned char)
- vec_cmpeq((__vector unsigned int)owner_mask,
- (__vector unsigned int)owner_check);
+ owner_mask = (__vector unsigned char)
+ vec_cmpeq((__vector unsigned int)owner_mask,
+ (__vector unsigned int)ownership);
+ }
owner_mask = (__vector unsigned char)
vec_packs((__vector unsigned int)owner_mask,
(__vector unsigned int)zero);
@@ -1174,7 +1221,8 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
(__vector unsigned long)mask);
/* D.3 check error in opcode. */
- adj = (comp_idx != MLX5_VPMD_DESCS_PER_LOOP && comp_idx == n);
+ adj = (!rxq->cqe_comp_layout &&
+ comp_idx != MLX5_VPMD_DESCS_PER_LOOP && comp_idx == n);
mask = (__vector unsigned char)(__vector unsigned long){
(adj * sizeof(uint16_t) * 8), 0};
lshift = vec_splat((__vector unsigned long)mask, 0);
@@ -71,8 +71,10 @@ static inline uint16_t
rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
struct rte_mbuf **elts)
{
- volatile struct mlx5_mini_cqe8 *mcq = (void *)&(cq + 1)->pkt_info;
- struct rte_mbuf *t_pkt = elts[0]; /* Title packet is pre-built. */
+ volatile struct mlx5_mini_cqe8 *mcq =
+ (void *)&(cq + !rxq->cqe_comp_layout)->pkt_info;
+ /* Title packet is pre-built. */
+ struct rte_mbuf *t_pkt = rxq->cqe_comp_layout ? &rxq->title_pkt : elts[0];
unsigned int pos;
unsigned int i;
unsigned int inv = 0;
@@ -92,8 +94,10 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
11, 10, 9, 8 /* hash.rss, bswap32 */
};
/* Restore the compressed count. Must be 16 bits. */
- const uint16_t mcqe_n = t_pkt->data_len +
- (rxq->crc_present * RTE_ETHER_CRC_LEN);
+ uint16_t mcqe_n = (rxq->cqe_comp_layout) ?
+ (MLX5_CQE_NUM_MINIS(cq->op_own) + 1) :
+ t_pkt->data_len + (rxq->crc_present * RTE_ETHER_CRC_LEN);
+ uint16_t pkts_n = mcqe_n;
const uint64x2_t rearm =
vld1q_u64((void *)&t_pkt->rearm_data);
const uint32x4_t rxdf_mask = {
@@ -131,6 +135,9 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
* D. store rx_descriptor_fields1.
* E. store flow tag (rte_flow mark).
*/
+cycle:
+ if (rxq->cqe_comp_layout)
+ rte_prefetch0((void *)(cq + mcqe_n));
for (pos = 0; pos < mcqe_n; ) {
uint8_t *p = (void *)&mcq[pos % 8];
uint8_t *e0 = (void *)&elts[pos]->rearm_data;
@@ -145,9 +152,10 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
sizeof(uint16_t) * 8) : 0);
#endif
- for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
- if (likely(pos + i < mcqe_n))
- rte_prefetch0((void *)(cq + pos + i));
+ if (!rxq->cqe_comp_layout)
+ for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
+ if (likely(pos + i < mcqe_n))
+ rte_prefetch0((void *)(cq + pos + i));
__asm__ volatile (
/* A.1 load mCQEs into a 128bit register. */
"ld1 {v16.16b - v17.16b}, [%[mcq]] \n\t"
@@ -354,22 +362,40 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
}
pos += MLX5_VPMD_DESCS_PER_LOOP;
/* Move to next CQE and invalidate consumed CQEs. */
- if (!(pos & 0x7) && pos < mcqe_n) {
- if (pos + 8 < mcqe_n)
- rte_prefetch0((void *)(cq + pos + 8));
- mcq = (void *)&(cq + pos)->pkt_info;
- for (i = 0; i < 8; ++i)
- cq[inv++].op_own = MLX5_CQE_INVALIDATE;
+ if (!rxq->cqe_comp_layout) {
+ if (!(pos & 0x7) && pos < mcqe_n) {
+ if (pos + 8 < mcqe_n)
+ rte_prefetch0((void *)(cq + pos + 8));
+ mcq = (void *)&(cq + pos)->pkt_info;
+ for (i = 0; i < 8; ++i)
+ cq[inv++].op_own = MLX5_CQE_INVALIDATE;
+ }
+ }
+ }
+ if (rxq->cqe_comp_layout) {
+ int ret;
+ /* Keep unzipping if the next CQE is the miniCQE array. */
+ cq = &cq[mcqe_n];
+ ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci + pkts_n);
+ if (ret == MLX5_CQE_STATUS_SW_OWN &&
+ MLX5_CQE_FORMAT(cq->op_own) == MLX5_COMPRESSED) {
+ pos = 0;
+ elts = &elts[mcqe_n];
+ mcq = (void *)cq;
+ mcqe_n = MLX5_CQE_NUM_MINIS(cq->op_own) + 1;
+ pkts_n += mcqe_n;
+ goto cycle;
}
+ } else {
+ /* Invalidate the rest of CQEs. */
+ for (; inv < pkts_n; ++inv)
+ cq[inv].op_own = MLX5_CQE_INVALIDATE;
}
- /* Invalidate the rest of CQEs. */
- for (; inv < mcqe_n; ++inv)
- cq[inv].op_own = MLX5_CQE_INVALIDATE;
#ifdef MLX5_PMD_SOFT_COUNTERS
- rxq->stats.ipackets += mcqe_n;
+ rxq->stats.ipackets += pkts_n;
rxq->stats.ibytes += rcvd_byte;
#endif
- return mcqe_n;
+ return pkts_n;
}
/**
@@ -528,7 +554,9 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
uint64_t n = 0;
uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
uint16_t nocmp_n = 0;
+ const uint16x4_t validity = vdup_n_u16((rxq->cq_ci >> rxq->cqe_n) << 8);
const uint16x4_t ownership = vdup_n_u16(!(rxq->cq_ci & (q_mask + 1)));
+ const uint16x4_t vic_check = vcreate_u16(0xff00ff00ff00ff00);
const uint16x4_t owner_check = vcreate_u16(0x0001000100010001);
const uint16x4_t opcode_check = vcreate_u16(0x00f000f000f000f0);
const uint16x4_t format_check = vcreate_u16(0x000c000c000c000c);
@@ -547,7 +575,7 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
const uint8x16_t cqe_shuf_m = {
28, 29, /* hdr_type_etc */
0, /* pkt_info */
- -1, /* null */
+ 62, /* validity_iteration_count */
47, 46, /* byte_cnt, bswap16 */
31, 30, /* vlan_info, bswap16 */
15, 14, 13, 12, /* rx_hash_res, bswap32 */
@@ -564,10 +592,10 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
};
/* Mask to generate 16B owner vector. */
const uint8x8_t owner_shuf_m = {
- 63, -1, /* 4th CQE */
- 47, -1, /* 3rd CQE */
- 31, -1, /* 2nd CQE */
- 15, -1 /* 1st CQE */
+ 63, 51, /* 4th CQE */
+ 47, 35, /* 3rd CQE */
+ 31, 19, /* 2nd CQE */
+ 15, 3 /* 1st CQE */
};
/* Mask to generate a vector having packet_type/ol_flags. */
const uint8x16_t ptype_shuf_m = {
@@ -600,7 +628,7 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
* struct {
* uint16_t hdr_type_etc;
* uint8_t pkt_info;
- * uint8_t rsvd;
+ * uint8_t validity_iteration_count;
* uint16_t byte_cnt;
* uint16_t vlan_info;
* uint32_t rx_has_res;
@@ -748,9 +776,15 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
"v16", "v17", "v18", "v19",
"v20", "v21", "v22", "v23",
"v24", "v25");
- /* D.2 flip owner bit to mark CQEs from last round. */
- owner_mask = vand_u16(op_own, owner_check);
- owner_mask = vceq_u16(owner_mask, ownership);
+ /* D.2 mask out CQEs belonging to HW. */
+ if (rxq->cqe_comp_layout) {
+ owner_mask = vand_u16(op_own, vic_check);
+ owner_mask = vceq_u16(owner_mask, validity);
+ owner_mask = vmvn_u16(owner_mask);
+ } else {
+ owner_mask = vand_u16(op_own, owner_check);
+ owner_mask = vceq_u16(owner_mask, ownership);
+ }
/* D.3 get mask for invalidated CQEs. */
opcode = vand_u16(op_own, opcode_check);
invalid_mask = vceq_u16(opcode_check, opcode);
@@ -780,7 +814,8 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
-1UL >> (n * sizeof(uint16_t) * 8) : 0);
invalid_mask = vorr_u16(invalid_mask, mask);
/* D.3 check error in opcode. */
- adj = (comp_idx != MLX5_VPMD_DESCS_PER_LOOP && comp_idx == n);
+ adj = (!rxq->cqe_comp_layout &&
+ comp_idx != MLX5_VPMD_DESCS_PER_LOOP && comp_idx == n);
mask = vcreate_u16(adj ?
-1UL >> ((n + 1) * sizeof(uint16_t) * 8) : -1UL);
mini_mask = vand_u16(invalid_mask, mask);
@@ -73,8 +73,9 @@ static inline uint16_t
rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
struct rte_mbuf **elts)
{
- volatile struct mlx5_mini_cqe8 *mcq = (void *)(cq + 1);
- struct rte_mbuf *t_pkt = elts[0]; /* Title packet is pre-built. */
+ volatile struct mlx5_mini_cqe8 *mcq = (void *)(cq + !rxq->cqe_comp_layout);
+ /* Title packet is pre-built. */
+ struct rte_mbuf *t_pkt = rxq->cqe_comp_layout ? &rxq->title_pkt : elts[0];
unsigned int pos;
unsigned int i;
unsigned int inv = 0;
@@ -92,8 +93,10 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
-1, -1, 14, 15, /* pkt_len, bswap16 */
-1, -1, -1, -1 /* skip packet_type */);
/* Restore the compressed count. Must be 16 bits. */
- const uint16_t mcqe_n = t_pkt->data_len +
- (rxq->crc_present * RTE_ETHER_CRC_LEN);
+ uint16_t mcqe_n = (rxq->cqe_comp_layout) ?
+ (MLX5_CQE_NUM_MINIS(cq->op_own) + 1) :
+ t_pkt->data_len + (rxq->crc_present * RTE_ETHER_CRC_LEN);
+ uint16_t pkts_n = mcqe_n;
const __m128i rearm =
_mm_loadu_si128((__m128i *)&t_pkt->rearm_data);
const __m128i rxdf =
@@ -124,6 +127,9 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
* D. store rx_descriptor_fields1.
* E. store flow tag (rte_flow mark).
*/
+cycle:
+ if (rxq->cqe_comp_layout)
+ rte_prefetch0((void *)(cq + mcqe_n));
for (pos = 0; pos < mcqe_n; ) {
__m128i mcqe1, mcqe2;
__m128i rxdf1, rxdf2;
@@ -131,9 +137,10 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
__m128i byte_cnt, invalid_mask;
#endif
- for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
- if (likely(pos + i < mcqe_n))
- rte_prefetch0((void *)(cq + pos + i));
+ if (!rxq->cqe_comp_layout)
+ for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
+ if (likely(pos + i < mcqe_n))
+ rte_prefetch0((void *)(cq + pos + i));
/* A.1 load mCQEs into a 128bit register. */
mcqe1 = _mm_loadu_si128((__m128i *)&mcq[pos % 8]);
mcqe2 = _mm_loadu_si128((__m128i *)&mcq[pos % 8 + 2]);
@@ -344,22 +351,40 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
}
pos += MLX5_VPMD_DESCS_PER_LOOP;
/* Move to next CQE and invalidate consumed CQEs. */
- if (!(pos & 0x7) && pos < mcqe_n) {
- if (pos + 8 < mcqe_n)
- rte_prefetch0((void *)(cq + pos + 8));
- mcq = (void *)(cq + pos);
- for (i = 0; i < 8; ++i)
- cq[inv++].op_own = MLX5_CQE_INVALIDATE;
+ if (!rxq->cqe_comp_layout) {
+ if (!(pos & 0x7) && pos < mcqe_n) {
+ if (pos + 8 < mcqe_n)
+ rte_prefetch0((void *)(cq + pos + 8));
+ mcq = (void *)(cq + pos);
+ for (i = 0; i < 8; ++i)
+ cq[inv++].op_own = MLX5_CQE_INVALIDATE;
+ }
+ }
+ }
+ if (rxq->cqe_comp_layout) {
+ int ret;
+ /* Keep unzipping if the next CQE is the miniCQE array. */
+ cq = &cq[mcqe_n];
+ ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci + pkts_n);
+ if (ret == MLX5_CQE_STATUS_SW_OWN &&
+ MLX5_CQE_FORMAT(cq->op_own) == MLX5_COMPRESSED) {
+ pos = 0;
+ elts = &elts[mcqe_n];
+ mcq = (void *)cq;
+ mcqe_n = MLX5_CQE_NUM_MINIS(cq->op_own) + 1;
+ pkts_n += mcqe_n;
+ goto cycle;
}
+ } else {
+ /* Invalidate the rest of CQEs. */
+ for (; inv < pkts_n; ++inv)
+ cq[inv].op_own = MLX5_CQE_INVALIDATE;
}
- /* Invalidate the rest of CQEs. */
- for (; inv < mcqe_n; ++inv)
- cq[inv].op_own = MLX5_CQE_INVALIDATE;
#ifdef MLX5_PMD_SOFT_COUNTERS
- rxq->stats.ipackets += mcqe_n;
+ rxq->stats.ipackets += pkts_n;
rxq->stats.ibytes += rcvd_byte;
#endif
- return mcqe_n;
+ return pkts_n;
}
/**
@@ -527,7 +552,9 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
uint64_t n = 0;
uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
uint16_t nocmp_n = 0;
- unsigned int ownership = !!(rxq->cq_ci & (q_mask + 1));
+ const uint8_t vic = rxq->cq_ci >> rxq->cqe_n;
+ const uint8_t own = !(rxq->cq_ci & (q_mask + 1));
+ const __m128i vic_check = _mm_set1_epi64x(0x00ff000000ff0000LL);
const __m128i owner_check = _mm_set1_epi64x(0x0100000001000000LL);
const __m128i opcode_check = _mm_set1_epi64x(0xf0000000f0000000LL);
const __m128i format_check = _mm_set1_epi64x(0x0c0000000c000000LL);
@@ -541,6 +568,16 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
12, 13, 8, 9,
4, 5, 0, 1);
#endif
+ const __m128i validity =
+ _mm_set_epi8(0, vic, 0, 0,
+ 0, vic, 0, 0,
+ 0, vic, 0, 0,
+ 0, vic, 0, 0);
+ const __m128i ownership =
+ _mm_set_epi8(own, 0, 0, 0,
+ own, 0, 0, 0,
+ own, 0, 0, 0,
+ own, 0, 0, 0);
/* Mask to shuffle from extracted CQE to mbuf. */
const __m128i shuf_mask =
_mm_set_epi8(-1, 3, 2, 1, /* fdir.hi */
@@ -573,7 +610,7 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
* uint8_t pkt_info;
* uint8_t flow_tag[3];
* uint16_t byte_cnt;
- * uint8_t rsvd4;
+ * uint8_t validity_iteration_count;
* uint8_t op_own;
* uint16_t hdr_type_etc;
* uint16_t vlan_info;
@@ -689,11 +726,15 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
/* D.1 fill in mbuf - rx_descriptor_fields1. */
_mm_storeu_si128((void *)&pkts[pos + 1]->pkt_len, pkt_mb1);
_mm_storeu_si128((void *)&pkts[pos]->pkt_len, pkt_mb0);
- /* E.2 flip owner bit to mark CQEs from last round. */
- owner_mask = _mm_and_si128(op_own, owner_check);
- if (ownership)
- owner_mask = _mm_xor_si128(owner_mask, owner_check);
- owner_mask = _mm_cmpeq_epi32(owner_mask, owner_check);
+ /* E.2 mask out CQEs belonging to HW. */
+ if (rxq->cqe_comp_layout) {
+ owner_mask = _mm_and_si128(op_own, vic_check);
+ owner_mask = _mm_cmpeq_epi32(owner_mask, validity);
+ owner_mask = _mm_xor_si128(owner_mask, ones);
+ } else {
+ owner_mask = _mm_and_si128(op_own, owner_check);
+ owner_mask = _mm_cmpeq_epi32(owner_mask, ownership);
+ }
owner_mask = _mm_packs_epi32(owner_mask, zero);
/* E.3 get mask for invalidated CQEs. */
opcode = _mm_and_si128(op_own, opcode_check);
@@ -729,7 +770,8 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
mask = _mm_sll_epi64(ones, mask);
invalid_mask = _mm_or_si128(invalid_mask, mask);
/* D.3 check error in opcode. */
- adj = (comp_idx != MLX5_VPMD_DESCS_PER_LOOP && comp_idx == n);
+ adj = (!rxq->cqe_comp_layout &&
+ comp_idx != MLX5_VPMD_DESCS_PER_LOOP && comp_idx == n);
mask = _mm_set_epi64x(0, adj * sizeof(uint16_t) * 8);
mini_mask = _mm_sll_epi64(invalid_mask, mask);
opcode = _mm_cmpeq_epi32(resp_err_check, opcode);