@@ -61,7 +61,7 @@
#define MLX4_OPCODE_SEND 0x0a
#define MLX4_EN_BIT_WQE_OWN 0x80000000
-#define SIZE_TO_TXBBS(size) (RTE_ALIGN((size), (TXBB_SIZE)) / (TXBB_SIZE))
+#define SIZE_TO_TXBBS(size) (RTE_ALIGN((size), (TXBB_SIZE)) / (TXBB_SIZE))
/**
* Update the HW with the new CQ consumer value.
@@ -148,6 +148,7 @@
/**
* Fills the ctrl segment of a WQE with info needed for transmitting the packet.
+ * Owner field is filled later.
*
* @param seg
* Pointer to the control structure in the WQE.
@@ -161,8 +162,8 @@
* Immediate data/Invalidation key..
*/
static inline void
-mlx4_set_ctrl_seg(struct mlx4_wqe_ctrl_seg *seg, uint32_t owner,
- uint8_t fence_size, uint32_t srcrb_flags, uint32_t imm)
+mlx4_set_ctrl_seg(struct mlx4_wqe_ctrl_seg *seg, uint8_t fence_size,
+ uint32_t srcrb_flags, uint32_t imm)
{
seg->fence_size = fence_size;
seg->srcrb_flags = rte_cpu_to_be_32(srcrb_flags);
@@ -173,13 +174,6 @@
* For the IBV_WR_SEND_WITH_INV, it should be htobe32(imm).
*/
seg->imm = imm;
- /*
- * Make sure descriptor is fully written before
- * setting ownership bit (because HW can start
- * executing as soon as we do).
- */
- rte_wmb();
- seg->owner_opcode = rte_cpu_to_be_32(owner);
}
/**
@@ -241,7 +235,7 @@
* The number of data-segments the WQE contains.
*
* @return
- * WQE size in bytes.
+ * The calculated WQE size in bytes.
*/
static inline int
mlx4_wqe_calc_real_size(unsigned int count)
@@ -309,6 +309,101 @@
}
/**
+ * Copy a WQE written in the bounce buffer back to the SQ.
+ * Routine is used when a WQE wraps-around the SQ and therefore needs a
+ * special attention. note that the WQE is written backward to the SQ.
+ *
+ * @param txq
+ * Pointer to mlx4 Tx queue structure.
+ * @param index
+ * First SQ TXBB index for this WQE.
+ * @param desc_size
+ * TXBB-aligned sixe of the WQE.
+ *
+ * @return
+ * A pointer to the control segment of this WQE in the SQ.
+ */
+static struct mlx4_wqe_ctrl_seg
+*mlx4_bounce_to_desc(struct txq *txq,
+ uint32_t index,
+ unsigned int desc_size)
+{
+ struct mlx4_sq *sq = &txq->msq;
+ uint32_t copy = (sq->txbb_cnt - index) * TXBB_SIZE;
+ int i;
+
+ for (i = desc_size - copy - 4; i >= 0; i -= 4) {
+ if ((i & (TXBB_SIZE - 1)) == 0)
+ rte_wmb();
+ *((uint32_t *)(sq->buf + i)) =
+ *((uint32_t *)(txq->bounce_buf + copy + i));
+ }
+ for (i = copy - 4; i >= 4; i -= 4) {
+ if ((i & (TXBB_SIZE - 1)) == 0)
+ rte_wmb();
+ *((uint32_t *)(sq->buf + index * TXBB_SIZE + i)) =
+ *((uint32_t *)(txq->bounce_buf + i));
+ }
+ /* Return real descriptor location */
+ return (struct mlx4_wqe_ctrl_seg *)(sq->buf + index * TXBB_SIZE);
+}
+
+/**
+ * Handle address translation of scattered buffers for mlx4_tx_burst().
+ *
+ * @param txq
+ * TX queue structure.
+ * @param segs
+ * Number of segments in buf.
+ * @param elt
+ * TX queue element to fill.
+ * @param[in] buf
+ * Buffer to process.
+ * @param elts_head
+ * Index of the linear buffer to use if necessary (normally txq->elts_head).
+ * @param[out] sges
+ * Array filled with SGEs on success.
+ *
+ * @return
+ * A structure containing the processed packet size in bytes and the
+ * number of SGEs. Both fields are set to (unsigned int)-1 in case of
+ * failure.
+ */
+static inline int
+mlx4_tx_sg_virt_to_lkey(struct txq *txq, struct rte_mbuf *buf,
+ struct ibv_sge *sges, unsigned int segs)
+{
+ unsigned int j;
+
+ /* Register segments as SGEs. */
+ for (j = 0; (j != segs); ++j) {
+ struct ibv_sge *sge = &sges[j];
+ uint32_t lkey;
+
+ /* Retrieve Memory Region key for this memory pool. */
+ lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(buf));
+ if (unlikely(lkey == (uint32_t)-1)) {
+ /* MR does not exist. */
+ DEBUG("%p: unable to get MP <-> MR association",
+ (void *)txq);
+ goto stop;
+ }
+ /* Update SGE. */
+ sge->addr = rte_pktmbuf_mtod(buf, uintptr_t);
+ if (txq->priv->vf)
+ rte_prefetch0((volatile void *)
+ (uintptr_t)sge->addr);
+ sge->length = buf->data_len;
+ sge->lkey = lkey;
+ buf = buf->next;
+ }
+ return 0;
+stop:
+ return -1;
+}
+
+
+/**
* Posts a single work requests to a send queue.
*
* @param txq
@@ -323,36 +418,53 @@
*/
static int
mlx4_post_send(struct txq *txq,
+ struct rte_mbuf *pkt,
struct ibv_send_wr *wr,
struct ibv_send_wr **bad_wr)
{
struct mlx4_wqe_ctrl_seg *ctrl;
struct mlx4_wqe_data_seg *dseg;
struct mlx4_sq *sq = &txq->msq;
+ struct ibv_sge sge[wr->num_sge];
uint32_t srcrb_flags;
uint8_t fence_size;
uint32_t head_idx = sq->head & sq->txbb_cnt_mask;
uint32_t owner_opcode;
- int wqe_real_size, nr_txbbs;
+ int wqe_real_size, wqe_size, nr_txbbs, i;
+ bool bounce = FALSE;
- /* for now we support pkts with one buf only */
- if (wr->num_sge != 1)
+ if (unlikely(mlx4_tx_sg_virt_to_lkey(txq, pkt, sge, wr->num_sge)))
goto err;
+ wr->sg_list = sge;
/* Calc the needed wqe size for this packet */
wqe_real_size = mlx4_wqe_calc_real_size(wr->num_sge);
if (unlikely(!wqe_real_size))
goto err;
+ wqe_size = RTE_ALIGN(wqe_real_size, TXBB_SIZE);
nr_txbbs = SIZE_TO_TXBBS(wqe_real_size);
/* Are we too big to handle ? */
if (unlikely(mlx4_wq_overflow(sq, nr_txbbs)))
goto err;
- /* Get ctrl and single-data wqe entries */
- ctrl = mlx4_get_send_wqe(sq, head_idx);
+ /* Get ctrl entry */
+ if (likely(head_idx + nr_txbbs <= sq->txbb_cnt)) {
+ ctrl = mlx4_get_send_wqe(sq, head_idx);
+ } else {
+ /* handle the case of wqe wraps around the SQ by working with
+ * a side-buf and when done copying it back to the SQ
+ */
+ ctrl = (struct mlx4_wqe_ctrl_seg *)txq->bounce_buf;
+ bounce = TRUE;
+ }
+ /* Get data-seg entry */
dseg = (struct mlx4_wqe_data_seg *)(((char *)ctrl) +
sizeof(struct mlx4_wqe_ctrl_seg));
- mlx4_set_data_seg(dseg, wr->sg_list);
- /* For raw eth, the SOLICIT flag is used
- * to indicate that no icrc should be calculated
+ /* Fill-in date from last to first */
+ for (i = wr->num_sge - 1; i >= 0; --i)
+ mlx4_set_data_seg(dseg + i, wr->sg_list + i);
+ /* Handle control info
+ *
+ * For raw eth, the SOLICIT flag is used to indicate that
+ * no icrc should be calculated
*/
srcrb_flags = MLX4_WQE_CTRL_SOLICIT |
((wr->send_flags & IBV_SEND_SIGNALED) ?
@@ -361,7 +473,19 @@
MLX4_WQE_CTRL_FENCE : 0) | ((wqe_real_size / 16) & 0x3f);
owner_opcode = MLX4_OPCODE_SEND |
((sq->head & sq->txbb_cnt) ? MLX4_EN_BIT_WQE_OWN : 0);
- mlx4_set_ctrl_seg(ctrl, owner_opcode, fence_size, srcrb_flags, 0);
+ /* fill in ctrl info but ownership */
+ mlx4_set_ctrl_seg(ctrl, fence_size, srcrb_flags, 0);
+ /* If we used a bounce buffer then copy wqe back into sq */
+ if (unlikely(bounce))
+ ctrl = mlx4_bounce_to_desc(txq, head_idx, wqe_size);
+ /*
+ * Make sure descriptor is fully written before
+ * setting ownership bit (because HW can start
+ * executing as soon as we do).
+ */
+ rte_wmb();
+ ctrl->owner_opcode = rte_cpu_to_be_32(owner_opcode);
+
sq->head += nr_txbbs;
rte_wmb();
return 0;
@@ -439,62 +563,31 @@
/* Request Tx completion. */
if (unlikely(--elts_comp_cd == 0)) {
elts_comp_cd = txq->elts_comp_cd_init;
- ++elts_comp;
send_flags |= IBV_SEND_SIGNALED;
}
- if (likely(segs == 1)) {
- struct ibv_sge *sge = &elt->sge;
- uintptr_t addr;
- uint32_t length;
- uint32_t lkey;
-
- /* Retrieve buffer information. */
- addr = rte_pktmbuf_mtod(buf, uintptr_t);
- length = buf->data_len;
- /* Retrieve memory region key for this memory pool. */
- lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(buf));
- if (unlikely(lkey == (uint32_t)-1)) {
- /* MR does not exist. */
- DEBUG("%p: unable to get MP <-> MR"
- " association", (void *)txq);
- /* Clean up Tx element. */
- elt->buf = NULL;
- goto stop;
- }
- if (buf->pkt_len <= txq->max_inline)
- send_flags |= IBV_SEND_INLINE;
- /* Update element. */
- elt->buf = buf;
- if (txq->priv->vf)
- rte_prefetch0((volatile void *)
- (uintptr_t)addr);
+ if (buf->pkt_len <= txq->max_inline)
+ send_flags |= IBV_SEND_INLINE;
+ /* Update element. */
+ elt->buf = buf;
+ if (txq->priv->vf)
RTE_MBUF_PREFETCH_TO_FREE(elt_next->buf);
- sge->addr = addr;
- sge->length = length;
- sge->lkey = lkey;
- sent_size += length;
- /* Set up WR. */
- wr->sg_list = sge;
- wr->num_sge = segs;
- wr->opcode = IBV_WR_SEND;
- wr->send_flags = send_flags;
- wr->next = NULL;
- /* post the pkt for sending */
- err = mlx4_post_send(txq, wr, &wr_bad);
- if (unlikely(err)) {
- if (unlikely(wr_bad->send_flags &
- IBV_SEND_SIGNALED)) {
- elts_comp_cd = 1;
- --elts_comp;
- }
- elt->buf = NULL;
- goto stop;
- }
- sent_size += length;
- } else {
- err = -1;
+ /* Set up WR. */
+ wr->sg_list = NULL; /* handled in post_send */
+ wr->num_sge = segs;
+ wr->opcode = IBV_WR_SEND;
+ wr->send_flags = send_flags;
+ wr->next = NULL;
+ /* post the pkt for sending */
+ err = mlx4_post_send(txq, buf, wr, &wr_bad);
+ if (unlikely(err)) {
+ if (unlikely(wr_bad->send_flags &
+ IBV_SEND_SIGNALED))
+ elts_comp_cd = 1;
+ elt->buf = NULL;
goto stop;
}
+ ++elts_comp;
+ sent_size += buf->pkt_len;
elts_head = elts_head_next;
/* Increment sent bytes counter. */
txq->stats.obytes += sent_size;
@@ -139,13 +139,14 @@ struct txq {
struct txq_elt (*elts)[]; /**< Tx elements. */
unsigned int elts_head; /**< Current index in (*elts)[]. */
unsigned int elts_tail; /**< First element awaiting completion. */
- unsigned int elts_comp; /**< Number of completion requests. */
+ unsigned int elts_comp; /**< Number of pkts waiting for completion. */
unsigned int elts_comp_cd; /**< Countdown for next completion. */
unsigned int elts_comp_cd_init; /**< Initial value for countdown. */
struct mlx4_txq_stats stats; /**< Tx queue counters. */
unsigned int socket; /**< CPU socket ID for allocations. */
struct mlx4_sq msq; /**< Info for directly manipulating the SQ. */
struct mlx4_cq mcq; /**< Info for directly manipulating the CQ. */
+ char *bounce_buf; /**< Side memory to be used when wqe wraps around */
};
/* mlx4_rxq.c */
@@ -83,8 +83,14 @@
rte_calloc_socket("TXQ", 1, sizeof(*elts), 0, txq->socket);
int ret = 0;
- if (elts == NULL) {
- ERROR("%p: can't allocate packets array", (void *)txq);
+ /* Allocate Bounce-buf memory */
+ txq->bounce_buf = (char *)rte_zmalloc_socket("TXQ",
+ MAX_WQE_SIZE,
+ RTE_CACHE_LINE_MIN_SIZE,
+ txq->socket);
+
+ if ((elts == NULL) || (txq->bounce_buf == NULL)) {
+ ERROR("%p: can't allocate TXQ memory", (void *)txq);
ret = ENOMEM;
goto error;
}
@@ -110,6 +116,8 @@
assert(ret == 0);
return 0;
error:
+ if (txq->bounce_buf != NULL)
+ rte_free(txq->bounce_buf);
if (elts != NULL)
rte_free(elts);
DEBUG("%p: failed, freed everything", (void *)txq);