@@ -23,6 +23,8 @@ sources = files(
'qdma_common.c',
'qdma_devops.c',
'qdma_ethdev.c',
+ 'qdma_user.c',
+ 'qdma_rxtx.c',
'qdma_access/eqdma_soft_access/eqdma_soft_access.c',
'qdma_access/eqdma_soft_access/eqdma_soft_reg_dump.c',
'qdma_access/qdma_s80_hard_access/qdma_s80_hard_access.c',
@@ -16,7 +16,9 @@
#include <rte_memzone.h>
#include <linux/pci.h>
+#include "qdma_user.h"
#include "qdma_resource_mgmt.h"
+#include "qdma_access_common.h"
#include "rte_pmd_qdma.h"
#include "qdma_log.h"
@@ -31,13 +33,27 @@
#define QDMA_MAX_BURST_SIZE (128)
#define QDMA_MIN_RXBUFF_SIZE (256)
+/* Descriptor Rings aligned to 4KB boundaries - only supported value */
+#define QDMA_ALIGN (4096)
+
#define DEFAULT_TIMER_CNT_TRIG_MODE_TIMER (5)
#define DEFAULT_TIMER_CNT_TRIG_MODE_COUNT_TIMER (30)
+#define MIN_RX_PIDX_UPDATE_THRESHOLD (1)
+#define MIN_TX_PIDX_UPDATE_THRESHOLD (1)
+#define DEFAULT_MM_CMPT_CNT_THRESHOLD (2)
+
#define WB_TIMEOUT (100000)
#define RESET_TIMEOUT (60000)
#define SHUTDOWN_TIMEOUT (60000)
+#define QDMA_MAX_BUFLEN (2048 * 10)
+
+#ifdef spin_lock_init
+#undef spin_lock_init
+#endif
+#define spin_lock_init(sl) rte_spinlock_init(sl)
+
/* Completion Context config */
#define CMPT_DEFAULT_COLOR_BIT (1)
#define CMPT_CNTXT_DESC_SIZE_8B (0)
@@ -90,6 +106,7 @@ struct qdma_pkt_stats {
struct qdma_cmpt_queue {
struct qdma_ul_cmpt_ring *cmpt_ring;
struct wb_status *wb_status;
+ struct qdma_q_cmpt_cidx_reg_info cmpt_cidx_info;
struct rte_eth_dev *dev;
uint16_t cmpt_desc_len;
@@ -127,7 +144,8 @@ struct qdma_rx_queue {
uint16_t nb_rx_cmpt_desc;
uint32_t queue_id; /* RX queue index. */
uint64_t mbuf_initializer; /* value to init mbufs */
-
+ struct qdma_q_pidx_reg_info q_pidx_info;
+ struct qdma_q_cmpt_cidx_reg_info cmpt_cidx_info;
uint16_t port_id; /* Device port identifier. */
uint8_t status:1;
uint8_t err:1;
@@ -138,7 +156,8 @@ struct qdma_rx_queue {
uint8_t en_bypass:1;
uint8_t en_bypass_prefetch:1;
uint8_t dis_overflow_check:1;
-
+ union qdma_ul_st_cmpt_ring cmpt_data[QDMA_MAX_BURST_SIZE];
+ enum rte_pmd_qdma_bypass_desc_len bypass_desc_sz:7;
uint8_t func_id; /* RX queue index. */
uint32_t ep_addr;
@@ -152,6 +171,19 @@ struct qdma_rx_queue {
const struct rte_memzone *rx_mz;
/* C2H stream mode, completion descriptor result */
const struct rte_memzone *rx_cmpt_mz;
+
+#ifdef QDMA_LATENCY_OPTIMIZED
+ /* pend_pkt_moving_avg: average rate of packets received */
+ unsigned int pend_pkt_moving_avg;
+ /* pend_pkt_avg_thr_hi: higher average threshold */
+ unsigned int pend_pkt_avg_thr_hi;
+ /* pend_pkt_avg_thr_lo: lower average threshold */
+ unsigned int pend_pkt_avg_thr_lo;
+ /* sorted_c2h_cntr_idx: sorted c2h counter index */
+ unsigned char sorted_c2h_cntr_idx;
+ /* c2h_cntr_monitor_cnt: c2h counter stagnant monitor count */
+ unsigned char c2h_cntr_monitor_cnt;
+#endif /* QDMA_LATENCY_OPTIMIZED */
};
/**
@@ -197,6 +229,8 @@ struct queue_info {
uint8_t immediate_data_state:1;
uint8_t dis_cmpt_ovf_chk:1;
uint8_t en_prefetch:1;
+ enum rte_pmd_qdma_bypass_desc_len rx_bypass_desc_sz:7;
+ enum rte_pmd_qdma_bypass_desc_len tx_bypass_desc_sz:7;
uint8_t timer_count;
int8_t trigger_mode;
};
@@ -244,6 +278,13 @@ struct qdma_pci_dev {
struct queue_info *q_info;
uint8_t init_q_range;
+ uint32_t g_ring_sz[QDMA_NUM_RING_SIZES];
+ uint32_t g_c2h_cnt_th[QDMA_NUM_C2H_COUNTERS];
+ uint32_t g_c2h_buf_sz[QDMA_NUM_C2H_BUFFER_SIZES];
+ uint32_t g_c2h_timer_cnt[QDMA_NUM_C2H_TIMERS];
+#ifdef QDMA_LATENCY_OPTIMIZED
+ uint32_t sorted_idx_c2h_cnt_th[QDMA_NUM_C2H_COUNTERS];
+#endif /* QDMA_LATENCY_OPTIMIZED */
void **cmpt_queues;
/* Pointer to QDMA access layer function pointers */
struct qdma_hw_access *hw_access;
@@ -256,10 +297,39 @@ struct qdma_pci_dev {
};
void qdma_dev_ops_init(struct rte_eth_dev *dev);
+int qdma_pf_csr_read(struct rte_eth_dev *dev);
+
+uint8_t qmda_get_desc_sz_idx(enum rte_pmd_qdma_bypass_desc_len);
+
+int qdma_init_rx_queue(struct qdma_rx_queue *rxq);
+void qdma_reset_rx_queue(struct qdma_rx_queue *rxq);
+
+void qdma_clr_rx_queue_ctxts(struct rte_eth_dev *dev, uint32_t qid,
+ uint32_t mode);
+void qdma_inv_rx_queue_ctxts(struct rte_eth_dev *dev, uint32_t qid,
+ uint32_t mode);
int qdma_identify_bars(struct rte_eth_dev *dev);
int qdma_get_hw_version(struct rte_eth_dev *dev);
+int index_of_array(uint32_t *arr, uint32_t n, uint32_t element);
+
int qdma_check_kvargs(struct rte_devargs *devargs,
struct qdma_pci_dev *qdma_dev);
+
+static inline const
+struct rte_memzone *qdma_zone_reserve(struct rte_eth_dev *dev,
+ const char *ring_name,
+ uint32_t queue_id,
+ uint32_t ring_size,
+ int socket_id)
+{
+ char z_name[RTE_MEMZONE_NAMESIZE];
+ snprintf(z_name, sizeof(z_name), "%s%s%d_%u",
+ dev->device->driver->name, ring_name,
+ dev->data->port_id, queue_id);
+ return rte_memzone_reserve_aligned(z_name, (uint64_t)ring_size,
+ socket_id, 0, QDMA_ALIGN);
+}
+
void qdma_check_errors(void *arg);
#endif /* ifndef __QDMA_H__ */
@@ -15,6 +15,163 @@
#include <fcntl.h>
#include <unistd.h>
+void qdma_reset_rx_queue(struct qdma_rx_queue *rxq)
+{
+ uint32_t i;
+ uint32_t sz;
+
+ rxq->rx_tail = 0;
+ rxq->q_pidx_info.pidx = 0;
+
+ /* Zero out HW ring memory, For MM Descriptor */
+ if (rxq->st_mode) { /** if ST-mode **/
+ sz = rxq->cmpt_desc_len;
+ for (i = 0; i < (sz * rxq->nb_rx_cmpt_desc); i++)
+ ((volatile char *)rxq->cmpt_ring)[i] = 0;
+
+ sz = sizeof(struct qdma_ul_st_c2h_desc);
+ for (i = 0; i < (sz * rxq->nb_rx_desc); i++)
+ ((volatile char *)rxq->rx_ring)[i] = 0;
+
+ } else {
+ sz = sizeof(struct qdma_ul_mm_desc);
+ for (i = 0; i < (sz * rxq->nb_rx_desc); i++)
+ ((volatile char *)rxq->rx_ring)[i] = 0;
+ }
+
+ /* Initialize SW ring entries */
+ for (i = 0; i < rxq->nb_rx_desc; i++)
+ rxq->sw_ring[i] = NULL;
+}
+
+void qdma_inv_rx_queue_ctxts(struct rte_eth_dev *dev,
+ uint32_t qid, uint32_t mode)
+{
+ struct qdma_pci_dev *qdma_dev = dev->data->dev_private;
+ struct qdma_descq_sw_ctxt q_sw_ctxt;
+ struct qdma_descq_prefetch_ctxt q_prefetch_ctxt;
+ struct qdma_descq_cmpt_ctxt q_cmpt_ctxt;
+ struct qdma_descq_hw_ctxt q_hw_ctxt;
+ struct qdma_descq_credit_ctxt q_credit_ctxt;
+ struct qdma_hw_access *hw_access = qdma_dev->hw_access;
+
+ hw_access->qdma_sw_ctx_conf(dev, 1, qid, &q_sw_ctxt,
+ QDMA_HW_ACCESS_INVALIDATE);
+ hw_access->qdma_hw_ctx_conf(dev, 1, qid, &q_hw_ctxt,
+ QDMA_HW_ACCESS_INVALIDATE);
+ if (mode) { /* ST-mode */
+ hw_access->qdma_pfetch_ctx_conf(dev, qid,
+ &q_prefetch_ctxt, QDMA_HW_ACCESS_INVALIDATE);
+ hw_access->qdma_cmpt_ctx_conf(dev, qid,
+ &q_cmpt_ctxt, QDMA_HW_ACCESS_INVALIDATE);
+ hw_access->qdma_credit_ctx_conf(dev, 1, qid,
+ &q_credit_ctxt, QDMA_HW_ACCESS_INVALIDATE);
+ }
+}
+
+/**
+ * Clears the Rx queue contexts.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ *
+ * @return
+ * Nothing.
+ */
+void qdma_clr_rx_queue_ctxts(struct rte_eth_dev *dev,
+ uint32_t qid, uint32_t mode)
+{
+ struct qdma_pci_dev *qdma_dev = dev->data->dev_private;
+ struct qdma_descq_prefetch_ctxt q_prefetch_ctxt;
+ struct qdma_descq_cmpt_ctxt q_cmpt_ctxt;
+ struct qdma_descq_hw_ctxt q_hw_ctxt;
+ struct qdma_descq_credit_ctxt q_credit_ctxt;
+ struct qdma_descq_sw_ctxt q_sw_ctxt;
+ struct qdma_hw_access *hw_access = qdma_dev->hw_access;
+
+ hw_access->qdma_sw_ctx_conf(dev, 1, qid, &q_sw_ctxt,
+ QDMA_HW_ACCESS_CLEAR);
+ hw_access->qdma_hw_ctx_conf(dev, 1, qid, &q_hw_ctxt,
+ QDMA_HW_ACCESS_CLEAR);
+ if (mode) { /* ST-mode */
+ hw_access->qdma_pfetch_ctx_conf(dev, qid,
+ &q_prefetch_ctxt, QDMA_HW_ACCESS_CLEAR);
+ hw_access->qdma_cmpt_ctx_conf(dev, qid,
+ &q_cmpt_ctxt, QDMA_HW_ACCESS_CLEAR);
+ hw_access->qdma_credit_ctx_conf(dev, 1, qid,
+ &q_credit_ctxt, QDMA_HW_ACCESS_CLEAR);
+ }
+}
+
+int qdma_init_rx_queue(struct qdma_rx_queue *rxq)
+{
+ struct rte_mbuf *mb;
+ void *obj = NULL;
+ uint64_t phys_addr;
+ uint16_t i;
+ struct qdma_ul_st_c2h_desc *rx_ring_st = NULL;
+
+ /* allocate new buffers for the Rx descriptor ring */
+ if (rxq->st_mode) { /* ST-mode */
+ rx_ring_st = (struct qdma_ul_st_c2h_desc *)rxq->rx_ring;
+#ifdef DUMP_MEMPOOL_USAGE_STATS
+ PMD_DRV_LOG(INFO, "%s(): %d: queue id %d, mbuf_avail_count =%d,"
+ "mbuf_in_use_count = %d",
+ __func__, __LINE__, rxq->queue_id,
+ rte_mempool_avail_count(rxq->mb_pool),
+ rte_mempool_in_use_count(rxq->mb_pool));
+#endif /* DUMP_MEMPOOL_USAGE_STATS */
+ for (i = 0; i < (rxq->nb_rx_desc - 2); i++) {
+ if (rte_mempool_get(rxq->mb_pool, &obj) != 0) {
+ PMD_DRV_LOG(ERR, "qdma-start-rx-queue(): "
+ "rte_mempool_get: failed");
+ goto fail;
+ }
+
+ if (obj != NULL) {
+ mb = obj;
+ } else {
+ PMD_DRV_LOG(ERR, "%s(): %d: qid %d, rte_mempool_get failed",
+ __func__, __LINE__, rxq->queue_id);
+ goto fail;
+ }
+
+ phys_addr = (uint64_t)mb->buf_iova +
+ RTE_PKTMBUF_HEADROOM;
+
+ mb->data_off = RTE_PKTMBUF_HEADROOM;
+ rxq->sw_ring[i] = mb;
+ rx_ring_st[i].dst_addr = phys_addr;
+ }
+#ifdef DUMP_MEMPOOL_USAGE_STATS
+ PMD_DRV_LOG(INFO, "%s(): %d: qid %d, mbuf_avail_count = %d,"
+ "mbuf_in_use_count = %d",
+ __func__, __LINE__, rxq->queue_id,
+ rte_mempool_avail_count(rxq->mb_pool),
+ rte_mempool_in_use_count(rxq->mb_pool));
+#endif /* DUMP_MEMPOOL_USAGE_STATS */
+ }
+
+ /* initialize tail */
+ rxq->rx_tail = 0;
+
+ return 0;
+fail:
+ return -ENOMEM;
+}
+
+/* Utility function to find index of an element in an array */
+int index_of_array(uint32_t *arr, uint32_t n, uint32_t element)
+{
+ int index = 0;
+
+ for (index = 0; (uint32_t)index < n; index++) {
+ if (*(arr + index) == element)
+ return index;
+ }
+ return -1;
+}
+
static int pfetch_check_handler(__rte_unused const char *key,
const char *value, void *opaque)
{
@@ -26,6 +26,92 @@
#include "qdma_platform.h"
#include "qdma_devops.h"
+#ifdef QDMA_LATENCY_OPTIMIZED
+static void qdma_sort_c2h_cntr_th_values(struct qdma_pci_dev *qdma_dev)
+{
+ uint8_t i, idx = 0, j = 0;
+ uint8_t c2h_cntr_val = qdma_dev->g_c2h_cnt_th[0];
+ uint8_t least_max = 0;
+ int ref_idx = -1;
+
+get_next_idx:
+ for (i = 0; i < QDMA_NUM_C2H_COUNTERS; i++) {
+ if (ref_idx >= 0 && ref_idx == i)
+ continue;
+ if (qdma_dev->g_c2h_cnt_th[i] < least_max)
+ continue;
+ c2h_cntr_val = qdma_dev->g_c2h_cnt_th[i];
+ idx = i;
+ break;
+ }
+ for (; i < QDMA_NUM_C2H_COUNTERS; i++) {
+ if (ref_idx >= 0 && ref_idx == i)
+ continue;
+ if (qdma_dev->g_c2h_cnt_th[i] < least_max)
+ continue;
+ if (c2h_cntr_val >= qdma_dev->g_c2h_cnt_th[i]) {
+ c2h_cntr_val = qdma_dev->g_c2h_cnt_th[i];
+ idx = i;
+ }
+ }
+ qdma_dev->sorted_idx_c2h_cnt_th[j] = idx;
+ ref_idx = idx;
+ j++;
+ idx = j;
+ least_max = c2h_cntr_val;
+ if (j < QDMA_NUM_C2H_COUNTERS)
+ goto get_next_idx;
+}
+#endif /* QDMA_LATENCY_OPTIMIZED */
+
+int qdma_pf_csr_read(struct rte_eth_dev *dev)
+{
+ int ret = 0;
+ struct qdma_pci_dev *qdma_dev = dev->data->dev_private;
+ struct qdma_hw_access *hw_access = qdma_dev->hw_access;
+
+ ret = hw_access->qdma_global_csr_conf(dev, 0,
+ QDMA_NUM_RING_SIZES, qdma_dev->g_ring_sz,
+ QDMA_CSR_RING_SZ, QDMA_HW_ACCESS_READ);
+ if (ret != QDMA_SUCCESS)
+ PMD_DRV_LOG(ERR, "qdma_global_csr_conf for ring size "
+ "returned %d", ret);
+ if (qdma_dev->dev_cap.st_en || qdma_dev->dev_cap.mm_cmpt_en) {
+ ret = hw_access->qdma_global_csr_conf(dev, 0,
+ QDMA_NUM_C2H_TIMERS, qdma_dev->g_c2h_timer_cnt,
+ QDMA_CSR_TIMER_CNT, QDMA_HW_ACCESS_READ);
+ if (ret != QDMA_SUCCESS)
+ PMD_DRV_LOG(ERR, "qdma_global_csr_conf for timer count "
+ "returned %d", ret);
+
+ ret = hw_access->qdma_global_csr_conf(dev, 0,
+ QDMA_NUM_C2H_COUNTERS, qdma_dev->g_c2h_cnt_th,
+ QDMA_CSR_CNT_TH, QDMA_HW_ACCESS_READ);
+ if (ret != QDMA_SUCCESS)
+ PMD_DRV_LOG(ERR, "qdma_global_csr_conf for counter threshold "
+ "returned %d", ret);
+#ifdef QDMA_LATENCY_OPTIMIZED
+ qdma_sort_c2h_cntr_th_values(qdma_dev);
+#endif /* QDMA_LATENCY_OPTIMIZED */
+ }
+
+ if (qdma_dev->dev_cap.st_en) {
+ ret = hw_access->qdma_global_csr_conf(dev, 0,
+ QDMA_NUM_C2H_BUFFER_SIZES,
+ qdma_dev->g_c2h_buf_sz,
+ QDMA_CSR_BUF_SZ,
+ QDMA_HW_ACCESS_READ);
+ if (ret != QDMA_SUCCESS)
+ PMD_DRV_LOG(ERR, "qdma_global_csr_conf for buffer sizes "
+ "returned %d", ret);
+ }
+
+ if (ret < 0)
+ return qdma_dev->hw_access->qdma_get_error_code(ret);
+
+ return ret;
+}
+
static int qdma_pf_fmap_prog(struct rte_eth_dev *dev)
{
struct qdma_pci_dev *qdma_dev = dev->data->dev_private;
@@ -45,6 +131,47 @@ static int qdma_pf_fmap_prog(struct rte_eth_dev *dev)
return ret;
}
+uint8_t qmda_get_desc_sz_idx(enum rte_pmd_qdma_bypass_desc_len size)
+{
+ uint8_t ret;
+ switch (size) {
+ case RTE_PMD_QDMA_BYPASS_DESC_LEN_8B:
+ ret = 0;
+ break;
+ case RTE_PMD_QDMA_BYPASS_DESC_LEN_16B:
+ ret = 1;
+ break;
+ case RTE_PMD_QDMA_BYPASS_DESC_LEN_32B:
+ ret = 2;
+ break;
+ case RTE_PMD_QDMA_BYPASS_DESC_LEN_64B:
+ ret = 3;
+ break;
+ default:
+ /* Suppress compiler warnings */
+ ret = 0;
+ }
+ return ret;
+}
+
+static inline int
+qdma_rxq_default_mbuf_init(struct qdma_rx_queue *rxq)
+{
+ uintptr_t p;
+ struct rte_mbuf mb = { .buf_addr = 0 };
+
+ mb.nb_segs = 1;
+ mb.data_off = RTE_PKTMBUF_HEADROOM;
+ mb.port = rxq->port_id;
+ rte_mbuf_refcnt_set(&mb, 1);
+
+ /* prevent compiler reordering */
+ rte_compiler_barrier();
+ p = (uintptr_t)&mb.rearm_data;
+ rxq->mbuf_initializer = *(uint64_t *)p;
+ return 0;
+}
+
/**
* DPDK callback to configure a RX queue.
*
@@ -72,14 +199,355 @@ int qdma_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
const struct rte_eth_rxconf *rx_conf,
struct rte_mempool *mb_pool)
{
- (void)dev;
- (void)rx_queue_id;
- (void)nb_rx_desc;
- (void)socket_id;
- (void)rx_conf;
- (void)mb_pool;
+ struct qdma_pci_dev *qdma_dev = dev->data->dev_private;
+ struct qdma_rx_queue *rxq = NULL;
+ struct qdma_ul_mm_desc *rx_ring_mm;
+ uint32_t sz;
+ uint8_t *rx_ring_bypass;
+ int err = 0;
+
+ PMD_DRV_LOG(INFO, "Configuring Rx queue id:%d\n", rx_queue_id);
+
+ if (nb_rx_desc == 0) {
+ PMD_DRV_LOG(ERR, "Invalid descriptor ring size %d\n",
+ nb_rx_desc);
+ return -EINVAL;
+ }
+
+ if (!qdma_dev->dev_configured) {
+ PMD_DRV_LOG(ERR,
+ "Device for Rx queue id %d is not configured yet\n",
+ rx_queue_id);
+ return -EINVAL;
+ }
+
+ if (!qdma_dev->is_vf) {
+ err = qdma_dev_increment_active_queue
+ (qdma_dev->dma_device_index,
+ qdma_dev->func_id,
+ QDMA_DEV_Q_TYPE_C2H);
+ if (err != QDMA_SUCCESS)
+ return -EINVAL;
+
+ if (qdma_dev->q_info[rx_queue_id].queue_mode ==
+ RTE_PMD_QDMA_STREAMING_MODE) {
+ err = qdma_dev_increment_active_queue
+ (qdma_dev->dma_device_index,
+ qdma_dev->func_id,
+ QDMA_DEV_Q_TYPE_CMPT);
+ if (err != QDMA_SUCCESS) {
+ qdma_dev_decrement_active_queue
+ (qdma_dev->dma_device_index,
+ qdma_dev->func_id,
+ QDMA_DEV_Q_TYPE_C2H);
+ return -EINVAL;
+ }
+ }
+ }
+ if (!qdma_dev->init_q_range) {
+ if (!qdma_dev->is_vf) {
+ err = qdma_pf_csr_read(dev);
+ if (err < 0)
+ goto rx_setup_err;
+ }
+ qdma_dev->init_q_range = 1;
+ }
+
+ /* allocate rx queue data structure */
+ rxq = rte_zmalloc_socket("QDMA_RxQ", sizeof(struct qdma_rx_queue),
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (!rxq) {
+ PMD_DRV_LOG(ERR, "Unable to allocate structure rxq of "
+ "size %d\n",
+ (int)(sizeof(struct qdma_rx_queue)));
+ err = -ENOMEM;
+ goto rx_setup_err;
+ }
+
+ rxq->queue_id = rx_queue_id;
+ rxq->port_id = dev->data->port_id;
+ rxq->func_id = qdma_dev->func_id;
+ rxq->mb_pool = mb_pool;
+ rxq->dev = dev;
+ rxq->st_mode = qdma_dev->q_info[rx_queue_id].queue_mode;
+ rxq->nb_rx_desc = (nb_rx_desc + 1);
+ /* <= 2018.2 IP
+ * double the cmpl ring size to avoid run out of cmpl entry while
+ * desc. ring still have free entries
+ */
+ rxq->nb_rx_cmpt_desc = ((nb_rx_desc * 2) + 1);
+ rxq->en_prefetch = qdma_dev->q_info[rx_queue_id].en_prefetch;
+ rxq->cmpt_desc_len = qdma_dev->q_info[rx_queue_id].cmpt_desc_sz;
+ if (rxq->cmpt_desc_len == RTE_PMD_QDMA_CMPT_DESC_LEN_64B &&
+ !qdma_dev->dev_cap.cmpt_desc_64b) {
+ PMD_DRV_LOG(ERR, "PF-%d(DEVFN) 64B completion entry size is "
+ "not supported in this design\n", qdma_dev->func_id);
+ return -ENOTSUP;
+ }
+ rxq->triggermode = qdma_dev->q_info[rx_queue_id].trigger_mode;
+ rxq->rx_deferred_start = rx_conf->rx_deferred_start;
+ rxq->dump_immediate_data =
+ qdma_dev->q_info[rx_queue_id].immediate_data_state;
+ rxq->dis_overflow_check =
+ qdma_dev->q_info[rx_queue_id].dis_cmpt_ovf_chk;
+
+ if (qdma_dev->q_info[rx_queue_id].rx_bypass_mode ==
+ RTE_PMD_QDMA_RX_BYPASS_CACHE ||
+ qdma_dev->q_info[rx_queue_id].rx_bypass_mode ==
+ RTE_PMD_QDMA_RX_BYPASS_SIMPLE)
+ rxq->en_bypass = 1;
+ if (qdma_dev->q_info[rx_queue_id].rx_bypass_mode ==
+ RTE_PMD_QDMA_RX_BYPASS_SIMPLE)
+ rxq->en_bypass_prefetch = 1;
+
+ if (qdma_dev->ip_type == EQDMA_SOFT_IP &&
+ qdma_dev->vivado_rel >= QDMA_VIVADO_2020_2) {
+ if (qdma_dev->dev_cap.desc_eng_mode ==
+ QDMA_DESC_ENG_BYPASS_ONLY) {
+ PMD_DRV_LOG(ERR,
+ "Bypass only mode design "
+ "is not supported\n");
+ return -ENOTSUP;
+ }
+
+ if (rxq->en_bypass &&
+ qdma_dev->dev_cap.desc_eng_mode ==
+ QDMA_DESC_ENG_INTERNAL_ONLY) {
+ PMD_DRV_LOG(ERR,
+ "Rx qid %d config in bypass "
+ "mode not supported on "
+ "internal only mode design\n",
+ rx_queue_id);
+ return -ENOTSUP;
+ }
+ }
+
+ if (rxq->en_bypass) {
+ rxq->bypass_desc_sz =
+ qdma_dev->q_info[rx_queue_id].rx_bypass_desc_sz;
+ if (rxq->bypass_desc_sz == RTE_PMD_QDMA_BYPASS_DESC_LEN_64B &&
+ !qdma_dev->dev_cap.sw_desc_64b) {
+ PMD_DRV_LOG(ERR, "PF-%d(DEVFN) C2H bypass descriptor "
+ "size of 64B is not supported in this design:\n",
+ qdma_dev->func_id);
+ return -ENOTSUP;
+ }
+ }
+ /* Calculate the ring index, completion queue ring size,
+ * buffer index and threshold index.
+ * If index is not found , by default use the index as 0
+ */
+
+ /* Find C2H queue ring size index */
+ rxq->ringszidx = index_of_array(qdma_dev->g_ring_sz,
+ QDMA_NUM_RING_SIZES, rxq->nb_rx_desc);
+ if (rxq->ringszidx < 0) {
+ PMD_DRV_LOG(ERR, "Expected Ring size %d not found\n",
+ rxq->nb_rx_desc);
+ err = -EINVAL;
+ goto rx_setup_err;
+ }
+
+ /* Find completion ring size index */
+ rxq->cmpt_ringszidx = index_of_array(qdma_dev->g_ring_sz,
+ QDMA_NUM_RING_SIZES,
+ rxq->nb_rx_cmpt_desc);
+ if (rxq->cmpt_ringszidx < 0) {
+ PMD_DRV_LOG(ERR, "Expected completion ring size %d not found\n",
+ rxq->nb_rx_cmpt_desc);
+ err = -EINVAL;
+ goto rx_setup_err;
+ }
+
+ /* Find Threshold index */
+ rxq->threshidx = index_of_array(qdma_dev->g_c2h_cnt_th,
+ QDMA_NUM_C2H_COUNTERS,
+ rx_conf->rx_thresh.wthresh);
+ if (rxq->threshidx < 0) {
+ PMD_DRV_LOG(WARNING, "Expected Threshold %d not found,"
+ " using the value %d at index 7\n",
+ rx_conf->rx_thresh.wthresh,
+ qdma_dev->g_c2h_cnt_th[7]);
+ rxq->threshidx = 7;
+ }
+
+#ifdef QDMA_LATENCY_OPTIMIZED
+ uint8_t next_idx;
+
+ /* Initialize sorted_c2h_cntr_idx */
+ rxq->sorted_c2h_cntr_idx = index_of_array
+ (qdma_dev->sorted_idx_c2h_cnt_th,
+ QDMA_NUM_C2H_COUNTERS,
+ qdma_dev->g_c2h_cnt_th[rxq->threshidx]);
+
+ /* Initialize pend_pkt_moving_avg */
+ rxq->pend_pkt_moving_avg = qdma_dev->g_c2h_cnt_th[rxq->threshidx];
+
+ /* Initialize pend_pkt_avg_thr_hi */
+ if (rxq->sorted_c2h_cntr_idx < (QDMA_NUM_C2H_COUNTERS - 1))
+ next_idx = qdma_dev->sorted_idx_c2h_cnt_th
+ [rxq->sorted_c2h_cntr_idx + 1];
+ else
+ next_idx = qdma_dev->sorted_idx_c2h_cnt_th
+ [rxq->sorted_c2h_cntr_idx];
+
+ rxq->pend_pkt_avg_thr_hi = qdma_dev->g_c2h_cnt_th[next_idx];
+
+ /* Initialize pend_pkt_avg_thr_lo */
+ if (rxq->sorted_c2h_cntr_idx > 0)
+ next_idx = qdma_dev->sorted_idx_c2h_cnt_th
+ [rxq->sorted_c2h_cntr_idx - 1];
+ else
+ next_idx = qdma_dev->sorted_idx_c2h_cnt_th
+ [rxq->sorted_c2h_cntr_idx];
+
+ rxq->pend_pkt_avg_thr_lo = qdma_dev->g_c2h_cnt_th[next_idx];
+#endif /* QDMA_LATENCY_OPTIMIZED */
+
+ /* Find Timer index */
+ rxq->timeridx = index_of_array(qdma_dev->g_c2h_timer_cnt,
+ QDMA_NUM_C2H_TIMERS,
+ qdma_dev->q_info[rx_queue_id].timer_count);
+ if (rxq->timeridx < 0) {
+ PMD_DRV_LOG(WARNING, "Expected timer %d not found, "
+ "using the value %d at index 1\n",
+ qdma_dev->q_info[rx_queue_id].timer_count,
+ qdma_dev->g_c2h_timer_cnt[1]);
+ rxq->timeridx = 1;
+ }
+
+ rxq->rx_buff_size = (uint16_t)
+ (rte_pktmbuf_data_room_size(rxq->mb_pool) -
+ RTE_PKTMBUF_HEADROOM);
+ /* Allocate memory for Rx descriptor ring */
+ if (rxq->st_mode) {
+ if (!qdma_dev->dev_cap.st_en) {
+ PMD_DRV_LOG(ERR, "Streaming mode not enabled "
+ "in the hardware\n");
+ err = -EINVAL;
+ goto rx_setup_err;
+ }
+ /* Find Buffer size index */
+ rxq->buffszidx = index_of_array(qdma_dev->g_c2h_buf_sz,
+ QDMA_NUM_C2H_BUFFER_SIZES,
+ rxq->rx_buff_size);
+ if (rxq->buffszidx < 0) {
+ PMD_DRV_LOG(ERR, "Expected buffer size %d not found\n",
+ rxq->rx_buff_size);
+ err = -EINVAL;
+ goto rx_setup_err;
+ }
+
+ if (rxq->en_bypass &&
+ rxq->bypass_desc_sz != 0)
+ sz = (rxq->nb_rx_desc) * (rxq->bypass_desc_sz);
+ else
+ sz = (rxq->nb_rx_desc) *
+ sizeof(struct qdma_ul_st_c2h_desc);
+
+ rxq->rx_mz = qdma_zone_reserve(dev, "RxHwRn", rx_queue_id,
+ sz, socket_id);
+ if (!rxq->rx_mz) {
+ PMD_DRV_LOG(ERR, "Unable to allocate rxq->rx_mz "
+ "of size %d\n", sz);
+ err = -ENOMEM;
+ goto rx_setup_err;
+ }
+ rxq->rx_ring = rxq->rx_mz->addr;
+ memset(rxq->rx_ring, 0, sz);
+
+ /* Allocate memory for Rx completion(CMPT) descriptor ring */
+ sz = (rxq->nb_rx_cmpt_desc) * rxq->cmpt_desc_len;
+ rxq->rx_cmpt_mz = qdma_zone_reserve(dev, "RxHwCmptRn",
+ rx_queue_id, sz, socket_id);
+ if (!rxq->rx_cmpt_mz) {
+ PMD_DRV_LOG(ERR, "Unable to allocate rxq->rx_cmpt_mz "
+ "of size %d\n", sz);
+ err = -ENOMEM;
+ goto rx_setup_err;
+ }
+ rxq->cmpt_ring =
+ (union qdma_ul_st_cmpt_ring *)rxq->rx_cmpt_mz->addr;
+
+ /* Write-back status structure */
+ rxq->wb_status = (struct wb_status *)((uint64_t)rxq->cmpt_ring +
+ (((uint64_t)rxq->nb_rx_cmpt_desc - 1) *
+ rxq->cmpt_desc_len));
+ memset(rxq->cmpt_ring, 0, sz);
+ } else {
+ if (!qdma_dev->dev_cap.mm_en) {
+ PMD_DRV_LOG(ERR, "Memory mapped mode not enabled "
+ "in the hardware\n");
+ err = -EINVAL;
+ goto rx_setup_err;
+ }
+
+ if (rxq->en_bypass &&
+ rxq->bypass_desc_sz != 0)
+ sz = (rxq->nb_rx_desc) * (rxq->bypass_desc_sz);
+ else
+ sz = (rxq->nb_rx_desc) * sizeof(struct qdma_ul_mm_desc);
+ rxq->rx_mz = qdma_zone_reserve(dev, "RxHwRn",
+ rx_queue_id, sz, socket_id);
+ if (!rxq->rx_mz) {
+ PMD_DRV_LOG(ERR, "Unable to allocate rxq->rx_mz "
+ "of size %d\n", sz);
+ err = -ENOMEM;
+ goto rx_setup_err;
+ }
+ rxq->rx_ring = rxq->rx_mz->addr;
+ rx_ring_mm = (struct qdma_ul_mm_desc *)rxq->rx_mz->addr;
+ memset(rxq->rx_ring, 0, sz);
+
+ rx_ring_bypass = (uint8_t *)rxq->rx_mz->addr;
+ if (rxq->en_bypass &&
+ rxq->bypass_desc_sz != 0)
+ rxq->wb_status = (struct wb_status *)&
+ (rx_ring_bypass[(rxq->nb_rx_desc - 1) *
+ (rxq->bypass_desc_sz)]);
+ else
+ rxq->wb_status = (struct wb_status *)&
+ (rx_ring_mm[rxq->nb_rx_desc - 1]);
+ }
+
+ /* allocate memory for RX software ring */
+ sz = (rxq->nb_rx_desc) * sizeof(struct rte_mbuf *);
+ rxq->sw_ring = rte_zmalloc_socket("RxSwRn", sz,
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (!rxq->sw_ring) {
+ PMD_DRV_LOG(ERR, "Unable to allocate rxq->sw_ring of size %d\n",
+ sz);
+ err = -ENOMEM;
+ goto rx_setup_err;
+ }
+
+ qdma_rxq_default_mbuf_init(rxq);
+
+ dev->data->rx_queues[rx_queue_id] = rxq;
return 0;
+
+rx_setup_err:
+ if (!qdma_dev->is_vf) {
+ qdma_dev_decrement_active_queue(qdma_dev->dma_device_index,
+ qdma_dev->func_id,
+ QDMA_DEV_Q_TYPE_C2H);
+
+ if (qdma_dev->q_info[rx_queue_id].queue_mode ==
+ RTE_PMD_QDMA_STREAMING_MODE)
+ qdma_dev_decrement_active_queue
+ (qdma_dev->dma_device_index,
+ qdma_dev->func_id,
+ QDMA_DEV_Q_TYPE_CMPT);
+ }
+ if (rxq) {
+ if (rxq->rx_mz)
+ rte_memzone_free(rxq->rx_mz);
+ if (rxq->sw_ring)
+ rte_free(rxq->sw_ring);
+ rte_free(rxq);
+ }
+ return err;
}
/**
@@ -524,16 +992,193 @@ int qdma_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t qid)
int qdma_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t qid)
{
- (void)dev;
- (void)qid;
+ struct qdma_pci_dev *qdma_dev = dev->data->dev_private;
+ struct qdma_rx_queue *rxq;
+ uint32_t queue_base = qdma_dev->queue_base;
+ uint8_t cmpt_desc_fmt;
+ int err, bypass_desc_sz_idx;
+ struct qdma_descq_sw_ctxt q_sw_ctxt;
+ struct qdma_descq_cmpt_ctxt q_cmpt_ctxt;
+ struct qdma_descq_prefetch_ctxt q_prefetch_ctxt;
+ struct qdma_hw_access *hw_access = qdma_dev->hw_access;
+
+ rxq = (struct qdma_rx_queue *)dev->data->rx_queues[qid];
+
+ memset(&q_sw_ctxt, 0, sizeof(struct qdma_descq_sw_ctxt));
+
+ qdma_reset_rx_queue(rxq);
+ qdma_clr_rx_queue_ctxts(dev, (qid + queue_base), rxq->st_mode);
+
+ bypass_desc_sz_idx = qmda_get_desc_sz_idx(rxq->bypass_desc_sz);
+
+ switch (rxq->cmpt_desc_len) {
+ case RTE_PMD_QDMA_CMPT_DESC_LEN_8B:
+ cmpt_desc_fmt = CMPT_CNTXT_DESC_SIZE_8B;
+ break;
+ case RTE_PMD_QDMA_CMPT_DESC_LEN_16B:
+ cmpt_desc_fmt = CMPT_CNTXT_DESC_SIZE_16B;
+ break;
+ case RTE_PMD_QDMA_CMPT_DESC_LEN_32B:
+ cmpt_desc_fmt = CMPT_CNTXT_DESC_SIZE_32B;
+ break;
+ case RTE_PMD_QDMA_CMPT_DESC_LEN_64B:
+ cmpt_desc_fmt = CMPT_CNTXT_DESC_SIZE_64B;
+ break;
+ default:
+ cmpt_desc_fmt = CMPT_CNTXT_DESC_SIZE_8B;
+ break;
+ }
+
+ err = qdma_init_rx_queue(rxq);
+ if (err != 0)
+ return err;
+
+ if (rxq->st_mode) {
+ memset(&q_cmpt_ctxt, 0, sizeof(struct qdma_descq_cmpt_ctxt));
+ memset(&q_prefetch_ctxt, 0,
+ sizeof(struct qdma_descq_prefetch_ctxt));
+
+ q_prefetch_ctxt.bypass = (rxq->en_bypass_prefetch) ? 1 : 0;
+ q_prefetch_ctxt.bufsz_idx = rxq->buffszidx;
+ q_prefetch_ctxt.pfch_en = (rxq->en_prefetch) ? 1 : 0;
+ q_prefetch_ctxt.valid = 1;
+
+#ifdef QDMA_LATENCY_OPTIMIZED
+ q_cmpt_ctxt.full_upd = 1;
+#endif /* QDMA_LATENCY_OPTIMIZED */
+ q_cmpt_ctxt.en_stat_desc = 1;
+ q_cmpt_ctxt.trig_mode = rxq->triggermode;
+ q_cmpt_ctxt.fnc_id = rxq->func_id;
+ q_cmpt_ctxt.counter_idx = rxq->threshidx;
+ q_cmpt_ctxt.timer_idx = rxq->timeridx;
+ q_cmpt_ctxt.color = CMPT_DEFAULT_COLOR_BIT;
+ q_cmpt_ctxt.ringsz_idx = rxq->cmpt_ringszidx;
+ q_cmpt_ctxt.bs_addr = (uint64_t)rxq->rx_cmpt_mz->iova;
+ q_cmpt_ctxt.desc_sz = cmpt_desc_fmt;
+ q_cmpt_ctxt.valid = 1;
+ if (qdma_dev->dev_cap.cmpt_ovf_chk_dis)
+ q_cmpt_ctxt.ovf_chk_dis = rxq->dis_overflow_check;
+
+
+ q_sw_ctxt.desc_sz = SW_DESC_CNTXT_C2H_STREAM_DMA;
+ q_sw_ctxt.frcd_en = 1;
+ } else {
+ q_sw_ctxt.desc_sz = SW_DESC_CNTXT_MEMORY_MAP_DMA;
+ q_sw_ctxt.is_mm = 1;
+ q_sw_ctxt.wbi_chk = 1;
+ q_sw_ctxt.wbi_intvl_en = 1;
+ }
+ q_sw_ctxt.fnc_id = rxq->func_id;
+ q_sw_ctxt.qen = 1;
+ q_sw_ctxt.rngsz_idx = rxq->ringszidx;
+ q_sw_ctxt.bypass = rxq->en_bypass;
+ q_sw_ctxt.wbk_en = 1;
+ q_sw_ctxt.ring_bs_addr = (uint64_t)rxq->rx_mz->iova;
+
+ if (rxq->en_bypass &&
+ rxq->bypass_desc_sz != 0)
+ q_sw_ctxt.desc_sz = bypass_desc_sz_idx;
+
+ /* Set SW Context */
+ err = hw_access->qdma_sw_ctx_conf(dev, 1, (qid + queue_base),
+ &q_sw_ctxt, QDMA_HW_ACCESS_WRITE);
+ if (err < 0)
+ return qdma_dev->hw_access->qdma_get_error_code(err);
+
+ if (rxq->st_mode) {
+ /* Set Prefetch Context */
+ err = hw_access->qdma_pfetch_ctx_conf(dev, (qid + queue_base),
+ &q_prefetch_ctxt, QDMA_HW_ACCESS_WRITE);
+ if (err < 0)
+ return qdma_dev->hw_access->qdma_get_error_code(err);
+
+ /* Set Completion Context */
+ err = hw_access->qdma_cmpt_ctx_conf(dev, (qid + queue_base),
+ &q_cmpt_ctxt, QDMA_HW_ACCESS_WRITE);
+ if (err < 0)
+ return qdma_dev->hw_access->qdma_get_error_code(err);
+
+ rte_wmb();
+ /* enable status desc , loading the triggermode,
+ * thresidx and timeridx passed from the user
+ */
+
+ rxq->cmpt_cidx_info.counter_idx = rxq->threshidx;
+ rxq->cmpt_cidx_info.timer_idx = rxq->timeridx;
+ rxq->cmpt_cidx_info.trig_mode = rxq->triggermode;
+ rxq->cmpt_cidx_info.wrb_en = 1;
+ rxq->cmpt_cidx_info.wrb_cidx = 0;
+ hw_access->qdma_queue_cmpt_cidx_update(dev, qdma_dev->is_vf,
+ qid, &rxq->cmpt_cidx_info);
+
+ rxq->q_pidx_info.pidx = (rxq->nb_rx_desc - 2);
+ hw_access->qdma_queue_pidx_update(dev, qdma_dev->is_vf, qid,
+ 1, &rxq->q_pidx_info);
+ }
+
+ dev->data->rx_queue_state[qid] = RTE_ETH_QUEUE_STATE_STARTED;
+ rxq->status = RTE_ETH_QUEUE_STATE_STARTED;
return 0;
}
int qdma_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t qid)
{
- (void)dev;
- (void)qid;
+ struct qdma_pci_dev *qdma_dev = dev->data->dev_private;
+ struct qdma_rx_queue *rxq;
+ uint32_t queue_base = qdma_dev->queue_base;
+ int i = 0;
+ int cnt = 0;
+
+ rxq = (struct qdma_rx_queue *)dev->data->rx_queues[qid];
+
+ rxq->status = RTE_ETH_QUEUE_STATE_STOPPED;
+
+ /* Wait for queue to recv all packets. */
+ if (rxq->st_mode) { /** ST-mode **/
+ /* For eqdma, c2h marker takes care to drain the pipeline */
+ if (!(qdma_dev->ip_type == EQDMA_SOFT_IP)) {
+ while (rxq->wb_status->pidx !=
+ rxq->cmpt_cidx_info.wrb_cidx) {
+ usleep(10);
+ if (cnt++ > 10000)
+ break;
+ }
+ }
+ } else { /* MM mode */
+ while (rxq->wb_status->cidx != rxq->q_pidx_info.pidx) {
+ usleep(10);
+ if (cnt++ > 10000)
+ break;
+ }
+ }
+
+ qdma_inv_rx_queue_ctxts(dev, (qid + queue_base), rxq->st_mode);
+
+ if (rxq->st_mode) { /* ST-mode */
+#ifdef DUMP_MEMPOOL_USAGE_STATS
+ PMD_DRV_LOG(INFO, "%s(): %d: queue id %d,"
+ "mbuf_avail_count = %d, mbuf_in_use_count = %d",
+ __func__, __LINE__, rxq->queue_id,
+ rte_mempool_avail_count(rxq->mb_pool),
+ rte_mempool_in_use_count(rxq->mb_pool));
+#endif /* DUMP_MEMPOOL_USAGE_STATS */
+ for (i = 0; i < rxq->nb_rx_desc - 1; i++) {
+ rte_pktmbuf_free(rxq->sw_ring[i]);
+ rxq->sw_ring[i] = NULL;
+ }
+#ifdef DUMP_MEMPOOL_USAGE_STATS
+ PMD_DRV_LOG(INFO, "%s(): %d: queue id %d,"
+ "mbuf_avail_count = %d, mbuf_in_use_count = %d",
+ __func__, __LINE__, rxq->queue_id,
+ rte_mempool_avail_count(rxq->mb_pool),
+ rte_mempool_in_use_count(rxq->mb_pool));
+#endif /* DUMP_MEMPOOL_USAGE_STATS */
+ }
+
+ qdma_reset_rx_queue(rxq);
+
+ dev->data->rx_queue_state[qid] = RTE_ETH_QUEUE_STATE_STOPPED;
return 0;
}
@@ -650,9 +1295,22 @@ void
qdma_dev_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id,
struct rte_eth_rxq_info *qinfo)
{
- (void)dev;
- (void)rx_queue_id;
- (void)qinfo;
+ struct qdma_pci_dev *dma_priv;
+ struct qdma_rx_queue *rxq = NULL;
+
+ if (!qinfo)
+ return;
+
+ dma_priv = (struct qdma_pci_dev *)dev->data->dev_private;
+
+ rxq = dev->data->rx_queues[rx_queue_id];
+ memset(qinfo, 0, sizeof(struct rte_eth_rxq_info));
+ qinfo->mp = rxq->mb_pool;
+ qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
+ qinfo->conf.rx_drop_en = 1;
+ qinfo->conf.rx_thresh.wthresh = dma_priv->g_c2h_cnt_th[rxq->threshidx];
+ qinfo->scattered_rx = 1;
+ qinfo->nb_desc = rxq->nb_rx_desc - 1;
}
/**
new file mode 100644
@@ -0,0 +1,208 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2022 Xilinx, Inc. All rights reserved.
+ * Copyright(c) 2022 VVDN Technologies Private Limited. All rights reserved.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_cycles.h>
+#include "qdma.h"
+#include "qdma_access_common.h"
+
+#include <fcntl.h>
+#include <unistd.h>
+#include "qdma_rxtx.h"
+#include "qdma_devops.h"
+
+#if defined RTE_ARCH_X86_64
+#include <immintrin.h>
+#include <emmintrin.h>
+#define RTE_QDMA_DESCS_PER_LOOP (2)
+#endif /* RTE_ARCH_X86_64 */
+
+/******** User logic dependent functions start **********/
+#ifdef QDMA_RX_VEC_X86_64
+/* Vector implementation to get packet length from two completion entries */
+static void qdma_ul_get_cmpt_pkt_len_v(void *ul_cmpt_entry, __m128i *data)
+{
+ union qdma_ul_st_cmpt_ring *cmpt_entry1, *cmpt_entry2;
+ __m128i pkt_len_shift = _mm_set_epi64x(0, 4);
+
+ cmpt_entry1 = (union qdma_ul_st_cmpt_ring *)(ul_cmpt_entry);
+ cmpt_entry2 = cmpt_entry1 + 1;
+
+ /* Read desc statuses backwards to avoid race condition */
+ /* Load a pkt desc */
+ data[1] = _mm_set_epi64x(0, cmpt_entry2->data);
+ /* Find packet length, currently driver needs
+ * only packet length from completion info
+ */
+ data[1] = _mm_srl_epi32(data[1], pkt_len_shift);
+
+ /* Load a pkt desc */
+ data[0] = _mm_set_epi64x(0, cmpt_entry1->data);
+ /* Find packet length, currently driver needs
+ * only packet length from completion info
+ */
+ data[0] = _mm_srl_epi32(data[0], pkt_len_shift);
+}
+#endif /* QDMA_RX_VEC_X86_64 */
+
+/******** User logic dependent functions end **********/
+uint16_t qdma_get_rx_queue_id(void *queue_hndl)
+{
+ struct qdma_rx_queue *rxq = (struct qdma_rx_queue *)queue_hndl;
+
+ return rxq->queue_id;
+}
+
+void qdma_get_device_info(void *queue_hndl,
+ enum qdma_device_type *device_type,
+ enum qdma_ip_type *ip_type)
+{
+ struct qdma_rx_queue *rxq = (struct qdma_rx_queue *)queue_hndl;
+ struct qdma_pci_dev *qdma_dev = rxq->dev->data->dev_private;
+
+ *device_type = (enum qdma_device_type)qdma_dev->device_type;
+ *ip_type = (enum qdma_ip_type)qdma_dev->ip_type;
+}
+
+uint32_t get_mm_c2h_ep_addr(void *queue_hndl)
+{
+ struct qdma_rx_queue *rxq = (struct qdma_rx_queue *)queue_hndl;
+
+ return rxq->ep_addr;
+}
+
+uint32_t get_mm_buff_size(void *queue_hndl)
+{
+ struct qdma_rx_queue *rxq = (struct qdma_rx_queue *)queue_hndl;
+
+ return rxq->rx_buff_size;
+}
+
+#ifdef QDMA_LATENCY_OPTIMIZED
+static void adjust_c2h_cntr_avgs(struct qdma_rx_queue *rxq)
+{
+ int i;
+ struct qdma_pci_dev *qdma_dev = rxq->dev->data->dev_private;
+
+ rxq->pend_pkt_moving_avg =
+ qdma_dev->g_c2h_cnt_th[rxq->cmpt_cidx_info.counter_idx];
+
+ if (rxq->sorted_c2h_cntr_idx == (QDMA_GLOBAL_CSR_ARRAY_SZ - 1))
+ i = qdma_dev->sorted_idx_c2h_cnt_th[rxq->sorted_c2h_cntr_idx];
+ else
+ i = qdma_dev->sorted_idx_c2h_cnt_th
+ [rxq->sorted_c2h_cntr_idx + 1];
+
+ rxq->pend_pkt_avg_thr_hi = qdma_dev->g_c2h_cnt_th[i];
+
+ if (rxq->sorted_c2h_cntr_idx > 0)
+ i = qdma_dev->sorted_idx_c2h_cnt_th
+ [rxq->sorted_c2h_cntr_idx - 1];
+ else
+ i = qdma_dev->sorted_idx_c2h_cnt_th[rxq->sorted_c2h_cntr_idx];
+
+ rxq->pend_pkt_avg_thr_lo = qdma_dev->g_c2h_cnt_th[i];
+
+ PMD_DRV_LOG(DEBUG, "q%u: c2h_cntr_idx = %u %u %u",
+ rxq->queue_id,
+ rxq->cmpt_cidx_info.counter_idx,
+ rxq->pend_pkt_avg_thr_lo,
+ rxq->pend_pkt_avg_thr_hi);
+}
+
+static void incr_c2h_cntr_th(struct qdma_rx_queue *rxq)
+{
+ struct qdma_pci_dev *qdma_dev = rxq->dev->data->dev_private;
+ unsigned char i, c2h_cntr_idx;
+ unsigned char c2h_cntr_val_new;
+ unsigned char c2h_cntr_val_curr;
+
+ if (rxq->sorted_c2h_cntr_idx ==
+ (QDMA_NUM_C2H_COUNTERS - 1))
+ return;
+
+ rxq->c2h_cntr_monitor_cnt = 0;
+ i = rxq->sorted_c2h_cntr_idx;
+ c2h_cntr_idx = qdma_dev->sorted_idx_c2h_cnt_th[i];
+ c2h_cntr_val_curr = qdma_dev->g_c2h_cnt_th[c2h_cntr_idx];
+ i++;
+ c2h_cntr_idx = qdma_dev->sorted_idx_c2h_cnt_th[i];
+ c2h_cntr_val_new = qdma_dev->g_c2h_cnt_th[c2h_cntr_idx];
+
+ /* Choose the closest counter value */
+ if (c2h_cntr_val_new >= rxq->pend_pkt_moving_avg &&
+ (c2h_cntr_val_new - rxq->pend_pkt_moving_avg) >=
+ (rxq->pend_pkt_moving_avg - c2h_cntr_val_curr))
+ return;
+
+ /* Do not allow c2h counter value go beyond half of C2H ring sz */
+ if (c2h_cntr_val_new < (qdma_dev->g_ring_sz[rxq->ringszidx] >> 1)) {
+ rxq->cmpt_cidx_info.counter_idx = c2h_cntr_idx;
+ rxq->sorted_c2h_cntr_idx = i;
+ adjust_c2h_cntr_avgs(rxq);
+ }
+}
+
+static void decr_c2h_cntr_th(struct qdma_rx_queue *rxq)
+{
+ struct qdma_pci_dev *qdma_dev = rxq->dev->data->dev_private;
+ unsigned char i, c2h_cntr_idx;
+ unsigned char c2h_cntr_val_new;
+ unsigned char c2h_cntr_val_curr;
+
+ if (!rxq->sorted_c2h_cntr_idx)
+ return;
+ rxq->c2h_cntr_monitor_cnt = 0;
+ i = rxq->sorted_c2h_cntr_idx;
+ c2h_cntr_idx = qdma_dev->sorted_idx_c2h_cnt_th[i];
+ c2h_cntr_val_curr = qdma_dev->g_c2h_cnt_th[c2h_cntr_idx];
+ i--;
+ c2h_cntr_idx = qdma_dev->sorted_idx_c2h_cnt_th[i];
+
+ c2h_cntr_val_new = qdma_dev->g_c2h_cnt_th[c2h_cntr_idx];
+
+ /* Choose the closest counter value */
+ if (c2h_cntr_val_new <= rxq->pend_pkt_moving_avg &&
+ (rxq->pend_pkt_moving_avg - c2h_cntr_val_new) >=
+ (c2h_cntr_val_curr - rxq->pend_pkt_moving_avg))
+ return;
+
+ rxq->cmpt_cidx_info.counter_idx = c2h_cntr_idx;
+
+ rxq->sorted_c2h_cntr_idx = i;
+ adjust_c2h_cntr_avgs(rxq);
+}
+
+#define MAX_C2H_CNTR_STAGNANT_CNT 16
+static void adapt_update_counter(struct qdma_rx_queue *rxq,
+ uint16_t nb_pkts_avail)
+{
+ /* Add available pkt count and average */
+ rxq->pend_pkt_moving_avg += nb_pkts_avail;
+ rxq->pend_pkt_moving_avg >>= 1;
+
+ /* if avg > hi_th, increase the counter
+ * if avg < lo_th, decrease the counter
+ */
+ if (rxq->pend_pkt_avg_thr_hi <= rxq->pend_pkt_moving_avg) {
+ incr_c2h_cntr_th(rxq);
+ } else if (rxq->pend_pkt_avg_thr_lo >=
+ rxq->pend_pkt_moving_avg) {
+ decr_c2h_cntr_th(rxq);
+ } else {
+ rxq->c2h_cntr_monitor_cnt++;
+ if (rxq->c2h_cntr_monitor_cnt == MAX_C2H_CNTR_STAGNANT_CNT) {
+ /* go down on counter value to see if we actually are
+ * increasing latency by setting
+ * higher counter threshold
+ */
+ decr_c2h_cntr_th(rxq);
+ rxq->c2h_cntr_monitor_cnt = 0;
+ } else {
+ return;
+ }
+ }
+}
+#endif /* QDMA_LATENCY_OPTIMIZED */
new file mode 100644
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2022 Xilinx, Inc. All rights reserved.
+ */
+
+#ifndef QDMA_DPDK_RXTX_H_
+#define QDMA_DPDK_RXTX_H_
+
+#include "qdma_access_export.h"
+
+/* Supporting functions for user logic pluggability */
+uint16_t qdma_get_rx_queue_id(void *queue_hndl);
+void qdma_get_device_info(void *queue_hndl,
+ enum qdma_device_type *device_type,
+ enum qdma_ip_type *ip_type);
+struct qdma_ul_st_h2c_desc *get_st_h2c_desc(void *queue_hndl);
+struct qdma_ul_mm_desc *get_mm_h2c_desc(void *queue_hndl);
+uint32_t get_mm_c2h_ep_addr(void *queue_hndl);
+uint32_t get_mm_buff_size(void *queue_hndl);
+
+#endif /* QDMA_DPDK_RXTX_H_ */
new file mode 100644
@@ -0,0 +1,188 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2022 Xilinx, Inc. All rights reserved.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include "qdma_user.h"
+#include "qdma_access_common.h"
+#include "qdma_log.h"
+
+#include <fcntl.h>
+#include <unistd.h>
+
+/**
+ * Extract the fields of given completion entry in the completion ring.
+ *
+ * @param ul_cmpt_entry
+ * Pointer to completion entry to be extracted.
+ * @param cmpt_info
+ * Pointer to variable to which completion entry details to be extracted.
+ *
+ * @return
+ * 0 on success and -1 on failure.
+ */
+int qdma_ul_extract_st_cmpt_info(void *ul_cmpt_entry, void *cmpt_info)
+{
+ union qdma_ul_st_cmpt_ring *cmpt_data, *cmpt_desc;
+
+ cmpt_desc = (union qdma_ul_st_cmpt_ring *)(ul_cmpt_entry);
+ cmpt_data = (union qdma_ul_st_cmpt_ring *)(cmpt_info);
+
+ if (unlikely(cmpt_desc->err || cmpt_desc->data_frmt))
+ return -1;
+
+ cmpt_data->data = cmpt_desc->data;
+ if (unlikely(!cmpt_desc->desc_used))
+ cmpt_data->length = 0;
+
+ return 0;
+}
+
+/**
+ * Extract the packet length from the given completion entry.
+ *
+ * @param ul_cmpt_entry
+ * Pointer to completion entry to be extracted.
+ *
+ * @return
+ * Packet length
+ */
+uint16_t qdma_ul_get_cmpt_pkt_len(void *ul_cmpt_entry)
+{
+ return ((union qdma_ul_st_cmpt_ring *)ul_cmpt_entry)->length;
+}
+
+/**
+ * Processes the immediate data for the given completion ring entry
+ * and stores in a file.
+ *
+ * @param qhndl
+ * Pointer to RX queue handle.
+ * @param cmpt_desc_len
+ * Completion descriptor length.
+ * @param cmpt_entry
+ * Pointer to completion entry to be processed.
+ *
+ * @return
+ * None.
+ */
+int qdma_ul_process_immediate_data_st(void *qhndl, void *cmpt_entry,
+ uint16_t cmpt_desc_len)
+{
+ int ofd;
+ char fln[50];
+#ifndef TEST_64B_DESC_BYPASS
+ uint16_t i = 0;
+ enum qdma_device_type dev_type;
+ enum qdma_ip_type ip_type;
+#else
+ int ret = 0;
+#endif
+ uint16_t queue_id = 0;
+
+ queue_id = qdma_get_rx_queue_id(qhndl);
+ snprintf(fln, sizeof(fln), "q_%d_%s", queue_id,
+ "immmediate_data.txt");
+ ofd = open(fln, O_RDWR | O_CREAT | O_APPEND |
+ O_SYNC, 0666);
+ if (ofd < 0) {
+ PMD_DRV_LOG(INFO, "recv on qhndl[%d] CMPT, "
+ "unable to create outfile "
+ " to dump immediate data",
+ queue_id);
+ return ofd;
+ }
+#ifdef TEST_64B_DESC_BYPASS
+ ret = write(ofd, cmpt_entry, cmpt_desc_len);
+ if (ret < cmpt_desc_len)
+ PMD_DRV_LOG(DEBUG, "recv on rxq[%d] CMPT, "
+ "immediate data len: %d, "
+ "written to outfile :%d bytes",
+ queue_id, cmpt_desc_len,
+ ret);
+#else
+ qdma_get_device_info(qhndl, &dev_type, &ip_type);
+
+ if (ip_type == QDMA_VERSAL_HARD_IP) {
+ /* ignoring first 20 bits of length feild */
+ dprintf(ofd, "%02x",
+ (*((uint8_t *)cmpt_entry + 2) & 0xF0));
+ for (i = 3; i < (cmpt_desc_len) ; i++)
+ dprintf(ofd, "%02x",
+ *((uint8_t *)cmpt_entry + i));
+ } else {
+ dprintf(ofd, "%02x",
+ (*((uint8_t *)cmpt_entry) & 0xF0));
+ for (i = 1; i < (cmpt_desc_len) ; i++)
+ dprintf(ofd, "%02x",
+ *((uint8_t *)cmpt_entry + i));
+ }
+#endif
+
+ close(ofd);
+ return 0;
+}
+
+/**
+ * updates the MM c2h descriptor.
+ *
+ * @param qhndl
+ * Pointer to RX queue handle.
+ * @param mb
+ * Pointer to memory buffer.
+ * @param desc
+ * Pointer to descriptor entry.
+ *
+ * @return
+ * None.
+ */
+int qdma_ul_update_mm_c2h_desc(void *qhndl, struct rte_mbuf *mb, void *desc)
+{
+ struct qdma_ul_mm_desc *desc_info = (struct qdma_ul_mm_desc *)desc;
+
+ desc_info->src_addr = get_mm_c2h_ep_addr(qhndl);
+ /* make it so the data pointer starts there too... */
+ mb->data_off = RTE_PKTMBUF_HEADROOM;
+ /* low 32-bits of phys addr must be 4KB aligned... */
+ desc_info->dst_addr = (uint64_t)mb->buf_iova + RTE_PKTMBUF_HEADROOM;
+ desc_info->dv = 1;
+ desc_info->eop = 1;
+ desc_info->sop = 1;
+ desc_info->len = (int)get_mm_buff_size(qhndl);
+
+ return 0;
+}
+
+/**
+ * Processes the completion data from the given completion entry.
+ *
+ * @param cmpt_entry
+ * Pointer to completion entry to be processed.
+ * @param cmpt_desc_len
+ * Completion descriptor length.
+ * @param cmpt_buff
+ * Pointer to the data buffer to which the data will be extracted.
+ *
+ * @return
+ * None.
+ */
+int qdma_ul_process_immediate_data(void *cmpt_entry, uint16_t cmpt_desc_len,
+ char *cmpt_buff)
+{
+ uint16_t i = 0;
+ char *cmpt_buff_ptr;
+ struct qdma_ul_cmpt_ring *cmpt_desc =
+ (struct qdma_ul_cmpt_ring *)(cmpt_entry);
+
+ if (unlikely(cmpt_desc->err || cmpt_desc->data_frmt))
+ return -1;
+
+ cmpt_buff_ptr = (char *)cmpt_buff;
+ *(cmpt_buff_ptr) = (*((uint8_t *)cmpt_desc) & 0xF0);
+ for (i = 1; i < (cmpt_desc_len); i++)
+ *(cmpt_buff_ptr + i) = (*((uint8_t *)cmpt_desc + i));
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2022 Xilinx, Inc. All rights reserved.
+ */
+
+/**
+ * @file
+ * @brief This file contains example design/user logic controlled
+ * data structures and functions
+ * The driver is specific to an example design, if the example design
+ * changes user controlled parameters, this file needs to be modified
+ * appropriately.
+ * Structures for Completion entry, Descriptor bypass can be added here.
+ */
+
+#ifndef __QDMA_USER_H__
+#define __QDMA_USER_H__
+
+#include "qdma_rxtx.h"
+ /**
+ * C2H Completion entry structure
+ * This structure is specific for the example design.
+ * Processing of this ring happens in qdma_rxtx.c.
+ */
+union qdma_ul_st_cmpt_ring {
+ volatile uint64_t data;
+ struct {
+ /* For 2018.2 IP, this field determines the
+ * Standard or User format of completion entry
+ */
+ volatile uint32_t data_frmt:1;
+
+ /* This field inverts every time PIDX wraps
+ * the completion ring
+ */
+ volatile uint32_t color:1;
+
+ /* Indicates that C2H engine encountered
+ * a descriptor error
+ */
+ volatile uint32_t err:1;
+
+ /* Indicates that the completion packet
+ * consumes descriptor in C2H ring
+ */
+ volatile uint32_t desc_used:1;
+
+ /* Indicates length of the data packet */
+ volatile uint32_t length:16;
+
+ /* Reserved field */
+ volatile uint32_t user_rsv:4;
+
+ /* User logic defined data of
+ * length based on CMPT entry
+ * length
+ */
+ volatile uint8_t user_def[];
+ };
+};
+
+
+ /**
+ * Completion entry structure
+ * This structure is specific for the example design.
+ * Currently this structure is used for the processing
+ * of the MM completion ring in rte_pmd_qdma.c.
+ */
+struct __rte_packed qdma_ul_cmpt_ring
+{
+ volatile uint32_t data_frmt:1; /* For 2018.2 IP, this field
+ * determines the Standard or User
+ * format of completion entry
+ */
+ volatile uint32_t color:1; /* This field inverts every time
+ * PIDX wraps the completion ring
+ */
+ volatile uint32_t err:1; /* Indicates that C2H engine
+ * encountered a descriptor
+ * error
+ */
+ volatile uint32_t rsv:1; /* Reserved */
+ volatile uint8_t user_def[]; /* User logic defined data of
+ * length based on CMPT entry
+ * length
+ */
+};
+
+/** ST C2H Descriptor **/
+struct __rte_packed qdma_ul_st_c2h_desc
+{
+ uint64_t dst_addr;
+};
+
+#define S_H2C_DESC_F_SOP 1
+#define S_H2C_DESC_F_EOP 2
+
+/* pld_len and flags members are part of custom descriptor format needed
+ * by example design for ST loopback and desc bypass
+ */
+
+/** ST H2C Descriptor **/
+struct __rte_packed qdma_ul_st_h2c_desc
+{
+ volatile uint16_t cdh_flags;
+ volatile uint16_t pld_len;
+ volatile uint16_t len;
+ volatile uint16_t flags;
+ volatile uint64_t src_addr;
+};
+
+/** MM Descriptor **/
+struct __rte_packed qdma_ul_mm_desc
+{
+ volatile uint64_t src_addr;
+ volatile uint64_t len:28;
+ volatile uint64_t dv:1;
+ volatile uint64_t sop:1;
+ volatile uint64_t eop:1;
+ volatile uint64_t rsvd:33;
+ volatile uint64_t dst_addr;
+ volatile uint64_t rsvd2;
+};
+
+/**
+ * Extract the fields of given completion entry in the completion ring.
+ *
+ * @param ul_cmpt_entry
+ * Pointer to completion entry to be extracted.
+ * @param cmpt_info
+ * Pointer to structure to which completion entry details needs to be filled.
+ *
+ * @return
+ * 0 on success and -ve on error.
+ */
+int qdma_ul_extract_st_cmpt_info(void *ul_cmpt_entry, void *cmpt_info);
+
+/**
+ * Extract the packet length from the given completion entry.
+ *
+ * @param ul_cmpt_entry
+ * Pointer to completion entry to be extracted.
+ *
+ * @return
+ * Packet length
+ */
+uint16_t qdma_ul_get_cmpt_pkt_len(void *ul_cmpt_entry);
+
+/**
+ * Processes the immediate data for the given completion ring entry
+ * and stores the immediate data in a file.
+ *
+ * @param qhndl
+ * Pointer to RX queue handle.
+ * @param cmpt_entry
+ * Pointer to completion entry to be processed.
+ * @param cmpt_desc_len
+ * Completion descriptor length.
+ *
+ * @return
+ * None.
+ */
+int qdma_ul_process_immediate_data_st(void *qhndl, void *cmpt_entry,
+ uint16_t cmpt_desc_len);
+
+/**
+ * Updates the ST H2C descriptor
+ *
+ * @param qhndl
+ * Pointer to TX queue handle.
+ * @param q_offloads
+ * Offloads supported for the queue.
+ * @param mb
+ * Pointer to memory buffer.
+ *
+ * @return
+ * None.
+ */
+int qdma_ul_update_st_h2c_desc(void *qhndl, uint64_t q_offloads,
+ struct rte_mbuf *mb);
+
+/**
+ * Updates the MM c2h descriptor.
+ *
+ * @param qhndl
+ * Pointer to RX queue handle.
+ * @param mb
+ * Pointer to memory buffer.
+ * @param desc
+ * Pointer to descriptor entry.
+ *
+ * @return
+ * None.
+ */
+int qdma_ul_update_mm_c2h_desc(void *qhndl, struct rte_mbuf *mb, void *desc);
+
+/**
+ * updates the MM h2c descriptor.
+ *
+ * @param qhndl
+ * Pointer to TX queue handle.
+ * @param mb
+ * Pointer to memory buffer.
+ *
+ * @return
+ * None.
+ */
+int qdma_ul_update_mm_h2c_desc(void *qhndl, struct rte_mbuf *mb);
+
+/**
+ * Processes the completion data from the given completion entry.
+ *
+ * @param cmpt_entry
+ * Pointer to completion entry to be processed.
+ * @param cmpt_desc_len
+ * Completion descriptor length.
+ * @param cmpt_buff
+ * Pointer to the data buffer to which the data will be extracted.
+ *
+ * @return
+ * None.
+ */
+int qdma_ul_process_immediate_data(void *cmpt_entry, uint16_t cmpt_desc_len,
+ char *cmpt_buff);
+
+#endif /* ifndef __QDMA_USER_H__ */