From patchwork Fri Sep 6 15:44:33 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Fiona Trahe X-Patchwork-Id: 58886 X-Patchwork-Delegate: gakhil@marvell.com Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id A6FAE1F3D5; Fri, 6 Sep 2019 17:44:55 +0200 (CEST) Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id ACAA31F3BD for ; Fri, 6 Sep 2019 17:44:51 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 06 Sep 2019 08:44:50 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.64,473,1559545200"; d="scan'208";a="190852636" Received: from sivswdev09.ir.intel.com (HELO localhost.localdomain) ([10.237.217.48]) by FMSMGA003.fm.intel.com with ESMTP; 06 Sep 2019 08:44:49 -0700 From: Fiona Trahe To: dev@dpdk.org Cc: akhil.goyal@nxp.com, arkadiuszx.kusztal@intel.com, fiona.trahe@intel.com Date: Fri, 6 Sep 2019 16:44:33 +0100 Message-Id: <1567784675-1401-2-git-send-email-fiona.trahe@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: <1567784675-1401-1-git-send-email-fiona.trahe@intel.com> References: <1567784675-1401-1-git-send-email-fiona.trahe@intel.com> Subject: [dpdk-dev] [PATCH 1/3] common/qat: remove tail write coalescing feature X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" The feature Coalescing Tail Writes on Enqueue is removed as it is not thread-safe and a dual-thread feature will be added shortly. Signed-off-by: Fiona Trahe --- drivers/common/qat/qat_qp.c | 16 +++------------- drivers/common/qat/qat_qp.h | 6 ------ 2 files changed, 3 insertions(+), 19 deletions(-) diff --git a/drivers/common/qat/qat_qp.c b/drivers/common/qat/qat_qp.c index 03f11f8..01ddce0 100644 --- a/drivers/common/qat/qat_qp.c +++ b/drivers/common/qat/qat_qp.c @@ -538,7 +538,6 @@ static inline uint32_t adf_modulo(uint32_t data, uint32_t modulo_mask) txq_write_tail(struct qat_qp *qp, struct qat_queue *q) { WRITE_CSR_RING_TAIL(qp->mmap_bar_addr, q->hw_bundle_number, q->hw_queue_number, q->tail); - q->nb_pending_requests = 0; q->csr_tail = q->tail; } @@ -622,25 +621,20 @@ void rxq_free_desc(struct qat_qp *qp, struct qat_queue *q) kick_tail: queue->tail = tail; tmp_qp->stats.enqueued_count += nb_ops_sent; - queue->nb_pending_requests += nb_ops_sent; - if (tmp_qp->inflights16 < QAT_CSR_TAIL_FORCE_WRITE_THRESH || - queue->nb_pending_requests > QAT_CSR_TAIL_WRITE_THRESH) { - txq_write_tail(tmp_qp, queue); - } + txq_write_tail(tmp_qp, queue); return nb_ops_sent; } uint16_t qat_dequeue_op_burst(void *qp, void **ops, uint16_t nb_ops) { - struct qat_queue *rx_queue, *tx_queue; + struct qat_queue *rx_queue; struct qat_qp *tmp_qp = (struct qat_qp *)qp; uint32_t head; uint32_t resp_counter = 0; uint8_t *resp_msg; rx_queue = &(tmp_qp->rx_q); - tx_queue = &(tmp_qp->tx_q); head = rx_queue->head; resp_msg = (uint8_t *)rx_queue->base_addr + rx_queue->head; @@ -677,11 +671,7 @@ void rxq_free_desc(struct qat_qp *qp, struct qat_queue *q) QAT_CSR_HEAD_WRITE_THRESH) rxq_free_desc(tmp_qp, rx_queue); } - /* also check if tail needs to be advanced */ - if (tmp_qp->inflights16 <= QAT_CSR_TAIL_FORCE_WRITE_THRESH && - tx_queue->tail != tx_queue->csr_tail) { - txq_write_tail(tmp_qp, tx_queue); - } + return resp_counter; } diff --git a/drivers/common/qat/qat_qp.h b/drivers/common/qat/qat_qp.h index 980c2ba..9212ca4 100644 --- a/drivers/common/qat/qat_qp.h +++ b/drivers/common/qat/qat_qp.h @@ -11,10 +11,6 @@ #define QAT_CSR_HEAD_WRITE_THRESH 32U /* number of requests to accumulate before writing head CSR */ -#define QAT_CSR_TAIL_WRITE_THRESH 32U -/* number of requests to accumulate before writing tail CSR */ -#define QAT_CSR_TAIL_FORCE_WRITE_THRESH 256U -/* number of inflights below which no tail write coalescing should occur */ typedef int (*build_request_t)(void *op, uint8_t *req, void *op_cookie, @@ -64,8 +60,6 @@ struct qat_queue { uint32_t csr_tail; /* last written tail value */ uint16_t nb_processed_responses; /* number of responses processed since last CSR head write */ - uint16_t nb_pending_requests; - /* number of requests pending since last CSR tail write */ }; struct qat_qp { From patchwork Fri Sep 6 15:44:34 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Fiona Trahe X-Patchwork-Id: 58887 X-Patchwork-Delegate: gakhil@marvell.com Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 667121F42A; Fri, 6 Sep 2019 17:44:58 +0200 (CEST) Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id D8B811F3C3 for ; Fri, 6 Sep 2019 17:44:54 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 06 Sep 2019 08:44:54 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.64,473,1559545200"; d="scan'208";a="190852657" Received: from sivswdev09.ir.intel.com (HELO localhost.localdomain) ([10.237.217.48]) by FMSMGA003.fm.intel.com with ESMTP; 06 Sep 2019 08:44:53 -0700 From: Fiona Trahe To: dev@dpdk.org Cc: akhil.goyal@nxp.com, arkadiuszx.kusztal@intel.com, fiona.trahe@intel.com Date: Fri, 6 Sep 2019 16:44:34 +0100 Message-Id: <1567784675-1401-3-git-send-email-fiona.trahe@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: <1567784675-1401-1-git-send-email-fiona.trahe@intel.com> References: <1567784675-1401-1-git-send-email-fiona.trahe@intel.com> Subject: [dpdk-dev] [PATCH 2/3] common/qat: move max inflights param into qp X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" The max_inflights parameter is moved from qat_queue to qat_qp as it's a more appropriate location. Signed-off-by: Fiona Trahe --- drivers/common/qat/qat_qp.c | 23 ++++++++++++----------- drivers/common/qat/qat_qp.h | 2 +- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/common/qat/qat_qp.c b/drivers/common/qat/qat_qp.c index 01ddce0..8e4c74a 100644 --- a/drivers/common/qat/qat_qp.c +++ b/drivers/common/qat/qat_qp.c @@ -239,6 +239,15 @@ int qat_qp_setup(struct qat_pci_device *qat_dev, goto create_err; } + qp->max_inflights = ADF_MAX_INFLIGHTS(qp->tx_q.queue_size, + ADF_BYTES_TO_MSG_SIZE(qp->tx_q.msg_size)); + + if (qp->max_inflights < 2) { + QAT_LOG(ERR, "Invalid num inflights"); + qat_queue_delete(&(qp->tx_q)); + goto create_err; + } + if (qat_queue_create(qat_dev, &(qp->rx_q), qat_qp_conf, ADF_RING_DIR_RX) != 0) { QAT_LOG(ERR, "Rx queue create failed " @@ -416,15 +425,7 @@ static void qat_queue_delete(struct qat_queue *queue) goto queue_create_err; } - queue->max_inflights = ADF_MAX_INFLIGHTS(queue->queue_size, - ADF_BYTES_TO_MSG_SIZE(desc_size)); queue->modulo_mask = (1 << ADF_RING_SIZE_MODULO(queue->queue_size)) - 1; - - if (queue->max_inflights < 2) { - QAT_LOG(ERR, "Invalid num inflights"); - ret = -EINVAL; - goto queue_create_err; - } queue->head = 0; queue->tail = 0; queue->msg_size = desc_size; @@ -443,11 +444,11 @@ static void qat_queue_delete(struct qat_queue *queue) queue->hw_queue_number, queue_base); QAT_LOG(DEBUG, "RING: Name:%s, size in CSR: %u, in bytes %u," - " nb msgs %u, msg_size %u, max_inflights %u modulo mask %u", + " nb msgs %u, msg_size %u, modulo mask %u", queue->memz_name, queue->queue_size, queue_size_bytes, qp_conf->nb_descriptors, desc_size, - queue->max_inflights, queue->modulo_mask); + queue->modulo_mask); return 0; @@ -590,7 +591,7 @@ void rxq_free_desc(struct qat_qp *qp, struct qat_queue *q) /* Find how many can actually fit on the ring */ tmp_qp->inflights16 += nb_ops; - overflow = tmp_qp->inflights16 - queue->max_inflights; + overflow = tmp_qp->inflights16 - tmp_qp->max_inflights; if (overflow > 0) { tmp_qp->inflights16 -= overflow; nb_ops_possible = nb_ops - overflow; diff --git a/drivers/common/qat/qat_qp.h b/drivers/common/qat/qat_qp.h index 9212ca4..5066f06 100644 --- a/drivers/common/qat/qat_qp.h +++ b/drivers/common/qat/qat_qp.h @@ -51,7 +51,6 @@ struct qat_queue { uint32_t tail; /* Shadow copy of the tail */ uint32_t modulo_mask; uint32_t msg_size; - uint16_t max_inflights; uint32_t queue_size; uint8_t hw_bundle_number; uint8_t hw_queue_number; @@ -76,6 +75,7 @@ struct qat_qp { enum qat_service_type service_type; struct qat_pci_device *qat_dev; /**< qat device this qp is on */ + uint16_t max_inflights; } __rte_cache_aligned; extern const struct qat_qp_hw_data qat_gen1_qps[][ADF_MAX_QPS_ON_ANY_SERVICE]; From patchwork Fri Sep 6 15:44:35 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Fiona Trahe X-Patchwork-Id: 58888 X-Patchwork-Delegate: gakhil@marvell.com Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 2159E1F443; Fri, 6 Sep 2019 17:45:06 +0200 (CEST) Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by dpdk.org (Postfix) with ESMTP id E3AEA1F43A for ; Fri, 6 Sep 2019 17:45:04 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by fmsmga103.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 06 Sep 2019 08:45:04 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.64,473,1559545200"; d="scan'208";a="190852738" Received: from sivswdev09.ir.intel.com (HELO localhost.localdomain) ([10.237.217.48]) by FMSMGA003.fm.intel.com with ESMTP; 06 Sep 2019 08:45:02 -0700 From: Fiona Trahe To: dev@dpdk.org Cc: akhil.goyal@nxp.com, arkadiuszx.kusztal@intel.com, fiona.trahe@intel.com Date: Fri, 6 Sep 2019 16:44:35 +0100 Message-Id: <1567784675-1401-4-git-send-email-fiona.trahe@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: <1567784675-1401-1-git-send-email-fiona.trahe@intel.com> References: <1567784675-1401-1-git-send-email-fiona.trahe@intel.com> Subject: [dpdk-dev] [PATCH 3/3] common/qat: add dual thread support X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Remove the limitation whereby enqueue and dequeue must be done in same thread. The inflight calculation is reworked to be thread-safe for 2 threads - note this is not general multi-thread support, i.e all enqueues to a qp must still be done in one thread and all dequeues must be done in one thread, but enqueues and dequeues may be in separate threads. Documentation updated. Signed-off-by: Fiona Trahe --- doc/guides/compressdevs/qat_comp.rst | 5 +++- doc/guides/cryptodevs/qat.rst | 11 +++++++- drivers/common/qat/qat_qp.c | 40 ++++++++++++++++++++------------- drivers/common/qat/qat_qp.h | 3 +- 4 files changed, 39 insertions(+), 20 deletions(-) diff --git a/doc/guides/compressdevs/qat_comp.rst b/doc/guides/compressdevs/qat_comp.rst index 6f583a4..d06b08a 100644 --- a/doc/guides/compressdevs/qat_comp.rst +++ b/doc/guides/compressdevs/qat_comp.rst @@ -33,7 +33,10 @@ Limitations ----------- * Compressdev level 0, no compression, is not supported. -* Queue pairs are not thread-safe (that is, within a single queue pair, RX and TX from different lcores is not supported). +* Queue-pairs are thread-safe on Intel CPUs but Queues are not (that is, within a single + queue-pair all enqueues to the TX queue must be done from one thread and all dequeues + from the RX queue must be done from one thread, but enqueues and dequeues may be done + in different threads.) * No BSD support as BSD QAT kernel driver not available. * When using Deflate dynamic huffman encoding for compression, the input size (op.src.length) must be < CONFIG_RTE_PMD_QAT_COMP_IM_BUFFER_SIZE from the config file, diff --git a/doc/guides/cryptodevs/qat.rst b/doc/guides/cryptodevs/qat.rst index e905f6d..7c81b9b 100644 --- a/doc/guides/cryptodevs/qat.rst +++ b/doc/guides/cryptodevs/qat.rst @@ -81,7 +81,11 @@ Limitations * No BSD support as BSD QAT kernel driver not available. * ZUC EEA3/EIA3 is not supported by dh895xcc devices * Maximum additional authenticated data (AAD) for GCM is 240 bytes long and must be passed to the device in a buffer rounded up to the nearest block-size multiple (x16) and padded with zeros. -* Queue pairs are not thread-safe (that is, within a single queue pair, RX and TX from different lcores is not supported). +* Queue-pairs are thread-safe on Intel CPUs but Queues are not (that is, within a single + queue-pair all enqueues to the TX queue must be done from one thread and all dequeues + from the RX queue must be done from one thread, but enqueues and dequeues may be done + in different threads.) + Extra notes on KASUMI F9 ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -122,7 +126,10 @@ Limitations ~~~~~~~~~~~ * Big integers longer than 4096 bits are not supported. -* Queue pairs are not thread-safe (that is, within a single queue pair, RX and TX from different lcores is not supported). +* Queue-pairs are thread-safe on Intel CPUs but Queues are not (that is, within a single + queue-pair all enqueues to the TX queue must be done from one thread and all dequeues + from the RX queue must be done from one thread, but enqueues and dequeues may be done + in different threads.) .. _building_qat: diff --git a/drivers/common/qat/qat_qp.c b/drivers/common/qat/qat_qp.c index 8e4c74a..30cdc61 100644 --- a/drivers/common/qat/qat_qp.c +++ b/drivers/common/qat/qat_qp.c @@ -230,7 +230,7 @@ int qat_qp_setup(struct qat_pci_device *qat_dev, } qp->mmap_bar_addr = pci_dev->mem_resource[0].addr; - qp->inflights16 = 0; + qp->enqueued = qp->dequeued = 0; if (qat_queue_create(qat_dev, &(qp->tx_q), qat_qp_conf, ADF_RING_DIR_TX) != 0) { @@ -321,7 +321,7 @@ int qat_qp_release(struct qat_qp **qp_addr) qp->qat_dev->qat_dev_id); /* Don't free memory if there are still responses to be processed */ - if (qp->inflights16 == 0) { + if ((qp->enqueued - qp->dequeued) == 0) { qat_queue_delete(&(qp->tx_q)); qat_queue_delete(&(qp->rx_q)); } else { @@ -579,7 +579,6 @@ void rxq_free_desc(struct qat_qp *qp, struct qat_queue *q) uint16_t nb_ops_possible = nb_ops; register uint8_t *base_addr; register uint32_t tail; - int overflow; if (unlikely(nb_ops == 0)) return 0; @@ -590,13 +589,25 @@ void rxq_free_desc(struct qat_qp *qp, struct qat_queue *q) tail = queue->tail; /* Find how many can actually fit on the ring */ - tmp_qp->inflights16 += nb_ops; - overflow = tmp_qp->inflights16 - tmp_qp->max_inflights; - if (overflow > 0) { - tmp_qp->inflights16 -= overflow; - nb_ops_possible = nb_ops - overflow; - if (nb_ops_possible == 0) - return 0; + { + /* dequeued can only be written by one thread, but it may not + * be this thread. As it's 4-byte aligned it will be read + * atomically here by any Intel CPU. + * enqueued can wrap before dequeued, but cannot + * lap it as var size of enq/deq (uint32_t) > var size of + * max_inflights (uint16_t). In reality inflights is never + * even as big as max uint16_t, as it's <= ADF_MAX_DESC. + * On wrapping, the calculation still returns the correct + * positive value as all three vars are unsigned. + */ + uint32_t inflights = + tmp_qp->enqueued - tmp_qp->dequeued; + + if ((inflights + nb_ops) > tmp_qp->max_inflights) { + nb_ops_possible = tmp_qp->max_inflights - inflights; + if (nb_ops_possible == 0) + return 0; + } } while (nb_ops_sent != nb_ops_possible) { @@ -605,11 +616,7 @@ void rxq_free_desc(struct qat_qp *qp, struct qat_queue *q) tmp_qp->qat_dev_gen); if (ret != 0) { tmp_qp->stats.enqueue_err_count++; - /* - * This message cannot be enqueued, - * decrease number of ops that wasn't sent - */ - tmp_qp->inflights16 -= nb_ops_possible - nb_ops_sent; + /* This message cannot be enqueued */ if (nb_ops_sent == 0) return 0; goto kick_tail; @@ -621,6 +628,7 @@ void rxq_free_desc(struct qat_qp *qp, struct qat_queue *q) } kick_tail: queue->tail = tail; + tmp_qp->enqueued += nb_ops_sent; tmp_qp->stats.enqueued_count += nb_ops_sent; txq_write_tail(tmp_qp, queue); return nb_ops_sent; @@ -664,9 +672,9 @@ void rxq_free_desc(struct qat_qp *qp, struct qat_queue *q) } if (resp_counter > 0) { rx_queue->head = head; + tmp_qp->dequeued += resp_counter; tmp_qp->stats.dequeued_count += resp_counter; rx_queue->nb_processed_responses += resp_counter; - tmp_qp->inflights16 -= resp_counter; if (rx_queue->nb_processed_responses > QAT_CSR_HEAD_WRITE_THRESH) diff --git a/drivers/common/qat/qat_qp.h b/drivers/common/qat/qat_qp.h index 5066f06..8b9ab79 100644 --- a/drivers/common/qat/qat_qp.h +++ b/drivers/common/qat/qat_qp.h @@ -63,7 +63,6 @@ struct qat_queue { struct qat_qp { void *mmap_bar_addr; - uint16_t inflights16; struct qat_queue tx_q; struct qat_queue rx_q; struct qat_common_stats stats; @@ -75,6 +74,8 @@ struct qat_qp { enum qat_service_type service_type; struct qat_pci_device *qat_dev; /**< qat device this qp is on */ + uint32_t enqueued; + uint32_t dequeued __rte_aligned(4); uint16_t max_inflights; } __rte_cache_aligned;