[dpdk-dev,1/2] crypto/qat: rework request builder for performance
Commit Message
QAT PMD adjusts the buffer start address and offsets passed
to the device so that the DMAs in and out of the device are
64-byte aligned.
This gives more consistent throughput, which had been
variable depending on how the application set up the mbuf.
The message builder code had to be considerably re-factored
to do this efficiently.
Signed-off-by: Fiona Trahe <fiona.trahe@intel.com>
---
drivers/crypto/qat/qat_adf/icp_qat_hw.h | 5 +
drivers/crypto/qat/qat_adf/qat_algs.h | 1 +
drivers/crypto/qat/qat_adf/qat_algs_build_desc.c | 2 +
drivers/crypto/qat/qat_crypto.c | 239 ++++++++++++++++-------
4 files changed, 176 insertions(+), 71 deletions(-)
Comments
> -----Original Message-----
> From: Trahe, Fiona
> Sent: Monday, October 24, 2016 1:00 PM
> To: dev@dpdk.org
> Cc: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>; Trahe, Fiona
> <fiona.trahe@intel.com>; Griffin, John <john.griffin@intel.com>; Jain,
> Deepak K <deepak.k.jain@intel.com>; Kusztal, ArkadiuszX
> <arkadiuszx.kusztal@intel.com>
> Subject: [PATCH 1/2] crypto/qat: rework request builder for performance
>
> QAT PMD adjusts the buffer start address and offsets passed to the device
> so that the DMAs in and out of the device are 64-byte aligned.
> This gives more consistent throughput, which had been variable depending
> on how the application set up the mbuf.
> The message builder code had to be considerably re-factored to do this
> efficiently.
>
> Signed-off-by: Fiona Trahe <fiona.trahe@intel.com>
> ---
> drivers/crypto/qat/qat_adf/icp_qat_hw.h | 5 +
> drivers/crypto/qat/qat_adf/qat_algs.h | 1 +
> drivers/crypto/qat/qat_adf/qat_algs_build_desc.c | 2 +
> drivers/crypto/qat/qat_crypto.c | 239 ++++++++++++++++-------
> 4 files changed, 176 insertions(+), 71 deletions(-)
>
> diff --git a/drivers/crypto/qat/qat_adf/icp_qat_hw.h
> b/drivers/crypto/qat/qat_adf/icp_qat_hw.h
> index a08094f..ebe245f 100644
> --- a/drivers/crypto/qat/qat_adf/icp_qat_hw.h
> --
> 2.5.0
Acked-By: Deepak Kumar Jain <deepak.k.jain@intel.com>
@@ -237,6 +237,11 @@ enum icp_qat_hw_cipher_dir {
ICP_QAT_HW_CIPHER_DECRYPT = 1,
};
+enum icp_qat_hw_auth_op {
+ ICP_QAT_HW_AUTH_VERIFY = 0,
+ ICP_QAT_HW_AUTH_GENERATE = 1,
+};
+
enum icp_qat_hw_cipher_convert {
ICP_QAT_HW_CIPHER_NO_CONVERT = 0,
ICP_QAT_HW_CIPHER_KEY_CONVERT = 1,
@@ -98,6 +98,7 @@ struct qat_session {
enum icp_qat_hw_cipher_dir qat_dir;
enum icp_qat_hw_cipher_mode qat_mode;
enum icp_qat_hw_auth_algo qat_hash_alg;
+ enum icp_qat_hw_auth_op auth_op;
struct qat_alg_cd cd;
uint8_t *cd_cur_ptr;
phys_addr_t cd_paddr;
@@ -625,11 +625,13 @@ int qat_alg_aead_session_create_content_desc_auth(struct qat_session *cdesc,
ICP_QAT_FW_LA_NO_RET_AUTH_RES);
ICP_QAT_FW_LA_CMP_AUTH_SET(header->serv_specif_flags,
ICP_QAT_FW_LA_CMP_AUTH_RES);
+ cdesc->auth_op = ICP_QAT_HW_AUTH_VERIFY;
} else {
ICP_QAT_FW_LA_RET_AUTH_SET(header->serv_specif_flags,
ICP_QAT_FW_LA_RET_AUTH_RES);
ICP_QAT_FW_LA_CMP_AUTH_SET(header->serv_specif_flags,
ICP_QAT_FW_LA_NO_CMP_AUTH_RES);
+ cdesc->auth_op = ICP_QAT_HW_AUTH_GENERATE;
}
/*
@@ -951,6 +951,13 @@ qat_write_hw_desc_entry(struct rte_crypto_op *op, uint8_t *out_msg)
struct icp_qat_fw_la_cipher_req_params *cipher_param;
struct icp_qat_fw_la_auth_req_params *auth_param;
register struct icp_qat_fw_la_bulk_req *qat_req;
+ uint8_t do_auth = 0, do_cipher = 0;
+ uint32_t cipher_len = 0, cipher_ofs = 0;
+ uint32_t auth_len = 0, auth_ofs = 0;
+ uint32_t min_ofs = 0;
+ uint32_t digest_appended = 1;
+ uint64_t buf_start = 0;
+
#ifdef RTE_LIBRTE_PMD_QAT_DEBUG_TX
if (unlikely(op->type != RTE_CRYPTO_OP_TYPE_SYMMETRIC)) {
@@ -975,88 +982,173 @@ qat_write_hw_desc_entry(struct rte_crypto_op *op, uint8_t *out_msg)
qat_req = (struct icp_qat_fw_la_bulk_req *)out_msg;
rte_mov128((uint8_t *)qat_req, (const uint8_t *)&(ctx->fw_req));
qat_req->comn_mid.opaque_data = (uint64_t)(uintptr_t)op;
+ cipher_param = (void *)&qat_req->serv_specif_rqpars;
+ auth_param = (void *)((uint8_t *)cipher_param + sizeof(*cipher_param));
- qat_req->comn_mid.dst_length =
- qat_req->comn_mid.src_length =
- rte_pktmbuf_data_len(op->sym->m_src);
+ if (ctx->qat_cmd == ICP_QAT_FW_LA_CMD_HASH_CIPHER ||
+ ctx->qat_cmd == ICP_QAT_FW_LA_CMD_CIPHER_HASH) {
+ do_auth = 1;
+ do_cipher = 1;
+ } else if (ctx->qat_cmd == ICP_QAT_FW_LA_CMD_AUTH) {
+ do_auth = 1;
+ do_cipher = 0;
+ } else if (ctx->qat_cmd == ICP_QAT_FW_LA_CMD_CIPHER) {
+ do_auth = 0;
+ do_cipher = 1;
+ }
- qat_req->comn_mid.dest_data_addr =
- qat_req->comn_mid.src_data_addr =
- rte_pktmbuf_mtophys(op->sym->m_src);
+ if (do_cipher) {
- if (unlikely(op->sym->m_dst != NULL)) {
- qat_req->comn_mid.dest_data_addr =
- rte_pktmbuf_mtophys(op->sym->m_dst);
- qat_req->comn_mid.dst_length =
- rte_pktmbuf_data_len(op->sym->m_dst);
+ if (ctx->qat_cipher_alg ==
+ ICP_QAT_HW_CIPHER_ALGO_SNOW_3G_UEA2 ||
+ ctx->qat_cipher_alg == ICP_QAT_HW_CIPHER_ALGO_KASUMI) {
+
+ if (unlikely(
+ (cipher_param->cipher_length % BYTE_LENGTH != 0)
+ || (cipher_param->cipher_offset
+ % BYTE_LENGTH != 0))) {
+ PMD_DRV_LOG(ERR,
+ "SNOW3G/KASUMI in QAT PMD only supports byte aligned values");
+ op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS;
+ return -EINVAL;
+ }
+ cipher_len = op->sym->cipher.data.length >> 3;
+ cipher_ofs = op->sym->cipher.data.offset >> 3;
+
+ } else {
+ cipher_len = op->sym->cipher.data.length;
+ cipher_ofs = op->sym->cipher.data.offset;
+ }
+
+ /* copy IV into request if it fits */
+ if (op->sym->cipher.iv.length && (op->sym->cipher.iv.length <=
+ sizeof(cipher_param->u.cipher_IV_array))) {
+ rte_memcpy(cipher_param->u.cipher_IV_array,
+ op->sym->cipher.iv.data,
+ op->sym->cipher.iv.length);
+ } else {
+ ICP_QAT_FW_LA_CIPH_IV_FLD_FLAG_SET(
+ qat_req->comn_hdr.serv_specif_flags,
+ ICP_QAT_FW_CIPH_IV_64BIT_PTR);
+ cipher_param->u.s.cipher_IV_ptr =
+ op->sym->cipher.iv.phys_addr;
+ }
+ min_ofs = cipher_ofs;
}
- cipher_param = (void *)&qat_req->serv_specif_rqpars;
- auth_param = (void *)((uint8_t *)cipher_param + sizeof(*cipher_param));
+ if (do_auth) {
- cipher_param->cipher_length = op->sym->cipher.data.length;
- cipher_param->cipher_offset = op->sym->cipher.data.offset;
- if (ctx->qat_cipher_alg == ICP_QAT_HW_CIPHER_ALGO_SNOW_3G_UEA2 ||
- ctx->qat_cipher_alg == ICP_QAT_HW_CIPHER_ALGO_KASUMI) {
- if (unlikely((cipher_param->cipher_length % BYTE_LENGTH != 0) ||
- (cipher_param->cipher_offset
- % BYTE_LENGTH != 0))) {
- PMD_DRV_LOG(ERR, " For SNOW 3G/KASUMI, QAT PMD only "
- "supports byte aligned values");
- op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS;
- return -EINVAL;
+ if (ctx->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_SNOW_3G_UIA2 ||
+ ctx->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_KASUMI_F9) {
+ if (unlikely((auth_param->auth_off % BYTE_LENGTH != 0)
+ || (auth_param->auth_len % BYTE_LENGTH != 0))) {
+ PMD_DRV_LOG(ERR,
+ "For SNOW3G/KASUMI, QAT PMD only supports byte aligned values");
+ op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS;
+ return -EINVAL;
+ }
+ auth_ofs = op->sym->auth.data.offset >> 3;
+ auth_len = op->sym->auth.data.length >> 3;
+
+ if (ctx->qat_hash_alg ==
+ ICP_QAT_HW_AUTH_ALGO_KASUMI_F9) {
+ if (do_cipher) {
+ auth_len = auth_len + auth_ofs + 1 -
+ ICP_QAT_HW_KASUMI_BLK_SZ;
+ auth_ofs = ICP_QAT_HW_KASUMI_BLK_SZ;
+ } else {
+ auth_len = auth_len + auth_ofs + 1;
+ auth_ofs = 0;
+ }
+ }
+
+ } else {
+ auth_ofs = op->sym->auth.data.offset;
+ auth_len = op->sym->auth.data.length;
+ }
+ min_ofs = auth_ofs;
+
+ if (op->sym->auth.digest.phys_addr) {
+ ICP_QAT_FW_LA_DIGEST_IN_BUFFER_SET(
+ qat_req->comn_hdr.serv_specif_flags,
+ ICP_QAT_FW_LA_NO_DIGEST_IN_BUFFER);
+ auth_param->auth_res_addr =
+ op->sym->auth.digest.phys_addr;
+ digest_appended = 0;
}
- cipher_param->cipher_length >>= 3;
- cipher_param->cipher_offset >>= 3;
+
+ auth_param->u1.aad_adr = op->sym->auth.aad.phys_addr;
+
}
- if (op->sym->cipher.iv.length && (op->sym->cipher.iv.length <=
- sizeof(cipher_param->u.cipher_IV_array))) {
- rte_memcpy(cipher_param->u.cipher_IV_array,
- op->sym->cipher.iv.data,
- op->sym->cipher.iv.length);
+ /* adjust for chain case */
+ if (do_cipher && do_auth)
+ min_ofs = cipher_ofs < auth_ofs ? cipher_ofs : auth_ofs;
+
+
+ /* Start DMA at nearest aligned address below min_ofs */
+ #define QAT_64_BTYE_ALIGN_MASK (~0x3f)
+ buf_start = rte_pktmbuf_mtophys_offset(op->sym->m_src, min_ofs) &
+ QAT_64_BTYE_ALIGN_MASK;
+
+ if (unlikely((rte_pktmbuf_mtophys(op->sym->m_src)
+ - rte_pktmbuf_headroom(op->sym->m_src)) > buf_start)) {
+ /* alignment has pushed addr ahead of start of mbuf
+ * so revert and take the performance hit
+ */
+ buf_start = rte_pktmbuf_mtophys(op->sym->m_src);
+ }
+
+ qat_req->comn_mid.dest_data_addr =
+ qat_req->comn_mid.src_data_addr = buf_start;
+
+ if (do_cipher) {
+ cipher_param->cipher_offset =
+ (uint32_t)rte_pktmbuf_mtophys_offset(
+ op->sym->m_src, cipher_ofs) - buf_start;
+ cipher_param->cipher_length = cipher_len;
} else {
- ICP_QAT_FW_LA_CIPH_IV_FLD_FLAG_SET(
- qat_req->comn_hdr.serv_specif_flags,
- ICP_QAT_FW_CIPH_IV_64BIT_PTR);
- cipher_param->u.s.cipher_IV_ptr = op->sym->cipher.iv.phys_addr;
+ cipher_param->cipher_offset = 0;
+ cipher_param->cipher_length = 0;
}
- if (op->sym->auth.digest.phys_addr) {
- ICP_QAT_FW_LA_DIGEST_IN_BUFFER_SET(
- qat_req->comn_hdr.serv_specif_flags,
- ICP_QAT_FW_LA_NO_DIGEST_IN_BUFFER);
- auth_param->auth_res_addr = op->sym->auth.digest.phys_addr;
+ if (do_auth) {
+ auth_param->auth_off = (uint32_t)rte_pktmbuf_mtophys_offset(
+ op->sym->m_src, auth_ofs) - buf_start;
+ auth_param->auth_len = auth_len;
+ } else {
+ auth_param->auth_off = 0;
+ auth_param->auth_len = 0;
}
- auth_param->auth_off = op->sym->auth.data.offset;
- auth_param->auth_len = op->sym->auth.data.length;
- if (ctx->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_SNOW_3G_UIA2) {
- if (unlikely((auth_param->auth_off % BYTE_LENGTH != 0) ||
- (auth_param->auth_len % BYTE_LENGTH != 0))) {
- PMD_DRV_LOG(ERR, " For SNOW 3G, QAT PMD only "
- "supports byte aligned values");
- op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS;
- return -EINVAL;
- }
- auth_param->auth_off >>= 3;
- auth_param->auth_len >>= 3;
+ qat_req->comn_mid.dst_length =
+ qat_req->comn_mid.src_length =
+ (cipher_param->cipher_offset + cipher_param->cipher_length)
+ > (auth_param->auth_off + auth_param->auth_len) ?
+ (cipher_param->cipher_offset + cipher_param->cipher_length)
+ : (auth_param->auth_off + auth_param->auth_len);
+
+ if (do_auth && digest_appended) {
+ if (ctx->auth_op == ICP_QAT_HW_AUTH_GENERATE)
+ qat_req->comn_mid.dst_length
+ += op->sym->auth.digest.length;
+ else
+ qat_req->comn_mid.src_length
+ += op->sym->auth.digest.length;
}
- if ((ctx->qat_cmd == ICP_QAT_FW_LA_CMD_HASH_CIPHER ||
- ctx->qat_cmd == ICP_QAT_FW_LA_CMD_CIPHER_HASH) &&
- ctx->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_KASUMI_F9) {
- auth_param->auth_len = (auth_param->auth_len >> 3)
- + (auth_param->auth_off >> 3)
- + (BYTE_LENGTH >> 3)
- - 8;
- auth_param->auth_off = 8;
- } else if (ctx->qat_cmd == ICP_QAT_FW_LA_CMD_AUTH
- && ctx->qat_hash_alg ==
- ICP_QAT_HW_AUTH_ALGO_KASUMI_F9) {
- auth_param->auth_len = (auth_param->auth_len >> 3)
- + (auth_param->auth_off >> 3)
- + (BYTE_LENGTH >> 3);
- auth_param->auth_off = 0;
+
+ /* out-of-place operation (OOP) */
+ if (unlikely(op->sym->m_dst != NULL)) {
+
+ if (do_auth)
+ qat_req->comn_mid.dest_data_addr =
+ rte_pktmbuf_mtophys_offset(op->sym->m_dst,
+ auth_ofs)
+ - auth_param->auth_off;
+ else
+ qat_req->comn_mid.dest_data_addr =
+ rte_pktmbuf_mtophys_offset(op->sym->m_dst,
+ cipher_ofs)
+ - cipher_param->cipher_offset;
}
- auth_param->u1.aad_adr = op->sym->auth.aad.phys_addr;
if (ctx->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_GALOIS_128 ||
ctx->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_GALOIS_64) {
@@ -1075,15 +1167,20 @@ qat_write_hw_desc_entry(struct rte_crypto_op *op, uint8_t *out_msg)
*/
qat_req->comn_mid.dest_data_addr =
qat_req->comn_mid.src_data_addr =
- op->sym->auth.aad.phys_addr;
+ op->sym->auth.aad.phys_addr;
+ qat_req->comn_mid.dst_length =
+ qat_req->comn_mid.src_length =
+ rte_pktmbuf_data_len(op->sym->m_src);
+ cipher_param->cipher_length = 0;
+ cipher_param->cipher_offset = 0;
auth_param->u1.aad_adr = 0;
auth_param->auth_len = op->sym->auth.aad.length;
+ auth_param->auth_off = op->sym->auth.data.offset;
auth_param->u2.aad_sz = 0;
-
}
-
}
+
#ifdef RTE_LIBRTE_PMD_QAT_DEBUG_TX
rte_hexdump(stdout, "qat_req:", qat_req,
sizeof(struct icp_qat_fw_la_bulk_req));