From 6a702fa5339597f2f2bb466043fbb20f3e55e0ad Mon Sep 17 00:00:00 2001 From: Vic Wu Date: Mon, 7 Dec 2020 15:58:42 +0800 Subject: [PATCH 001/154] crypto: mediatek - remove obsolete driver The crypto mediatek driver has been replaced by the inside-secure driver now. Remove this driver to avoid having duplicate drivers. Signed-off-by: Vic Wu Acked-by: Ryder Lee Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 15 - drivers/crypto/Makefile | 1 - drivers/crypto/mediatek/Makefile | 3 - drivers/crypto/mediatek/mtk-aes.c | 1271 ---------------------- drivers/crypto/mediatek/mtk-platform.c | 586 ---------- drivers/crypto/mediatek/mtk-platform.h | 231 ---- drivers/crypto/mediatek/mtk-regs.h | 190 ---- drivers/crypto/mediatek/mtk-sha.c | 1353 ------------------------ 8 files changed, 3650 deletions(-) delete mode 100644 drivers/crypto/mediatek/Makefile delete mode 100644 drivers/crypto/mediatek/mtk-aes.c delete mode 100644 drivers/crypto/mediatek/mtk-platform.c delete mode 100644 drivers/crypto/mediatek/mtk-platform.h delete mode 100644 drivers/crypto/mediatek/mtk-regs.h delete mode 100644 drivers/crypto/mediatek/mtk-sha.c diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index bbd51703e738..857b7956feca 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -772,21 +772,6 @@ config CRYPTO_DEV_ZYNQMP_AES accelerator. Select this if you want to use the ZynqMP module for AES algorithms. -config CRYPTO_DEV_MEDIATEK - tristate "MediaTek's EIP97 Cryptographic Engine driver" - depends on (ARM && ARCH_MEDIATEK) || COMPILE_TEST - select CRYPTO_LIB_AES - select CRYPTO_AEAD - select CRYPTO_SKCIPHER - select CRYPTO_SHA1 - select CRYPTO_SHA256 - select CRYPTO_SHA512 - select CRYPTO_HMAC - help - This driver allows you to utilize the hardware crypto accelerator - EIP97 which can be found on the MT7623 MT2701, MT8521p, etc .... - Select this if you want to use it for AES/SHA1/SHA2 algorithms. - source "drivers/crypto/chelsio/Kconfig" source "drivers/crypto/virtio/Kconfig" diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index fff9a70348e1..367630e7e888 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -19,7 +19,6 @@ obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o obj-$(CONFIG_CRYPTO_DEV_MARVELL) += marvell/ -obj-$(CONFIG_CRYPTO_DEV_MEDIATEK) += mediatek/ obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o n2_crypto-y := n2_core.o n2_asm.o diff --git a/drivers/crypto/mediatek/Makefile b/drivers/crypto/mediatek/Makefile deleted file mode 100644 index 196a4653974e..000000000000 --- a/drivers/crypto/mediatek/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_CRYPTO_DEV_MEDIATEK) += mtk-crypto.o -mtk-crypto-objs:= mtk-platform.o mtk-aes.o mtk-sha.o diff --git a/drivers/crypto/mediatek/mtk-aes.c b/drivers/crypto/mediatek/mtk-aes.c deleted file mode 100644 index 7323066724c3..000000000000 --- a/drivers/crypto/mediatek/mtk-aes.c +++ /dev/null @@ -1,1271 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Cryptographic API. - * - * Driver for EIP97 AES acceleration. - * - * Copyright (c) 2016 Ryder Lee - * - * Some ideas are from atmel-aes.c drivers. - */ - -#include -#include -#include -#include "mtk-platform.h" - -#define AES_QUEUE_SIZE 512 -#define AES_BUF_ORDER 2 -#define AES_BUF_SIZE ((PAGE_SIZE << AES_BUF_ORDER) \ - & ~(AES_BLOCK_SIZE - 1)) -#define AES_MAX_STATE_BUF_SIZE SIZE_IN_WORDS(AES_KEYSIZE_256 + \ - AES_BLOCK_SIZE * 2) -#define AES_MAX_CT_SIZE 6 - -#define AES_CT_CTRL_HDR cpu_to_le32(0x00220000) - -/* AES-CBC/ECB/CTR/OFB/CFB command token */ -#define AES_CMD0 cpu_to_le32(0x05000000) -#define AES_CMD1 cpu_to_le32(0x2d060000) -#define AES_CMD2 cpu_to_le32(0xe4a63806) -/* AES-GCM command token */ -#define AES_GCM_CMD0 cpu_to_le32(0x0b000000) -#define AES_GCM_CMD1 cpu_to_le32(0xa0800000) -#define AES_GCM_CMD2 cpu_to_le32(0x25000010) -#define AES_GCM_CMD3 cpu_to_le32(0x0f020000) -#define AES_GCM_CMD4 cpu_to_le32(0x21e60000) -#define AES_GCM_CMD5 cpu_to_le32(0x40e60000) -#define AES_GCM_CMD6 cpu_to_le32(0xd0070000) - -/* AES transform information word 0 fields */ -#define AES_TFM_BASIC_OUT cpu_to_le32(0x4 << 0) -#define AES_TFM_BASIC_IN cpu_to_le32(0x5 << 0) -#define AES_TFM_GCM_OUT cpu_to_le32(0x6 << 0) -#define AES_TFM_GCM_IN cpu_to_le32(0xf << 0) -#define AES_TFM_SIZE(x) cpu_to_le32((x) << 8) -#define AES_TFM_128BITS cpu_to_le32(0xb << 16) -#define AES_TFM_192BITS cpu_to_le32(0xd << 16) -#define AES_TFM_256BITS cpu_to_le32(0xf << 16) -#define AES_TFM_GHASH_DIGEST cpu_to_le32(0x2 << 21) -#define AES_TFM_GHASH cpu_to_le32(0x4 << 23) -/* AES transform information word 1 fields */ -#define AES_TFM_ECB cpu_to_le32(0x0 << 0) -#define AES_TFM_CBC cpu_to_le32(0x1 << 0) -#define AES_TFM_OFB cpu_to_le32(0x4 << 0) -#define AES_TFM_CFB128 cpu_to_le32(0x5 << 0) -#define AES_TFM_CTR_INIT cpu_to_le32(0x2 << 0) /* init counter to 1 */ -#define AES_TFM_CTR_LOAD cpu_to_le32(0x6 << 0) /* load/reuse counter */ -#define AES_TFM_3IV cpu_to_le32(0x7 << 5) /* using IV 0-2 */ -#define AES_TFM_FULL_IV cpu_to_le32(0xf << 5) /* using IV 0-3 */ -#define AES_TFM_IV_CTR_MODE cpu_to_le32(0x1 << 10) -#define AES_TFM_ENC_HASH cpu_to_le32(0x1 << 17) - -/* AES flags */ -#define AES_FLAGS_CIPHER_MSK GENMASK(4, 0) -#define AES_FLAGS_ECB BIT(0) -#define AES_FLAGS_CBC BIT(1) -#define AES_FLAGS_CTR BIT(2) -#define AES_FLAGS_OFB BIT(3) -#define AES_FLAGS_CFB128 BIT(4) -#define AES_FLAGS_GCM BIT(5) -#define AES_FLAGS_ENCRYPT BIT(6) -#define AES_FLAGS_BUSY BIT(7) - -#define AES_AUTH_TAG_ERR cpu_to_le32(BIT(26)) - -/** - * mtk_aes_info - hardware information of AES - * @cmd: command token, hardware instruction - * @tfm: transform state of cipher algorithm. - * @state: contains keys and initial vectors. - * - * Memory layout of GCM buffer: - * /-----------\ - * | AES KEY | 128/196/256 bits - * |-----------| - * | HASH KEY | a string 128 zero bits encrypted using the block cipher - * |-----------| - * | IVs | 4 * 4 bytes - * \-----------/ - * - * The engine requires all these info to do: - * - Commands decoding and control of the engine's data path. - * - Coordinating hardware data fetch and store operations. - * - Result token construction and output. - */ -struct mtk_aes_info { - __le32 cmd[AES_MAX_CT_SIZE]; - __le32 tfm[2]; - __le32 state[AES_MAX_STATE_BUF_SIZE]; -}; - -struct mtk_aes_reqctx { - u64 mode; -}; - -struct mtk_aes_base_ctx { - struct mtk_cryp *cryp; - u32 keylen; - __le32 key[12]; - __le32 keymode; - - mtk_aes_fn start; - - struct mtk_aes_info info; - dma_addr_t ct_dma; - dma_addr_t tfm_dma; - - __le32 ct_hdr; - u32 ct_size; -}; - -struct mtk_aes_ctx { - struct mtk_aes_base_ctx base; -}; - -struct mtk_aes_ctr_ctx { - struct mtk_aes_base_ctx base; - - __be32 iv[AES_BLOCK_SIZE / sizeof(u32)]; - size_t offset; - struct scatterlist src[2]; - struct scatterlist dst[2]; -}; - -struct mtk_aes_gcm_ctx { - struct mtk_aes_base_ctx base; - - u32 authsize; - size_t textlen; -}; - -struct mtk_aes_drv { - struct list_head dev_list; - /* Device list lock */ - spinlock_t lock; -}; - -static struct mtk_aes_drv mtk_aes = { - .dev_list = LIST_HEAD_INIT(mtk_aes.dev_list), - .lock = __SPIN_LOCK_UNLOCKED(mtk_aes.lock), -}; - -static inline u32 mtk_aes_read(struct mtk_cryp *cryp, u32 offset) -{ - return readl_relaxed(cryp->base + offset); -} - -static inline void mtk_aes_write(struct mtk_cryp *cryp, - u32 offset, u32 value) -{ - writel_relaxed(value, cryp->base + offset); -} - -static struct mtk_cryp *mtk_aes_find_dev(struct mtk_aes_base_ctx *ctx) -{ - struct mtk_cryp *cryp = NULL; - struct mtk_cryp *tmp; - - spin_lock_bh(&mtk_aes.lock); - if (!ctx->cryp) { - list_for_each_entry(tmp, &mtk_aes.dev_list, aes_list) { - cryp = tmp; - break; - } - ctx->cryp = cryp; - } else { - cryp = ctx->cryp; - } - spin_unlock_bh(&mtk_aes.lock); - - return cryp; -} - -static inline size_t mtk_aes_padlen(size_t len) -{ - len &= AES_BLOCK_SIZE - 1; - return len ? AES_BLOCK_SIZE - len : 0; -} - -static bool mtk_aes_check_aligned(struct scatterlist *sg, size_t len, - struct mtk_aes_dma *dma) -{ - int nents; - - if (!IS_ALIGNED(len, AES_BLOCK_SIZE)) - return false; - - for (nents = 0; sg; sg = sg_next(sg), ++nents) { - if (!IS_ALIGNED(sg->offset, sizeof(u32))) - return false; - - if (len <= sg->length) { - if (!IS_ALIGNED(len, AES_BLOCK_SIZE)) - return false; - - dma->nents = nents + 1; - dma->remainder = sg->length - len; - sg->length = len; - return true; - } - - if (!IS_ALIGNED(sg->length, AES_BLOCK_SIZE)) - return false; - - len -= sg->length; - } - - return false; -} - -static inline void mtk_aes_set_mode(struct mtk_aes_rec *aes, - const struct mtk_aes_reqctx *rctx) -{ - /* Clear all but persistent flags and set request flags. */ - aes->flags = (aes->flags & AES_FLAGS_BUSY) | rctx->mode; -} - -static inline void mtk_aes_restore_sg(const struct mtk_aes_dma *dma) -{ - struct scatterlist *sg = dma->sg; - int nents = dma->nents; - - if (!dma->remainder) - return; - - while (--nents > 0 && sg) - sg = sg_next(sg); - - if (!sg) - return; - - sg->length += dma->remainder; -} - -static inline int mtk_aes_complete(struct mtk_cryp *cryp, - struct mtk_aes_rec *aes, - int err) -{ - aes->flags &= ~AES_FLAGS_BUSY; - aes->areq->complete(aes->areq, err); - /* Handle new request */ - tasklet_schedule(&aes->queue_task); - return err; -} - -/* - * Write descriptors for processing. This will configure the engine, load - * the transform information and then start the packet processing. - */ -static int mtk_aes_xmit(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) -{ - struct mtk_ring *ring = cryp->ring[aes->id]; - struct mtk_desc *cmd = NULL, *res = NULL; - struct scatterlist *ssg = aes->src.sg, *dsg = aes->dst.sg; - u32 slen = aes->src.sg_len, dlen = aes->dst.sg_len; - int nents; - - /* Write command descriptors */ - for (nents = 0; nents < slen; ++nents, ssg = sg_next(ssg)) { - cmd = ring->cmd_next; - cmd->hdr = MTK_DESC_BUF_LEN(ssg->length); - cmd->buf = cpu_to_le32(sg_dma_address(ssg)); - - if (nents == 0) { - cmd->hdr |= MTK_DESC_FIRST | - MTK_DESC_CT_LEN(aes->ctx->ct_size); - cmd->ct = cpu_to_le32(aes->ctx->ct_dma); - cmd->ct_hdr = aes->ctx->ct_hdr; - cmd->tfm = cpu_to_le32(aes->ctx->tfm_dma); - } - - /* Shift ring buffer and check boundary */ - if (++ring->cmd_next == ring->cmd_base + MTK_DESC_NUM) - ring->cmd_next = ring->cmd_base; - } - cmd->hdr |= MTK_DESC_LAST; - - /* Prepare result descriptors */ - for (nents = 0; nents < dlen; ++nents, dsg = sg_next(dsg)) { - res = ring->res_next; - res->hdr = MTK_DESC_BUF_LEN(dsg->length); - res->buf = cpu_to_le32(sg_dma_address(dsg)); - - if (nents == 0) - res->hdr |= MTK_DESC_FIRST; - - /* Shift ring buffer and check boundary */ - if (++ring->res_next == ring->res_base + MTK_DESC_NUM) - ring->res_next = ring->res_base; - } - res->hdr |= MTK_DESC_LAST; - - /* Pointer to current result descriptor */ - ring->res_prev = res; - - /* Prepare enough space for authenticated tag */ - if (aes->flags & AES_FLAGS_GCM) - le32_add_cpu(&res->hdr, AES_BLOCK_SIZE); - - /* - * Make sure that all changes to the DMA ring are done before we - * start engine. - */ - wmb(); - /* Start DMA transfer */ - mtk_aes_write(cryp, RDR_PREP_COUNT(aes->id), MTK_DESC_CNT(dlen)); - mtk_aes_write(cryp, CDR_PREP_COUNT(aes->id), MTK_DESC_CNT(slen)); - - return -EINPROGRESS; -} - -static void mtk_aes_unmap(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) -{ - struct mtk_aes_base_ctx *ctx = aes->ctx; - - dma_unmap_single(cryp->dev, ctx->ct_dma, sizeof(ctx->info), - DMA_TO_DEVICE); - - if (aes->src.sg == aes->dst.sg) { - dma_unmap_sg(cryp->dev, aes->src.sg, aes->src.nents, - DMA_BIDIRECTIONAL); - - if (aes->src.sg != &aes->aligned_sg) - mtk_aes_restore_sg(&aes->src); - } else { - dma_unmap_sg(cryp->dev, aes->dst.sg, aes->dst.nents, - DMA_FROM_DEVICE); - - if (aes->dst.sg != &aes->aligned_sg) - mtk_aes_restore_sg(&aes->dst); - - dma_unmap_sg(cryp->dev, aes->src.sg, aes->src.nents, - DMA_TO_DEVICE); - - if (aes->src.sg != &aes->aligned_sg) - mtk_aes_restore_sg(&aes->src); - } - - if (aes->dst.sg == &aes->aligned_sg) - sg_copy_from_buffer(aes->real_dst, sg_nents(aes->real_dst), - aes->buf, aes->total); -} - -static int mtk_aes_map(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) -{ - struct mtk_aes_base_ctx *ctx = aes->ctx; - struct mtk_aes_info *info = &ctx->info; - - ctx->ct_dma = dma_map_single(cryp->dev, info, sizeof(*info), - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(cryp->dev, ctx->ct_dma))) - goto exit; - - ctx->tfm_dma = ctx->ct_dma + sizeof(info->cmd); - - if (aes->src.sg == aes->dst.sg) { - aes->src.sg_len = dma_map_sg(cryp->dev, aes->src.sg, - aes->src.nents, - DMA_BIDIRECTIONAL); - aes->dst.sg_len = aes->src.sg_len; - if (unlikely(!aes->src.sg_len)) - goto sg_map_err; - } else { - aes->src.sg_len = dma_map_sg(cryp->dev, aes->src.sg, - aes->src.nents, DMA_TO_DEVICE); - if (unlikely(!aes->src.sg_len)) - goto sg_map_err; - - aes->dst.sg_len = dma_map_sg(cryp->dev, aes->dst.sg, - aes->dst.nents, DMA_FROM_DEVICE); - if (unlikely(!aes->dst.sg_len)) { - dma_unmap_sg(cryp->dev, aes->src.sg, aes->src.nents, - DMA_TO_DEVICE); - goto sg_map_err; - } - } - - return mtk_aes_xmit(cryp, aes); - -sg_map_err: - dma_unmap_single(cryp->dev, ctx->ct_dma, sizeof(*info), DMA_TO_DEVICE); -exit: - return mtk_aes_complete(cryp, aes, -EINVAL); -} - -/* Initialize transform information of CBC/ECB/CTR/OFB/CFB mode */ -static void mtk_aes_info_init(struct mtk_cryp *cryp, struct mtk_aes_rec *aes, - size_t len) -{ - struct skcipher_request *req = skcipher_request_cast(aes->areq); - struct mtk_aes_base_ctx *ctx = aes->ctx; - struct mtk_aes_info *info = &ctx->info; - u32 cnt = 0; - - ctx->ct_hdr = AES_CT_CTRL_HDR | cpu_to_le32(len); - info->cmd[cnt++] = AES_CMD0 | cpu_to_le32(len); - info->cmd[cnt++] = AES_CMD1; - - info->tfm[0] = AES_TFM_SIZE(ctx->keylen) | ctx->keymode; - if (aes->flags & AES_FLAGS_ENCRYPT) - info->tfm[0] |= AES_TFM_BASIC_OUT; - else - info->tfm[0] |= AES_TFM_BASIC_IN; - - switch (aes->flags & AES_FLAGS_CIPHER_MSK) { - case AES_FLAGS_CBC: - info->tfm[1] = AES_TFM_CBC; - break; - case AES_FLAGS_ECB: - info->tfm[1] = AES_TFM_ECB; - goto ecb; - case AES_FLAGS_CTR: - info->tfm[1] = AES_TFM_CTR_LOAD; - goto ctr; - case AES_FLAGS_OFB: - info->tfm[1] = AES_TFM_OFB; - break; - case AES_FLAGS_CFB128: - info->tfm[1] = AES_TFM_CFB128; - break; - default: - /* Should not happen... */ - return; - } - - memcpy(info->state + ctx->keylen, req->iv, AES_BLOCK_SIZE); -ctr: - le32_add_cpu(&info->tfm[0], - le32_to_cpu(AES_TFM_SIZE(SIZE_IN_WORDS(AES_BLOCK_SIZE)))); - info->tfm[1] |= AES_TFM_FULL_IV; - info->cmd[cnt++] = AES_CMD2; -ecb: - ctx->ct_size = cnt; -} - -static int mtk_aes_dma(struct mtk_cryp *cryp, struct mtk_aes_rec *aes, - struct scatterlist *src, struct scatterlist *dst, - size_t len) -{ - size_t padlen = 0; - bool src_aligned, dst_aligned; - - aes->total = len; - aes->src.sg = src; - aes->dst.sg = dst; - aes->real_dst = dst; - - src_aligned = mtk_aes_check_aligned(src, len, &aes->src); - if (src == dst) - dst_aligned = src_aligned; - else - dst_aligned = mtk_aes_check_aligned(dst, len, &aes->dst); - - if (!src_aligned || !dst_aligned) { - padlen = mtk_aes_padlen(len); - - if (len + padlen > AES_BUF_SIZE) - return mtk_aes_complete(cryp, aes, -ENOMEM); - - if (!src_aligned) { - sg_copy_to_buffer(src, sg_nents(src), aes->buf, len); - aes->src.sg = &aes->aligned_sg; - aes->src.nents = 1; - aes->src.remainder = 0; - } - - if (!dst_aligned) { - aes->dst.sg = &aes->aligned_sg; - aes->dst.nents = 1; - aes->dst.remainder = 0; - } - - sg_init_table(&aes->aligned_sg, 1); - sg_set_buf(&aes->aligned_sg, aes->buf, len + padlen); - } - - mtk_aes_info_init(cryp, aes, len + padlen); - - return mtk_aes_map(cryp, aes); -} - -static int mtk_aes_handle_queue(struct mtk_cryp *cryp, u8 id, - struct crypto_async_request *new_areq) -{ - struct mtk_aes_rec *aes = cryp->aes[id]; - struct crypto_async_request *areq, *backlog; - struct mtk_aes_base_ctx *ctx; - unsigned long flags; - int ret = 0; - - spin_lock_irqsave(&aes->lock, flags); - if (new_areq) - ret = crypto_enqueue_request(&aes->queue, new_areq); - if (aes->flags & AES_FLAGS_BUSY) { - spin_unlock_irqrestore(&aes->lock, flags); - return ret; - } - backlog = crypto_get_backlog(&aes->queue); - areq = crypto_dequeue_request(&aes->queue); - if (areq) - aes->flags |= AES_FLAGS_BUSY; - spin_unlock_irqrestore(&aes->lock, flags); - - if (!areq) - return ret; - - if (backlog) - backlog->complete(backlog, -EINPROGRESS); - - ctx = crypto_tfm_ctx(areq->tfm); - /* Write key into state buffer */ - memcpy(ctx->info.state, ctx->key, sizeof(ctx->key)); - - aes->areq = areq; - aes->ctx = ctx; - - return ctx->start(cryp, aes); -} - -static int mtk_aes_transfer_complete(struct mtk_cryp *cryp, - struct mtk_aes_rec *aes) -{ - return mtk_aes_complete(cryp, aes, 0); -} - -static int mtk_aes_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) -{ - struct skcipher_request *req = skcipher_request_cast(aes->areq); - struct mtk_aes_reqctx *rctx = skcipher_request_ctx(req); - - mtk_aes_set_mode(aes, rctx); - aes->resume = mtk_aes_transfer_complete; - - return mtk_aes_dma(cryp, aes, req->src, req->dst, req->cryptlen); -} - -static inline struct mtk_aes_ctr_ctx * -mtk_aes_ctr_ctx_cast(struct mtk_aes_base_ctx *ctx) -{ - return container_of(ctx, struct mtk_aes_ctr_ctx, base); -} - -static int mtk_aes_ctr_transfer(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) -{ - struct mtk_aes_base_ctx *ctx = aes->ctx; - struct mtk_aes_ctr_ctx *cctx = mtk_aes_ctr_ctx_cast(ctx); - struct skcipher_request *req = skcipher_request_cast(aes->areq); - struct scatterlist *src, *dst; - u32 start, end, ctr, blocks; - size_t datalen; - bool fragmented = false; - - /* Check for transfer completion. */ - cctx->offset += aes->total; - if (cctx->offset >= req->cryptlen) - return mtk_aes_transfer_complete(cryp, aes); - - /* Compute data length. */ - datalen = req->cryptlen - cctx->offset; - blocks = DIV_ROUND_UP(datalen, AES_BLOCK_SIZE); - ctr = be32_to_cpu(cctx->iv[3]); - - /* Check 32bit counter overflow. */ - start = ctr; - end = start + blocks - 1; - if (end < start) { - ctr = 0xffffffff; - datalen = AES_BLOCK_SIZE * -start; - fragmented = true; - } - - /* Jump to offset. */ - src = scatterwalk_ffwd(cctx->src, req->src, cctx->offset); - dst = ((req->src == req->dst) ? src : - scatterwalk_ffwd(cctx->dst, req->dst, cctx->offset)); - - /* Write IVs into transform state buffer. */ - memcpy(ctx->info.state + ctx->keylen, cctx->iv, AES_BLOCK_SIZE); - - if (unlikely(fragmented)) { - /* - * Increment the counter manually to cope with the hardware - * counter overflow. - */ - cctx->iv[3] = cpu_to_be32(ctr); - crypto_inc((u8 *)cctx->iv, AES_BLOCK_SIZE); - } - - return mtk_aes_dma(cryp, aes, src, dst, datalen); -} - -static int mtk_aes_ctr_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) -{ - struct mtk_aes_ctr_ctx *cctx = mtk_aes_ctr_ctx_cast(aes->ctx); - struct skcipher_request *req = skcipher_request_cast(aes->areq); - struct mtk_aes_reqctx *rctx = skcipher_request_ctx(req); - - mtk_aes_set_mode(aes, rctx); - - memcpy(cctx->iv, req->iv, AES_BLOCK_SIZE); - cctx->offset = 0; - aes->total = 0; - aes->resume = mtk_aes_ctr_transfer; - - return mtk_aes_ctr_transfer(cryp, aes); -} - -/* Check and set the AES key to transform state buffer */ -static int mtk_aes_setkey(struct crypto_skcipher *tfm, - const u8 *key, u32 keylen) -{ - struct mtk_aes_base_ctx *ctx = crypto_skcipher_ctx(tfm); - - switch (keylen) { - case AES_KEYSIZE_128: - ctx->keymode = AES_TFM_128BITS; - break; - case AES_KEYSIZE_192: - ctx->keymode = AES_TFM_192BITS; - break; - case AES_KEYSIZE_256: - ctx->keymode = AES_TFM_256BITS; - break; - - default: - return -EINVAL; - } - - ctx->keylen = SIZE_IN_WORDS(keylen); - memcpy(ctx->key, key, keylen); - - return 0; -} - -static int mtk_aes_crypt(struct skcipher_request *req, u64 mode) -{ - struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct mtk_aes_base_ctx *ctx = crypto_skcipher_ctx(skcipher); - struct mtk_aes_reqctx *rctx; - struct mtk_cryp *cryp; - - cryp = mtk_aes_find_dev(ctx); - if (!cryp) - return -ENODEV; - - rctx = skcipher_request_ctx(req); - rctx->mode = mode; - - return mtk_aes_handle_queue(cryp, !(mode & AES_FLAGS_ENCRYPT), - &req->base); -} - -static int mtk_aes_ecb_encrypt(struct skcipher_request *req) -{ - return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_ECB); -} - -static int mtk_aes_ecb_decrypt(struct skcipher_request *req) -{ - return mtk_aes_crypt(req, AES_FLAGS_ECB); -} - -static int mtk_aes_cbc_encrypt(struct skcipher_request *req) -{ - return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_CBC); -} - -static int mtk_aes_cbc_decrypt(struct skcipher_request *req) -{ - return mtk_aes_crypt(req, AES_FLAGS_CBC); -} - -static int mtk_aes_ctr_encrypt(struct skcipher_request *req) -{ - return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_CTR); -} - -static int mtk_aes_ctr_decrypt(struct skcipher_request *req) -{ - return mtk_aes_crypt(req, AES_FLAGS_CTR); -} - -static int mtk_aes_ofb_encrypt(struct skcipher_request *req) -{ - return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_OFB); -} - -static int mtk_aes_ofb_decrypt(struct skcipher_request *req) -{ - return mtk_aes_crypt(req, AES_FLAGS_OFB); -} - -static int mtk_aes_cfb_encrypt(struct skcipher_request *req) -{ - return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_CFB128); -} - -static int mtk_aes_cfb_decrypt(struct skcipher_request *req) -{ - return mtk_aes_crypt(req, AES_FLAGS_CFB128); -} - -static int mtk_aes_init_tfm(struct crypto_skcipher *tfm) -{ - struct mtk_aes_ctx *ctx = crypto_skcipher_ctx(tfm); - - crypto_skcipher_set_reqsize(tfm, sizeof(struct mtk_aes_reqctx)); - ctx->base.start = mtk_aes_start; - return 0; -} - -static int mtk_aes_ctr_init_tfm(struct crypto_skcipher *tfm) -{ - struct mtk_aes_ctx *ctx = crypto_skcipher_ctx(tfm); - - crypto_skcipher_set_reqsize(tfm, sizeof(struct mtk_aes_reqctx)); - ctx->base.start = mtk_aes_ctr_start; - return 0; -} - -static struct skcipher_alg aes_algs[] = { -{ - .base.cra_name = "cbc(aes)", - .base.cra_driver_name = "cbc-aes-mtk", - .base.cra_priority = 400, - .base.cra_flags = CRYPTO_ALG_ASYNC, - .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct mtk_aes_ctx), - .base.cra_alignmask = 0xf, - .base.cra_module = THIS_MODULE, - - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = mtk_aes_setkey, - .encrypt = mtk_aes_cbc_encrypt, - .decrypt = mtk_aes_cbc_decrypt, - .ivsize = AES_BLOCK_SIZE, - .init = mtk_aes_init_tfm, -}, -{ - .base.cra_name = "ecb(aes)", - .base.cra_driver_name = "ecb-aes-mtk", - .base.cra_priority = 400, - .base.cra_flags = CRYPTO_ALG_ASYNC, - .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct mtk_aes_ctx), - .base.cra_alignmask = 0xf, - .base.cra_module = THIS_MODULE, - - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = mtk_aes_setkey, - .encrypt = mtk_aes_ecb_encrypt, - .decrypt = mtk_aes_ecb_decrypt, - .init = mtk_aes_init_tfm, -}, -{ - .base.cra_name = "ctr(aes)", - .base.cra_driver_name = "ctr-aes-mtk", - .base.cra_priority = 400, - .base.cra_flags = CRYPTO_ALG_ASYNC, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct mtk_aes_ctx), - .base.cra_alignmask = 0xf, - .base.cra_module = THIS_MODULE, - - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = mtk_aes_setkey, - .encrypt = mtk_aes_ctr_encrypt, - .decrypt = mtk_aes_ctr_decrypt, - .init = mtk_aes_ctr_init_tfm, -}, -{ - .base.cra_name = "ofb(aes)", - .base.cra_driver_name = "ofb-aes-mtk", - .base.cra_priority = 400, - .base.cra_flags = CRYPTO_ALG_ASYNC, - .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct mtk_aes_ctx), - .base.cra_alignmask = 0xf, - .base.cra_module = THIS_MODULE, - - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = mtk_aes_setkey, - .encrypt = mtk_aes_ofb_encrypt, - .decrypt = mtk_aes_ofb_decrypt, -}, -{ - .base.cra_name = "cfb(aes)", - .base.cra_driver_name = "cfb-aes-mtk", - .base.cra_priority = 400, - .base.cra_flags = CRYPTO_ALG_ASYNC, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct mtk_aes_ctx), - .base.cra_alignmask = 0xf, - .base.cra_module = THIS_MODULE, - - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = mtk_aes_setkey, - .encrypt = mtk_aes_cfb_encrypt, - .decrypt = mtk_aes_cfb_decrypt, -}, -}; - -static inline struct mtk_aes_gcm_ctx * -mtk_aes_gcm_ctx_cast(struct mtk_aes_base_ctx *ctx) -{ - return container_of(ctx, struct mtk_aes_gcm_ctx, base); -} - -/* - * Engine will verify and compare tag automatically, so we just need - * to check returned status which stored in the result descriptor. - */ -static int mtk_aes_gcm_tag_verify(struct mtk_cryp *cryp, - struct mtk_aes_rec *aes) -{ - __le32 status = cryp->ring[aes->id]->res_prev->ct; - - return mtk_aes_complete(cryp, aes, (status & AES_AUTH_TAG_ERR) ? - -EBADMSG : 0); -} - -/* Initialize transform information of GCM mode */ -static void mtk_aes_gcm_info_init(struct mtk_cryp *cryp, - struct mtk_aes_rec *aes, - size_t len) -{ - struct aead_request *req = aead_request_cast(aes->areq); - struct mtk_aes_base_ctx *ctx = aes->ctx; - struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(ctx); - struct mtk_aes_info *info = &ctx->info; - u32 ivsize = crypto_aead_ivsize(crypto_aead_reqtfm(req)); - u32 cnt = 0; - - ctx->ct_hdr = AES_CT_CTRL_HDR | cpu_to_le32(len); - - info->cmd[cnt++] = AES_GCM_CMD0 | cpu_to_le32(req->assoclen); - info->cmd[cnt++] = AES_GCM_CMD1 | cpu_to_le32(req->assoclen); - info->cmd[cnt++] = AES_GCM_CMD2; - info->cmd[cnt++] = AES_GCM_CMD3 | cpu_to_le32(gctx->textlen); - - if (aes->flags & AES_FLAGS_ENCRYPT) { - info->cmd[cnt++] = AES_GCM_CMD4 | cpu_to_le32(gctx->authsize); - info->tfm[0] = AES_TFM_GCM_OUT; - } else { - info->cmd[cnt++] = AES_GCM_CMD5 | cpu_to_le32(gctx->authsize); - info->cmd[cnt++] = AES_GCM_CMD6 | cpu_to_le32(gctx->authsize); - info->tfm[0] = AES_TFM_GCM_IN; - } - ctx->ct_size = cnt; - - info->tfm[0] |= AES_TFM_GHASH_DIGEST | AES_TFM_GHASH | AES_TFM_SIZE( - ctx->keylen + SIZE_IN_WORDS(AES_BLOCK_SIZE + ivsize)) | - ctx->keymode; - info->tfm[1] = AES_TFM_CTR_INIT | AES_TFM_IV_CTR_MODE | AES_TFM_3IV | - AES_TFM_ENC_HASH; - - memcpy(info->state + ctx->keylen + SIZE_IN_WORDS(AES_BLOCK_SIZE), - req->iv, ivsize); -} - -static int mtk_aes_gcm_dma(struct mtk_cryp *cryp, struct mtk_aes_rec *aes, - struct scatterlist *src, struct scatterlist *dst, - size_t len) -{ - bool src_aligned, dst_aligned; - - aes->src.sg = src; - aes->dst.sg = dst; - aes->real_dst = dst; - - src_aligned = mtk_aes_check_aligned(src, len, &aes->src); - if (src == dst) - dst_aligned = src_aligned; - else - dst_aligned = mtk_aes_check_aligned(dst, len, &aes->dst); - - if (!src_aligned || !dst_aligned) { - if (aes->total > AES_BUF_SIZE) - return mtk_aes_complete(cryp, aes, -ENOMEM); - - if (!src_aligned) { - sg_copy_to_buffer(src, sg_nents(src), aes->buf, len); - aes->src.sg = &aes->aligned_sg; - aes->src.nents = 1; - aes->src.remainder = 0; - } - - if (!dst_aligned) { - aes->dst.sg = &aes->aligned_sg; - aes->dst.nents = 1; - aes->dst.remainder = 0; - } - - sg_init_table(&aes->aligned_sg, 1); - sg_set_buf(&aes->aligned_sg, aes->buf, aes->total); - } - - mtk_aes_gcm_info_init(cryp, aes, len); - - return mtk_aes_map(cryp, aes); -} - -/* Todo: GMAC */ -static int mtk_aes_gcm_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) -{ - struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(aes->ctx); - struct aead_request *req = aead_request_cast(aes->areq); - struct mtk_aes_reqctx *rctx = aead_request_ctx(req); - u32 len = req->assoclen + req->cryptlen; - - mtk_aes_set_mode(aes, rctx); - - if (aes->flags & AES_FLAGS_ENCRYPT) { - u32 tag[4]; - - aes->resume = mtk_aes_transfer_complete; - /* Compute total process length. */ - aes->total = len + gctx->authsize; - /* Hardware will append authenticated tag to output buffer */ - scatterwalk_map_and_copy(tag, req->dst, len, gctx->authsize, 1); - } else { - aes->resume = mtk_aes_gcm_tag_verify; - aes->total = len; - } - - return mtk_aes_gcm_dma(cryp, aes, req->src, req->dst, len); -} - -static int mtk_aes_gcm_crypt(struct aead_request *req, u64 mode) -{ - struct mtk_aes_base_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); - struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(ctx); - struct mtk_aes_reqctx *rctx = aead_request_ctx(req); - struct mtk_cryp *cryp; - bool enc = !!(mode & AES_FLAGS_ENCRYPT); - - cryp = mtk_aes_find_dev(ctx); - if (!cryp) - return -ENODEV; - - /* Compute text length. */ - gctx->textlen = req->cryptlen - (enc ? 0 : gctx->authsize); - - /* Empty messages are not supported yet */ - if (!gctx->textlen && !req->assoclen) - return -EINVAL; - - rctx->mode = AES_FLAGS_GCM | mode; - - return mtk_aes_handle_queue(cryp, enc, &req->base); -} - -/* - * Because of the hardware limitation, we need to pre-calculate key(H) - * for the GHASH operation. The result of the encryption operation - * need to be stored in the transform state buffer. - */ -static int mtk_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key, - u32 keylen) -{ - struct mtk_aes_base_ctx *ctx = crypto_aead_ctx(aead); - union { - u32 x32[SIZE_IN_WORDS(AES_BLOCK_SIZE)]; - u8 x8[AES_BLOCK_SIZE]; - } hash = {}; - struct crypto_aes_ctx aes_ctx; - int err; - int i; - - switch (keylen) { - case AES_KEYSIZE_128: - ctx->keymode = AES_TFM_128BITS; - break; - case AES_KEYSIZE_192: - ctx->keymode = AES_TFM_192BITS; - break; - case AES_KEYSIZE_256: - ctx->keymode = AES_TFM_256BITS; - break; - - default: - return -EINVAL; - } - - ctx->keylen = SIZE_IN_WORDS(keylen); - - err = aes_expandkey(&aes_ctx, key, keylen); - if (err) - return err; - - aes_encrypt(&aes_ctx, hash.x8, hash.x8); - memzero_explicit(&aes_ctx, sizeof(aes_ctx)); - - memcpy(ctx->key, key, keylen); - - /* Why do we need to do this? */ - for (i = 0; i < SIZE_IN_WORDS(AES_BLOCK_SIZE); i++) - hash.x32[i] = swab32(hash.x32[i]); - - memcpy(ctx->key + ctx->keylen, &hash, AES_BLOCK_SIZE); - - return 0; -} - -static int mtk_aes_gcm_setauthsize(struct crypto_aead *aead, - u32 authsize) -{ - struct mtk_aes_base_ctx *ctx = crypto_aead_ctx(aead); - struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(ctx); - - /* Same as crypto_gcm_authsize() from crypto/gcm.c */ - switch (authsize) { - case 8: - case 12: - case 16: - break; - default: - return -EINVAL; - } - - gctx->authsize = authsize; - return 0; -} - -static int mtk_aes_gcm_encrypt(struct aead_request *req) -{ - return mtk_aes_gcm_crypt(req, AES_FLAGS_ENCRYPT); -} - -static int mtk_aes_gcm_decrypt(struct aead_request *req) -{ - return mtk_aes_gcm_crypt(req, 0); -} - -static int mtk_aes_gcm_init(struct crypto_aead *aead) -{ - struct mtk_aes_gcm_ctx *ctx = crypto_aead_ctx(aead); - - crypto_aead_set_reqsize(aead, sizeof(struct mtk_aes_reqctx)); - ctx->base.start = mtk_aes_gcm_start; - return 0; -} - -static struct aead_alg aes_gcm_alg = { - .setkey = mtk_aes_gcm_setkey, - .setauthsize = mtk_aes_gcm_setauthsize, - .encrypt = mtk_aes_gcm_encrypt, - .decrypt = mtk_aes_gcm_decrypt, - .init = mtk_aes_gcm_init, - .ivsize = GCM_AES_IV_SIZE, - .maxauthsize = AES_BLOCK_SIZE, - - .base = { - .cra_name = "gcm(aes)", - .cra_driver_name = "gcm-aes-mtk", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct mtk_aes_gcm_ctx), - .cra_alignmask = 0xf, - .cra_module = THIS_MODULE, - }, -}; - -static void mtk_aes_queue_task(unsigned long data) -{ - struct mtk_aes_rec *aes = (struct mtk_aes_rec *)data; - - mtk_aes_handle_queue(aes->cryp, aes->id, NULL); -} - -static void mtk_aes_done_task(unsigned long data) -{ - struct mtk_aes_rec *aes = (struct mtk_aes_rec *)data; - struct mtk_cryp *cryp = aes->cryp; - - mtk_aes_unmap(cryp, aes); - aes->resume(cryp, aes); -} - -static irqreturn_t mtk_aes_irq(int irq, void *dev_id) -{ - struct mtk_aes_rec *aes = (struct mtk_aes_rec *)dev_id; - struct mtk_cryp *cryp = aes->cryp; - u32 val = mtk_aes_read(cryp, RDR_STAT(aes->id)); - - mtk_aes_write(cryp, RDR_STAT(aes->id), val); - - if (likely(AES_FLAGS_BUSY & aes->flags)) { - mtk_aes_write(cryp, RDR_PROC_COUNT(aes->id), MTK_CNT_RST); - mtk_aes_write(cryp, RDR_THRESH(aes->id), - MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE); - - tasklet_schedule(&aes->done_task); - } else { - dev_warn(cryp->dev, "AES interrupt when no active requests.\n"); - } - return IRQ_HANDLED; -} - -/* - * The purpose of creating encryption and decryption records is - * to process outbound/inbound data in parallel, it can improve - * performance in most use cases, such as IPSec VPN, especially - * under heavy network traffic. - */ -static int mtk_aes_record_init(struct mtk_cryp *cryp) -{ - struct mtk_aes_rec **aes = cryp->aes; - int i, err = -ENOMEM; - - for (i = 0; i < MTK_REC_NUM; i++) { - aes[i] = kzalloc(sizeof(**aes), GFP_KERNEL); - if (!aes[i]) - goto err_cleanup; - - aes[i]->buf = (void *)__get_free_pages(GFP_KERNEL, - AES_BUF_ORDER); - if (!aes[i]->buf) - goto err_cleanup; - - aes[i]->cryp = cryp; - - spin_lock_init(&aes[i]->lock); - crypto_init_queue(&aes[i]->queue, AES_QUEUE_SIZE); - - tasklet_init(&aes[i]->queue_task, mtk_aes_queue_task, - (unsigned long)aes[i]); - tasklet_init(&aes[i]->done_task, mtk_aes_done_task, - (unsigned long)aes[i]); - } - - /* Link to ring0 and ring1 respectively */ - aes[0]->id = MTK_RING0; - aes[1]->id = MTK_RING1; - - return 0; - -err_cleanup: - for (; i--; ) { - free_page((unsigned long)aes[i]->buf); - kfree(aes[i]); - } - - return err; -} - -static void mtk_aes_record_free(struct mtk_cryp *cryp) -{ - int i; - - for (i = 0; i < MTK_REC_NUM; i++) { - tasklet_kill(&cryp->aes[i]->done_task); - tasklet_kill(&cryp->aes[i]->queue_task); - - free_page((unsigned long)cryp->aes[i]->buf); - kfree(cryp->aes[i]); - } -} - -static void mtk_aes_unregister_algs(void) -{ - int i; - - crypto_unregister_aead(&aes_gcm_alg); - - for (i = 0; i < ARRAY_SIZE(aes_algs); i++) - crypto_unregister_skcipher(&aes_algs[i]); -} - -static int mtk_aes_register_algs(void) -{ - int err, i; - - for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { - err = crypto_register_skcipher(&aes_algs[i]); - if (err) - goto err_aes_algs; - } - - err = crypto_register_aead(&aes_gcm_alg); - if (err) - goto err_aes_algs; - - return 0; - -err_aes_algs: - for (; i--; ) - crypto_unregister_skcipher(&aes_algs[i]); - - return err; -} - -int mtk_cipher_alg_register(struct mtk_cryp *cryp) -{ - int ret; - - INIT_LIST_HEAD(&cryp->aes_list); - - /* Initialize two cipher records */ - ret = mtk_aes_record_init(cryp); - if (ret) - goto err_record; - - ret = devm_request_irq(cryp->dev, cryp->irq[MTK_RING0], mtk_aes_irq, - 0, "mtk-aes", cryp->aes[0]); - if (ret) { - dev_err(cryp->dev, "unable to request AES irq.\n"); - goto err_res; - } - - ret = devm_request_irq(cryp->dev, cryp->irq[MTK_RING1], mtk_aes_irq, - 0, "mtk-aes", cryp->aes[1]); - if (ret) { - dev_err(cryp->dev, "unable to request AES irq.\n"); - goto err_res; - } - - /* Enable ring0 and ring1 interrupt */ - mtk_aes_write(cryp, AIC_ENABLE_SET(MTK_RING0), MTK_IRQ_RDR0); - mtk_aes_write(cryp, AIC_ENABLE_SET(MTK_RING1), MTK_IRQ_RDR1); - - spin_lock(&mtk_aes.lock); - list_add_tail(&cryp->aes_list, &mtk_aes.dev_list); - spin_unlock(&mtk_aes.lock); - - ret = mtk_aes_register_algs(); - if (ret) - goto err_algs; - - return 0; - -err_algs: - spin_lock(&mtk_aes.lock); - list_del(&cryp->aes_list); - spin_unlock(&mtk_aes.lock); -err_res: - mtk_aes_record_free(cryp); -err_record: - - dev_err(cryp->dev, "mtk-aes initialization failed.\n"); - return ret; -} - -void mtk_cipher_alg_release(struct mtk_cryp *cryp) -{ - spin_lock(&mtk_aes.lock); - list_del(&cryp->aes_list); - spin_unlock(&mtk_aes.lock); - - mtk_aes_unregister_algs(); - mtk_aes_record_free(cryp); -} diff --git a/drivers/crypto/mediatek/mtk-platform.c b/drivers/crypto/mediatek/mtk-platform.c deleted file mode 100644 index 9d878620e5c9..000000000000 --- a/drivers/crypto/mediatek/mtk-platform.c +++ /dev/null @@ -1,586 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Driver for EIP97 cryptographic accelerator. - * - * Copyright (c) 2016 Ryder Lee - */ - -#include -#include -#include -#include -#include -#include -#include -#include "mtk-platform.h" - -#define MTK_BURST_SIZE_MSK GENMASK(7, 4) -#define MTK_BURST_SIZE(x) ((x) << 4) -#define MTK_DESC_SIZE(x) ((x) << 0) -#define MTK_DESC_OFFSET(x) ((x) << 16) -#define MTK_DESC_FETCH_SIZE(x) ((x) << 0) -#define MTK_DESC_FETCH_THRESH(x) ((x) << 16) -#define MTK_DESC_OVL_IRQ_EN BIT(25) -#define MTK_DESC_ATP_PRESENT BIT(30) - -#define MTK_DFSE_IDLE GENMASK(3, 0) -#define MTK_DFSE_THR_CTRL_EN BIT(30) -#define MTK_DFSE_THR_CTRL_RESET BIT(31) -#define MTK_DFSE_RING_ID(x) (((x) >> 12) & GENMASK(3, 0)) -#define MTK_DFSE_MIN_DATA(x) ((x) << 0) -#define MTK_DFSE_MAX_DATA(x) ((x) << 8) -#define MTK_DFE_MIN_CTRL(x) ((x) << 16) -#define MTK_DFE_MAX_CTRL(x) ((x) << 24) - -#define MTK_IN_BUF_MIN_THRESH(x) ((x) << 8) -#define MTK_IN_BUF_MAX_THRESH(x) ((x) << 12) -#define MTK_OUT_BUF_MIN_THRESH(x) ((x) << 0) -#define MTK_OUT_BUF_MAX_THRESH(x) ((x) << 4) -#define MTK_IN_TBUF_SIZE(x) (((x) >> 4) & GENMASK(3, 0)) -#define MTK_IN_DBUF_SIZE(x) (((x) >> 8) & GENMASK(3, 0)) -#define MTK_OUT_DBUF_SIZE(x) (((x) >> 16) & GENMASK(3, 0)) -#define MTK_CMD_FIFO_SIZE(x) (((x) >> 8) & GENMASK(3, 0)) -#define MTK_RES_FIFO_SIZE(x) (((x) >> 12) & GENMASK(3, 0)) - -#define MTK_PE_TK_LOC_AVL BIT(2) -#define MTK_PE_PROC_HELD BIT(14) -#define MTK_PE_TK_TIMEOUT_EN BIT(22) -#define MTK_PE_INPUT_DMA_ERR BIT(0) -#define MTK_PE_OUTPUT_DMA_ERR BIT(1) -#define MTK_PE_PKT_PORC_ERR BIT(2) -#define MTK_PE_PKT_TIMEOUT BIT(3) -#define MTK_PE_FATAL_ERR BIT(14) -#define MTK_PE_INPUT_DMA_ERR_EN BIT(16) -#define MTK_PE_OUTPUT_DMA_ERR_EN BIT(17) -#define MTK_PE_PKT_PORC_ERR_EN BIT(18) -#define MTK_PE_PKT_TIMEOUT_EN BIT(19) -#define MTK_PE_FATAL_ERR_EN BIT(30) -#define MTK_PE_INT_OUT_EN BIT(31) - -#define MTK_HIA_SIGNATURE ((u16)0x35ca) -#define MTK_HIA_DATA_WIDTH(x) (((x) >> 25) & GENMASK(1, 0)) -#define MTK_HIA_DMA_LENGTH(x) (((x) >> 20) & GENMASK(4, 0)) -#define MTK_CDR_STAT_CLR GENMASK(4, 0) -#define MTK_RDR_STAT_CLR GENMASK(7, 0) - -#define MTK_AIC_INT_MSK GENMASK(5, 0) -#define MTK_AIC_VER_MSK (GENMASK(15, 0) | GENMASK(27, 20)) -#define MTK_AIC_VER11 0x011036c9 -#define MTK_AIC_VER12 0x012036c9 -#define MTK_AIC_G_CLR GENMASK(30, 20) - -/** - * EIP97 is an integrated security subsystem to accelerate cryptographic - * functions and protocols to offload the host processor. - * Some important hardware modules are briefly introduced below: - * - * Host Interface Adapter(HIA) - the main interface between the host - * system and the hardware subsystem. It is responsible for attaching - * processing engine to the specific host bus interface and provides a - * standardized software view for off loading tasks to the engine. - * - * Command Descriptor Ring Manager(CDR Manager) - keeps track of how many - * CD the host has prepared in the CDR. It monitors the fill level of its - * CD-FIFO and if there's sufficient space for the next block of descriptors, - * then it fires off a DMA request to fetch a block of CDs. - * - * Data fetch engine(DFE) - It is responsible for parsing the CD and - * setting up the required control and packet data DMA transfers from - * system memory to the processing engine. - * - * Result Descriptor Ring Manager(RDR Manager) - same as CDR Manager, - * but target is result descriptors, Moreover, it also handles the RD - * updates under control of the DSE. For each packet data segment - * processed, the DSE triggers the RDR Manager to write the updated RD. - * If triggered to update, the RDR Manager sets up a DMA operation to - * copy the RD from the DSE to the correct location in the RDR. - * - * Data Store Engine(DSE) - It is responsible for parsing the prepared RD - * and setting up the required control and packet data DMA transfers from - * the processing engine to system memory. - * - * Advanced Interrupt Controllers(AICs) - receive interrupt request signals - * from various sources and combine them into one interrupt output. - * The AICs are used by: - * - One for the HIA global and processing engine interrupts. - * - The others for the descriptor ring interrupts. - */ - -/* Cryptographic engine capabilities */ -struct mtk_sys_cap { - /* host interface adapter */ - u32 hia_ver; - u32 hia_opt; - /* packet engine */ - u32 pkt_eng_opt; - /* global hardware */ - u32 hw_opt; -}; - -static void mtk_desc_ring_link(struct mtk_cryp *cryp, u32 mask) -{ - /* Assign rings to DFE/DSE thread and enable it */ - writel(MTK_DFSE_THR_CTRL_EN | mask, cryp->base + DFE_THR_CTRL); - writel(MTK_DFSE_THR_CTRL_EN | mask, cryp->base + DSE_THR_CTRL); -} - -static void mtk_dfe_dse_buf_setup(struct mtk_cryp *cryp, - struct mtk_sys_cap *cap) -{ - u32 width = MTK_HIA_DATA_WIDTH(cap->hia_opt) + 2; - u32 len = MTK_HIA_DMA_LENGTH(cap->hia_opt) - 1; - u32 ipbuf = min((u32)MTK_IN_DBUF_SIZE(cap->hw_opt) + width, len); - u32 opbuf = min((u32)MTK_OUT_DBUF_SIZE(cap->hw_opt) + width, len); - u32 itbuf = min((u32)MTK_IN_TBUF_SIZE(cap->hw_opt) + width, len); - - writel(MTK_DFSE_MIN_DATA(ipbuf - 1) | - MTK_DFSE_MAX_DATA(ipbuf) | - MTK_DFE_MIN_CTRL(itbuf - 1) | - MTK_DFE_MAX_CTRL(itbuf), - cryp->base + DFE_CFG); - - writel(MTK_DFSE_MIN_DATA(opbuf - 1) | - MTK_DFSE_MAX_DATA(opbuf), - cryp->base + DSE_CFG); - - writel(MTK_IN_BUF_MIN_THRESH(ipbuf - 1) | - MTK_IN_BUF_MAX_THRESH(ipbuf), - cryp->base + PE_IN_DBUF_THRESH); - - writel(MTK_IN_BUF_MIN_THRESH(itbuf - 1) | - MTK_IN_BUF_MAX_THRESH(itbuf), - cryp->base + PE_IN_TBUF_THRESH); - - writel(MTK_OUT_BUF_MIN_THRESH(opbuf - 1) | - MTK_OUT_BUF_MAX_THRESH(opbuf), - cryp->base + PE_OUT_DBUF_THRESH); - - writel(0, cryp->base + PE_OUT_TBUF_THRESH); - writel(0, cryp->base + PE_OUT_BUF_CTRL); -} - -static int mtk_dfe_dse_state_check(struct mtk_cryp *cryp) -{ - int ret = -EINVAL; - u32 val; - - /* Check for completion of all DMA transfers */ - val = readl(cryp->base + DFE_THR_STAT); - if (MTK_DFSE_RING_ID(val) == MTK_DFSE_IDLE) { - val = readl(cryp->base + DSE_THR_STAT); - if (MTK_DFSE_RING_ID(val) == MTK_DFSE_IDLE) - ret = 0; - } - - if (!ret) { - /* Take DFE/DSE thread out of reset */ - writel(0, cryp->base + DFE_THR_CTRL); - writel(0, cryp->base + DSE_THR_CTRL); - } else { - return -EBUSY; - } - - return 0; -} - -static int mtk_dfe_dse_reset(struct mtk_cryp *cryp) -{ - /* Reset DSE/DFE and correct system priorities for all rings. */ - writel(MTK_DFSE_THR_CTRL_RESET, cryp->base + DFE_THR_CTRL); - writel(0, cryp->base + DFE_PRIO_0); - writel(0, cryp->base + DFE_PRIO_1); - writel(0, cryp->base + DFE_PRIO_2); - writel(0, cryp->base + DFE_PRIO_3); - - writel(MTK_DFSE_THR_CTRL_RESET, cryp->base + DSE_THR_CTRL); - writel(0, cryp->base + DSE_PRIO_0); - writel(0, cryp->base + DSE_PRIO_1); - writel(0, cryp->base + DSE_PRIO_2); - writel(0, cryp->base + DSE_PRIO_3); - - return mtk_dfe_dse_state_check(cryp); -} - -static void mtk_cmd_desc_ring_setup(struct mtk_cryp *cryp, - int i, struct mtk_sys_cap *cap) -{ - /* Full descriptor that fits FIFO minus one */ - u32 count = - ((1 << MTK_CMD_FIFO_SIZE(cap->hia_opt)) / MTK_DESC_SZ) - 1; - - /* Temporarily disable external triggering */ - writel(0, cryp->base + CDR_CFG(i)); - - /* Clear CDR count */ - writel(MTK_CNT_RST, cryp->base + CDR_PREP_COUNT(i)); - writel(MTK_CNT_RST, cryp->base + CDR_PROC_COUNT(i)); - - writel(0, cryp->base + CDR_PREP_PNTR(i)); - writel(0, cryp->base + CDR_PROC_PNTR(i)); - writel(0, cryp->base + CDR_DMA_CFG(i)); - - /* Configure CDR host address space */ - writel(0, cryp->base + CDR_BASE_ADDR_HI(i)); - writel(cryp->ring[i]->cmd_dma, cryp->base + CDR_BASE_ADDR_LO(i)); - - writel(MTK_DESC_RING_SZ, cryp->base + CDR_RING_SIZE(i)); - - /* Clear and disable all CDR interrupts */ - writel(MTK_CDR_STAT_CLR, cryp->base + CDR_STAT(i)); - - /* - * Set command descriptor offset and enable additional - * token present in descriptor. - */ - writel(MTK_DESC_SIZE(MTK_DESC_SZ) | - MTK_DESC_OFFSET(MTK_DESC_OFF) | - MTK_DESC_ATP_PRESENT, - cryp->base + CDR_DESC_SIZE(i)); - - writel(MTK_DESC_FETCH_SIZE(count * MTK_DESC_OFF) | - MTK_DESC_FETCH_THRESH(count * MTK_DESC_SZ), - cryp->base + CDR_CFG(i)); -} - -static void mtk_res_desc_ring_setup(struct mtk_cryp *cryp, - int i, struct mtk_sys_cap *cap) -{ - u32 rndup = 2; - u32 count = ((1 << MTK_RES_FIFO_SIZE(cap->hia_opt)) / rndup) - 1; - - /* Temporarily disable external triggering */ - writel(0, cryp->base + RDR_CFG(i)); - - /* Clear RDR count */ - writel(MTK_CNT_RST, cryp->base + RDR_PREP_COUNT(i)); - writel(MTK_CNT_RST, cryp->base + RDR_PROC_COUNT(i)); - - writel(0, cryp->base + RDR_PREP_PNTR(i)); - writel(0, cryp->base + RDR_PROC_PNTR(i)); - writel(0, cryp->base + RDR_DMA_CFG(i)); - - /* Configure RDR host address space */ - writel(0, cryp->base + RDR_BASE_ADDR_HI(i)); - writel(cryp->ring[i]->res_dma, cryp->base + RDR_BASE_ADDR_LO(i)); - - writel(MTK_DESC_RING_SZ, cryp->base + RDR_RING_SIZE(i)); - writel(MTK_RDR_STAT_CLR, cryp->base + RDR_STAT(i)); - - /* - * RDR manager generates update interrupts on a per-completed-packet, - * and the rd_proc_thresh_irq interrupt is fired when proc_pkt_count - * for the RDR exceeds the number of packets. - */ - writel(MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE, - cryp->base + RDR_THRESH(i)); - - /* - * Configure a threshold and time-out value for the processed - * result descriptors (or complete packets) that are written to - * the RDR. - */ - writel(MTK_DESC_SIZE(MTK_DESC_SZ) | MTK_DESC_OFFSET(MTK_DESC_OFF), - cryp->base + RDR_DESC_SIZE(i)); - - /* - * Configure HIA fetch size and fetch threshold that are used to - * fetch blocks of multiple descriptors. - */ - writel(MTK_DESC_FETCH_SIZE(count * MTK_DESC_OFF) | - MTK_DESC_FETCH_THRESH(count * rndup) | - MTK_DESC_OVL_IRQ_EN, - cryp->base + RDR_CFG(i)); -} - -static int mtk_packet_engine_setup(struct mtk_cryp *cryp) -{ - struct mtk_sys_cap cap; - int i, err; - u32 val; - - cap.hia_ver = readl(cryp->base + HIA_VERSION); - cap.hia_opt = readl(cryp->base + HIA_OPTIONS); - cap.hw_opt = readl(cryp->base + EIP97_OPTIONS); - - if (!(((u16)cap.hia_ver) == MTK_HIA_SIGNATURE)) - return -EINVAL; - - /* Configure endianness conversion method for master (DMA) interface */ - writel(0, cryp->base + EIP97_MST_CTRL); - - /* Set HIA burst size */ - val = readl(cryp->base + HIA_MST_CTRL); - val &= ~MTK_BURST_SIZE_MSK; - val |= MTK_BURST_SIZE(5); - writel(val, cryp->base + HIA_MST_CTRL); - - err = mtk_dfe_dse_reset(cryp); - if (err) { - dev_err(cryp->dev, "Failed to reset DFE and DSE.\n"); - return err; - } - - mtk_dfe_dse_buf_setup(cryp, &cap); - - /* Enable the 4 rings for the packet engines. */ - mtk_desc_ring_link(cryp, 0xf); - - for (i = 0; i < MTK_RING_MAX; i++) { - mtk_cmd_desc_ring_setup(cryp, i, &cap); - mtk_res_desc_ring_setup(cryp, i, &cap); - } - - writel(MTK_PE_TK_LOC_AVL | MTK_PE_PROC_HELD | MTK_PE_TK_TIMEOUT_EN, - cryp->base + PE_TOKEN_CTRL_STAT); - - /* Clear all pending interrupts */ - writel(MTK_AIC_G_CLR, cryp->base + AIC_G_ACK); - writel(MTK_PE_INPUT_DMA_ERR | MTK_PE_OUTPUT_DMA_ERR | - MTK_PE_PKT_PORC_ERR | MTK_PE_PKT_TIMEOUT | - MTK_PE_FATAL_ERR | MTK_PE_INPUT_DMA_ERR_EN | - MTK_PE_OUTPUT_DMA_ERR_EN | MTK_PE_PKT_PORC_ERR_EN | - MTK_PE_PKT_TIMEOUT_EN | MTK_PE_FATAL_ERR_EN | - MTK_PE_INT_OUT_EN, - cryp->base + PE_INTERRUPT_CTRL_STAT); - - return 0; -} - -static int mtk_aic_cap_check(struct mtk_cryp *cryp, int hw) -{ - u32 val; - - if (hw == MTK_RING_MAX) - val = readl(cryp->base + AIC_G_VERSION); - else - val = readl(cryp->base + AIC_VERSION(hw)); - - val &= MTK_AIC_VER_MSK; - if (val != MTK_AIC_VER11 && val != MTK_AIC_VER12) - return -ENXIO; - - if (hw == MTK_RING_MAX) - val = readl(cryp->base + AIC_G_OPTIONS); - else - val = readl(cryp->base + AIC_OPTIONS(hw)); - - val &= MTK_AIC_INT_MSK; - if (!val || val > 32) - return -ENXIO; - - return 0; -} - -static int mtk_aic_init(struct mtk_cryp *cryp, int hw) -{ - int err; - - err = mtk_aic_cap_check(cryp, hw); - if (err) - return err; - - /* Disable all interrupts and set initial configuration */ - if (hw == MTK_RING_MAX) { - writel(0, cryp->base + AIC_G_ENABLE_CTRL); - writel(0, cryp->base + AIC_G_POL_CTRL); - writel(0, cryp->base + AIC_G_TYPE_CTRL); - writel(0, cryp->base + AIC_G_ENABLE_SET); - } else { - writel(0, cryp->base + AIC_ENABLE_CTRL(hw)); - writel(0, cryp->base + AIC_POL_CTRL(hw)); - writel(0, cryp->base + AIC_TYPE_CTRL(hw)); - writel(0, cryp->base + AIC_ENABLE_SET(hw)); - } - - return 0; -} - -static int mtk_accelerator_init(struct mtk_cryp *cryp) -{ - int i, err; - - /* Initialize advanced interrupt controller(AIC) */ - for (i = 0; i < MTK_IRQ_NUM; i++) { - err = mtk_aic_init(cryp, i); - if (err) { - dev_err(cryp->dev, "Failed to initialize AIC.\n"); - return err; - } - } - - /* Initialize packet engine */ - err = mtk_packet_engine_setup(cryp); - if (err) { - dev_err(cryp->dev, "Failed to configure packet engine.\n"); - return err; - } - - return 0; -} - -static void mtk_desc_dma_free(struct mtk_cryp *cryp) -{ - int i; - - for (i = 0; i < MTK_RING_MAX; i++) { - dma_free_coherent(cryp->dev, MTK_DESC_RING_SZ, - cryp->ring[i]->res_base, - cryp->ring[i]->res_dma); - dma_free_coherent(cryp->dev, MTK_DESC_RING_SZ, - cryp->ring[i]->cmd_base, - cryp->ring[i]->cmd_dma); - kfree(cryp->ring[i]); - } -} - -static int mtk_desc_ring_alloc(struct mtk_cryp *cryp) -{ - struct mtk_ring **ring = cryp->ring; - int i; - - for (i = 0; i < MTK_RING_MAX; i++) { - ring[i] = kzalloc(sizeof(**ring), GFP_KERNEL); - if (!ring[i]) - goto err_cleanup; - - ring[i]->cmd_base = dma_alloc_coherent(cryp->dev, - MTK_DESC_RING_SZ, - &ring[i]->cmd_dma, - GFP_KERNEL); - if (!ring[i]->cmd_base) - goto err_cleanup; - - ring[i]->res_base = dma_alloc_coherent(cryp->dev, - MTK_DESC_RING_SZ, - &ring[i]->res_dma, - GFP_KERNEL); - if (!ring[i]->res_base) - goto err_cleanup; - - ring[i]->cmd_next = ring[i]->cmd_base; - ring[i]->res_next = ring[i]->res_base; - } - return 0; - -err_cleanup: - do { - dma_free_coherent(cryp->dev, MTK_DESC_RING_SZ, - ring[i]->res_base, ring[i]->res_dma); - dma_free_coherent(cryp->dev, MTK_DESC_RING_SZ, - ring[i]->cmd_base, ring[i]->cmd_dma); - kfree(ring[i]); - } while (i--); - return -ENOMEM; -} - -static int mtk_crypto_probe(struct platform_device *pdev) -{ - struct mtk_cryp *cryp; - int i, err; - - cryp = devm_kzalloc(&pdev->dev, sizeof(*cryp), GFP_KERNEL); - if (!cryp) - return -ENOMEM; - - cryp->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(cryp->base)) - return PTR_ERR(cryp->base); - - for (i = 0; i < MTK_IRQ_NUM; i++) { - cryp->irq[i] = platform_get_irq(pdev, i); - if (cryp->irq[i] < 0) - return cryp->irq[i]; - } - - cryp->clk_cryp = devm_clk_get(&pdev->dev, "cryp"); - if (IS_ERR(cryp->clk_cryp)) - return -EPROBE_DEFER; - - cryp->dev = &pdev->dev; - pm_runtime_enable(cryp->dev); - pm_runtime_get_sync(cryp->dev); - - err = clk_prepare_enable(cryp->clk_cryp); - if (err) - goto err_clk_cryp; - - /* Allocate four command/result descriptor rings */ - err = mtk_desc_ring_alloc(cryp); - if (err) { - dev_err(cryp->dev, "Unable to allocate descriptor rings.\n"); - goto err_resource; - } - - /* Initialize hardware modules */ - err = mtk_accelerator_init(cryp); - if (err) { - dev_err(cryp->dev, "Failed to initialize cryptographic engine.\n"); - goto err_engine; - } - - err = mtk_cipher_alg_register(cryp); - if (err) { - dev_err(cryp->dev, "Unable to register cipher algorithm.\n"); - goto err_cipher; - } - - err = mtk_hash_alg_register(cryp); - if (err) { - dev_err(cryp->dev, "Unable to register hash algorithm.\n"); - goto err_hash; - } - - platform_set_drvdata(pdev, cryp); - return 0; - -err_hash: - mtk_cipher_alg_release(cryp); -err_cipher: - mtk_dfe_dse_reset(cryp); -err_engine: - mtk_desc_dma_free(cryp); -err_resource: - clk_disable_unprepare(cryp->clk_cryp); -err_clk_cryp: - pm_runtime_put_sync(cryp->dev); - pm_runtime_disable(cryp->dev); - - return err; -} - -static int mtk_crypto_remove(struct platform_device *pdev) -{ - struct mtk_cryp *cryp = platform_get_drvdata(pdev); - - mtk_hash_alg_release(cryp); - mtk_cipher_alg_release(cryp); - mtk_desc_dma_free(cryp); - - clk_disable_unprepare(cryp->clk_cryp); - - pm_runtime_put_sync(cryp->dev); - pm_runtime_disable(cryp->dev); - platform_set_drvdata(pdev, NULL); - - return 0; -} - -static const struct of_device_id of_crypto_id[] = { - { .compatible = "mediatek,eip97-crypto" }, - {}, -}; -MODULE_DEVICE_TABLE(of, of_crypto_id); - -static struct platform_driver mtk_crypto_driver = { - .probe = mtk_crypto_probe, - .remove = mtk_crypto_remove, - .driver = { - .name = "mtk-crypto", - .of_match_table = of_crypto_id, - }, -}; -module_platform_driver(mtk_crypto_driver); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Ryder Lee "); -MODULE_DESCRIPTION("Cryptographic accelerator driver for EIP97"); diff --git a/drivers/crypto/mediatek/mtk-platform.h b/drivers/crypto/mediatek/mtk-platform.h deleted file mode 100644 index 47920c51abac..000000000000 --- a/drivers/crypto/mediatek/mtk-platform.h +++ /dev/null @@ -1,231 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Driver for EIP97 cryptographic accelerator. - * - * Copyright (c) 2016 Ryder Lee - */ - -#ifndef __MTK_PLATFORM_H_ -#define __MTK_PLATFORM_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "mtk-regs.h" - -#define MTK_RDR_PROC_THRESH BIT(0) -#define MTK_RDR_PROC_MODE BIT(23) -#define MTK_CNT_RST BIT(31) -#define MTK_IRQ_RDR0 BIT(1) -#define MTK_IRQ_RDR1 BIT(3) -#define MTK_IRQ_RDR2 BIT(5) -#define MTK_IRQ_RDR3 BIT(7) - -#define SIZE_IN_WORDS(x) ((x) >> 2) - -/** - * Ring 0/1 are used by AES encrypt and decrypt. - * Ring 2/3 are used by SHA. - */ -enum { - MTK_RING0, - MTK_RING1, - MTK_RING2, - MTK_RING3, - MTK_RING_MAX -}; - -#define MTK_REC_NUM (MTK_RING_MAX / 2) -#define MTK_IRQ_NUM 5 - -/** - * struct mtk_desc - DMA descriptor - * @hdr: the descriptor control header - * @buf: DMA address of input buffer segment - * @ct: DMA address of command token that control operation flow - * @ct_hdr: the command token control header - * @tag: the user-defined field - * @tfm: DMA address of transform state - * @bound: align descriptors offset boundary - * - * Structure passed to the crypto engine to describe where source - * data needs to be fetched and how it needs to be processed. - */ -struct mtk_desc { - __le32 hdr; - __le32 buf; - __le32 ct; - __le32 ct_hdr; - __le32 tag; - __le32 tfm; - __le32 bound[2]; -}; - -#define MTK_DESC_NUM 512 -#define MTK_DESC_OFF SIZE_IN_WORDS(sizeof(struct mtk_desc)) -#define MTK_DESC_SZ (MTK_DESC_OFF - 2) -#define MTK_DESC_RING_SZ ((sizeof(struct mtk_desc) * MTK_DESC_NUM)) -#define MTK_DESC_CNT(x) ((MTK_DESC_OFF * (x)) << 2) -#define MTK_DESC_LAST cpu_to_le32(BIT(22)) -#define MTK_DESC_FIRST cpu_to_le32(BIT(23)) -#define MTK_DESC_BUF_LEN(x) cpu_to_le32(x) -#define MTK_DESC_CT_LEN(x) cpu_to_le32((x) << 24) - -/** - * struct mtk_ring - Descriptor ring - * @cmd_base: pointer to command descriptor ring base - * @cmd_next: pointer to the next command descriptor - * @cmd_dma: DMA address of command descriptor ring - * @res_base: pointer to result descriptor ring base - * @res_next: pointer to the next result descriptor - * @res_prev: pointer to the previous result descriptor - * @res_dma: DMA address of result descriptor ring - * - * A descriptor ring is a circular buffer that is used to manage - * one or more descriptors. There are two type of descriptor rings; - * the command descriptor ring and result descriptor ring. - */ -struct mtk_ring { - struct mtk_desc *cmd_base; - struct mtk_desc *cmd_next; - dma_addr_t cmd_dma; - struct mtk_desc *res_base; - struct mtk_desc *res_next; - struct mtk_desc *res_prev; - dma_addr_t res_dma; -}; - -/** - * struct mtk_aes_dma - Structure that holds sg list info - * @sg: pointer to scatter-gather list - * @nents: number of entries in the sg list - * @remainder: remainder of sg list - * @sg_len: number of entries in the sg mapped list - */ -struct mtk_aes_dma { - struct scatterlist *sg; - int nents; - u32 remainder; - u32 sg_len; -}; - -struct mtk_aes_base_ctx; -struct mtk_aes_rec; -struct mtk_cryp; - -typedef int (*mtk_aes_fn)(struct mtk_cryp *cryp, struct mtk_aes_rec *aes); - -/** - * struct mtk_aes_rec - AES operation record - * @cryp: pointer to Cryptographic device - * @queue: crypto request queue - * @areq: pointer to async request - * @done_task: the tasklet is use in AES interrupt - * @queue_task: the tasklet is used to dequeue request - * @ctx: pointer to current context - * @src: the structure that holds source sg list info - * @dst: the structure that holds destination sg list info - * @aligned_sg: the scatter list is use to alignment - * @real_dst: pointer to the destination sg list - * @resume: pointer to resume function - * @total: request buffer length - * @buf: pointer to page buffer - * @id: the current use of ring - * @flags: it's describing AES operation state - * @lock: the async queue lock - * - * Structure used to record AES execution state. - */ -struct mtk_aes_rec { - struct mtk_cryp *cryp; - struct crypto_queue queue; - struct crypto_async_request *areq; - struct tasklet_struct done_task; - struct tasklet_struct queue_task; - struct mtk_aes_base_ctx *ctx; - struct mtk_aes_dma src; - struct mtk_aes_dma dst; - - struct scatterlist aligned_sg; - struct scatterlist *real_dst; - - mtk_aes_fn resume; - - size_t total; - void *buf; - - u8 id; - unsigned long flags; - /* queue lock */ - spinlock_t lock; -}; - -/** - * struct mtk_sha_rec - SHA operation record - * @cryp: pointer to Cryptographic device - * @queue: crypto request queue - * @req: pointer to ahash request - * @done_task: the tasklet is use in SHA interrupt - * @queue_task: the tasklet is used to dequeue request - * @id: the current use of ring - * @flags: it's describing SHA operation state - * @lock: the async queue lock - * - * Structure used to record SHA execution state. - */ -struct mtk_sha_rec { - struct mtk_cryp *cryp; - struct crypto_queue queue; - struct ahash_request *req; - struct tasklet_struct done_task; - struct tasklet_struct queue_task; - - u8 id; - unsigned long flags; - /* queue lock */ - spinlock_t lock; -}; - -/** - * struct mtk_cryp - Cryptographic device - * @base: pointer to mapped register I/O base - * @dev: pointer to device - * @clk_cryp: pointer to crypto clock - * @irq: global system and rings IRQ - * @ring: pointer to descriptor rings - * @aes: pointer to operation record of AES - * @sha: pointer to operation record of SHA - * @aes_list: device list of AES - * @sha_list: device list of SHA - * @rec: it's used to select SHA record for tfm - * - * Structure storing cryptographic device information. - */ -struct mtk_cryp { - void __iomem *base; - struct device *dev; - struct clk *clk_cryp; - int irq[MTK_IRQ_NUM]; - - struct mtk_ring *ring[MTK_RING_MAX]; - struct mtk_aes_rec *aes[MTK_REC_NUM]; - struct mtk_sha_rec *sha[MTK_REC_NUM]; - - struct list_head aes_list; - struct list_head sha_list; - - bool rec; -}; - -int mtk_cipher_alg_register(struct mtk_cryp *cryp); -void mtk_cipher_alg_release(struct mtk_cryp *cryp); -int mtk_hash_alg_register(struct mtk_cryp *cryp); -void mtk_hash_alg_release(struct mtk_cryp *cryp); - -#endif /* __MTK_PLATFORM_H_ */ diff --git a/drivers/crypto/mediatek/mtk-regs.h b/drivers/crypto/mediatek/mtk-regs.h deleted file mode 100644 index d3defda7a750..000000000000 --- a/drivers/crypto/mediatek/mtk-regs.h +++ /dev/null @@ -1,190 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Support for MediaTek cryptographic accelerator. - * - * Copyright (c) 2016 MediaTek Inc. - * Author: Ryder Lee - */ - -#ifndef __MTK_REGS_H__ -#define __MTK_REGS_H__ - -/* HIA, Command Descriptor Ring Manager */ -#define CDR_BASE_ADDR_LO(x) (0x0 + ((x) << 12)) -#define CDR_BASE_ADDR_HI(x) (0x4 + ((x) << 12)) -#define CDR_DATA_BASE_ADDR_LO(x) (0x8 + ((x) << 12)) -#define CDR_DATA_BASE_ADDR_HI(x) (0xC + ((x) << 12)) -#define CDR_ACD_BASE_ADDR_LO(x) (0x10 + ((x) << 12)) -#define CDR_ACD_BASE_ADDR_HI(x) (0x14 + ((x) << 12)) -#define CDR_RING_SIZE(x) (0x18 + ((x) << 12)) -#define CDR_DESC_SIZE(x) (0x1C + ((x) << 12)) -#define CDR_CFG(x) (0x20 + ((x) << 12)) -#define CDR_DMA_CFG(x) (0x24 + ((x) << 12)) -#define CDR_THRESH(x) (0x28 + ((x) << 12)) -#define CDR_PREP_COUNT(x) (0x2C + ((x) << 12)) -#define CDR_PROC_COUNT(x) (0x30 + ((x) << 12)) -#define CDR_PREP_PNTR(x) (0x34 + ((x) << 12)) -#define CDR_PROC_PNTR(x) (0x38 + ((x) << 12)) -#define CDR_STAT(x) (0x3C + ((x) << 12)) - -/* HIA, Result Descriptor Ring Manager */ -#define RDR_BASE_ADDR_LO(x) (0x800 + ((x) << 12)) -#define RDR_BASE_ADDR_HI(x) (0x804 + ((x) << 12)) -#define RDR_DATA_BASE_ADDR_LO(x) (0x808 + ((x) << 12)) -#define RDR_DATA_BASE_ADDR_HI(x) (0x80C + ((x) << 12)) -#define RDR_ACD_BASE_ADDR_LO(x) (0x810 + ((x) << 12)) -#define RDR_ACD_BASE_ADDR_HI(x) (0x814 + ((x) << 12)) -#define RDR_RING_SIZE(x) (0x818 + ((x) << 12)) -#define RDR_DESC_SIZE(x) (0x81C + ((x) << 12)) -#define RDR_CFG(x) (0x820 + ((x) << 12)) -#define RDR_DMA_CFG(x) (0x824 + ((x) << 12)) -#define RDR_THRESH(x) (0x828 + ((x) << 12)) -#define RDR_PREP_COUNT(x) (0x82C + ((x) << 12)) -#define RDR_PROC_COUNT(x) (0x830 + ((x) << 12)) -#define RDR_PREP_PNTR(x) (0x834 + ((x) << 12)) -#define RDR_PROC_PNTR(x) (0x838 + ((x) << 12)) -#define RDR_STAT(x) (0x83C + ((x) << 12)) - -/* HIA, Ring AIC */ -#define AIC_POL_CTRL(x) (0xE000 - ((x) << 12)) -#define AIC_TYPE_CTRL(x) (0xE004 - ((x) << 12)) -#define AIC_ENABLE_CTRL(x) (0xE008 - ((x) << 12)) -#define AIC_RAW_STAL(x) (0xE00C - ((x) << 12)) -#define AIC_ENABLE_SET(x) (0xE00C - ((x) << 12)) -#define AIC_ENABLED_STAT(x) (0xE010 - ((x) << 12)) -#define AIC_ACK(x) (0xE010 - ((x) << 12)) -#define AIC_ENABLE_CLR(x) (0xE014 - ((x) << 12)) -#define AIC_OPTIONS(x) (0xE018 - ((x) << 12)) -#define AIC_VERSION(x) (0xE01C - ((x) << 12)) - -/* HIA, Global AIC */ -#define AIC_G_POL_CTRL 0xF800 -#define AIC_G_TYPE_CTRL 0xF804 -#define AIC_G_ENABLE_CTRL 0xF808 -#define AIC_G_RAW_STAT 0xF80C -#define AIC_G_ENABLE_SET 0xF80C -#define AIC_G_ENABLED_STAT 0xF810 -#define AIC_G_ACK 0xF810 -#define AIC_G_ENABLE_CLR 0xF814 -#define AIC_G_OPTIONS 0xF818 -#define AIC_G_VERSION 0xF81C - -/* HIA, Data Fetch Engine */ -#define DFE_CFG 0xF000 -#define DFE_PRIO_0 0xF010 -#define DFE_PRIO_1 0xF014 -#define DFE_PRIO_2 0xF018 -#define DFE_PRIO_3 0xF01C - -/* HIA, Data Fetch Engine access monitoring for CDR */ -#define DFE_RING_REGION_LO(x) (0xF080 + ((x) << 3)) -#define DFE_RING_REGION_HI(x) (0xF084 + ((x) << 3)) - -/* HIA, Data Fetch Engine thread control and status for thread */ -#define DFE_THR_CTRL 0xF200 -#define DFE_THR_STAT 0xF204 -#define DFE_THR_DESC_CTRL 0xF208 -#define DFE_THR_DESC_DPTR_LO 0xF210 -#define DFE_THR_DESC_DPTR_HI 0xF214 -#define DFE_THR_DESC_ACDPTR_LO 0xF218 -#define DFE_THR_DESC_ACDPTR_HI 0xF21C - -/* HIA, Data Store Engine */ -#define DSE_CFG 0xF400 -#define DSE_PRIO_0 0xF410 -#define DSE_PRIO_1 0xF414 -#define DSE_PRIO_2 0xF418 -#define DSE_PRIO_3 0xF41C - -/* HIA, Data Store Engine access monitoring for RDR */ -#define DSE_RING_REGION_LO(x) (0xF480 + ((x) << 3)) -#define DSE_RING_REGION_HI(x) (0xF484 + ((x) << 3)) - -/* HIA, Data Store Engine thread control and status for thread */ -#define DSE_THR_CTRL 0xF600 -#define DSE_THR_STAT 0xF604 -#define DSE_THR_DESC_CTRL 0xF608 -#define DSE_THR_DESC_DPTR_LO 0xF610 -#define DSE_THR_DESC_DPTR_HI 0xF614 -#define DSE_THR_DESC_S_DPTR_LO 0xF618 -#define DSE_THR_DESC_S_DPTR_HI 0xF61C -#define DSE_THR_ERROR_STAT 0xF620 - -/* HIA Global */ -#define HIA_MST_CTRL 0xFFF4 -#define HIA_OPTIONS 0xFFF8 -#define HIA_VERSION 0xFFFC - -/* Processing Engine Input Side, Processing Engine */ -#define PE_IN_DBUF_THRESH 0x10000 -#define PE_IN_TBUF_THRESH 0x10100 - -/* Packet Engine Configuration / Status Registers */ -#define PE_TOKEN_CTRL_STAT 0x11000 -#define PE_FUNCTION_EN 0x11004 -#define PE_CONTEXT_CTRL 0x11008 -#define PE_INTERRUPT_CTRL_STAT 0x11010 -#define PE_CONTEXT_STAT 0x1100C -#define PE_OUT_TRANS_CTRL_STAT 0x11018 -#define PE_OUT_BUF_CTRL 0x1101C - -/* Packet Engine PRNG Registers */ -#define PE_PRNG_STAT 0x11040 -#define PE_PRNG_CTRL 0x11044 -#define PE_PRNG_SEED_L 0x11048 -#define PE_PRNG_SEED_H 0x1104C -#define PE_PRNG_KEY_0_L 0x11050 -#define PE_PRNG_KEY_0_H 0x11054 -#define PE_PRNG_KEY_1_L 0x11058 -#define PE_PRNG_KEY_1_H 0x1105C -#define PE_PRNG_RES_0 0x11060 -#define PE_PRNG_RES_1 0x11064 -#define PE_PRNG_RES_2 0x11068 -#define PE_PRNG_RES_3 0x1106C -#define PE_PRNG_LFSR_L 0x11070 -#define PE_PRNG_LFSR_H 0x11074 - -/* Packet Engine AIC */ -#define PE_EIP96_AIC_POL_CTRL 0x113C0 -#define PE_EIP96_AIC_TYPE_CTRL 0x113C4 -#define PE_EIP96_AIC_ENABLE_CTRL 0x113C8 -#define PE_EIP96_AIC_RAW_STAT 0x113CC -#define PE_EIP96_AIC_ENABLE_SET 0x113CC -#define PE_EIP96_AIC_ENABLED_STAT 0x113D0 -#define PE_EIP96_AIC_ACK 0x113D0 -#define PE_EIP96_AIC_ENABLE_CLR 0x113D4 -#define PE_EIP96_AIC_OPTIONS 0x113D8 -#define PE_EIP96_AIC_VERSION 0x113DC - -/* Packet Engine Options & Version Registers */ -#define PE_EIP96_OPTIONS 0x113F8 -#define PE_EIP96_VERSION 0x113FC - -/* Processing Engine Output Side */ -#define PE_OUT_DBUF_THRESH 0x11C00 -#define PE_OUT_TBUF_THRESH 0x11D00 - -/* Processing Engine Local AIC */ -#define PE_AIC_POL_CTRL 0x11F00 -#define PE_AIC_TYPE_CTRL 0x11F04 -#define PE_AIC_ENABLE_CTRL 0x11F08 -#define PE_AIC_RAW_STAT 0x11F0C -#define PE_AIC_ENABLE_SET 0x11F0C -#define PE_AIC_ENABLED_STAT 0x11F10 -#define PE_AIC_ENABLE_CLR 0x11F14 -#define PE_AIC_OPTIONS 0x11F18 -#define PE_AIC_VERSION 0x11F1C - -/* Processing Engine General Configuration and Version */ -#define PE_IN_FLIGHT 0x11FF0 -#define PE_OPTIONS 0x11FF8 -#define PE_VERSION 0x11FFC - -/* EIP-97 - Global */ -#define EIP97_CLOCK_STATE 0x1FFE4 -#define EIP97_FORCE_CLOCK_ON 0x1FFE8 -#define EIP97_FORCE_CLOCK_OFF 0x1FFEC -#define EIP97_MST_CTRL 0x1FFF4 -#define EIP97_OPTIONS 0x1FFF8 -#define EIP97_VERSION 0x1FFFC -#endif /* __MTK_REGS_H__ */ diff --git a/drivers/crypto/mediatek/mtk-sha.c b/drivers/crypto/mediatek/mtk-sha.c deleted file mode 100644 index f55aacdafbef..000000000000 --- a/drivers/crypto/mediatek/mtk-sha.c +++ /dev/null @@ -1,1353 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Cryptographic API. - * - * Driver for EIP97 SHA1/SHA2(HMAC) acceleration. - * - * Copyright (c) 2016 Ryder Lee - * - * Some ideas are from atmel-sha.c and omap-sham.c drivers. - */ - -#include -#include -#include -#include "mtk-platform.h" - -#define SHA_ALIGN_MSK (sizeof(u32) - 1) -#define SHA_QUEUE_SIZE 512 -#define SHA_BUF_SIZE ((u32)PAGE_SIZE) - -#define SHA_OP_UPDATE 1 -#define SHA_OP_FINAL 2 - -#define SHA_DATA_LEN_MSK cpu_to_le32(GENMASK(16, 0)) -#define SHA_MAX_DIGEST_BUF_SIZE 32 - -/* SHA command token */ -#define SHA_CT_SIZE 5 -#define SHA_CT_CTRL_HDR cpu_to_le32(0x02220000) -#define SHA_CMD0 cpu_to_le32(0x03020000) -#define SHA_CMD1 cpu_to_le32(0x21060000) -#define SHA_CMD2 cpu_to_le32(0xe0e63802) - -/* SHA transform information */ -#define SHA_TFM_HASH cpu_to_le32(0x2 << 0) -#define SHA_TFM_SIZE(x) cpu_to_le32((x) << 8) -#define SHA_TFM_START cpu_to_le32(0x1 << 4) -#define SHA_TFM_CONTINUE cpu_to_le32(0x1 << 5) -#define SHA_TFM_HASH_STORE cpu_to_le32(0x1 << 19) -#define SHA_TFM_SHA1 cpu_to_le32(0x2 << 23) -#define SHA_TFM_SHA256 cpu_to_le32(0x3 << 23) -#define SHA_TFM_SHA224 cpu_to_le32(0x4 << 23) -#define SHA_TFM_SHA512 cpu_to_le32(0x5 << 23) -#define SHA_TFM_SHA384 cpu_to_le32(0x6 << 23) -#define SHA_TFM_DIGEST(x) cpu_to_le32(((x) & GENMASK(3, 0)) << 24) - -/* SHA flags */ -#define SHA_FLAGS_BUSY BIT(0) -#define SHA_FLAGS_FINAL BIT(1) -#define SHA_FLAGS_FINUP BIT(2) -#define SHA_FLAGS_SG BIT(3) -#define SHA_FLAGS_ALGO_MSK GENMASK(8, 4) -#define SHA_FLAGS_SHA1 BIT(4) -#define SHA_FLAGS_SHA224 BIT(5) -#define SHA_FLAGS_SHA256 BIT(6) -#define SHA_FLAGS_SHA384 BIT(7) -#define SHA_FLAGS_SHA512 BIT(8) -#define SHA_FLAGS_HMAC BIT(9) -#define SHA_FLAGS_PAD BIT(10) - -/** - * mtk_sha_info - hardware information of AES - * @cmd: command token, hardware instruction - * @tfm: transform state of cipher algorithm. - * @state: contains keys and initial vectors. - * - */ -struct mtk_sha_info { - __le32 ctrl[2]; - __le32 cmd[3]; - __le32 tfm[2]; - __le32 digest[SHA_MAX_DIGEST_BUF_SIZE]; -}; - -struct mtk_sha_reqctx { - struct mtk_sha_info info; - unsigned long flags; - unsigned long op; - - u64 digcnt; - size_t bufcnt; - dma_addr_t dma_addr; - - __le32 ct_hdr; - u32 ct_size; - dma_addr_t ct_dma; - dma_addr_t tfm_dma; - - /* Walk state */ - struct scatterlist *sg; - u32 offset; /* Offset in current sg */ - u32 total; /* Total request */ - size_t ds; - size_t bs; - - u8 *buffer; -}; - -struct mtk_sha_hmac_ctx { - struct crypto_shash *shash; - u8 ipad[SHA512_BLOCK_SIZE] __aligned(sizeof(u32)); - u8 opad[SHA512_BLOCK_SIZE] __aligned(sizeof(u32)); -}; - -struct mtk_sha_ctx { - struct mtk_cryp *cryp; - unsigned long flags; - u8 id; - u8 buf[SHA_BUF_SIZE] __aligned(sizeof(u32)); - - struct mtk_sha_hmac_ctx base[]; -}; - -struct mtk_sha_drv { - struct list_head dev_list; - /* Device list lock */ - spinlock_t lock; -}; - -static struct mtk_sha_drv mtk_sha = { - .dev_list = LIST_HEAD_INIT(mtk_sha.dev_list), - .lock = __SPIN_LOCK_UNLOCKED(mtk_sha.lock), -}; - -static int mtk_sha_handle_queue(struct mtk_cryp *cryp, u8 id, - struct ahash_request *req); - -static inline u32 mtk_sha_read(struct mtk_cryp *cryp, u32 offset) -{ - return readl_relaxed(cryp->base + offset); -} - -static inline void mtk_sha_write(struct mtk_cryp *cryp, - u32 offset, u32 value) -{ - writel_relaxed(value, cryp->base + offset); -} - -static inline void mtk_sha_ring_shift(struct mtk_ring *ring, - struct mtk_desc **cmd_curr, - struct mtk_desc **res_curr, - int *count) -{ - *cmd_curr = ring->cmd_next++; - *res_curr = ring->res_next++; - (*count)++; - - if (ring->cmd_next == ring->cmd_base + MTK_DESC_NUM) { - ring->cmd_next = ring->cmd_base; - ring->res_next = ring->res_base; - } -} - -static struct mtk_cryp *mtk_sha_find_dev(struct mtk_sha_ctx *tctx) -{ - struct mtk_cryp *cryp = NULL; - struct mtk_cryp *tmp; - - spin_lock_bh(&mtk_sha.lock); - if (!tctx->cryp) { - list_for_each_entry(tmp, &mtk_sha.dev_list, sha_list) { - cryp = tmp; - break; - } - tctx->cryp = cryp; - } else { - cryp = tctx->cryp; - } - - /* - * Assign record id to tfm in round-robin fashion, and this - * will help tfm to bind to corresponding descriptor rings. - */ - tctx->id = cryp->rec; - cryp->rec = !cryp->rec; - - spin_unlock_bh(&mtk_sha.lock); - - return cryp; -} - -static int mtk_sha_append_sg(struct mtk_sha_reqctx *ctx) -{ - size_t count; - - while ((ctx->bufcnt < SHA_BUF_SIZE) && ctx->total) { - count = min(ctx->sg->length - ctx->offset, ctx->total); - count = min(count, SHA_BUF_SIZE - ctx->bufcnt); - - if (count <= 0) { - /* - * Check if count <= 0 because the buffer is full or - * because the sg length is 0. In the latest case, - * check if there is another sg in the list, a 0 length - * sg doesn't necessarily mean the end of the sg list. - */ - if ((ctx->sg->length == 0) && !sg_is_last(ctx->sg)) { - ctx->sg = sg_next(ctx->sg); - continue; - } else { - break; - } - } - - scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg, - ctx->offset, count, 0); - - ctx->bufcnt += count; - ctx->offset += count; - ctx->total -= count; - - if (ctx->offset == ctx->sg->length) { - ctx->sg = sg_next(ctx->sg); - if (ctx->sg) - ctx->offset = 0; - else - ctx->total = 0; - } - } - - return 0; -} - -/* - * The purpose of this padding is to ensure that the padded message is a - * multiple of 512 bits (SHA1/SHA224/SHA256) or 1024 bits (SHA384/SHA512). - * The bit "1" is appended at the end of the message followed by - * "padlen-1" zero bits. Then a 64 bits block (SHA1/SHA224/SHA256) or - * 128 bits block (SHA384/SHA512) equals to the message length in bits - * is appended. - * - * For SHA1/SHA224/SHA256, padlen is calculated as followed: - * - if message length < 56 bytes then padlen = 56 - message length - * - else padlen = 64 + 56 - message length - * - * For SHA384/SHA512, padlen is calculated as followed: - * - if message length < 112 bytes then padlen = 112 - message length - * - else padlen = 128 + 112 - message length - */ -static void mtk_sha_fill_padding(struct mtk_sha_reqctx *ctx, u32 len) -{ - u32 index, padlen; - __be64 bits[2]; - u64 size = ctx->digcnt; - - size += ctx->bufcnt; - size += len; - - bits[1] = cpu_to_be64(size << 3); - bits[0] = cpu_to_be64(size >> 61); - - switch (ctx->flags & SHA_FLAGS_ALGO_MSK) { - case SHA_FLAGS_SHA384: - case SHA_FLAGS_SHA512: - index = ctx->bufcnt & 0x7f; - padlen = (index < 112) ? (112 - index) : ((128 + 112) - index); - *(ctx->buffer + ctx->bufcnt) = 0x80; - memset(ctx->buffer + ctx->bufcnt + 1, 0, padlen - 1); - memcpy(ctx->buffer + ctx->bufcnt + padlen, bits, 16); - ctx->bufcnt += padlen + 16; - ctx->flags |= SHA_FLAGS_PAD; - break; - - default: - index = ctx->bufcnt & 0x3f; - padlen = (index < 56) ? (56 - index) : ((64 + 56) - index); - *(ctx->buffer + ctx->bufcnt) = 0x80; - memset(ctx->buffer + ctx->bufcnt + 1, 0, padlen - 1); - memcpy(ctx->buffer + ctx->bufcnt + padlen, &bits[1], 8); - ctx->bufcnt += padlen + 8; - ctx->flags |= SHA_FLAGS_PAD; - break; - } -} - -/* Initialize basic transform information of SHA */ -static void mtk_sha_info_init(struct mtk_sha_reqctx *ctx) -{ - struct mtk_sha_info *info = &ctx->info; - - ctx->ct_hdr = SHA_CT_CTRL_HDR; - ctx->ct_size = SHA_CT_SIZE; - - info->tfm[0] = SHA_TFM_HASH | SHA_TFM_SIZE(SIZE_IN_WORDS(ctx->ds)); - - switch (ctx->flags & SHA_FLAGS_ALGO_MSK) { - case SHA_FLAGS_SHA1: - info->tfm[0] |= SHA_TFM_SHA1; - break; - case SHA_FLAGS_SHA224: - info->tfm[0] |= SHA_TFM_SHA224; - break; - case SHA_FLAGS_SHA256: - info->tfm[0] |= SHA_TFM_SHA256; - break; - case SHA_FLAGS_SHA384: - info->tfm[0] |= SHA_TFM_SHA384; - break; - case SHA_FLAGS_SHA512: - info->tfm[0] |= SHA_TFM_SHA512; - break; - - default: - /* Should not happen... */ - return; - } - - info->tfm[1] = SHA_TFM_HASH_STORE; - info->ctrl[0] = info->tfm[0] | SHA_TFM_CONTINUE | SHA_TFM_START; - info->ctrl[1] = info->tfm[1]; - - info->cmd[0] = SHA_CMD0; - info->cmd[1] = SHA_CMD1; - info->cmd[2] = SHA_CMD2 | SHA_TFM_DIGEST(SIZE_IN_WORDS(ctx->ds)); -} - -/* - * Update input data length field of transform information and - * map it to DMA region. - */ -static int mtk_sha_info_update(struct mtk_cryp *cryp, - struct mtk_sha_rec *sha, - size_t len1, size_t len2) -{ - struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); - struct mtk_sha_info *info = &ctx->info; - - ctx->ct_hdr &= ~SHA_DATA_LEN_MSK; - ctx->ct_hdr |= cpu_to_le32(len1 + len2); - info->cmd[0] &= ~SHA_DATA_LEN_MSK; - info->cmd[0] |= cpu_to_le32(len1 + len2); - - /* Setting SHA_TFM_START only for the first iteration */ - if (ctx->digcnt) - info->ctrl[0] &= ~SHA_TFM_START; - - ctx->digcnt += len1; - - ctx->ct_dma = dma_map_single(cryp->dev, info, sizeof(*info), - DMA_BIDIRECTIONAL); - if (unlikely(dma_mapping_error(cryp->dev, ctx->ct_dma))) { - dev_err(cryp->dev, "dma %zu bytes error\n", sizeof(*info)); - return -EINVAL; - } - - ctx->tfm_dma = ctx->ct_dma + sizeof(info->ctrl) + sizeof(info->cmd); - - return 0; -} - -/* - * Because of hardware limitation, we must pre-calculate the inner - * and outer digest that need to be processed firstly by engine, then - * apply the result digest to the input message. These complex hashing - * procedures limits HMAC performance, so we use fallback SW encoding. - */ -static int mtk_sha_finish_hmac(struct ahash_request *req) -{ - struct mtk_sha_ctx *tctx = crypto_tfm_ctx(req->base.tfm); - struct mtk_sha_hmac_ctx *bctx = tctx->base; - struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); - - SHASH_DESC_ON_STACK(shash, bctx->shash); - - shash->tfm = bctx->shash; - - return crypto_shash_init(shash) ?: - crypto_shash_update(shash, bctx->opad, ctx->bs) ?: - crypto_shash_finup(shash, req->result, ctx->ds, req->result); -} - -/* Initialize request context */ -static int mtk_sha_init(struct ahash_request *req) -{ - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct mtk_sha_ctx *tctx = crypto_ahash_ctx(tfm); - struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); - - ctx->flags = 0; - ctx->ds = crypto_ahash_digestsize(tfm); - - switch (ctx->ds) { - case SHA1_DIGEST_SIZE: - ctx->flags |= SHA_FLAGS_SHA1; - ctx->bs = SHA1_BLOCK_SIZE; - break; - case SHA224_DIGEST_SIZE: - ctx->flags |= SHA_FLAGS_SHA224; - ctx->bs = SHA224_BLOCK_SIZE; - break; - case SHA256_DIGEST_SIZE: - ctx->flags |= SHA_FLAGS_SHA256; - ctx->bs = SHA256_BLOCK_SIZE; - break; - case SHA384_DIGEST_SIZE: - ctx->flags |= SHA_FLAGS_SHA384; - ctx->bs = SHA384_BLOCK_SIZE; - break; - case SHA512_DIGEST_SIZE: - ctx->flags |= SHA_FLAGS_SHA512; - ctx->bs = SHA512_BLOCK_SIZE; - break; - default: - return -EINVAL; - } - - ctx->bufcnt = 0; - ctx->digcnt = 0; - ctx->buffer = tctx->buf; - - if (tctx->flags & SHA_FLAGS_HMAC) { - struct mtk_sha_hmac_ctx *bctx = tctx->base; - - memcpy(ctx->buffer, bctx->ipad, ctx->bs); - ctx->bufcnt = ctx->bs; - ctx->flags |= SHA_FLAGS_HMAC; - } - - return 0; -} - -static int mtk_sha_xmit(struct mtk_cryp *cryp, struct mtk_sha_rec *sha, - dma_addr_t addr1, size_t len1, - dma_addr_t addr2, size_t len2) -{ - struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); - struct mtk_ring *ring = cryp->ring[sha->id]; - struct mtk_desc *cmd, *res; - int err, count = 0; - - err = mtk_sha_info_update(cryp, sha, len1, len2); - if (err) - return err; - - /* Fill in the command/result descriptors */ - mtk_sha_ring_shift(ring, &cmd, &res, &count); - - res->hdr = MTK_DESC_FIRST | MTK_DESC_BUF_LEN(len1); - cmd->hdr = MTK_DESC_FIRST | MTK_DESC_BUF_LEN(len1) | - MTK_DESC_CT_LEN(ctx->ct_size); - cmd->buf = cpu_to_le32(addr1); - cmd->ct = cpu_to_le32(ctx->ct_dma); - cmd->ct_hdr = ctx->ct_hdr; - cmd->tfm = cpu_to_le32(ctx->tfm_dma); - - if (len2) { - mtk_sha_ring_shift(ring, &cmd, &res, &count); - - res->hdr = MTK_DESC_BUF_LEN(len2); - cmd->hdr = MTK_DESC_BUF_LEN(len2); - cmd->buf = cpu_to_le32(addr2); - } - - cmd->hdr |= MTK_DESC_LAST; - res->hdr |= MTK_DESC_LAST; - - /* - * Make sure that all changes to the DMA ring are done before we - * start engine. - */ - wmb(); - /* Start DMA transfer */ - mtk_sha_write(cryp, RDR_PREP_COUNT(sha->id), MTK_DESC_CNT(count)); - mtk_sha_write(cryp, CDR_PREP_COUNT(sha->id), MTK_DESC_CNT(count)); - - return -EINPROGRESS; -} - -static int mtk_sha_dma_map(struct mtk_cryp *cryp, - struct mtk_sha_rec *sha, - struct mtk_sha_reqctx *ctx, - size_t count) -{ - ctx->dma_addr = dma_map_single(cryp->dev, ctx->buffer, - SHA_BUF_SIZE, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(cryp->dev, ctx->dma_addr))) { - dev_err(cryp->dev, "dma map error\n"); - return -EINVAL; - } - - ctx->flags &= ~SHA_FLAGS_SG; - - return mtk_sha_xmit(cryp, sha, ctx->dma_addr, count, 0, 0); -} - -static int mtk_sha_update_slow(struct mtk_cryp *cryp, - struct mtk_sha_rec *sha) -{ - struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); - size_t count; - u32 final; - - mtk_sha_append_sg(ctx); - - final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total; - - dev_dbg(cryp->dev, "slow: bufcnt: %zu\n", ctx->bufcnt); - - if (final) { - sha->flags |= SHA_FLAGS_FINAL; - mtk_sha_fill_padding(ctx, 0); - } - - if (final || (ctx->bufcnt == SHA_BUF_SIZE && ctx->total)) { - count = ctx->bufcnt; - ctx->bufcnt = 0; - - return mtk_sha_dma_map(cryp, sha, ctx, count); - } - return 0; -} - -static int mtk_sha_update_start(struct mtk_cryp *cryp, - struct mtk_sha_rec *sha) -{ - struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); - u32 len, final, tail; - struct scatterlist *sg; - - if (!ctx->total) - return 0; - - if (ctx->bufcnt || ctx->offset) - return mtk_sha_update_slow(cryp, sha); - - sg = ctx->sg; - - if (!IS_ALIGNED(sg->offset, sizeof(u32))) - return mtk_sha_update_slow(cryp, sha); - - if (!sg_is_last(sg) && !IS_ALIGNED(sg->length, ctx->bs)) - /* size is not ctx->bs aligned */ - return mtk_sha_update_slow(cryp, sha); - - len = min(ctx->total, sg->length); - - if (sg_is_last(sg)) { - if (!(ctx->flags & SHA_FLAGS_FINUP)) { - /* not last sg must be ctx->bs aligned */ - tail = len & (ctx->bs - 1); - len -= tail; - } - } - - ctx->total -= len; - ctx->offset = len; /* offset where to start slow */ - - final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total; - - /* Add padding */ - if (final) { - size_t count; - - tail = len & (ctx->bs - 1); - len -= tail; - ctx->total += tail; - ctx->offset = len; /* offset where to start slow */ - - sg = ctx->sg; - mtk_sha_append_sg(ctx); - mtk_sha_fill_padding(ctx, len); - - ctx->dma_addr = dma_map_single(cryp->dev, ctx->buffer, - SHA_BUF_SIZE, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(cryp->dev, ctx->dma_addr))) { - dev_err(cryp->dev, "dma map bytes error\n"); - return -EINVAL; - } - - sha->flags |= SHA_FLAGS_FINAL; - count = ctx->bufcnt; - ctx->bufcnt = 0; - - if (len == 0) { - ctx->flags &= ~SHA_FLAGS_SG; - return mtk_sha_xmit(cryp, sha, ctx->dma_addr, - count, 0, 0); - - } else { - ctx->sg = sg; - if (!dma_map_sg(cryp->dev, ctx->sg, 1, DMA_TO_DEVICE)) { - dev_err(cryp->dev, "dma_map_sg error\n"); - return -EINVAL; - } - - ctx->flags |= SHA_FLAGS_SG; - return mtk_sha_xmit(cryp, sha, sg_dma_address(ctx->sg), - len, ctx->dma_addr, count); - } - } - - if (!dma_map_sg(cryp->dev, ctx->sg, 1, DMA_TO_DEVICE)) { - dev_err(cryp->dev, "dma_map_sg error\n"); - return -EINVAL; - } - - ctx->flags |= SHA_FLAGS_SG; - - return mtk_sha_xmit(cryp, sha, sg_dma_address(ctx->sg), - len, 0, 0); -} - -static int mtk_sha_final_req(struct mtk_cryp *cryp, - struct mtk_sha_rec *sha) -{ - struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); - size_t count; - - mtk_sha_fill_padding(ctx, 0); - - sha->flags |= SHA_FLAGS_FINAL; - count = ctx->bufcnt; - ctx->bufcnt = 0; - - return mtk_sha_dma_map(cryp, sha, ctx, count); -} - -/* Copy ready hash (+ finalize hmac) */ -static int mtk_sha_finish(struct ahash_request *req) -{ - struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); - __le32 *digest = ctx->info.digest; - u32 *result = (u32 *)req->result; - int i; - - /* Get the hash from the digest buffer */ - for (i = 0; i < SIZE_IN_WORDS(ctx->ds); i++) - result[i] = le32_to_cpu(digest[i]); - - if (ctx->flags & SHA_FLAGS_HMAC) - return mtk_sha_finish_hmac(req); - - return 0; -} - -static void mtk_sha_finish_req(struct mtk_cryp *cryp, - struct mtk_sha_rec *sha, - int err) -{ - if (likely(!err && (SHA_FLAGS_FINAL & sha->flags))) - err = mtk_sha_finish(sha->req); - - sha->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL); - - sha->req->base.complete(&sha->req->base, err); - - /* Handle new request */ - tasklet_schedule(&sha->queue_task); -} - -static int mtk_sha_handle_queue(struct mtk_cryp *cryp, u8 id, - struct ahash_request *req) -{ - struct mtk_sha_rec *sha = cryp->sha[id]; - struct crypto_async_request *async_req, *backlog; - struct mtk_sha_reqctx *ctx; - unsigned long flags; - int err = 0, ret = 0; - - spin_lock_irqsave(&sha->lock, flags); - if (req) - ret = ahash_enqueue_request(&sha->queue, req); - - if (SHA_FLAGS_BUSY & sha->flags) { - spin_unlock_irqrestore(&sha->lock, flags); - return ret; - } - - backlog = crypto_get_backlog(&sha->queue); - async_req = crypto_dequeue_request(&sha->queue); - if (async_req) - sha->flags |= SHA_FLAGS_BUSY; - spin_unlock_irqrestore(&sha->lock, flags); - - if (!async_req) - return ret; - - if (backlog) - backlog->complete(backlog, -EINPROGRESS); - - req = ahash_request_cast(async_req); - ctx = ahash_request_ctx(req); - - sha->req = req; - - mtk_sha_info_init(ctx); - - if (ctx->op == SHA_OP_UPDATE) { - err = mtk_sha_update_start(cryp, sha); - if (err != -EINPROGRESS && (ctx->flags & SHA_FLAGS_FINUP)) - /* No final() after finup() */ - err = mtk_sha_final_req(cryp, sha); - } else if (ctx->op == SHA_OP_FINAL) { - err = mtk_sha_final_req(cryp, sha); - } - - if (unlikely(err != -EINPROGRESS)) - /* Task will not finish it, so do it here */ - mtk_sha_finish_req(cryp, sha, err); - - return ret; -} - -static int mtk_sha_enqueue(struct ahash_request *req, u32 op) -{ - struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); - struct mtk_sha_ctx *tctx = crypto_tfm_ctx(req->base.tfm); - - ctx->op = op; - - return mtk_sha_handle_queue(tctx->cryp, tctx->id, req); -} - -static void mtk_sha_unmap(struct mtk_cryp *cryp, struct mtk_sha_rec *sha) -{ - struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); - - dma_unmap_single(cryp->dev, ctx->ct_dma, sizeof(ctx->info), - DMA_BIDIRECTIONAL); - - if (ctx->flags & SHA_FLAGS_SG) { - dma_unmap_sg(cryp->dev, ctx->sg, 1, DMA_TO_DEVICE); - if (ctx->sg->length == ctx->offset) { - ctx->sg = sg_next(ctx->sg); - if (ctx->sg) - ctx->offset = 0; - } - if (ctx->flags & SHA_FLAGS_PAD) { - dma_unmap_single(cryp->dev, ctx->dma_addr, - SHA_BUF_SIZE, DMA_TO_DEVICE); - } - } else - dma_unmap_single(cryp->dev, ctx->dma_addr, - SHA_BUF_SIZE, DMA_TO_DEVICE); -} - -static void mtk_sha_complete(struct mtk_cryp *cryp, - struct mtk_sha_rec *sha) -{ - int err = 0; - - err = mtk_sha_update_start(cryp, sha); - if (err != -EINPROGRESS) - mtk_sha_finish_req(cryp, sha, err); -} - -static int mtk_sha_update(struct ahash_request *req) -{ - struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); - - ctx->total = req->nbytes; - ctx->sg = req->src; - ctx->offset = 0; - - if ((ctx->bufcnt + ctx->total < SHA_BUF_SIZE) && - !(ctx->flags & SHA_FLAGS_FINUP)) - return mtk_sha_append_sg(ctx); - - return mtk_sha_enqueue(req, SHA_OP_UPDATE); -} - -static int mtk_sha_final(struct ahash_request *req) -{ - struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); - - ctx->flags |= SHA_FLAGS_FINUP; - - if (ctx->flags & SHA_FLAGS_PAD) - return mtk_sha_finish(req); - - return mtk_sha_enqueue(req, SHA_OP_FINAL); -} - -static int mtk_sha_finup(struct ahash_request *req) -{ - struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); - int err1, err2; - - ctx->flags |= SHA_FLAGS_FINUP; - - err1 = mtk_sha_update(req); - if (err1 == -EINPROGRESS || - (err1 == -EBUSY && (ahash_request_flags(req) & - CRYPTO_TFM_REQ_MAY_BACKLOG))) - return err1; - /* - * final() has to be always called to cleanup resources - * even if update() failed - */ - err2 = mtk_sha_final(req); - - return err1 ?: err2; -} - -static int mtk_sha_digest(struct ahash_request *req) -{ - return mtk_sha_init(req) ?: mtk_sha_finup(req); -} - -static int mtk_sha_setkey(struct crypto_ahash *tfm, const u8 *key, - u32 keylen) -{ - struct mtk_sha_ctx *tctx = crypto_ahash_ctx(tfm); - struct mtk_sha_hmac_ctx *bctx = tctx->base; - size_t bs = crypto_shash_blocksize(bctx->shash); - size_t ds = crypto_shash_digestsize(bctx->shash); - int err, i; - - if (keylen > bs) { - err = crypto_shash_tfm_digest(bctx->shash, key, keylen, - bctx->ipad); - if (err) - return err; - keylen = ds; - } else { - memcpy(bctx->ipad, key, keylen); - } - - memset(bctx->ipad + keylen, 0, bs - keylen); - memcpy(bctx->opad, bctx->ipad, bs); - - for (i = 0; i < bs; i++) { - bctx->ipad[i] ^= HMAC_IPAD_VALUE; - bctx->opad[i] ^= HMAC_OPAD_VALUE; - } - - return 0; -} - -static int mtk_sha_export(struct ahash_request *req, void *out) -{ - const struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); - - memcpy(out, ctx, sizeof(*ctx)); - return 0; -} - -static int mtk_sha_import(struct ahash_request *req, const void *in) -{ - struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); - - memcpy(ctx, in, sizeof(*ctx)); - return 0; -} - -static int mtk_sha_cra_init_alg(struct crypto_tfm *tfm, - const char *alg_base) -{ - struct mtk_sha_ctx *tctx = crypto_tfm_ctx(tfm); - struct mtk_cryp *cryp = NULL; - - cryp = mtk_sha_find_dev(tctx); - if (!cryp) - return -ENODEV; - - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct mtk_sha_reqctx)); - - if (alg_base) { - struct mtk_sha_hmac_ctx *bctx = tctx->base; - - tctx->flags |= SHA_FLAGS_HMAC; - bctx->shash = crypto_alloc_shash(alg_base, 0, - CRYPTO_ALG_NEED_FALLBACK); - if (IS_ERR(bctx->shash)) { - pr_err("base driver %s could not be loaded.\n", - alg_base); - - return PTR_ERR(bctx->shash); - } - } - return 0; -} - -static int mtk_sha_cra_init(struct crypto_tfm *tfm) -{ - return mtk_sha_cra_init_alg(tfm, NULL); -} - -static int mtk_sha_cra_sha1_init(struct crypto_tfm *tfm) -{ - return mtk_sha_cra_init_alg(tfm, "sha1"); -} - -static int mtk_sha_cra_sha224_init(struct crypto_tfm *tfm) -{ - return mtk_sha_cra_init_alg(tfm, "sha224"); -} - -static int mtk_sha_cra_sha256_init(struct crypto_tfm *tfm) -{ - return mtk_sha_cra_init_alg(tfm, "sha256"); -} - -static int mtk_sha_cra_sha384_init(struct crypto_tfm *tfm) -{ - return mtk_sha_cra_init_alg(tfm, "sha384"); -} - -static int mtk_sha_cra_sha512_init(struct crypto_tfm *tfm) -{ - return mtk_sha_cra_init_alg(tfm, "sha512"); -} - -static void mtk_sha_cra_exit(struct crypto_tfm *tfm) -{ - struct mtk_sha_ctx *tctx = crypto_tfm_ctx(tfm); - - if (tctx->flags & SHA_FLAGS_HMAC) { - struct mtk_sha_hmac_ctx *bctx = tctx->base; - - crypto_free_shash(bctx->shash); - } -} - -static struct ahash_alg algs_sha1_sha224_sha256[] = { -{ - .init = mtk_sha_init, - .update = mtk_sha_update, - .final = mtk_sha_final, - .finup = mtk_sha_finup, - .digest = mtk_sha_digest, - .export = mtk_sha_export, - .import = mtk_sha_import, - .halg.digestsize = SHA1_DIGEST_SIZE, - .halg.statesize = sizeof(struct mtk_sha_reqctx), - .halg.base = { - .cra_name = "sha1", - .cra_driver_name = "mtk-sha1", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = SHA1_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct mtk_sha_ctx), - .cra_alignmask = SHA_ALIGN_MSK, - .cra_module = THIS_MODULE, - .cra_init = mtk_sha_cra_init, - .cra_exit = mtk_sha_cra_exit, - } -}, -{ - .init = mtk_sha_init, - .update = mtk_sha_update, - .final = mtk_sha_final, - .finup = mtk_sha_finup, - .digest = mtk_sha_digest, - .export = mtk_sha_export, - .import = mtk_sha_import, - .halg.digestsize = SHA224_DIGEST_SIZE, - .halg.statesize = sizeof(struct mtk_sha_reqctx), - .halg.base = { - .cra_name = "sha224", - .cra_driver_name = "mtk-sha224", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = SHA224_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct mtk_sha_ctx), - .cra_alignmask = SHA_ALIGN_MSK, - .cra_module = THIS_MODULE, - .cra_init = mtk_sha_cra_init, - .cra_exit = mtk_sha_cra_exit, - } -}, -{ - .init = mtk_sha_init, - .update = mtk_sha_update, - .final = mtk_sha_final, - .finup = mtk_sha_finup, - .digest = mtk_sha_digest, - .export = mtk_sha_export, - .import = mtk_sha_import, - .halg.digestsize = SHA256_DIGEST_SIZE, - .halg.statesize = sizeof(struct mtk_sha_reqctx), - .halg.base = { - .cra_name = "sha256", - .cra_driver_name = "mtk-sha256", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct mtk_sha_ctx), - .cra_alignmask = SHA_ALIGN_MSK, - .cra_module = THIS_MODULE, - .cra_init = mtk_sha_cra_init, - .cra_exit = mtk_sha_cra_exit, - } -}, -{ - .init = mtk_sha_init, - .update = mtk_sha_update, - .final = mtk_sha_final, - .finup = mtk_sha_finup, - .digest = mtk_sha_digest, - .export = mtk_sha_export, - .import = mtk_sha_import, - .setkey = mtk_sha_setkey, - .halg.digestsize = SHA1_DIGEST_SIZE, - .halg.statesize = sizeof(struct mtk_sha_reqctx), - .halg.base = { - .cra_name = "hmac(sha1)", - .cra_driver_name = "mtk-hmac-sha1", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC | - CRYPTO_ALG_NEED_FALLBACK, - .cra_blocksize = SHA1_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct mtk_sha_ctx) + - sizeof(struct mtk_sha_hmac_ctx), - .cra_alignmask = SHA_ALIGN_MSK, - .cra_module = THIS_MODULE, - .cra_init = mtk_sha_cra_sha1_init, - .cra_exit = mtk_sha_cra_exit, - } -}, -{ - .init = mtk_sha_init, - .update = mtk_sha_update, - .final = mtk_sha_final, - .finup = mtk_sha_finup, - .digest = mtk_sha_digest, - .export = mtk_sha_export, - .import = mtk_sha_import, - .setkey = mtk_sha_setkey, - .halg.digestsize = SHA224_DIGEST_SIZE, - .halg.statesize = sizeof(struct mtk_sha_reqctx), - .halg.base = { - .cra_name = "hmac(sha224)", - .cra_driver_name = "mtk-hmac-sha224", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC | - CRYPTO_ALG_NEED_FALLBACK, - .cra_blocksize = SHA224_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct mtk_sha_ctx) + - sizeof(struct mtk_sha_hmac_ctx), - .cra_alignmask = SHA_ALIGN_MSK, - .cra_module = THIS_MODULE, - .cra_init = mtk_sha_cra_sha224_init, - .cra_exit = mtk_sha_cra_exit, - } -}, -{ - .init = mtk_sha_init, - .update = mtk_sha_update, - .final = mtk_sha_final, - .finup = mtk_sha_finup, - .digest = mtk_sha_digest, - .export = mtk_sha_export, - .import = mtk_sha_import, - .setkey = mtk_sha_setkey, - .halg.digestsize = SHA256_DIGEST_SIZE, - .halg.statesize = sizeof(struct mtk_sha_reqctx), - .halg.base = { - .cra_name = "hmac(sha256)", - .cra_driver_name = "mtk-hmac-sha256", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC | - CRYPTO_ALG_NEED_FALLBACK, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct mtk_sha_ctx) + - sizeof(struct mtk_sha_hmac_ctx), - .cra_alignmask = SHA_ALIGN_MSK, - .cra_module = THIS_MODULE, - .cra_init = mtk_sha_cra_sha256_init, - .cra_exit = mtk_sha_cra_exit, - } -}, -}; - -static struct ahash_alg algs_sha384_sha512[] = { -{ - .init = mtk_sha_init, - .update = mtk_sha_update, - .final = mtk_sha_final, - .finup = mtk_sha_finup, - .digest = mtk_sha_digest, - .export = mtk_sha_export, - .import = mtk_sha_import, - .halg.digestsize = SHA384_DIGEST_SIZE, - .halg.statesize = sizeof(struct mtk_sha_reqctx), - .halg.base = { - .cra_name = "sha384", - .cra_driver_name = "mtk-sha384", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = SHA384_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct mtk_sha_ctx), - .cra_alignmask = SHA_ALIGN_MSK, - .cra_module = THIS_MODULE, - .cra_init = mtk_sha_cra_init, - .cra_exit = mtk_sha_cra_exit, - } -}, -{ - .init = mtk_sha_init, - .update = mtk_sha_update, - .final = mtk_sha_final, - .finup = mtk_sha_finup, - .digest = mtk_sha_digest, - .export = mtk_sha_export, - .import = mtk_sha_import, - .halg.digestsize = SHA512_DIGEST_SIZE, - .halg.statesize = sizeof(struct mtk_sha_reqctx), - .halg.base = { - .cra_name = "sha512", - .cra_driver_name = "mtk-sha512", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = SHA512_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct mtk_sha_ctx), - .cra_alignmask = SHA_ALIGN_MSK, - .cra_module = THIS_MODULE, - .cra_init = mtk_sha_cra_init, - .cra_exit = mtk_sha_cra_exit, - } -}, -{ - .init = mtk_sha_init, - .update = mtk_sha_update, - .final = mtk_sha_final, - .finup = mtk_sha_finup, - .digest = mtk_sha_digest, - .export = mtk_sha_export, - .import = mtk_sha_import, - .setkey = mtk_sha_setkey, - .halg.digestsize = SHA384_DIGEST_SIZE, - .halg.statesize = sizeof(struct mtk_sha_reqctx), - .halg.base = { - .cra_name = "hmac(sha384)", - .cra_driver_name = "mtk-hmac-sha384", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC | - CRYPTO_ALG_NEED_FALLBACK, - .cra_blocksize = SHA384_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct mtk_sha_ctx) + - sizeof(struct mtk_sha_hmac_ctx), - .cra_alignmask = SHA_ALIGN_MSK, - .cra_module = THIS_MODULE, - .cra_init = mtk_sha_cra_sha384_init, - .cra_exit = mtk_sha_cra_exit, - } -}, -{ - .init = mtk_sha_init, - .update = mtk_sha_update, - .final = mtk_sha_final, - .finup = mtk_sha_finup, - .digest = mtk_sha_digest, - .export = mtk_sha_export, - .import = mtk_sha_import, - .setkey = mtk_sha_setkey, - .halg.digestsize = SHA512_DIGEST_SIZE, - .halg.statesize = sizeof(struct mtk_sha_reqctx), - .halg.base = { - .cra_name = "hmac(sha512)", - .cra_driver_name = "mtk-hmac-sha512", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC | - CRYPTO_ALG_NEED_FALLBACK, - .cra_blocksize = SHA512_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct mtk_sha_ctx) + - sizeof(struct mtk_sha_hmac_ctx), - .cra_alignmask = SHA_ALIGN_MSK, - .cra_module = THIS_MODULE, - .cra_init = mtk_sha_cra_sha512_init, - .cra_exit = mtk_sha_cra_exit, - } -}, -}; - -static void mtk_sha_queue_task(unsigned long data) -{ - struct mtk_sha_rec *sha = (struct mtk_sha_rec *)data; - - mtk_sha_handle_queue(sha->cryp, sha->id - MTK_RING2, NULL); -} - -static void mtk_sha_done_task(unsigned long data) -{ - struct mtk_sha_rec *sha = (struct mtk_sha_rec *)data; - struct mtk_cryp *cryp = sha->cryp; - - mtk_sha_unmap(cryp, sha); - mtk_sha_complete(cryp, sha); -} - -static irqreturn_t mtk_sha_irq(int irq, void *dev_id) -{ - struct mtk_sha_rec *sha = (struct mtk_sha_rec *)dev_id; - struct mtk_cryp *cryp = sha->cryp; - u32 val = mtk_sha_read(cryp, RDR_STAT(sha->id)); - - mtk_sha_write(cryp, RDR_STAT(sha->id), val); - - if (likely((SHA_FLAGS_BUSY & sha->flags))) { - mtk_sha_write(cryp, RDR_PROC_COUNT(sha->id), MTK_CNT_RST); - mtk_sha_write(cryp, RDR_THRESH(sha->id), - MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE); - - tasklet_schedule(&sha->done_task); - } else { - dev_warn(cryp->dev, "SHA interrupt when no active requests.\n"); - } - return IRQ_HANDLED; -} - -/* - * The purpose of two SHA records is used to get extra performance. - * It is similar to mtk_aes_record_init(). - */ -static int mtk_sha_record_init(struct mtk_cryp *cryp) -{ - struct mtk_sha_rec **sha = cryp->sha; - int i, err = -ENOMEM; - - for (i = 0; i < MTK_REC_NUM; i++) { - sha[i] = kzalloc(sizeof(**sha), GFP_KERNEL); - if (!sha[i]) - goto err_cleanup; - - sha[i]->cryp = cryp; - - spin_lock_init(&sha[i]->lock); - crypto_init_queue(&sha[i]->queue, SHA_QUEUE_SIZE); - - tasklet_init(&sha[i]->queue_task, mtk_sha_queue_task, - (unsigned long)sha[i]); - tasklet_init(&sha[i]->done_task, mtk_sha_done_task, - (unsigned long)sha[i]); - } - - /* Link to ring2 and ring3 respectively */ - sha[0]->id = MTK_RING2; - sha[1]->id = MTK_RING3; - - cryp->rec = 1; - - return 0; - -err_cleanup: - for (; i--; ) - kfree(sha[i]); - return err; -} - -static void mtk_sha_record_free(struct mtk_cryp *cryp) -{ - int i; - - for (i = 0; i < MTK_REC_NUM; i++) { - tasklet_kill(&cryp->sha[i]->done_task); - tasklet_kill(&cryp->sha[i]->queue_task); - - kfree(cryp->sha[i]); - } -} - -static void mtk_sha_unregister_algs(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(algs_sha1_sha224_sha256); i++) - crypto_unregister_ahash(&algs_sha1_sha224_sha256[i]); - - for (i = 0; i < ARRAY_SIZE(algs_sha384_sha512); i++) - crypto_unregister_ahash(&algs_sha384_sha512[i]); -} - -static int mtk_sha_register_algs(void) -{ - int err, i; - - for (i = 0; i < ARRAY_SIZE(algs_sha1_sha224_sha256); i++) { - err = crypto_register_ahash(&algs_sha1_sha224_sha256[i]); - if (err) - goto err_sha_224_256_algs; - } - - for (i = 0; i < ARRAY_SIZE(algs_sha384_sha512); i++) { - err = crypto_register_ahash(&algs_sha384_sha512[i]); - if (err) - goto err_sha_384_512_algs; - } - - return 0; - -err_sha_384_512_algs: - for (; i--; ) - crypto_unregister_ahash(&algs_sha384_sha512[i]); - i = ARRAY_SIZE(algs_sha1_sha224_sha256); -err_sha_224_256_algs: - for (; i--; ) - crypto_unregister_ahash(&algs_sha1_sha224_sha256[i]); - - return err; -} - -int mtk_hash_alg_register(struct mtk_cryp *cryp) -{ - int err; - - INIT_LIST_HEAD(&cryp->sha_list); - - /* Initialize two hash records */ - err = mtk_sha_record_init(cryp); - if (err) - goto err_record; - - err = devm_request_irq(cryp->dev, cryp->irq[MTK_RING2], mtk_sha_irq, - 0, "mtk-sha", cryp->sha[0]); - if (err) { - dev_err(cryp->dev, "unable to request sha irq0.\n"); - goto err_res; - } - - err = devm_request_irq(cryp->dev, cryp->irq[MTK_RING3], mtk_sha_irq, - 0, "mtk-sha", cryp->sha[1]); - if (err) { - dev_err(cryp->dev, "unable to request sha irq1.\n"); - goto err_res; - } - - /* Enable ring2 and ring3 interrupt for hash */ - mtk_sha_write(cryp, AIC_ENABLE_SET(MTK_RING2), MTK_IRQ_RDR2); - mtk_sha_write(cryp, AIC_ENABLE_SET(MTK_RING3), MTK_IRQ_RDR3); - - spin_lock(&mtk_sha.lock); - list_add_tail(&cryp->sha_list, &mtk_sha.dev_list); - spin_unlock(&mtk_sha.lock); - - err = mtk_sha_register_algs(); - if (err) - goto err_algs; - - return 0; - -err_algs: - spin_lock(&mtk_sha.lock); - list_del(&cryp->sha_list); - spin_unlock(&mtk_sha.lock); -err_res: - mtk_sha_record_free(cryp); -err_record: - - dev_err(cryp->dev, "mtk-sha initialization failed.\n"); - return err; -} - -void mtk_hash_alg_release(struct mtk_cryp *cryp) -{ - spin_lock(&mtk_sha.lock); - list_del(&cryp->sha_list); - spin_unlock(&mtk_sha.lock); - - mtk_sha_unregister_algs(); - mtk_sha_record_free(cryp); -} From a417178abc4ae2517231ee67a1291d58929fade1 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 7 Dec 2020 17:55:20 +0100 Subject: [PATCH 002/154] MAINTAINERS: crypto: s5p-sss: drop Kamil Konieczny E-mails to Kamil Konieczny to his Samsung address bounce with 550 (User unknown). Kamil no longer takes care about Samsung S5P SSS driver so remove the invalid email address from: - mailmap, - bindings maintainer entries, - maintainers entry for S5P Security Subsystem crypto accelerator. Signed-off-by: Krzysztof Kozlowski Acked-by: Vladimir Zapolskiy Signed-off-by: Herbert Xu --- .mailmap | 1 - Documentation/devicetree/bindings/crypto/samsung-slimsss.yaml | 1 - Documentation/devicetree/bindings/crypto/samsung-sss.yaml | 1 - MAINTAINERS | 1 - 4 files changed, 4 deletions(-) diff --git a/.mailmap b/.mailmap index 632700cee55c..17d50c1f249f 100644 --- a/.mailmap +++ b/.mailmap @@ -174,7 +174,6 @@ Juha Yrjola Juha Yrjola Juha Yrjola Julien Thierry -Kamil Konieczny Kay Sievers Kees Cook Kees Cook diff --git a/Documentation/devicetree/bindings/crypto/samsung-slimsss.yaml b/Documentation/devicetree/bindings/crypto/samsung-slimsss.yaml index 7743eae049ab..676950bb7b37 100644 --- a/Documentation/devicetree/bindings/crypto/samsung-slimsss.yaml +++ b/Documentation/devicetree/bindings/crypto/samsung-slimsss.yaml @@ -8,7 +8,6 @@ title: Samsung Exynos SoC SlimSSS (Slim Security SubSystem) module maintainers: - Krzysztof Kozlowski - - Kamil Konieczny description: |+ The SlimSSS module in Exynos5433 SoC supports the following: diff --git a/Documentation/devicetree/bindings/crypto/samsung-sss.yaml b/Documentation/devicetree/bindings/crypto/samsung-sss.yaml index cf1c47a81d7f..6d62b0e42fc9 100644 --- a/Documentation/devicetree/bindings/crypto/samsung-sss.yaml +++ b/Documentation/devicetree/bindings/crypto/samsung-sss.yaml @@ -8,7 +8,6 @@ title: Samsung Exynos SoC SSS (Security SubSystem) module maintainers: - Krzysztof Kozlowski - - Kamil Konieczny description: |+ The SSS module in S5PV210 SoC supports the following: diff --git a/MAINTAINERS b/MAINTAINERS index 546aa66428c9..aeb3a118842e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15689,7 +15689,6 @@ F: drivers/media/i2c/s5k5baf.c SAMSUNG S5P Security SubSystem (SSS) DRIVER M: Krzysztof Kozlowski M: Vladimir Zapolskiy -M: Kamil Konieczny L: linux-crypto@vger.kernel.org L: linux-samsung-soc@vger.kernel.org S: Maintained From ddf169a98f01d6fd46295ec0dd4c1d6385be65d4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 8 Dec 2020 00:34:02 +0100 Subject: [PATCH 003/154] crypto: aesni - implement support for cts(cbc(aes)) Follow the same approach as the arm64 driver for implementing a version of AES-NI in CBC mode that supports ciphertext stealing. This results in a ~2x speed increase for relatively short inputs (less than 256 bytes), which is relevant given that AES-CBC with ciphertext stealing is used for filename encryption in the fscrypt layer. For larger inputs, the speedup is still significant (~25% on decryption, ~6% on encryption) Tested-by: Eric Biggers # x86_64 Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 129 +++++++++++++++++++++++++++- arch/x86/crypto/aesni-intel_glue.c | 133 +++++++++++++++++++++++++++++ 2 files changed, 261 insertions(+), 1 deletion(-) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index d1436c37008b..a2710f76862f 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -2577,13 +2577,140 @@ SYM_FUNC_START(aesni_cbc_dec) ret SYM_FUNC_END(aesni_cbc_dec) -#ifdef __x86_64__ +/* + * void aesni_cts_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len, u8 *iv) + */ +SYM_FUNC_START(aesni_cts_cbc_enc) + FRAME_BEGIN +#ifndef __x86_64__ + pushl IVP + pushl LEN + pushl KEYP + pushl KLEN + movl (FRAME_OFFSET+20)(%esp), KEYP # ctx + movl (FRAME_OFFSET+24)(%esp), OUTP # dst + movl (FRAME_OFFSET+28)(%esp), INP # src + movl (FRAME_OFFSET+32)(%esp), LEN # len + movl (FRAME_OFFSET+36)(%esp), IVP # iv + lea .Lcts_permute_table, T1 +#else + lea .Lcts_permute_table(%rip), T1 +#endif + mov 480(KEYP), KLEN + movups (IVP), STATE + sub $16, LEN + mov T1, IVP + add $32, IVP + add LEN, T1 + sub LEN, IVP + movups (T1), %xmm4 + movups (IVP), %xmm5 + + movups (INP), IN1 + add LEN, INP + movups (INP), IN2 + + pxor IN1, STATE + call _aesni_enc1 + + pshufb %xmm5, IN2 + pxor STATE, IN2 + pshufb %xmm4, STATE + add OUTP, LEN + movups STATE, (LEN) + + movaps IN2, STATE + call _aesni_enc1 + movups STATE, (OUTP) + +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN + popl IVP +#endif + FRAME_END + ret +SYM_FUNC_END(aesni_cts_cbc_enc) + +/* + * void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len, u8 *iv) + */ +SYM_FUNC_START(aesni_cts_cbc_dec) + FRAME_BEGIN +#ifndef __x86_64__ + pushl IVP + pushl LEN + pushl KEYP + pushl KLEN + movl (FRAME_OFFSET+20)(%esp), KEYP # ctx + movl (FRAME_OFFSET+24)(%esp), OUTP # dst + movl (FRAME_OFFSET+28)(%esp), INP # src + movl (FRAME_OFFSET+32)(%esp), LEN # len + movl (FRAME_OFFSET+36)(%esp), IVP # iv + lea .Lcts_permute_table, T1 +#else + lea .Lcts_permute_table(%rip), T1 +#endif + mov 480(KEYP), KLEN + add $240, KEYP + movups (IVP), IV + sub $16, LEN + mov T1, IVP + add $32, IVP + add LEN, T1 + sub LEN, IVP + movups (T1), %xmm4 + + movups (INP), STATE + add LEN, INP + movups (INP), IN1 + + call _aesni_dec1 + movaps STATE, IN2 + pshufb %xmm4, STATE + pxor IN1, STATE + + add OUTP, LEN + movups STATE, (LEN) + + movups (IVP), %xmm0 + pshufb %xmm0, IN1 + pblendvb IN2, IN1 + movaps IN1, STATE + call _aesni_dec1 + + pxor IV, STATE + movups STATE, (OUTP) + +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN + popl IVP +#endif + FRAME_END + ret +SYM_FUNC_END(aesni_cts_cbc_dec) + .pushsection .rodata .align 16 +.Lcts_permute_table: + .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 + .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f + .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 + .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 +#ifdef __x86_64__ .Lbswap_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +#endif .popsection +#ifdef __x86_64__ /* * _aesni_inc_init: internal ABI * setup registers used by _aesni_inc diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index ad8a7188a2bf..96bdc1584215 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -93,6 +93,10 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); +asmlinkage void aesni_cts_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); +asmlinkage void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); #define AVX_GEN2_OPTSIZE 640 #define AVX_GEN4_OPTSIZE 4096 @@ -454,6 +458,118 @@ static int cbc_decrypt(struct skcipher_request *req) return err; } +static int cts_cbc_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); + int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; + struct scatterlist *src = req->src, *dst = req->dst; + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; + struct skcipher_walk walk; + int err; + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, skcipher_request_flags(req), + NULL, NULL); + + if (req->cryptlen <= AES_BLOCK_SIZE) { + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + cbc_blocks = 1; + } + + if (cbc_blocks > 0) { + skcipher_request_set_crypt(&subreq, req->src, req->dst, + cbc_blocks * AES_BLOCK_SIZE, + req->iv); + + err = cbc_encrypt(&subreq); + if (err) + return err; + + if (req->cryptlen == AES_BLOCK_SIZE) + return 0; + + dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, + subreq.cryptlen); + } + + /* handle ciphertext stealing */ + skcipher_request_set_crypt(&subreq, src, dst, + req->cryptlen - cbc_blocks * AES_BLOCK_SIZE, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + + kernel_fpu_begin(); + aesni_cts_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes, walk.iv); + kernel_fpu_end(); + + return skcipher_walk_done(&walk, 0); +} + +static int cts_cbc_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); + int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; + struct scatterlist *src = req->src, *dst = req->dst; + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; + struct skcipher_walk walk; + int err; + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, skcipher_request_flags(req), + NULL, NULL); + + if (req->cryptlen <= AES_BLOCK_SIZE) { + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + cbc_blocks = 1; + } + + if (cbc_blocks > 0) { + skcipher_request_set_crypt(&subreq, req->src, req->dst, + cbc_blocks * AES_BLOCK_SIZE, + req->iv); + + err = cbc_decrypt(&subreq); + if (err) + return err; + + if (req->cryptlen == AES_BLOCK_SIZE) + return 0; + + dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, + subreq.cryptlen); + } + + /* handle ciphertext stealing */ + skcipher_request_set_crypt(&subreq, src, dst, + req->cryptlen - cbc_blocks * AES_BLOCK_SIZE, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + + kernel_fpu_begin(); + aesni_cts_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes, walk.iv); + kernel_fpu_end(); + + return skcipher_walk_done(&walk, 0); +} + #ifdef CONFIG_X86_64 static void ctr_crypt_final(struct crypto_aes_ctx *ctx, struct skcipher_walk *walk) @@ -928,6 +1044,23 @@ static struct skcipher_alg aesni_skciphers[] = { .setkey = aesni_skcipher_setkey, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, + }, { + .base = { + .cra_name = "__cts(cbc(aes))", + .cra_driver_name = "__cts-cbc-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = CRYPTO_AES_CTX_SIZE, + .cra_module = THIS_MODULE, + }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .walksize = 2 * AES_BLOCK_SIZE, + .setkey = aesni_skcipher_setkey, + .encrypt = cts_cbc_encrypt, + .decrypt = cts_cbc_decrypt, #ifdef CONFIG_X86_64 }, { .base = { From 303fd3e1c771077e32e96e5788817f025f0067e2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 8 Dec 2020 15:34:41 +0100 Subject: [PATCH 004/154] crypto: tcrypt - avoid signed overflow in byte count The signed long type used for printing the number of bytes processed in tcrypt benchmarks limits the range to -/+ 2 GiB, which is not sufficient to cover the performance of common accelerated ciphers such as AES-NI when benchmarked with sec=1. So switch to u64 instead. While at it, fix up a missing printk->pr_cont conversion in the AEAD benchmark. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index a647bb298fbc..a4a11d2b57bd 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -199,8 +199,8 @@ static int test_mb_aead_jiffies(struct test_mb_aead_data *data, int enc, goto out; } - pr_cont("%d operations in %d seconds (%ld bytes)\n", - bcount * num_mb, secs, (long)bcount * blen * num_mb); + pr_cont("%d operations in %d seconds (%llu bytes)\n", + bcount * num_mb, secs, (u64)bcount * blen * num_mb); out: kfree(rc); @@ -471,8 +471,8 @@ static int test_aead_jiffies(struct aead_request *req, int enc, return ret; } - printk("%d operations in %d seconds (%ld bytes)\n", - bcount, secs, (long)bcount * blen); + pr_cont("%d operations in %d seconds (%llu bytes)\n", + bcount, secs, (u64)bcount * blen); return 0; } @@ -764,8 +764,8 @@ static int test_mb_ahash_jiffies(struct test_mb_ahash_data *data, int blen, goto out; } - pr_cont("%d operations in %d seconds (%ld bytes)\n", - bcount * num_mb, secs, (long)bcount * blen * num_mb); + pr_cont("%d operations in %d seconds (%llu bytes)\n", + bcount * num_mb, secs, (u64)bcount * blen * num_mb); out: kfree(rc); @@ -1201,8 +1201,8 @@ static int test_mb_acipher_jiffies(struct test_mb_skcipher_data *data, int enc, goto out; } - pr_cont("%d operations in %d seconds (%ld bytes)\n", - bcount * num_mb, secs, (long)bcount * blen * num_mb); + pr_cont("%d operations in %d seconds (%llu bytes)\n", + bcount * num_mb, secs, (u64)bcount * blen * num_mb); out: kfree(rc); @@ -1441,8 +1441,8 @@ static int test_acipher_jiffies(struct skcipher_request *req, int enc, return ret; } - pr_cont("%d operations in %d seconds (%ld bytes)\n", - bcount, secs, (long)bcount * blen); + pr_cont("%d operations in %d seconds (%llu bytes)\n", + bcount, secs, (u64)bcount * blen); return 0; } From c4dc99e14c58f257e96c81da16404f8285c3d42f Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 9 Dec 2020 18:50:14 -0300 Subject: [PATCH 005/154] crypto: sahara - Remove unused .id_table support Since 5.10-rc1 i.MX is a devicetree-only platform and the existing .id_table support in this driver was only useful for old non-devicetree platforms. Remove the unused .id_table support. Signed-off-by: Fabio Estevam Signed-off-by: Herbert Xu --- drivers/crypto/sahara.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c index 8b5be29cb4dc..457084b344c1 100644 --- a/drivers/crypto/sahara.c +++ b/drivers/crypto/sahara.c @@ -1350,12 +1350,6 @@ static void sahara_unregister_algs(struct sahara_dev *dev) crypto_unregister_ahash(&sha_v4_algs[i]); } -static const struct platform_device_id sahara_platform_ids[] = { - { .name = "sahara-imx27" }, - { /* sentinel */ } -}; -MODULE_DEVICE_TABLE(platform, sahara_platform_ids); - static const struct of_device_id sahara_dt_ids[] = { { .compatible = "fsl,imx53-sahara" }, { .compatible = "fsl,imx27-sahara" }, @@ -1540,7 +1534,6 @@ static struct platform_driver sahara_driver = { .name = SAHARA_NAME, .of_match_table = sahara_dt_ids, }, - .id_table = sahara_platform_ids, }; module_platform_driver(sahara_driver); From bbfd06c7c85ec6dfae4a77b27495db8b8bcdfc8c Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Fri, 11 Dec 2020 09:42:47 +0800 Subject: [PATCH 006/154] crypto: ccree - remove unused including Remove including that don't need it. Signed-off-by: Tian Tao Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_driver.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h index 5f1d4602eb8f..f49579aa1452 100644 --- a/drivers/crypto/ccree/cc_driver.h +++ b/drivers/crypto/ccree/cc_driver.h @@ -23,7 +23,6 @@ #include #include #include -#include #include #include From a3b01ffddc210a836eda8aa751cfa911a2817a85 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 11 Dec 2020 13:27:14 +0100 Subject: [PATCH 007/154] chcr_ktls: use AES library for single use cipher Allocating a cipher via the crypto API only to free it again after using it to encrypt a single block is unnecessary in cases where the algorithm is known at compile time. So replace this pattern with a call to the AES library. Cc: Ayush Sawal Cc: Vinay Kumar Yadav Cc: Rohit Maheshwari Signed-off-by: Ard Biesheuvel Reviewed-by: Eric Biggers Signed-off-by: Herbert Xu --- .../ethernet/chelsio/inline_crypto/Kconfig | 1 + .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 19 +++++++------------ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/Kconfig b/drivers/net/ethernet/chelsio/inline_crypto/Kconfig index bc06e83fd3c6..521955e1f894 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/Kconfig +++ b/drivers/net/ethernet/chelsio/inline_crypto/Kconfig @@ -42,6 +42,7 @@ config CHELSIO_TLS_DEVICE depends on CHELSIO_T4 depends on TLS depends on TLS_DEVICE + select CRYPTO_LIB_AES help This flag enables support for kernel tls offload over Chelsio T6 crypto accelerator. CONFIG_CHELSIO_TLS_DEVICE flag can be enabled diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 1b7e8c91b541..46a809f2aeca 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "chcr_ktls.h" static LIST_HEAD(uld_ctx_list); @@ -74,7 +75,7 @@ static int chcr_ktls_save_keys(struct chcr_ktls_info *tx_info, unsigned char ghash_h[TLS_CIPHER_AES_GCM_256_TAG_SIZE]; struct tls12_crypto_info_aes_gcm_128 *info_128_gcm; struct ktls_key_ctx *kctx = &tx_info->key_ctx; - struct crypto_cipher *cipher; + struct crypto_aes_ctx aes_ctx; unsigned char *key, *salt; switch (crypto_info->cipher_type) { @@ -135,18 +136,14 @@ static int chcr_ktls_save_keys(struct chcr_ktls_info *tx_info, /* Calculate the H = CIPH(K, 0 repeated 16 times). * It will go in key context */ - cipher = crypto_alloc_cipher("aes", 0, 0); - if (IS_ERR(cipher)) { - ret = -ENOMEM; - goto out; - } - ret = crypto_cipher_setkey(cipher, key, keylen); + ret = aes_expandkey(&aes_ctx, key, keylen); if (ret) - goto out1; + goto out; memset(ghash_h, 0, ghash_size); - crypto_cipher_encrypt_one(cipher, ghash_h, ghash_h); + aes_encrypt(&aes_ctx, ghash_h, ghash_h); + memzero_explicit(&aes_ctx, sizeof(aes_ctx)); /* fill the Key context */ if (direction == TLS_OFFLOAD_CTX_DIR_TX) { @@ -155,7 +152,7 @@ static int chcr_ktls_save_keys(struct chcr_ktls_info *tx_info, key_ctx_size >> 4); } else { ret = -EINVAL; - goto out1; + goto out; } memcpy(kctx->salt, salt, tx_info->salt_size); @@ -163,8 +160,6 @@ static int chcr_ktls_save_keys(struct chcr_ktls_info *tx_info, memcpy(kctx->key + keylen, ghash_h, ghash_size); tx_info->key_ctx_len = key_ctx_size; -out1: - crypto_free_cipher(cipher); out: return ret; } From 0eb76ba29d16df2951d37c54ca279c4e5630b071 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 11 Dec 2020 13:27:15 +0100 Subject: [PATCH 008/154] crypto: remove cipher routines from public crypto API The cipher routines in the crypto API are mostly intended for templates implementing skcipher modes generically in software, and shouldn't be used outside of the crypto subsystem. So move the prototypes and all related definitions to a new header file under include/crypto/internal. Also, let's use the new module namespace feature to move the symbol exports into a new namespace CRYPTO_INTERNAL. Signed-off-by: Ard Biesheuvel Acked-by: Eric Biggers Signed-off-by: Herbert Xu --- Documentation/crypto/api-skcipher.rst | 4 +- arch/arm/crypto/aes-neonbs-glue.c | 3 + arch/s390/crypto/aes_s390.c | 2 + crypto/adiantum.c | 2 + crypto/ansi_cprng.c | 2 + crypto/cbc.c | 1 + crypto/ccm.c | 2 + crypto/cfb.c | 2 + crypto/cipher.c | 7 +- crypto/cmac.c | 2 + crypto/ctr.c | 2 + crypto/drbg.c | 2 + crypto/ecb.c | 1 + crypto/essiv.c | 2 + crypto/keywrap.c | 2 + crypto/ofb.c | 2 + crypto/pcbc.c | 2 + crypto/skcipher.c | 2 + crypto/testmgr.c | 3 + crypto/vmac.c | 2 + crypto/xcbc.c | 2 + crypto/xts.c | 2 + drivers/crypto/geode-aes.c | 2 + drivers/crypto/inside-secure/safexcel.c | 1 + drivers/crypto/inside-secure/safexcel_hash.c | 1 + drivers/crypto/qat/qat_common/adf_ctl_drv.c | 1 + drivers/crypto/qat/qat_common/qat_algs.c | 1 + drivers/crypto/vmx/aes.c | 1 + drivers/crypto/vmx/vmx.c | 1 + include/crypto/algapi.h | 39 ---- include/crypto/internal/cipher.h | 218 +++++++++++++++++++ include/crypto/internal/skcipher.h | 1 + include/linux/crypto.h | 163 -------------- 33 files changed, 273 insertions(+), 207 deletions(-) create mode 100644 include/crypto/internal/cipher.h diff --git a/Documentation/crypto/api-skcipher.rst b/Documentation/crypto/api-skcipher.rst index 1aaf8985894b..04d6cc5357c8 100644 --- a/Documentation/crypto/api-skcipher.rst +++ b/Documentation/crypto/api-skcipher.rst @@ -28,8 +28,8 @@ Symmetric Key Cipher Request Handle Single Block Cipher API ----------------------- -.. kernel-doc:: include/linux/crypto.h +.. kernel-doc:: include/crypto/internal/cipher.h :doc: Single Block Cipher API -.. kernel-doc:: include/linux/crypto.h +.. kernel-doc:: include/crypto/internal/cipher.h :functions: crypto_alloc_cipher crypto_free_cipher crypto_has_cipher crypto_cipher_blocksize crypto_cipher_setkey crypto_cipher_encrypt_one crypto_cipher_decrypt_one diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c index f70af1d0514b..5c6cd3c63cbc 100644 --- a/arch/arm/crypto/aes-neonbs-glue.c +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -23,6 +24,8 @@ MODULE_ALIAS_CRYPTO("cbc(aes)-all"); MODULE_ALIAS_CRYPTO("ctr(aes)"); MODULE_ALIAS_CRYPTO("xts(aes)"); +MODULE_IMPORT_NS(CRYPTO_INTERNAL); + asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds); asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 73044634d342..54c7536f2482 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -1055,3 +1056,4 @@ MODULE_ALIAS_CRYPTO("aes-all"); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(CRYPTO_INTERNAL); diff --git a/crypto/adiantum.c b/crypto/adiantum.c index ce4d5725342c..84450130cb6b 100644 --- a/crypto/adiantum.c +++ b/crypto/adiantum.c @@ -32,6 +32,7 @@ #include #include +#include #include #include #include @@ -616,3 +617,4 @@ MODULE_DESCRIPTION("Adiantum length-preserving encryption mode"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Eric Biggers "); MODULE_ALIAS_CRYPTO("adiantum"); +MODULE_IMPORT_NS(CRYPTO_INTERNAL); diff --git a/crypto/ansi_cprng.c b/crypto/ansi_cprng.c index c475c1129ff2..3f512efaba3a 100644 --- a/crypto/ansi_cprng.c +++ b/crypto/ansi_cprng.c @@ -7,6 +7,7 @@ * (C) Neil Horman */ +#include #include #include #include @@ -470,3 +471,4 @@ subsys_initcall(prng_mod_init); module_exit(prng_mod_fini); MODULE_ALIAS_CRYPTO("stdrng"); MODULE_ALIAS_CRYPTO("ansi_cprng"); +MODULE_IMPORT_NS(CRYPTO_INTERNAL); diff --git a/crypto/cbc.c b/crypto/cbc.c index 0d9509dff891..6c03e96b945f 100644 --- a/crypto/cbc.c +++ b/crypto/cbc.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include diff --git a/crypto/ccm.c b/crypto/ccm.c index 494d70901186..6b815ece51c6 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -954,3 +955,4 @@ MODULE_ALIAS_CRYPTO("ccm_base"); MODULE_ALIAS_CRYPTO("rfc4309"); MODULE_ALIAS_CRYPTO("ccm"); MODULE_ALIAS_CRYPTO("cbcmac"); +MODULE_IMPORT_NS(CRYPTO_INTERNAL); diff --git a/crypto/cfb.c b/crypto/cfb.c index 4e5219bbcd19..0d664dfb47bc 100644 --- a/crypto/cfb.c +++ b/crypto/cfb.c @@ -20,6 +20,7 @@ */ #include +#include #include #include #include @@ -250,3 +251,4 @@ module_exit(crypto_cfb_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CFB block cipher mode of operation"); MODULE_ALIAS_CRYPTO("cfb"); +MODULE_IMPORT_NS(CRYPTO_INTERNAL); diff --git a/crypto/cipher.c b/crypto/cipher.c index fd78150deb1c..b47141ed4a9f 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include @@ -53,7 +54,7 @@ int crypto_cipher_setkey(struct crypto_cipher *tfm, return cia->cia_setkey(crypto_cipher_tfm(tfm), key, keylen); } -EXPORT_SYMBOL_GPL(crypto_cipher_setkey); +EXPORT_SYMBOL_NS_GPL(crypto_cipher_setkey, CRYPTO_INTERNAL); static inline void cipher_crypt_one(struct crypto_cipher *tfm, u8 *dst, const u8 *src, bool enc) @@ -81,11 +82,11 @@ void crypto_cipher_encrypt_one(struct crypto_cipher *tfm, { cipher_crypt_one(tfm, dst, src, true); } -EXPORT_SYMBOL_GPL(crypto_cipher_encrypt_one); +EXPORT_SYMBOL_NS_GPL(crypto_cipher_encrypt_one, CRYPTO_INTERNAL); void crypto_cipher_decrypt_one(struct crypto_cipher *tfm, u8 *dst, const u8 *src) { cipher_crypt_one(tfm, dst, src, false); } -EXPORT_SYMBOL_GPL(crypto_cipher_decrypt_one); +EXPORT_SYMBOL_NS_GPL(crypto_cipher_decrypt_one, CRYPTO_INTERNAL); diff --git a/crypto/cmac.c b/crypto/cmac.c index df36be1efb81..f4a5d3bfb376 100644 --- a/crypto/cmac.c +++ b/crypto/cmac.c @@ -11,6 +11,7 @@ * Author: Kazunori Miyazawa */ +#include #include #include #include @@ -313,3 +314,4 @@ module_exit(crypto_cmac_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CMAC keyed hash algorithm"); MODULE_ALIAS_CRYPTO("cmac"); +MODULE_IMPORT_NS(CRYPTO_INTERNAL); diff --git a/crypto/ctr.c b/crypto/ctr.c index c39fcffba27f..23c698b22013 100644 --- a/crypto/ctr.c +++ b/crypto/ctr.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -358,3 +359,4 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CTR block cipher mode of operation"); MODULE_ALIAS_CRYPTO("rfc3686"); MODULE_ALIAS_CRYPTO("ctr"); +MODULE_IMPORT_NS(CRYPTO_INTERNAL); diff --git a/crypto/drbg.c b/crypto/drbg.c index 3132967a1749..1b4587e0ddad 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -98,6 +98,7 @@ */ #include +#include #include /*************************************************************** @@ -2161,3 +2162,4 @@ MODULE_DESCRIPTION("NIST SP800-90A Deterministic Random Bit Generator (DRBG) " CRYPTO_DRBG_HMAC_STRING CRYPTO_DRBG_CTR_STRING); MODULE_ALIAS_CRYPTO("stdrng"); +MODULE_IMPORT_NS(CRYPTO_INTERNAL); diff --git a/crypto/ecb.c b/crypto/ecb.c index 69a687cbdf21..71fbb0543d64 100644 --- a/crypto/ecb.c +++ b/crypto/ecb.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include diff --git a/crypto/essiv.c b/crypto/essiv.c index d012be23d496..8bcc5bdcb2a9 100644 --- a/crypto/essiv.c +++ b/crypto/essiv.c @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -643,3 +644,4 @@ module_exit(essiv_module_exit); MODULE_DESCRIPTION("ESSIV skcipher/aead wrapper for block encryption"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS_CRYPTO("essiv"); +MODULE_IMPORT_NS(CRYPTO_INTERNAL); diff --git a/crypto/keywrap.c b/crypto/keywrap.c index 0355cce21b1e..3517773bc7f7 100644 --- a/crypto/keywrap.c +++ b/crypto/keywrap.c @@ -85,6 +85,7 @@ #include #include #include +#include #include struct crypto_kw_block { @@ -316,3 +317,4 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Stephan Mueller "); MODULE_DESCRIPTION("Key Wrapping (RFC3394 / NIST SP800-38F)"); MODULE_ALIAS_CRYPTO("kw"); +MODULE_IMPORT_NS(CRYPTO_INTERNAL); diff --git a/crypto/ofb.c b/crypto/ofb.c index 2ec68e3f2c55..b630fdecceee 100644 --- a/crypto/ofb.c +++ b/crypto/ofb.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -102,3 +103,4 @@ module_exit(crypto_ofb_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("OFB block cipher mode of operation"); MODULE_ALIAS_CRYPTO("ofb"); +MODULE_IMPORT_NS(CRYPTO_INTERNAL); diff --git a/crypto/pcbc.c b/crypto/pcbc.c index ae921fb74dc9..7030f59e46b6 100644 --- a/crypto/pcbc.c +++ b/crypto/pcbc.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include @@ -191,3 +192,4 @@ module_exit(crypto_pcbc_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("PCBC block cipher mode of operation"); MODULE_ALIAS_CRYPTO("pcbc"); +MODULE_IMPORT_NS(CRYPTO_INTERNAL); diff --git a/crypto/skcipher.c b/crypto/skcipher.c index b4dae640de9f..ff16d05644c7 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include @@ -986,3 +987,4 @@ EXPORT_SYMBOL_GPL(skcipher_alloc_instance_simple); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Symmetric key cipher type"); +MODULE_IMPORT_NS(CRYPTO_INTERNAL); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 321e38eef51b..a896d77e9611 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -33,10 +33,13 @@ #include #include #include +#include #include #include "internal.h" +MODULE_IMPORT_NS(CRYPTO_INTERNAL); + static bool notests; module_param(notests, bool, 0644); MODULE_PARM_DESC(notests, "disable crypto self-tests"); diff --git a/crypto/vmac.c b/crypto/vmac.c index 9b565d1040d6..4633b2dda1e0 100644 --- a/crypto/vmac.c +++ b/crypto/vmac.c @@ -36,6 +36,7 @@ #include #include #include +#include #include /* @@ -693,3 +694,4 @@ module_exit(vmac_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("VMAC hash algorithm"); MODULE_ALIAS_CRYPTO("vmac64"); +MODULE_IMPORT_NS(CRYPTO_INTERNAL); diff --git a/crypto/xcbc.c b/crypto/xcbc.c index af3b7eb5d7c7..6074c5c1da49 100644 --- a/crypto/xcbc.c +++ b/crypto/xcbc.c @@ -6,6 +6,7 @@ * Kazunori Miyazawa */ +#include #include #include #include @@ -272,3 +273,4 @@ module_exit(crypto_xcbc_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("XCBC keyed hash algorithm"); MODULE_ALIAS_CRYPTO("xcbc"); +MODULE_IMPORT_NS(CRYPTO_INTERNAL); diff --git a/crypto/xts.c b/crypto/xts.c index ad45b009774b..6c12f30dbdd6 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -7,6 +7,7 @@ * Based on ecb.c * Copyright (c) 2006 Herbert Xu */ +#include #include #include #include @@ -464,3 +465,4 @@ module_exit(xts_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("XTS block cipher mode"); MODULE_ALIAS_CRYPTO("xts"); +MODULE_IMPORT_NS(CRYPTO_INTERNAL); diff --git a/drivers/crypto/geode-aes.c b/drivers/crypto/geode-aes.c index f4f18bfc2247..4ee010f39912 100644 --- a/drivers/crypto/geode-aes.c +++ b/drivers/crypto/geode-aes.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -434,3 +435,4 @@ module_pci_driver(geode_aes_driver); MODULE_AUTHOR("Advanced Micro Devices, Inc."); MODULE_DESCRIPTION("Geode LX Hardware AES driver"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(CRYPTO_INTERNAL); diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c index 2e1562108a85..30aedfcfee7c 100644 --- a/drivers/crypto/inside-secure/safexcel.c +++ b/drivers/crypto/inside-secure/safexcel.c @@ -1999,3 +1999,4 @@ MODULE_AUTHOR("Ofer Heifetz "); MODULE_AUTHOR("Igal Liberman "); MODULE_DESCRIPTION("Support for SafeXcel cryptographic engines: EIP97 & EIP197"); MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(CRYPTO_INTERNAL); diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c index 50fb6d90a2e0..bc60b5802256 100644 --- a/drivers/crypto/inside-secure/safexcel_hash.c +++ b/drivers/crypto/inside-secure/safexcel_hash.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c index eb9b3be9d8eb..96b437bfe3de 100644 --- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c +++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c @@ -464,3 +464,4 @@ MODULE_AUTHOR("Intel"); MODULE_DESCRIPTION("Intel(R) QuickAssist Technology"); MODULE_ALIAS_CRYPTO("intel_qat"); MODULE_VERSION(ADF_DRV_VERSION); +MODULE_IMPORT_NS(CRYPTO_INTERNAL); diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 31c7a206a629..ff78c73c47e3 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/crypto/vmx/aes.c b/drivers/crypto/vmx/aes.c index 2bc5d4e1adf4..d05c02baebcf 100644 --- a/drivers/crypto/vmx/aes.c +++ b/drivers/crypto/vmx/aes.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "aesp8-ppc.h" diff --git a/drivers/crypto/vmx/vmx.c b/drivers/crypto/vmx/vmx.c index 3e0335fb406c..87a194455d6a 100644 --- a/drivers/crypto/vmx/vmx.c +++ b/drivers/crypto/vmx/vmx.c @@ -78,3 +78,4 @@ MODULE_DESCRIPTION("IBM VMX cryptographic acceleration instructions " "support on Power 8"); MODULE_LICENSE("GPL"); MODULE_VERSION("1.0.0"); +MODULE_IMPORT_NS(CRYPTO_INTERNAL); diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 18dd7a4aaf7d..86f0748009af 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -189,45 +189,6 @@ static inline void *crypto_instance_ctx(struct crypto_instance *inst) return inst->__ctx; } -struct crypto_cipher_spawn { - struct crypto_spawn base; -}; - -static inline int crypto_grab_cipher(struct crypto_cipher_spawn *spawn, - struct crypto_instance *inst, - const char *name, u32 type, u32 mask) -{ - type &= ~CRYPTO_ALG_TYPE_MASK; - type |= CRYPTO_ALG_TYPE_CIPHER; - mask |= CRYPTO_ALG_TYPE_MASK; - return crypto_grab_spawn(&spawn->base, inst, name, type, mask); -} - -static inline void crypto_drop_cipher(struct crypto_cipher_spawn *spawn) -{ - crypto_drop_spawn(&spawn->base); -} - -static inline struct crypto_alg *crypto_spawn_cipher_alg( - struct crypto_cipher_spawn *spawn) -{ - return spawn->base.alg; -} - -static inline struct crypto_cipher *crypto_spawn_cipher( - struct crypto_cipher_spawn *spawn) -{ - u32 type = CRYPTO_ALG_TYPE_CIPHER; - u32 mask = CRYPTO_ALG_TYPE_MASK; - - return __crypto_cipher_cast(crypto_spawn_tfm(&spawn->base, type, mask)); -} - -static inline struct cipher_alg *crypto_cipher_alg(struct crypto_cipher *tfm) -{ - return &crypto_cipher_tfm(tfm)->__crt_alg->cra_cipher; -} - static inline struct crypto_async_request *crypto_get_backlog( struct crypto_queue *queue) { diff --git a/include/crypto/internal/cipher.h b/include/crypto/internal/cipher.h new file mode 100644 index 000000000000..a9174ba90250 --- /dev/null +++ b/include/crypto/internal/cipher.h @@ -0,0 +1,218 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2002 James Morris + * Copyright (c) 2002 David S. Miller (davem@redhat.com) + * Copyright (c) 2005 Herbert Xu + * + * Portions derived from Cryptoapi, by Alexander Kjeldaas + * and Nettle, by Niels Möller. + */ + +#ifndef _CRYPTO_INTERNAL_CIPHER_H +#define _CRYPTO_INTERNAL_CIPHER_H + +#include + +struct crypto_cipher { + struct crypto_tfm base; +}; + +/** + * DOC: Single Block Cipher API + * + * The single block cipher API is used with the ciphers of type + * CRYPTO_ALG_TYPE_CIPHER (listed as type "cipher" in /proc/crypto). + * + * Using the single block cipher API calls, operations with the basic cipher + * primitive can be implemented. These cipher primitives exclude any block + * chaining operations including IV handling. + * + * The purpose of this single block cipher API is to support the implementation + * of templates or other concepts that only need to perform the cipher operation + * on one block at a time. Templates invoke the underlying cipher primitive + * block-wise and process either the input or the output data of these cipher + * operations. + */ + +static inline struct crypto_cipher *__crypto_cipher_cast(struct crypto_tfm *tfm) +{ + return (struct crypto_cipher *)tfm; +} + +/** + * crypto_alloc_cipher() - allocate single block cipher handle + * @alg_name: is the cra_name / name or cra_driver_name / driver name of the + * single block cipher + * @type: specifies the type of the cipher + * @mask: specifies the mask for the cipher + * + * Allocate a cipher handle for a single block cipher. The returned struct + * crypto_cipher is the cipher handle that is required for any subsequent API + * invocation for that single block cipher. + * + * Return: allocated cipher handle in case of success; IS_ERR() is true in case + * of an error, PTR_ERR() returns the error code. + */ +static inline struct crypto_cipher *crypto_alloc_cipher(const char *alg_name, + u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_CIPHER; + mask |= CRYPTO_ALG_TYPE_MASK; + + return __crypto_cipher_cast(crypto_alloc_base(alg_name, type, mask)); +} + +static inline struct crypto_tfm *crypto_cipher_tfm(struct crypto_cipher *tfm) +{ + return &tfm->base; +} + +/** + * crypto_free_cipher() - zeroize and free the single block cipher handle + * @tfm: cipher handle to be freed + */ +static inline void crypto_free_cipher(struct crypto_cipher *tfm) +{ + crypto_free_tfm(crypto_cipher_tfm(tfm)); +} + +/** + * crypto_has_cipher() - Search for the availability of a single block cipher + * @alg_name: is the cra_name / name or cra_driver_name / driver name of the + * single block cipher + * @type: specifies the type of the cipher + * @mask: specifies the mask for the cipher + * + * Return: true when the single block cipher is known to the kernel crypto API; + * false otherwise + */ +static inline int crypto_has_cipher(const char *alg_name, u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_CIPHER; + mask |= CRYPTO_ALG_TYPE_MASK; + + return crypto_has_alg(alg_name, type, mask); +} + +/** + * crypto_cipher_blocksize() - obtain block size for cipher + * @tfm: cipher handle + * + * The block size for the single block cipher referenced with the cipher handle + * tfm is returned. The caller may use that information to allocate appropriate + * memory for the data returned by the encryption or decryption operation + * + * Return: block size of cipher + */ +static inline unsigned int crypto_cipher_blocksize(struct crypto_cipher *tfm) +{ + return crypto_tfm_alg_blocksize(crypto_cipher_tfm(tfm)); +} + +static inline unsigned int crypto_cipher_alignmask(struct crypto_cipher *tfm) +{ + return crypto_tfm_alg_alignmask(crypto_cipher_tfm(tfm)); +} + +static inline u32 crypto_cipher_get_flags(struct crypto_cipher *tfm) +{ + return crypto_tfm_get_flags(crypto_cipher_tfm(tfm)); +} + +static inline void crypto_cipher_set_flags(struct crypto_cipher *tfm, + u32 flags) +{ + crypto_tfm_set_flags(crypto_cipher_tfm(tfm), flags); +} + +static inline void crypto_cipher_clear_flags(struct crypto_cipher *tfm, + u32 flags) +{ + crypto_tfm_clear_flags(crypto_cipher_tfm(tfm), flags); +} + +/** + * crypto_cipher_setkey() - set key for cipher + * @tfm: cipher handle + * @key: buffer holding the key + * @keylen: length of the key in bytes + * + * The caller provided key is set for the single block cipher referenced by the + * cipher handle. + * + * Note, the key length determines the cipher type. Many block ciphers implement + * different cipher modes depending on the key size, such as AES-128 vs AES-192 + * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 + * is performed. + * + * Return: 0 if the setting of the key was successful; < 0 if an error occurred + */ +int crypto_cipher_setkey(struct crypto_cipher *tfm, + const u8 *key, unsigned int keylen); + +/** + * crypto_cipher_encrypt_one() - encrypt one block of plaintext + * @tfm: cipher handle + * @dst: points to the buffer that will be filled with the ciphertext + * @src: buffer holding the plaintext to be encrypted + * + * Invoke the encryption operation of one block. The caller must ensure that + * the plaintext and ciphertext buffers are at least one block in size. + */ +void crypto_cipher_encrypt_one(struct crypto_cipher *tfm, + u8 *dst, const u8 *src); + +/** + * crypto_cipher_decrypt_one() - decrypt one block of ciphertext + * @tfm: cipher handle + * @dst: points to the buffer that will be filled with the plaintext + * @src: buffer holding the ciphertext to be decrypted + * + * Invoke the decryption operation of one block. The caller must ensure that + * the plaintext and ciphertext buffers are at least one block in size. + */ +void crypto_cipher_decrypt_one(struct crypto_cipher *tfm, + u8 *dst, const u8 *src); + +struct crypto_cipher_spawn { + struct crypto_spawn base; +}; + +static inline int crypto_grab_cipher(struct crypto_cipher_spawn *spawn, + struct crypto_instance *inst, + const char *name, u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_CIPHER; + mask |= CRYPTO_ALG_TYPE_MASK; + return crypto_grab_spawn(&spawn->base, inst, name, type, mask); +} + +static inline void crypto_drop_cipher(struct crypto_cipher_spawn *spawn) +{ + crypto_drop_spawn(&spawn->base); +} + +static inline struct crypto_alg *crypto_spawn_cipher_alg( + struct crypto_cipher_spawn *spawn) +{ + return spawn->base.alg; +} + +static inline struct crypto_cipher *crypto_spawn_cipher( + struct crypto_cipher_spawn *spawn) +{ + u32 type = CRYPTO_ALG_TYPE_CIPHER; + u32 mask = CRYPTO_ALG_TYPE_MASK; + + return __crypto_cipher_cast(crypto_spawn_tfm(&spawn->base, type, mask)); +} + +static inline struct cipher_alg *crypto_cipher_alg(struct crypto_cipher *tfm) +{ + return &crypto_cipher_tfm(tfm)->__crt_alg->cra_cipher; +} + +#endif diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h index 10226c12c5df..9dd6c0c17eb8 100644 --- a/include/crypto/internal/skcipher.h +++ b/include/crypto/internal/skcipher.h @@ -9,6 +9,7 @@ #define _CRYPTO_INTERNAL_SKCIPHER_H #include +#include #include #include #include diff --git a/include/linux/crypto.h b/include/linux/crypto.h index ef90e07c9635..9b55cd6b1f1b 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -636,10 +636,6 @@ struct crypto_tfm { void *__crt_ctx[] CRYPTO_MINALIGN_ATTR; }; -struct crypto_cipher { - struct crypto_tfm base; -}; - struct crypto_comp { struct crypto_tfm base; }; @@ -743,165 +739,6 @@ static inline unsigned int crypto_tfm_ctx_alignment(void) return __alignof__(tfm->__crt_ctx); } -/** - * DOC: Single Block Cipher API - * - * The single block cipher API is used with the ciphers of type - * CRYPTO_ALG_TYPE_CIPHER (listed as type "cipher" in /proc/crypto). - * - * Using the single block cipher API calls, operations with the basic cipher - * primitive can be implemented. These cipher primitives exclude any block - * chaining operations including IV handling. - * - * The purpose of this single block cipher API is to support the implementation - * of templates or other concepts that only need to perform the cipher operation - * on one block at a time. Templates invoke the underlying cipher primitive - * block-wise and process either the input or the output data of these cipher - * operations. - */ - -static inline struct crypto_cipher *__crypto_cipher_cast(struct crypto_tfm *tfm) -{ - return (struct crypto_cipher *)tfm; -} - -/** - * crypto_alloc_cipher() - allocate single block cipher handle - * @alg_name: is the cra_name / name or cra_driver_name / driver name of the - * single block cipher - * @type: specifies the type of the cipher - * @mask: specifies the mask for the cipher - * - * Allocate a cipher handle for a single block cipher. The returned struct - * crypto_cipher is the cipher handle that is required for any subsequent API - * invocation for that single block cipher. - * - * Return: allocated cipher handle in case of success; IS_ERR() is true in case - * of an error, PTR_ERR() returns the error code. - */ -static inline struct crypto_cipher *crypto_alloc_cipher(const char *alg_name, - u32 type, u32 mask) -{ - type &= ~CRYPTO_ALG_TYPE_MASK; - type |= CRYPTO_ALG_TYPE_CIPHER; - mask |= CRYPTO_ALG_TYPE_MASK; - - return __crypto_cipher_cast(crypto_alloc_base(alg_name, type, mask)); -} - -static inline struct crypto_tfm *crypto_cipher_tfm(struct crypto_cipher *tfm) -{ - return &tfm->base; -} - -/** - * crypto_free_cipher() - zeroize and free the single block cipher handle - * @tfm: cipher handle to be freed - */ -static inline void crypto_free_cipher(struct crypto_cipher *tfm) -{ - crypto_free_tfm(crypto_cipher_tfm(tfm)); -} - -/** - * crypto_has_cipher() - Search for the availability of a single block cipher - * @alg_name: is the cra_name / name or cra_driver_name / driver name of the - * single block cipher - * @type: specifies the type of the cipher - * @mask: specifies the mask for the cipher - * - * Return: true when the single block cipher is known to the kernel crypto API; - * false otherwise - */ -static inline int crypto_has_cipher(const char *alg_name, u32 type, u32 mask) -{ - type &= ~CRYPTO_ALG_TYPE_MASK; - type |= CRYPTO_ALG_TYPE_CIPHER; - mask |= CRYPTO_ALG_TYPE_MASK; - - return crypto_has_alg(alg_name, type, mask); -} - -/** - * crypto_cipher_blocksize() - obtain block size for cipher - * @tfm: cipher handle - * - * The block size for the single block cipher referenced with the cipher handle - * tfm is returned. The caller may use that information to allocate appropriate - * memory for the data returned by the encryption or decryption operation - * - * Return: block size of cipher - */ -static inline unsigned int crypto_cipher_blocksize(struct crypto_cipher *tfm) -{ - return crypto_tfm_alg_blocksize(crypto_cipher_tfm(tfm)); -} - -static inline unsigned int crypto_cipher_alignmask(struct crypto_cipher *tfm) -{ - return crypto_tfm_alg_alignmask(crypto_cipher_tfm(tfm)); -} - -static inline u32 crypto_cipher_get_flags(struct crypto_cipher *tfm) -{ - return crypto_tfm_get_flags(crypto_cipher_tfm(tfm)); -} - -static inline void crypto_cipher_set_flags(struct crypto_cipher *tfm, - u32 flags) -{ - crypto_tfm_set_flags(crypto_cipher_tfm(tfm), flags); -} - -static inline void crypto_cipher_clear_flags(struct crypto_cipher *tfm, - u32 flags) -{ - crypto_tfm_clear_flags(crypto_cipher_tfm(tfm), flags); -} - -/** - * crypto_cipher_setkey() - set key for cipher - * @tfm: cipher handle - * @key: buffer holding the key - * @keylen: length of the key in bytes - * - * The caller provided key is set for the single block cipher referenced by the - * cipher handle. - * - * Note, the key length determines the cipher type. Many block ciphers implement - * different cipher modes depending on the key size, such as AES-128 vs AES-192 - * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 - * is performed. - * - * Return: 0 if the setting of the key was successful; < 0 if an error occurred - */ -int crypto_cipher_setkey(struct crypto_cipher *tfm, - const u8 *key, unsigned int keylen); - -/** - * crypto_cipher_encrypt_one() - encrypt one block of plaintext - * @tfm: cipher handle - * @dst: points to the buffer that will be filled with the ciphertext - * @src: buffer holding the plaintext to be encrypted - * - * Invoke the encryption operation of one block. The caller must ensure that - * the plaintext and ciphertext buffers are at least one block in size. - */ -void crypto_cipher_encrypt_one(struct crypto_cipher *tfm, - u8 *dst, const u8 *src); - -/** - * crypto_cipher_decrypt_one() - decrypt one block of ciphertext - * @tfm: cipher handle - * @dst: points to the buffer that will be filled with the plaintext - * @src: buffer holding the ciphertext to be decrypted - * - * Invoke the decryption operation of one block. The caller must ensure that - * the plaintext and ciphertext buffers are at least one block in size. - */ -void crypto_cipher_decrypt_one(struct crypto_cipher *tfm, - u8 *dst, const u8 *src); - static inline struct crypto_comp *__crypto_comp_cast(struct crypto_tfm *tfm) { return (struct crypto_comp *)tfm; From 7334a4be50764500d5cae4d9a655f7755dbedd5d Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Mon, 14 Dec 2020 19:44:40 +0800 Subject: [PATCH 009/154] crypto: inside-secure - fix platform_get_irq.cocci warnings Remove dev_err() messages after platform_get_irq*() failures. drivers/crypto/inside-secure/safexcel.c: line 1161 is redundant because platform_get_irq() already prints an error Generated by: scripts/coccinelle/api/platform_get_irq.cocci Signed-off-by: Tian Tao Acked-by: Antoine Tenart Signed-off-by: Herbert Xu --- drivers/crypto/inside-secure/safexcel.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c index 30aedfcfee7c..6364583b88b2 100644 --- a/drivers/crypto/inside-secure/safexcel.c +++ b/drivers/crypto/inside-secure/safexcel.c @@ -1166,11 +1166,8 @@ static int safexcel_request_ring_irq(void *pdev, int irqid, dev = &plf_pdev->dev; irq = platform_get_irq_byname(plf_pdev, irq_name); - if (irq < 0) { - dev_err(dev, "unable to get IRQ '%s' (err %d)\n", - irq_name, irq); + if (irq < 0) return irq; - } } else { return -ENXIO; } From 583513510a7acd2306787865bcd19ebb2f629d42 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 14 Dec 2020 20:02:25 +0000 Subject: [PATCH 010/154] crypto: sun4i-ss - linearize buffers content must be kept When running the non-optimized cipher function, SS produce partial random output. This is due to linearize buffers being reseted after each loop. For preserving stack, instead of moving them back to start of function, I move them in sun4i_ss_ctx. Fixes: 8d3bcb9900ca ("crypto: sun4i-ss - reduce stack usage") Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c | 12 ++++-------- drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h | 2 ++ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c index b72de8939497..19f1aa577ed4 100644 --- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c @@ -233,8 +233,6 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) while (oleft) { if (ileft) { - char buf[4 * SS_RX_MAX];/* buffer for linearize SG src */ - /* * todo is the number of consecutive 4byte word that we * can read from current SG @@ -256,12 +254,12 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) */ todo = min(rx_cnt * 4 - ob, ileft); todo = min_t(size_t, todo, mi.length - oi); - memcpy(buf + ob, mi.addr + oi, todo); + memcpy(ss->buf + ob, mi.addr + oi, todo); ileft -= todo; oi += todo; ob += todo; if (!(ob % 4)) { - writesl(ss->base + SS_RXFIFO, buf, + writesl(ss->base + SS_RXFIFO, ss->buf, ob / 4); ob = 0; } @@ -295,13 +293,11 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) oo = 0; } } else { - char bufo[4 * SS_TX_MAX]; /* buffer for linearize SG dst */ - /* * read obl bytes in bufo, we read at maximum for * emptying the device */ - readsl(ss->base + SS_TXFIFO, bufo, tx_cnt); + readsl(ss->base + SS_TXFIFO, ss->bufo, tx_cnt); obl = tx_cnt * 4; obo = 0; do { @@ -313,7 +309,7 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) */ todo = min_t(size_t, mo.length - oo, obl - obo); - memcpy(mo.addr + oo, bufo + obo, todo); + memcpy(mo.addr + oo, ss->bufo + obo, todo); oleft -= todo; obo += todo; oo += todo; diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h index 5c291e4a6857..c242fccb2ab6 100644 --- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h +++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h @@ -148,6 +148,8 @@ struct sun4i_ss_ctx { struct reset_control *reset; struct device *dev; struct resource *res; + char buf[4 * SS_RX_MAX];/* buffer for linearize SG src */ + char bufo[4 * SS_TX_MAX]; /* buffer for linearize SG dst */ spinlock_t slock; /* control the use of the device */ #ifdef CONFIG_CRYPTO_DEV_SUN4I_SS_PRNG u32 seed[SS_SEED_LEN / BITS_PER_LONG]; From 7bdcd851fa7eb66e8922aa7f6cba9e2f2427a7cf Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 14 Dec 2020 20:02:26 +0000 Subject: [PATCH 011/154] crypto: sun4i-ss - checking sg length is not sufficient The optimized cipher function need length multiple of 4 bytes. But it get sometimes odd length. This is due to SG data could be stored with an offset. So the fix is to check also if the offset is aligned with 4 bytes. Fixes: 6298e948215f2 ("crypto: sunxi-ss - Add Allwinner Security System crypto accelerator") Cc: Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c index 19f1aa577ed4..f49797588329 100644 --- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c @@ -186,12 +186,12 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) * we can use the SS optimized function */ while (in_sg && no_chunk == 1) { - if (in_sg->length % 4) + if ((in_sg->length | in_sg->offset) & 3u) no_chunk = 0; in_sg = sg_next(in_sg); } while (out_sg && no_chunk == 1) { - if (out_sg->length % 4) + if ((out_sg->length | out_sg->offset) & 3u) no_chunk = 0; out_sg = sg_next(out_sg); } From b756f1c8fc9d84e3f546d7ffe056c5352f4aab05 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 14 Dec 2020 20:02:27 +0000 Subject: [PATCH 012/154] crypto: sun4i-ss - IV register does not work on A10 and A13 Allwinner A10 and A13 SoC have a version of the SS which produce invalid IV in IVx register. Instead of adding a variant for those, let's convert SS to produce IV directly from data. Fixes: 6298e948215f2 ("crypto: sunxi-ss - Add Allwinner Security System crypto accelerator") Cc: Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- .../allwinner/sun4i-ss/sun4i-ss-cipher.c | 34 +++++++++++++++---- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c index f49797588329..c7bf731dad7b 100644 --- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c @@ -20,6 +20,7 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq) unsigned int ivsize = crypto_skcipher_ivsize(tfm); struct sun4i_cipher_req_ctx *ctx = skcipher_request_ctx(areq); u32 mode = ctx->mode; + void *backup_iv = NULL; /* when activating SS, the default FIFO space is SS_RX_DEFAULT(32) */ u32 rx_cnt = SS_RX_DEFAULT; u32 tx_cnt = 0; @@ -42,6 +43,13 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq) return -EINVAL; } + if (areq->iv && ivsize > 0 && mode & SS_DECRYPTION) { + backup_iv = kzalloc(ivsize, GFP_KERNEL); + if (!backup_iv) + return -ENOMEM; + scatterwalk_map_and_copy(backup_iv, areq->src, areq->cryptlen - ivsize, ivsize, 0); + } + spin_lock_irqsave(&ss->slock, flags); for (i = 0; i < op->keylen; i += 4) @@ -102,9 +110,12 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq) } while (oleft); if (areq->iv) { - for (i = 0; i < 4 && i < ivsize / 4; i++) { - v = readl(ss->base + SS_IV0 + i * 4); - *(u32 *)(areq->iv + i * 4) = v; + if (mode & SS_DECRYPTION) { + memcpy(areq->iv, backup_iv, ivsize); + kfree_sensitive(backup_iv); + } else { + scatterwalk_map_and_copy(areq->iv, areq->dst, areq->cryptlen - ivsize, + ivsize, 0); } } @@ -161,6 +172,7 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) unsigned int ileft = areq->cryptlen; unsigned int oleft = areq->cryptlen; unsigned int todo; + void *backup_iv = NULL; struct sg_mapping_iter mi, mo; unsigned int oi, oo; /* offset for in and out */ unsigned int ob = 0; /* offset in buf */ @@ -202,6 +214,13 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) if (need_fallback) return sun4i_ss_cipher_poll_fallback(areq); + if (areq->iv && ivsize > 0 && mode & SS_DECRYPTION) { + backup_iv = kzalloc(ivsize, GFP_KERNEL); + if (!backup_iv) + return -ENOMEM; + scatterwalk_map_and_copy(backup_iv, areq->src, areq->cryptlen - ivsize, ivsize, 0); + } + spin_lock_irqsave(&ss->slock, flags); for (i = 0; i < op->keylen; i += 4) @@ -322,9 +341,12 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) } } if (areq->iv) { - for (i = 0; i < 4 && i < ivsize / 4; i++) { - v = readl(ss->base + SS_IV0 + i * 4); - *(u32 *)(areq->iv + i * 4) = v; + if (mode & SS_DECRYPTION) { + memcpy(areq->iv, backup_iv, ivsize); + kfree_sensitive(backup_iv); + } else { + scatterwalk_map_and_copy(areq->iv, areq->dst, areq->cryptlen - ivsize, + ivsize, 0); } } From 5ab6177fa02df15cd8a02a1f1fb361d2d5d8b946 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 14 Dec 2020 20:02:28 +0000 Subject: [PATCH 013/154] crypto: sun4i-ss - handle BigEndian for cipher Ciphers produce invalid results on BE. Key and IV need to be written in LE. Fixes: 6298e948215f2 ("crypto: sunxi-ss - Add Allwinner Security System crypto accelerator") Cc: Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c index c7bf731dad7b..e097f4c3e68f 100644 --- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c @@ -52,13 +52,13 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq) spin_lock_irqsave(&ss->slock, flags); - for (i = 0; i < op->keylen; i += 4) - writel(*(op->key + i / 4), ss->base + SS_KEY0 + i); + for (i = 0; i < op->keylen / 4; i++) + writesl(ss->base + SS_KEY0 + i * 4, &op->key[i], 1); if (areq->iv) { for (i = 0; i < 4 && i < ivsize / 4; i++) { v = *(u32 *)(areq->iv + i * 4); - writel(v, ss->base + SS_IV0 + i * 4); + writesl(ss->base + SS_IV0 + i * 4, &v, 1); } } writel(mode, ss->base + SS_CTL); @@ -223,13 +223,13 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) spin_lock_irqsave(&ss->slock, flags); - for (i = 0; i < op->keylen; i += 4) - writel(*(op->key + i / 4), ss->base + SS_KEY0 + i); + for (i = 0; i < op->keylen / 4; i++) + writesl(ss->base + SS_KEY0 + i * 4, &op->key[i], 1); if (areq->iv) { for (i = 0; i < 4 && i < ivsize / 4; i++) { v = *(u32 *)(areq->iv + i * 4); - writel(v, ss->base + SS_IV0 + i * 4); + writesl(ss->base + SS_IV0 + i * 4, &v, 1); } } writel(mode, ss->base + SS_CTL); From 4ec8977b921fd9d512701e009ce8082cb94b5c1c Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 14 Dec 2020 20:02:29 +0000 Subject: [PATCH 014/154] crypto: sun4i-ss - initialize need_fallback The need_fallback is never initialized and seem to be always true at runtime. So all hardware operations are always bypassed. Fixes: 0ae1f46c55f87 ("crypto: sun4i-ss - fallback when length is not multiple of blocksize") Cc: Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c index e097f4c3e68f..5759fa79f293 100644 --- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c @@ -179,7 +179,7 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) unsigned int obo = 0; /* offset in bufo*/ unsigned int obl = 0; /* length of data in bufo */ unsigned long flags; - bool need_fallback; + bool need_fallback = false; if (!areq->cryptlen) return 0; From 9bc3dd24e7dccd50757db743a3635ad5b0497e6e Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 14 Dec 2020 20:02:30 +0000 Subject: [PATCH 015/154] crypto: sun4i-ss - fix kmap usage With the recent kmap change, some tests which were conditional on CONFIG_DEBUG_HIGHMEM now are enabled by default. This permit to detect a problem in sun4i-ss usage of kmap. sun4i-ss uses two kmap via sg_miter (one for input, one for output), but using two kmap at the same time is hard: "the ordering has to be correct and with sg_miter that's probably hard to get right." (quoting Tlgx) So the easiest solution is to never have two sg_miter/kmap open at the same time. After each use of sg_miter, I store the current index, for being able to resume sg_miter to the right place. Fixes: 6298e948215f ("crypto: sunxi-ss - Add Allwinner Security System crypto accelerator") Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- .../allwinner/sun4i-ss/sun4i-ss-cipher.c | 109 +++++++++++------- 1 file changed, 65 insertions(+), 44 deletions(-) diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c index 5759fa79f293..ffa628c89e21 100644 --- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c @@ -31,6 +31,8 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq) unsigned int ileft = areq->cryptlen; unsigned int oleft = areq->cryptlen; unsigned int todo; + unsigned long pi = 0, po = 0; /* progress for in and out */ + bool miter_err; struct sg_mapping_iter mi, mo; unsigned int oi, oo; /* offset for in and out */ unsigned long flags; @@ -63,39 +65,51 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq) } writel(mode, ss->base + SS_CTL); - sg_miter_start(&mi, areq->src, sg_nents(areq->src), - SG_MITER_FROM_SG | SG_MITER_ATOMIC); - sg_miter_start(&mo, areq->dst, sg_nents(areq->dst), - SG_MITER_TO_SG | SG_MITER_ATOMIC); - sg_miter_next(&mi); - sg_miter_next(&mo); - if (!mi.addr || !mo.addr) { - dev_err_ratelimited(ss->dev, "ERROR: sg_miter return null\n"); - err = -EINVAL; - goto release_ss; - } ileft = areq->cryptlen / 4; oleft = areq->cryptlen / 4; oi = 0; oo = 0; do { - todo = min(rx_cnt, ileft); - todo = min_t(size_t, todo, (mi.length - oi) / 4); - if (todo) { - ileft -= todo; - writesl(ss->base + SS_RXFIFO, mi.addr + oi, todo); - oi += todo * 4; - } - if (oi == mi.length) { - sg_miter_next(&mi); - oi = 0; + if (ileft) { + sg_miter_start(&mi, areq->src, sg_nents(areq->src), + SG_MITER_FROM_SG | SG_MITER_ATOMIC); + if (pi) + sg_miter_skip(&mi, pi); + miter_err = sg_miter_next(&mi); + if (!miter_err || !mi.addr) { + dev_err_ratelimited(ss->dev, "ERROR: sg_miter return null\n"); + err = -EINVAL; + goto release_ss; + } + todo = min(rx_cnt, ileft); + todo = min_t(size_t, todo, (mi.length - oi) / 4); + if (todo) { + ileft -= todo; + writesl(ss->base + SS_RXFIFO, mi.addr + oi, todo); + oi += todo * 4; + } + if (oi == mi.length) { + pi += mi.length; + oi = 0; + } + sg_miter_stop(&mi); } spaces = readl(ss->base + SS_FCSR); rx_cnt = SS_RXFIFO_SPACES(spaces); tx_cnt = SS_TXFIFO_SPACES(spaces); + sg_miter_start(&mo, areq->dst, sg_nents(areq->dst), + SG_MITER_TO_SG | SG_MITER_ATOMIC); + if (po) + sg_miter_skip(&mo, po); + miter_err = sg_miter_next(&mo); + if (!miter_err || !mo.addr) { + dev_err_ratelimited(ss->dev, "ERROR: sg_miter return null\n"); + err = -EINVAL; + goto release_ss; + } todo = min(tx_cnt, oleft); todo = min_t(size_t, todo, (mo.length - oo) / 4); if (todo) { @@ -104,9 +118,10 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq) oo += todo * 4; } if (oo == mo.length) { - sg_miter_next(&mo); oo = 0; + po += mo.length; } + sg_miter_stop(&mo); } while (oleft); if (areq->iv) { @@ -120,8 +135,6 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq) } release_ss: - sg_miter_stop(&mi); - sg_miter_stop(&mo); writel(0, ss->base + SS_CTL); spin_unlock_irqrestore(&ss->slock, flags); return err; @@ -174,6 +187,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) unsigned int todo; void *backup_iv = NULL; struct sg_mapping_iter mi, mo; + unsigned long pi = 0, po = 0; /* progress for in and out */ + bool miter_err; unsigned int oi, oo; /* offset for in and out */ unsigned int ob = 0; /* offset in buf */ unsigned int obo = 0; /* offset in bufo*/ @@ -234,17 +249,6 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) } writel(mode, ss->base + SS_CTL); - sg_miter_start(&mi, areq->src, sg_nents(areq->src), - SG_MITER_FROM_SG | SG_MITER_ATOMIC); - sg_miter_start(&mo, areq->dst, sg_nents(areq->dst), - SG_MITER_TO_SG | SG_MITER_ATOMIC); - sg_miter_next(&mi); - sg_miter_next(&mo); - if (!mi.addr || !mo.addr) { - dev_err_ratelimited(ss->dev, "ERROR: sg_miter return null\n"); - err = -EINVAL; - goto release_ss; - } ileft = areq->cryptlen; oleft = areq->cryptlen; oi = 0; @@ -252,6 +256,16 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) while (oleft) { if (ileft) { + sg_miter_start(&mi, areq->src, sg_nents(areq->src), + SG_MITER_FROM_SG | SG_MITER_ATOMIC); + if (pi) + sg_miter_skip(&mi, pi); + miter_err = sg_miter_next(&mi); + if (!miter_err || !mi.addr) { + dev_err_ratelimited(ss->dev, "ERROR: sg_miter return null\n"); + err = -EINVAL; + goto release_ss; + } /* * todo is the number of consecutive 4byte word that we * can read from current SG @@ -284,31 +298,38 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) } } if (oi == mi.length) { - sg_miter_next(&mi); + pi += mi.length; oi = 0; } + sg_miter_stop(&mi); } spaces = readl(ss->base + SS_FCSR); rx_cnt = SS_RXFIFO_SPACES(spaces); tx_cnt = SS_TXFIFO_SPACES(spaces); - dev_dbg(ss->dev, - "%x %u/%zu %u/%u cnt=%u %u/%zu %u/%u cnt=%u %u\n", - mode, - oi, mi.length, ileft, areq->cryptlen, rx_cnt, - oo, mo.length, oleft, areq->cryptlen, tx_cnt, ob); if (!tx_cnt) continue; + sg_miter_start(&mo, areq->dst, sg_nents(areq->dst), + SG_MITER_TO_SG | SG_MITER_ATOMIC); + if (po) + sg_miter_skip(&mo, po); + miter_err = sg_miter_next(&mo); + if (!miter_err || !mo.addr) { + dev_err_ratelimited(ss->dev, "ERROR: sg_miter return null\n"); + err = -EINVAL; + goto release_ss; + } /* todo in 4bytes word */ todo = min(tx_cnt, oleft / 4); todo = min_t(size_t, todo, (mo.length - oo) / 4); + if (todo) { readsl(ss->base + SS_TXFIFO, mo.addr + oo, todo); oleft -= todo * 4; oo += todo * 4; if (oo == mo.length) { - sg_miter_next(&mo); + po += mo.length; oo = 0; } } else { @@ -333,12 +354,14 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) obo += todo; oo += todo; if (oo == mo.length) { + po += mo.length; sg_miter_next(&mo); oo = 0; } } while (obo < obl); /* bufo must be fully used here */ } + sg_miter_stop(&mo); } if (areq->iv) { if (mode & SS_DECRYPTION) { @@ -351,8 +374,6 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) } release_ss: - sg_miter_stop(&mi); - sg_miter_stop(&mo); writel(0, ss->base + SS_CTL); spin_unlock_irqrestore(&ss->slock, flags); From b1f578b85a13c4228d7862a203b428e774f87653 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 14 Dec 2020 20:02:31 +0000 Subject: [PATCH 016/154] crypto: sun4i-ss - enabled stats via debugfs This patch enable to access usage stats for each algorithm. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/Kconfig | 9 ++++ .../allwinner/sun4i-ss/sun4i-ss-cipher.c | 20 +++++++ .../crypto/allwinner/sun4i-ss/sun4i-ss-core.c | 52 +++++++++++++++++++ .../crypto/allwinner/sun4i-ss/sun4i-ss-hash.c | 6 +++ .../crypto/allwinner/sun4i-ss/sun4i-ss-prng.c | 5 ++ drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h | 6 +++ 6 files changed, 98 insertions(+) diff --git a/drivers/crypto/allwinner/Kconfig b/drivers/crypto/allwinner/Kconfig index 180c8a9db819..856fb2045656 100644 --- a/drivers/crypto/allwinner/Kconfig +++ b/drivers/crypto/allwinner/Kconfig @@ -32,6 +32,15 @@ config CRYPTO_DEV_SUN4I_SS_PRNG Select this option if you want to provide kernel-side support for the Pseudo-Random Number Generator found in the Security System. +config CRYPTO_DEV_SUN4I_SS_DEBUG + bool "Enable sun4i-ss stats" + depends on CRYPTO_DEV_SUN4I_SS + depends on DEBUG_FS + help + Say y to enable sun4i-ss debug stats. + This will create /sys/kernel/debug/sun4i-ss/stats for displaying + the number of requests per algorithm. + config CRYPTO_DEV_SUN8I_CE tristate "Support for Allwinner Crypto Engine cryptographic offloader" select CRYPTO_SKCIPHER diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c index ffa628c89e21..d5275d914d09 100644 --- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c @@ -36,6 +36,8 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq) struct sg_mapping_iter mi, mo; unsigned int oi, oo; /* offset for in and out */ unsigned long flags; + struct skcipher_alg *alg = crypto_skcipher_alg(tfm); + struct sun4i_ss_alg_template *algt; if (!areq->cryptlen) return 0; @@ -52,6 +54,12 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq) scatterwalk_map_and_copy(backup_iv, areq->src, areq->cryptlen - ivsize, ivsize, 0); } + if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN4I_SS_DEBUG)) { + algt = container_of(alg, struct sun4i_ss_alg_template, alg.crypto); + algt->stat_opti++; + algt->stat_bytes += areq->cryptlen; + } + spin_lock_irqsave(&ss->slock, flags); for (i = 0; i < op->keylen / 4; i++) @@ -147,6 +155,13 @@ static int noinline_for_stack sun4i_ss_cipher_poll_fallback(struct skcipher_requ struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm); struct sun4i_cipher_req_ctx *ctx = skcipher_request_ctx(areq); int err; + struct skcipher_alg *alg = crypto_skcipher_alg(tfm); + struct sun4i_ss_alg_template *algt; + + if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN4I_SS_DEBUG)) { + algt = container_of(alg, struct sun4i_ss_alg_template, alg.crypto); + algt->stat_fb++; + } skcipher_request_set_tfm(&ctx->fallback_req, op->fallback_tfm); skcipher_request_set_callback(&ctx->fallback_req, areq->base.flags, @@ -236,6 +251,11 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) scatterwalk_map_and_copy(backup_iv, areq->src, areq->cryptlen - ivsize, ivsize, 0); } + if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN4I_SS_DEBUG)) { + algt->stat_req++; + algt->stat_bytes += areq->cryptlen; + } + spin_lock_irqsave(&ss->slock, flags); for (i = 0; i < op->keylen / 4; i++) diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-core.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-core.c index a2b67f7f8a81..709905ec4680 100644 --- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-core.c +++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-core.c @@ -10,6 +10,7 @@ */ #include #include +#include #include #include #include @@ -234,6 +235,51 @@ static struct sun4i_ss_alg_template ss_algs[] = { #endif }; +static int sun4i_ss_dbgfs_read(struct seq_file *seq, void *v) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(ss_algs); i++) { + if (!ss_algs[i].ss) + continue; + switch (ss_algs[i].type) { + case CRYPTO_ALG_TYPE_SKCIPHER: + seq_printf(seq, "%s %s reqs=%lu opti=%lu fallback=%lu tsize=%lu\n", + ss_algs[i].alg.crypto.base.cra_driver_name, + ss_algs[i].alg.crypto.base.cra_name, + ss_algs[i].stat_req, ss_algs[i].stat_opti, ss_algs[i].stat_fb, + ss_algs[i].stat_bytes); + break; + case CRYPTO_ALG_TYPE_RNG: + seq_printf(seq, "%s %s reqs=%lu tsize=%lu\n", + ss_algs[i].alg.rng.base.cra_driver_name, + ss_algs[i].alg.rng.base.cra_name, + ss_algs[i].stat_req, ss_algs[i].stat_bytes); + break; + case CRYPTO_ALG_TYPE_AHASH: + seq_printf(seq, "%s %s reqs=%lu\n", + ss_algs[i].alg.hash.halg.base.cra_driver_name, + ss_algs[i].alg.hash.halg.base.cra_name, + ss_algs[i].stat_req); + break; + } + } + return 0; +} + +static int sun4i_ss_dbgfs_open(struct inode *inode, struct file *file) +{ + return single_open(file, sun4i_ss_dbgfs_read, inode->i_private); +} + +static const struct file_operations sun4i_ss_debugfs_fops = { + .owner = THIS_MODULE, + .open = sun4i_ss_dbgfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /* * Power management strategy: The device is suspended unless a TFM exists for * one of the algorithms proposed by this driver. @@ -454,6 +500,12 @@ static int sun4i_ss_probe(struct platform_device *pdev) break; } } + + /* Ignore error of debugfs */ + ss->dbgfs_dir = debugfs_create_dir("sun4i-ss", NULL); + ss->dbgfs_stats = debugfs_create_file("stats", 0444, ss->dbgfs_dir, ss, + &sun4i_ss_debugfs_fops); + return 0; error_alg: i--; diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-hash.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-hash.c index 1dff48558f53..c1b4585e9bbc 100644 --- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-hash.c +++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-hash.c @@ -191,8 +191,10 @@ static int sun4i_hash(struct ahash_request *areq) u32 spaces, rx_cnt = SS_RX_DEFAULT, bf[32] = {0}, v, ivmode = 0; struct sun4i_req_ctx *op = ahash_request_ctx(areq); struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); + struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg); struct sun4i_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm); struct sun4i_ss_ctx *ss = tfmctx->ss; + struct sun4i_ss_alg_template *algt; struct scatterlist *in_sg = areq->src; struct sg_mapping_iter mi; int in_r, err = 0; @@ -398,6 +400,10 @@ static int sun4i_hash(struct ahash_request *areq) */ hash_final: + if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN4I_SS_DEBUG)) { + algt = container_of(alg, struct sun4i_ss_alg_template, alg.hash); + algt->stat_req++; + } /* write the remaining words of the wait buffer */ if (op->len) { diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-prng.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-prng.c index 729aafdbea84..152841076e3a 100644 --- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-prng.c +++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-prng.c @@ -32,6 +32,11 @@ int sun4i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src, if (err < 0) return err; + if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN4I_SS_DEBUG)) { + algt->stat_req++; + algt->stat_bytes += todo; + } + spin_lock_bh(&ss->slock); writel(mode, ss->base + SS_CTL); diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h index c242fccb2ab6..0fee6f4e2d90 100644 --- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h +++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h @@ -154,6 +154,8 @@ struct sun4i_ss_ctx { #ifdef CONFIG_CRYPTO_DEV_SUN4I_SS_PRNG u32 seed[SS_SEED_LEN / BITS_PER_LONG]; #endif + struct dentry *dbgfs_dir; + struct dentry *dbgfs_stats; }; struct sun4i_ss_alg_template { @@ -165,6 +167,10 @@ struct sun4i_ss_alg_template { struct rng_alg rng; } alg; struct sun4i_ss_ctx *ss; + unsigned long stat_req; + unsigned long stat_fb; + unsigned long stat_bytes; + unsigned long stat_opti; }; struct sun4i_tfm_ctx { From 44122cc6eea1bd876800da18a84821e0429c4089 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 14 Dec 2020 20:02:32 +0000 Subject: [PATCH 017/154] crypto: sun4i-ss - add SPDX header and remove blank lines This patchs fixes some remaining style issue. Signed-off-by: Corentin Labbe Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c | 3 --- drivers/crypto/allwinner/sun4i-ss/sun4i-ss-prng.c | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c index d5275d914d09..c2e6f5ed1d79 100644 --- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c @@ -148,7 +148,6 @@ release_ss: return err; } - static int noinline_for_stack sun4i_ss_cipher_poll_fallback(struct skcipher_request *areq) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); @@ -562,7 +561,6 @@ int sun4i_ss_cipher_init(struct crypto_tfm *tfm) sizeof(struct sun4i_cipher_req_ctx) + crypto_skcipher_reqsize(op->fallback_tfm)); - err = pm_runtime_get_sync(op->ss->dev); if (err < 0) goto error_pm; @@ -649,5 +647,4 @@ int sun4i_ss_des3_setkey(struct crypto_skcipher *tfm, const u8 *key, crypto_skcipher_set_flags(op->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK); return crypto_skcipher_setkey(op->fallback_tfm, key, keylen); - } diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-prng.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-prng.c index 152841076e3a..443160a114bb 100644 --- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-prng.c +++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-prng.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later #include "sun4i-ss.h" int sun4i_ss_prng_seed(struct crypto_rng *tfm, const u8 *seed, From 33ff64884c4e5ffcac1c4aa767e38bf4b3f443a0 Mon Sep 17 00:00:00 2001 From: Declan Murphy Date: Wed, 16 Dec 2020 11:46:35 +0000 Subject: [PATCH 018/154] dt-bindings: crypto: Add Keem Bay OCS HCU bindings Add device-tree bindings for the Intel Keem Bay Offload Crypto Subsystem (OCS) Hashing Control Unit (HCU) crypto driver. Signed-off-by: Declan Murphy Signed-off-by: Daniele Alessandrelli Acked-by: Mark Gross Reviewed-by: Rob Herring Signed-off-by: Herbert Xu --- .../crypto/intel,keembay-ocs-hcu.yaml | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 Documentation/devicetree/bindings/crypto/intel,keembay-ocs-hcu.yaml diff --git a/Documentation/devicetree/bindings/crypto/intel,keembay-ocs-hcu.yaml b/Documentation/devicetree/bindings/crypto/intel,keembay-ocs-hcu.yaml new file mode 100644 index 000000000000..acb92706d280 --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/intel,keembay-ocs-hcu.yaml @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/crypto/intel,keembay-ocs-hcu.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Intel Keem Bay OCS HCU Device Tree Bindings + +maintainers: + - Declan Murphy + - Daniele Alessandrelli + +description: + The Intel Keem Bay Offload and Crypto Subsystem (OCS) Hash Control Unit (HCU) + provides hardware-accelerated hashing and HMAC. + +properties: + compatible: + const: intel,keembay-ocs-hcu + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + - clocks + +additionalProperties: false + +examples: + - | + #include + crypto@3000b000 { + compatible = "intel,keembay-ocs-hcu"; + reg = <0x3000b000 0x1000>; + interrupts = ; + clocks = <&scmi_clk 94>; + }; From 472b04444cd39e16ba54987b2e901a79cf175463 Mon Sep 17 00:00:00 2001 From: Declan Murphy Date: Wed, 16 Dec 2020 11:46:36 +0000 Subject: [PATCH 019/154] crypto: keembay - Add Keem Bay OCS HCU driver Add support for the Hashing Control Unit (HCU) included in the Offload Crypto Subsystem (OCS) of the Intel Keem Bay SoC, thus enabling hardware-accelerated hashing on the Keem Bay SoC for the following algorithms: - sha256 - sha384 - sha512 - sm3 The driver is composed of two files: - 'ocs-hcu.c' which interacts with the hardware and abstracts it by providing an API following the usual paradigm used in hashing drivers / libraries (e.g., hash_init(), hash_update(), hash_final(), etc.). NOTE: this API can block and sleep, since completions are used to wait for the HW to complete the hashing. - 'keembay-ocs-hcu-core.c' which exports the functionality provided by 'ocs-hcu.c' as a ahash crypto driver. The crypto engine is used to provide asynchronous behavior. 'keembay-ocs-hcu-core.c' also takes care of the DMA mapping of the input sg list. The driver passes crypto manager self-tests, including the extra tests (CRYPTO_MANAGER_EXTRA_TESTS=y). Signed-off-by: Declan Murphy Co-developed-by: Daniele Alessandrelli Signed-off-by: Daniele Alessandrelli Acked-by: Mark Gross Signed-off-by: Herbert Xu --- drivers/crypto/keembay/Kconfig | 17 + drivers/crypto/keembay/Makefile | 3 + drivers/crypto/keembay/keembay-ocs-hcu-core.c | 830 ++++++++++++++++++ drivers/crypto/keembay/ocs-hcu.c | 684 +++++++++++++++ drivers/crypto/keembay/ocs-hcu.h | 98 +++ 5 files changed, 1632 insertions(+) create mode 100644 drivers/crypto/keembay/keembay-ocs-hcu-core.c create mode 100644 drivers/crypto/keembay/ocs-hcu.c create mode 100644 drivers/crypto/keembay/ocs-hcu.h diff --git a/drivers/crypto/keembay/Kconfig b/drivers/crypto/keembay/Kconfig index f2e17b0c4fa0..e99b5ddf4b94 100644 --- a/drivers/crypto/keembay/Kconfig +++ b/drivers/crypto/keembay/Kconfig @@ -38,3 +38,20 @@ config CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_CTS Provides OCS version of cts(cbc(aes)) and cts(cbc(sm4)). Intel does not recommend use of CTS mode with AES/SM4. + +config CRYPTO_DEV_KEEMBAY_OCS_HCU + tristate "Support for Intel Keem Bay OCS HCU HW acceleration" + select CRYPTO_HASH + select CRYPTO_ENGINE + depends on OF || COMPILE_TEST + help + Support for Intel Keem Bay Offload and Crypto Subsystem (OCS) Hash + Control Unit (HCU) hardware acceleration for use with Crypto API. + + Provides OCS HCU hardware acceleration of sha256, sha384, sha512, and + sm3. + + Say Y or M if you're building for the Intel Keem Bay SoC. If compiled + as a module, the module will be called keembay-ocs-hcu. + + If unsure, say N. diff --git a/drivers/crypto/keembay/Makefile b/drivers/crypto/keembay/Makefile index f21e2c4ab3b3..aea03d4432c4 100644 --- a/drivers/crypto/keembay/Makefile +++ b/drivers/crypto/keembay/Makefile @@ -3,3 +3,6 @@ # obj-$(CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4) += keembay-ocs-aes.o keembay-ocs-aes-objs := keembay-ocs-aes-core.o ocs-aes.o + +obj-$(CONFIG_CRYPTO_DEV_KEEMBAY_OCS_HCU) += keembay-ocs-hcu.o +keembay-ocs-hcu-objs := keembay-ocs-hcu-core.o ocs-hcu.o diff --git a/drivers/crypto/keembay/keembay-ocs-hcu-core.c b/drivers/crypto/keembay/keembay-ocs-hcu-core.c new file mode 100644 index 000000000000..388cf9add757 --- /dev/null +++ b/drivers/crypto/keembay/keembay-ocs-hcu-core.c @@ -0,0 +1,830 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Intel Keem Bay OCS HCU Crypto Driver. + * + * Copyright (C) 2018-2020 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "ocs-hcu.h" + +#define DRV_NAME "keembay-ocs-hcu" + +/* Flag marking a final request. */ +#define REQ_FINAL BIT(0) + +/** + * struct ocs_hcu_ctx: OCS HCU Transform context. + * @engine_ctx: Crypto Engine context. + * @hcu_dev: The OCS HCU device used by the transformation. + * @is_sm3_tfm: Whether or not this is an SM3 transformation. + */ +struct ocs_hcu_ctx { + struct crypto_engine_ctx engine_ctx; + struct ocs_hcu_dev *hcu_dev; + bool is_sm3_tfm; +}; + +/** + * struct ocs_hcu_rctx - Context for the request. + * @hcu_dev: OCS HCU device to be used to service the request. + * @flags: Flags tracking request status. + * @algo: Algorithm to use for the request. + * @blk_sz: Block size of the transformation / request. + * @dig_sz: Digest size of the transformation / request. + * @dma_list: OCS DMA linked list. + * @hash_ctx: OCS HCU hashing context. + * @buffer: Buffer to store partial block of data. + * @buf_cnt: Number of bytes currently stored in the buffer. + * @buf_dma_addr: The DMA address of @buffer (when mapped). + * @buf_dma_count: The number of bytes in @buffer currently DMA-mapped. + * @sg: Head of the scatterlist entries containing data. + * @sg_data_total: Total data in the SG list at any time. + * @sg_data_offset: Offset into the data of the current individual SG node. + * @sg_dma_nents: Number of sg entries mapped in dma_list. + */ +struct ocs_hcu_rctx { + struct ocs_hcu_dev *hcu_dev; + u32 flags; + enum ocs_hcu_algo algo; + size_t blk_sz; + size_t dig_sz; + struct ocs_hcu_dma_list *dma_list; + struct ocs_hcu_hash_ctx hash_ctx; + u8 buffer[SHA512_BLOCK_SIZE]; + size_t buf_cnt; + dma_addr_t buf_dma_addr; + size_t buf_dma_count; + struct scatterlist *sg; + unsigned int sg_data_total; + unsigned int sg_data_offset; + unsigned int sg_dma_nents; +}; + +/** + * struct ocs_hcu_drv - Driver data + * @dev_list: The list of HCU devices. + * @lock: The lock protecting dev_list. + */ +struct ocs_hcu_drv { + struct list_head dev_list; + spinlock_t lock; /* Protects dev_list. */ +}; + +static struct ocs_hcu_drv ocs_hcu = { + .dev_list = LIST_HEAD_INIT(ocs_hcu.dev_list), + .lock = __SPIN_LOCK_UNLOCKED(ocs_hcu.lock), +}; + +/* + * Return the total amount of data in the request; that is: the data in the + * request buffer + the data in the sg list. + */ +static inline unsigned int kmb_get_total_data(struct ocs_hcu_rctx *rctx) +{ + return rctx->sg_data_total + rctx->buf_cnt; +} + +/* Move remaining content of scatter-gather list to context buffer. */ +static int flush_sg_to_ocs_buffer(struct ocs_hcu_rctx *rctx) +{ + size_t count; + + if (rctx->sg_data_total > (sizeof(rctx->buffer) - rctx->buf_cnt)) { + WARN(1, "%s: sg data does not fit in buffer\n", __func__); + return -EINVAL; + } + + while (rctx->sg_data_total) { + if (!rctx->sg) { + WARN(1, "%s: unexpected NULL sg\n", __func__); + return -EINVAL; + } + /* + * If current sg has been fully processed, skip to the next + * one. + */ + if (rctx->sg_data_offset == rctx->sg->length) { + rctx->sg = sg_next(rctx->sg); + rctx->sg_data_offset = 0; + continue; + } + /* + * Determine the maximum data available to copy from the node. + * Minimum of the length left in the sg node, or the total data + * in the request. + */ + count = min(rctx->sg->length - rctx->sg_data_offset, + rctx->sg_data_total); + /* Copy from scatter-list entry to context buffer. */ + scatterwalk_map_and_copy(&rctx->buffer[rctx->buf_cnt], + rctx->sg, rctx->sg_data_offset, + count, 0); + + rctx->sg_data_offset += count; + rctx->sg_data_total -= count; + rctx->buf_cnt += count; + } + + return 0; +} + +static struct ocs_hcu_dev *kmb_ocs_hcu_find_dev(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ocs_hcu_ctx *tctx = crypto_ahash_ctx(tfm); + + /* If the HCU device for the request was previously set, return it. */ + if (tctx->hcu_dev) + return tctx->hcu_dev; + + /* + * Otherwise, get the first HCU device available (there should be one + * and only one device). + */ + spin_lock_bh(&ocs_hcu.lock); + tctx->hcu_dev = list_first_entry_or_null(&ocs_hcu.dev_list, + struct ocs_hcu_dev, + list); + spin_unlock_bh(&ocs_hcu.lock); + + return tctx->hcu_dev; +} + +/* Free OCS DMA linked list and DMA-able context buffer. */ +static void kmb_ocs_hcu_dma_cleanup(struct ahash_request *req, + struct ocs_hcu_rctx *rctx) +{ + struct ocs_hcu_dev *hcu_dev = rctx->hcu_dev; + struct device *dev = hcu_dev->dev; + + /* Unmap rctx->buffer (if mapped). */ + if (rctx->buf_dma_count) { + dma_unmap_single(dev, rctx->buf_dma_addr, rctx->buf_dma_count, + DMA_TO_DEVICE); + rctx->buf_dma_count = 0; + } + + /* Unmap req->src (if mapped). */ + if (rctx->sg_dma_nents) { + dma_unmap_sg(dev, req->src, rctx->sg_dma_nents, DMA_TO_DEVICE); + rctx->sg_dma_nents = 0; + } + + /* Free dma_list (if allocated). */ + if (rctx->dma_list) { + ocs_hcu_dma_list_free(hcu_dev, rctx->dma_list); + rctx->dma_list = NULL; + } +} + +/* + * Prepare for DMA operation: + * - DMA-map request context buffer (if needed) + * - DMA-map SG list (only the entries to be processed, see note below) + * - Allocate OCS HCU DMA linked list (number of elements = SG entries to + * process + context buffer (if not empty)). + * - Add DMA-mapped request context buffer to OCS HCU DMA list. + * - Add SG entries to DMA list. + * + * Note: if this is a final request, we process all the data in the SG list, + * otherwise we can only process up to the maximum amount of block-aligned data + * (the remainder will be put into the context buffer and processed in the next + * request). + */ +static int kmb_ocs_dma_prepare(struct ahash_request *req) +{ + struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct device *dev = rctx->hcu_dev->dev; + unsigned int remainder = 0; + unsigned int total; + size_t nents; + size_t count; + int rc; + int i; + + /* This function should be called only when there is data to process. */ + total = kmb_get_total_data(rctx); + if (!total) + return -EINVAL; + + /* + * If this is not a final DMA (terminated DMA), the data passed to the + * HCU must be aligned to the block size; compute the remainder data to + * be processed in the next request. + */ + if (!(rctx->flags & REQ_FINAL)) + remainder = total % rctx->blk_sz; + + /* Determine the number of scatter gather list entries to process. */ + nents = sg_nents_for_len(req->src, rctx->sg_data_total - remainder); + + /* If there are entries to process, map them. */ + if (nents) { + rctx->sg_dma_nents = dma_map_sg(dev, req->src, nents, + DMA_TO_DEVICE); + if (!rctx->sg_dma_nents) { + dev_err(dev, "Failed to MAP SG\n"); + rc = -ENOMEM; + goto cleanup; + } + /* + * The value returned by dma_map_sg() can be < nents; so update + * nents accordingly. + */ + nents = rctx->sg_dma_nents; + } + + /* + * If context buffer is not empty, map it and add extra DMA entry for + * it. + */ + if (rctx->buf_cnt) { + rctx->buf_dma_addr = dma_map_single(dev, rctx->buffer, + rctx->buf_cnt, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, rctx->buf_dma_addr)) { + dev_err(dev, "Failed to map request context buffer\n"); + rc = -ENOMEM; + goto cleanup; + } + rctx->buf_dma_count = rctx->buf_cnt; + /* Increase number of dma entries. */ + nents++; + } + + /* Allocate OCS HCU DMA list. */ + rctx->dma_list = ocs_hcu_dma_list_alloc(rctx->hcu_dev, nents); + if (!rctx->dma_list) { + rc = -ENOMEM; + goto cleanup; + } + + /* Add request context buffer (if previously DMA-mapped) */ + if (rctx->buf_dma_count) { + rc = ocs_hcu_dma_list_add_tail(rctx->hcu_dev, rctx->dma_list, + rctx->buf_dma_addr, + rctx->buf_dma_count); + if (rc) + goto cleanup; + } + + /* Add the SG nodes to be processed to the DMA linked list. */ + for_each_sg(req->src, rctx->sg, rctx->sg_dma_nents, i) { + /* + * The number of bytes to add to the list entry is the minimum + * between: + * - The DMA length of the SG entry. + * - The data left to be processed. + */ + count = min(rctx->sg_data_total - remainder, + sg_dma_len(rctx->sg) - rctx->sg_data_offset); + /* + * Do not create a zero length DMA descriptor. Check in case of + * zero length SG node. + */ + if (count == 0) + continue; + /* Add sg to HCU DMA list. */ + rc = ocs_hcu_dma_list_add_tail(rctx->hcu_dev, + rctx->dma_list, + rctx->sg->dma_address, + count); + if (rc) + goto cleanup; + + /* Update amount of data remaining in SG list. */ + rctx->sg_data_total -= count; + + /* + * If remaining data is equal to remainder (note: 'less than' + * case should never happen in practice), we are done: update + * offset and exit the loop. + */ + if (rctx->sg_data_total <= remainder) { + WARN_ON(rctx->sg_data_total < remainder); + rctx->sg_data_offset += count; + break; + } + + /* + * If we get here is because we need to process the next sg in + * the list; set offset within the sg to 0. + */ + rctx->sg_data_offset = 0; + } + + return 0; +cleanup: + dev_err(dev, "Failed to prepare DMA.\n"); + kmb_ocs_hcu_dma_cleanup(req, rctx); + + return rc; +} + +static void kmb_ocs_hcu_secure_cleanup(struct ahash_request *req) +{ + struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + + /* Clear buffer of any data. */ + memzero_explicit(rctx->buffer, sizeof(rctx->buffer)); +} + +static int kmb_ocs_hcu_handle_queue(struct ahash_request *req) +{ + struct ocs_hcu_dev *hcu_dev = kmb_ocs_hcu_find_dev(req); + + if (!hcu_dev) + return -ENOENT; + + return crypto_transfer_hash_request_to_engine(hcu_dev->engine, req); +} + +static int kmb_ocs_hcu_do_one_request(struct crypto_engine *engine, void *areq) +{ + struct ahash_request *req = container_of(areq, struct ahash_request, + base); + struct ocs_hcu_dev *hcu_dev = kmb_ocs_hcu_find_dev(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + int rc; + + if (!hcu_dev) { + rc = -ENOENT; + goto error; + } + + /* Handle update request case. */ + if (!(rctx->flags & REQ_FINAL)) { + /* Update should always have input data. */ + if (!kmb_get_total_data(rctx)) + return -EINVAL; + + /* Map input data into the HCU DMA linked list. */ + rc = kmb_ocs_dma_prepare(req); + if (rc) + goto error; + + /* Do hashing step. */ + rc = ocs_hcu_hash_update(hcu_dev, &rctx->hash_ctx, + rctx->dma_list); + + /* Unmap data and free DMA list regardless of return code. */ + kmb_ocs_hcu_dma_cleanup(req, rctx); + + /* Process previous return code. */ + if (rc) + goto error; + + /* + * Reset request buffer count (data in the buffer was just + * processed). + */ + rctx->buf_cnt = 0; + /* + * Move remaining sg data into the request buffer, so that it + * will be processed during the next request. + * + * NOTE: we have remaining data if kmb_get_total_data() was not + * a multiple of block size. + */ + rc = flush_sg_to_ocs_buffer(rctx); + if (rc) + goto error; + + goto done; + } + + /* If we get here, this is a final request. */ + + /* If there is data to process, use finup. */ + if (kmb_get_total_data(rctx)) { + /* Map input data into the HCU DMA linked list. */ + rc = kmb_ocs_dma_prepare(req); + if (rc) + goto error; + + /* Do hashing step. */ + rc = ocs_hcu_hash_finup(hcu_dev, &rctx->hash_ctx, + rctx->dma_list, + req->result, rctx->dig_sz); + /* Free DMA list regardless of return code. */ + kmb_ocs_hcu_dma_cleanup(req, rctx); + + /* Process previous return code. */ + if (rc) + goto error; + + } else { /* Otherwise (if we have no data), use final. */ + rc = ocs_hcu_hash_final(hcu_dev, &rctx->hash_ctx, req->result, + rctx->dig_sz); + if (rc) + goto error; + } + + /* Perform secure clean-up. */ + kmb_ocs_hcu_secure_cleanup(req); +done: + crypto_finalize_hash_request(hcu_dev->engine, req, 0); + + return 0; + +error: + kmb_ocs_hcu_secure_cleanup(req); + return rc; +} + +static int kmb_ocs_hcu_init(struct ahash_request *req) +{ + struct ocs_hcu_dev *hcu_dev = kmb_ocs_hcu_find_dev(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ocs_hcu_ctx *ctx = crypto_ahash_ctx(tfm); + + if (!hcu_dev) + return -ENOENT; + + /* Initialize entire request context to zero. */ + memset(rctx, 0, sizeof(*rctx)); + + rctx->hcu_dev = hcu_dev; + rctx->dig_sz = crypto_ahash_digestsize(tfm); + + switch (rctx->dig_sz) { + case SHA256_DIGEST_SIZE: + rctx->blk_sz = SHA256_BLOCK_SIZE; + /* + * SHA256 and SM3 have the same digest size: use info from tfm + * context to find out which one we should use. + */ + rctx->algo = ctx->is_sm3_tfm ? OCS_HCU_ALGO_SM3 : + OCS_HCU_ALGO_SHA256; + break; + case SHA384_DIGEST_SIZE: + rctx->blk_sz = SHA384_BLOCK_SIZE; + rctx->algo = OCS_HCU_ALGO_SHA384; + break; + case SHA512_DIGEST_SIZE: + rctx->blk_sz = SHA512_BLOCK_SIZE; + rctx->algo = OCS_HCU_ALGO_SHA512; + break; + default: + return -EINVAL; + } + + /* Initialize intermediate data. */ + ocs_hcu_hash_init(&rctx->hash_ctx, rctx->algo); + + return 0; +} + +static int kmb_ocs_hcu_update(struct ahash_request *req) +{ + struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + + if (!req->nbytes) + return 0; + + rctx->sg_data_total = req->nbytes; + rctx->sg_data_offset = 0; + rctx->sg = req->src; + + /* + * If remaining sg_data fits into ctx buffer, just copy it there; we'll + * process it at the next update() or final(). + */ + if (rctx->sg_data_total <= (sizeof(rctx->buffer) - rctx->buf_cnt)) + return flush_sg_to_ocs_buffer(rctx); + + return kmb_ocs_hcu_handle_queue(req); +} + +static int kmb_ocs_hcu_final(struct ahash_request *req) +{ + struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + + rctx->sg_data_total = 0; + rctx->sg_data_offset = 0; + rctx->sg = NULL; + + rctx->flags |= REQ_FINAL; + + return kmb_ocs_hcu_handle_queue(req); +} + +static int kmb_ocs_hcu_finup(struct ahash_request *req) +{ + struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + + rctx->sg_data_total = req->nbytes; + rctx->sg_data_offset = 0; + rctx->sg = req->src; + + rctx->flags |= REQ_FINAL; + + return kmb_ocs_hcu_handle_queue(req); +} + +static int kmb_ocs_hcu_digest(struct ahash_request *req) +{ + int rc = 0; + struct ocs_hcu_dev *hcu_dev = kmb_ocs_hcu_find_dev(req); + + if (!hcu_dev) + return -ENOENT; + + rc = kmb_ocs_hcu_init(req); + if (rc) + return rc; + + rc = kmb_ocs_hcu_finup(req); + + return rc; +} + +static int kmb_ocs_hcu_export(struct ahash_request *req, void *out) +{ + struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + + /* Intermediate data is always stored and applied per request. */ + memcpy(out, rctx, sizeof(*rctx)); + + return 0; +} + +static int kmb_ocs_hcu_import(struct ahash_request *req, const void *in) +{ + struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + + /* Intermediate data is always stored and applied per request. */ + memcpy(rctx, in, sizeof(*rctx)); + + return 0; +} + +/* Set request size and initialize tfm context. */ +static void __cra_init(struct crypto_tfm *tfm, struct ocs_hcu_ctx *ctx) +{ + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct ocs_hcu_rctx)); + + /* Init context to 0. */ + memzero_explicit(ctx, sizeof(*ctx)); + /* Set engine ops. */ + ctx->engine_ctx.op.do_one_request = kmb_ocs_hcu_do_one_request; +} + +static int kmb_ocs_hcu_sha_cra_init(struct crypto_tfm *tfm) +{ + struct ocs_hcu_ctx *ctx = crypto_tfm_ctx(tfm); + + __cra_init(tfm, ctx); + + return 0; +} + +static int kmb_ocs_hcu_sm3_cra_init(struct crypto_tfm *tfm) +{ + struct ocs_hcu_ctx *ctx = crypto_tfm_ctx(tfm); + + __cra_init(tfm, ctx); + + ctx->is_sm3_tfm = true; + + return 0; +} + +static struct ahash_alg ocs_hcu_algs[] = { +{ + .init = kmb_ocs_hcu_init, + .update = kmb_ocs_hcu_update, + .final = kmb_ocs_hcu_final, + .finup = kmb_ocs_hcu_finup, + .digest = kmb_ocs_hcu_digest, + .export = kmb_ocs_hcu_export, + .import = kmb_ocs_hcu_import, + .halg = { + .digestsize = SHA256_DIGEST_SIZE, + .statesize = sizeof(struct ocs_hcu_rctx), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-keembay-ocs", + .cra_priority = 255, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ocs_hcu_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = kmb_ocs_hcu_sha_cra_init, + } + } +}, +{ + .init = kmb_ocs_hcu_init, + .update = kmb_ocs_hcu_update, + .final = kmb_ocs_hcu_final, + .finup = kmb_ocs_hcu_finup, + .digest = kmb_ocs_hcu_digest, + .export = kmb_ocs_hcu_export, + .import = kmb_ocs_hcu_import, + .halg = { + .digestsize = SM3_DIGEST_SIZE, + .statesize = sizeof(struct ocs_hcu_rctx), + .base = { + .cra_name = "sm3", + .cra_driver_name = "sm3-keembay-ocs", + .cra_priority = 255, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SM3_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ocs_hcu_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = kmb_ocs_hcu_sm3_cra_init, + } + } +}, +{ + .init = kmb_ocs_hcu_init, + .update = kmb_ocs_hcu_update, + .final = kmb_ocs_hcu_final, + .finup = kmb_ocs_hcu_finup, + .digest = kmb_ocs_hcu_digest, + .export = kmb_ocs_hcu_export, + .import = kmb_ocs_hcu_import, + .halg = { + .digestsize = SHA384_DIGEST_SIZE, + .statesize = sizeof(struct ocs_hcu_rctx), + .base = { + .cra_name = "sha384", + .cra_driver_name = "sha384-keembay-ocs", + .cra_priority = 255, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ocs_hcu_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = kmb_ocs_hcu_sha_cra_init, + } + } +}, +{ + .init = kmb_ocs_hcu_init, + .update = kmb_ocs_hcu_update, + .final = kmb_ocs_hcu_final, + .finup = kmb_ocs_hcu_finup, + .digest = kmb_ocs_hcu_digest, + .export = kmb_ocs_hcu_export, + .import = kmb_ocs_hcu_import, + .halg = { + .digestsize = SHA512_DIGEST_SIZE, + .statesize = sizeof(struct ocs_hcu_rctx), + .base = { + .cra_name = "sha512", + .cra_driver_name = "sha512-keembay-ocs", + .cra_priority = 255, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ocs_hcu_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = kmb_ocs_hcu_sha_cra_init, + } + } +}, +}; + +/* Device tree driver match. */ +static const struct of_device_id kmb_ocs_hcu_of_match[] = { + { + .compatible = "intel,keembay-ocs-hcu", + }, + {} +}; + +static int kmb_ocs_hcu_remove(struct platform_device *pdev) +{ + struct ocs_hcu_dev *hcu_dev; + int rc; + + hcu_dev = platform_get_drvdata(pdev); + if (!hcu_dev) + return -ENODEV; + + crypto_unregister_ahashes(ocs_hcu_algs, ARRAY_SIZE(ocs_hcu_algs)); + + rc = crypto_engine_exit(hcu_dev->engine); + + spin_lock_bh(&ocs_hcu.lock); + list_del(&hcu_dev->list); + spin_unlock_bh(&ocs_hcu.lock); + + return rc; +} + +static int kmb_ocs_hcu_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct ocs_hcu_dev *hcu_dev; + struct resource *hcu_mem; + int rc; + + hcu_dev = devm_kzalloc(dev, sizeof(*hcu_dev), GFP_KERNEL); + if (!hcu_dev) + return -ENOMEM; + + hcu_dev->dev = dev; + + platform_set_drvdata(pdev, hcu_dev); + rc = dma_set_mask_and_coherent(&pdev->dev, OCS_HCU_DMA_BIT_MASK); + if (rc) + return rc; + + /* Get the memory address and remap. */ + hcu_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!hcu_mem) { + dev_err(dev, "Could not retrieve io mem resource.\n"); + return -ENODEV; + } + + hcu_dev->io_base = devm_ioremap_resource(dev, hcu_mem); + if (IS_ERR(hcu_dev->io_base)) { + dev_err(dev, "Could not io-remap mem resource.\n"); + return PTR_ERR(hcu_dev->io_base); + } + + init_completion(&hcu_dev->irq_done); + + /* Get and request IRQ. */ + hcu_dev->irq = platform_get_irq(pdev, 0); + if (hcu_dev->irq < 0) + return hcu_dev->irq; + + rc = devm_request_threaded_irq(&pdev->dev, hcu_dev->irq, + ocs_hcu_irq_handler, NULL, 0, + "keembay-ocs-hcu", hcu_dev); + if (rc < 0) { + dev_err(dev, "Could not request IRQ.\n"); + return rc; + } + + INIT_LIST_HEAD(&hcu_dev->list); + + spin_lock_bh(&ocs_hcu.lock); + list_add_tail(&hcu_dev->list, &ocs_hcu.dev_list); + spin_unlock_bh(&ocs_hcu.lock); + + /* Initialize crypto engine */ + hcu_dev->engine = crypto_engine_alloc_init(dev, 1); + if (!hcu_dev->engine) + goto list_del; + + rc = crypto_engine_start(hcu_dev->engine); + if (rc) { + dev_err(dev, "Could not start engine.\n"); + goto cleanup; + } + + /* Security infrastructure guarantees OCS clock is enabled. */ + + rc = crypto_register_ahashes(ocs_hcu_algs, ARRAY_SIZE(ocs_hcu_algs)); + if (rc) { + dev_err(dev, "Could not register algorithms.\n"); + goto cleanup; + } + + return 0; + +cleanup: + crypto_engine_exit(hcu_dev->engine); +list_del: + spin_lock_bh(&ocs_hcu.lock); + list_del(&hcu_dev->list); + spin_unlock_bh(&ocs_hcu.lock); + + return rc; +} + +/* The OCS driver is a platform device. */ +static struct platform_driver kmb_ocs_hcu_driver = { + .probe = kmb_ocs_hcu_probe, + .remove = kmb_ocs_hcu_remove, + .driver = { + .name = DRV_NAME, + .of_match_table = kmb_ocs_hcu_of_match, + }, +}; + +module_platform_driver(kmb_ocs_hcu_driver); + +MODULE_LICENSE("GPL"); diff --git a/drivers/crypto/keembay/ocs-hcu.c b/drivers/crypto/keembay/ocs-hcu.c new file mode 100644 index 000000000000..6a80a31d0b00 --- /dev/null +++ b/drivers/crypto/keembay/ocs-hcu.c @@ -0,0 +1,684 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Intel Keem Bay OCS HCU Crypto Driver. + * + * Copyright (C) 2018-2020 Intel Corporation + */ + +#include +#include +#include +#include +#include + +#include + +#include "ocs-hcu.h" + +/* Registers. */ +#define OCS_HCU_MODE 0x00 +#define OCS_HCU_CHAIN 0x04 +#define OCS_HCU_OPERATION 0x08 +#define OCS_HCU_KEY_0 0x0C +#define OCS_HCU_ISR 0x50 +#define OCS_HCU_IER 0x54 +#define OCS_HCU_STATUS 0x58 +#define OCS_HCU_MSG_LEN_LO 0x60 +#define OCS_HCU_MSG_LEN_HI 0x64 +#define OCS_HCU_KEY_BYTE_ORDER_CFG 0x80 +#define OCS_HCU_DMA_SRC_ADDR 0x400 +#define OCS_HCU_DMA_SRC_SIZE 0x408 +#define OCS_HCU_DMA_DST_SIZE 0x40C +#define OCS_HCU_DMA_DMA_MODE 0x410 +#define OCS_HCU_DMA_NEXT_SRC_DESCR 0x418 +#define OCS_HCU_DMA_MSI_ISR 0x480 +#define OCS_HCU_DMA_MSI_IER 0x484 +#define OCS_HCU_DMA_MSI_MASK 0x488 + +/* Register bit definitions. */ +#define HCU_MODE_ALGO_SHIFT 16 +#define HCU_MODE_HMAC_SHIFT 22 + +#define HCU_STATUS_BUSY BIT(0) + +#define HCU_BYTE_ORDER_SWAP BIT(0) + +#define HCU_IRQ_HASH_DONE BIT(2) +#define HCU_IRQ_HASH_ERR_MASK (BIT(3) | BIT(1) | BIT(0)) + +#define HCU_DMA_IRQ_SRC_DONE BIT(0) +#define HCU_DMA_IRQ_SAI_ERR BIT(2) +#define HCU_DMA_IRQ_BAD_COMP_ERR BIT(3) +#define HCU_DMA_IRQ_INBUF_RD_ERR BIT(4) +#define HCU_DMA_IRQ_INBUF_WD_ERR BIT(5) +#define HCU_DMA_IRQ_OUTBUF_WR_ERR BIT(6) +#define HCU_DMA_IRQ_OUTBUF_RD_ERR BIT(7) +#define HCU_DMA_IRQ_CRD_ERR BIT(8) +#define HCU_DMA_IRQ_ERR_MASK (HCU_DMA_IRQ_SAI_ERR | \ + HCU_DMA_IRQ_BAD_COMP_ERR | \ + HCU_DMA_IRQ_INBUF_RD_ERR | \ + HCU_DMA_IRQ_INBUF_WD_ERR | \ + HCU_DMA_IRQ_OUTBUF_WR_ERR | \ + HCU_DMA_IRQ_OUTBUF_RD_ERR | \ + HCU_DMA_IRQ_CRD_ERR) + +#define HCU_DMA_SNOOP_MASK (0x7 << 28) +#define HCU_DMA_SRC_LL_EN BIT(25) +#define HCU_DMA_EN BIT(31) + +#define OCS_HCU_ENDIANNESS_VALUE 0x2A + +#define HCU_DMA_MSI_UNMASK BIT(0) +#define HCU_DMA_MSI_DISABLE 0 +#define HCU_IRQ_DISABLE 0 + +#define OCS_HCU_START BIT(0) +#define OCS_HCU_TERMINATE BIT(1) + +#define OCS_LL_DMA_FLAG_TERMINATE BIT(31) + +#define OCS_HCU_HW_KEY_LEN_U32 (OCS_HCU_HW_KEY_LEN / sizeof(u32)) + +#define HCU_DATA_WRITE_ENDIANNESS_OFFSET 26 + +#define OCS_HCU_NUM_CHAINS_SHA256_224_SM3 (SHA256_DIGEST_SIZE / sizeof(u32)) +#define OCS_HCU_NUM_CHAINS_SHA384_512 (SHA512_DIGEST_SIZE / sizeof(u32)) + +/* + * While polling on a busy HCU, wait maximum 200us between one check and the + * other. + */ +#define OCS_HCU_WAIT_BUSY_RETRY_DELAY_US 200 +/* Wait on a busy HCU for maximum 1 second. */ +#define OCS_HCU_WAIT_BUSY_TIMEOUT_US 1000000 + +/** + * struct ocs_hcu_dma_list - An entry in an OCS DMA linked list. + * @src_addr: Source address of the data. + * @src_len: Length of data to be fetched. + * @nxt_desc: Next descriptor to fetch. + * @ll_flags: Flags (Freeze @ terminate) for the DMA engine. + */ +struct ocs_hcu_dma_entry { + u32 src_addr; + u32 src_len; + u32 nxt_desc; + u32 ll_flags; +}; + +/** + * struct ocs_dma_list - OCS-specific DMA linked list. + * @head: The head of the list (points to the array backing the list). + * @tail: The current tail of the list; NULL if the list is empty. + * @dma_addr: The DMA address of @head (i.e., the DMA address of the backing + * array). + * @max_nents: Maximum number of entries in the list (i.e., number of elements + * in the backing array). + * + * The OCS DMA list is an array-backed list of OCS DMA descriptors. The array + * backing the list is allocated with dma_alloc_coherent() and pointed by + * @head. + */ +struct ocs_hcu_dma_list { + struct ocs_hcu_dma_entry *head; + struct ocs_hcu_dma_entry *tail; + dma_addr_t dma_addr; + size_t max_nents; +}; + +static inline u32 ocs_hcu_num_chains(enum ocs_hcu_algo algo) +{ + switch (algo) { + case OCS_HCU_ALGO_SHA224: + case OCS_HCU_ALGO_SHA256: + case OCS_HCU_ALGO_SM3: + return OCS_HCU_NUM_CHAINS_SHA256_224_SM3; + case OCS_HCU_ALGO_SHA384: + case OCS_HCU_ALGO_SHA512: + return OCS_HCU_NUM_CHAINS_SHA384_512; + default: + return 0; + }; +} + +static inline u32 ocs_hcu_digest_size(enum ocs_hcu_algo algo) +{ + switch (algo) { + case OCS_HCU_ALGO_SHA224: + return SHA224_DIGEST_SIZE; + case OCS_HCU_ALGO_SHA256: + case OCS_HCU_ALGO_SM3: + /* SM3 shares the same block size. */ + return SHA256_DIGEST_SIZE; + case OCS_HCU_ALGO_SHA384: + return SHA384_DIGEST_SIZE; + case OCS_HCU_ALGO_SHA512: + return SHA512_DIGEST_SIZE; + default: + return 0; + } +} + +/** + * ocs_hcu_wait_busy() - Wait for HCU OCS hardware to became usable. + * @hcu_dev: OCS HCU device to wait for. + * + * Return: 0 if device free, -ETIMEOUT if device busy and internal timeout has + * expired. + */ +static int ocs_hcu_wait_busy(struct ocs_hcu_dev *hcu_dev) +{ + long val; + + return readl_poll_timeout(hcu_dev->io_base + OCS_HCU_STATUS, val, + !(val & HCU_STATUS_BUSY), + OCS_HCU_WAIT_BUSY_RETRY_DELAY_US, + OCS_HCU_WAIT_BUSY_TIMEOUT_US); +} + +static void ocs_hcu_done_irq_en(struct ocs_hcu_dev *hcu_dev) +{ + /* Clear any pending interrupts. */ + writel(0xFFFFFFFF, hcu_dev->io_base + OCS_HCU_ISR); + hcu_dev->irq_err = false; + /* Enable error and HCU done interrupts. */ + writel(HCU_IRQ_HASH_DONE | HCU_IRQ_HASH_ERR_MASK, + hcu_dev->io_base + OCS_HCU_IER); +} + +static void ocs_hcu_dma_irq_en(struct ocs_hcu_dev *hcu_dev) +{ + /* Clear any pending interrupts. */ + writel(0xFFFFFFFF, hcu_dev->io_base + OCS_HCU_DMA_MSI_ISR); + hcu_dev->irq_err = false; + /* Only operating on DMA source completion and error interrupts. */ + writel(HCU_DMA_IRQ_ERR_MASK | HCU_DMA_IRQ_SRC_DONE, + hcu_dev->io_base + OCS_HCU_DMA_MSI_IER); + /* Unmask */ + writel(HCU_DMA_MSI_UNMASK, hcu_dev->io_base + OCS_HCU_DMA_MSI_MASK); +} + +static void ocs_hcu_irq_dis(struct ocs_hcu_dev *hcu_dev) +{ + writel(HCU_IRQ_DISABLE, hcu_dev->io_base + OCS_HCU_IER); + writel(HCU_DMA_MSI_DISABLE, hcu_dev->io_base + OCS_HCU_DMA_MSI_IER); +} + +static int ocs_hcu_wait_and_disable_irq(struct ocs_hcu_dev *hcu_dev) +{ + int rc; + + rc = wait_for_completion_interruptible(&hcu_dev->irq_done); + if (rc) + goto exit; + + if (hcu_dev->irq_err) { + /* Unset flag and return error. */ + hcu_dev->irq_err = false; + rc = -EIO; + goto exit; + } + +exit: + ocs_hcu_irq_dis(hcu_dev); + + return rc; +} + +/** + * ocs_hcu_get_intermediate_data() - Get intermediate data. + * @hcu_dev: The target HCU device. + * @data: Where to store the intermediate. + * @algo: The algorithm being used. + * + * This function is used to save the current hashing process state in order to + * continue it in the future. + * + * Note: once all data has been processed, the intermediate data actually + * contains the hashing result. So this function is also used to retrieve the + * final result of a hashing process. + * + * Return: 0 on success, negative error code otherwise. + */ +static int ocs_hcu_get_intermediate_data(struct ocs_hcu_dev *hcu_dev, + struct ocs_hcu_idata *data, + enum ocs_hcu_algo algo) +{ + const int n = ocs_hcu_num_chains(algo); + u32 *chain; + int rc; + int i; + + /* Data not requested. */ + if (!data) + return -EINVAL; + + chain = (u32 *)data->digest; + + /* Ensure that the OCS is no longer busy before reading the chains. */ + rc = ocs_hcu_wait_busy(hcu_dev); + if (rc) + return rc; + + /* + * This loops is safe because data->digest is an array of + * SHA512_DIGEST_SIZE bytes and the maximum value returned by + * ocs_hcu_num_chains() is OCS_HCU_NUM_CHAINS_SHA384_512 which is equal + * to SHA512_DIGEST_SIZE / sizeof(u32). + */ + for (i = 0; i < n; i++) + chain[i] = readl(hcu_dev->io_base + OCS_HCU_CHAIN); + + data->msg_len_lo = readl(hcu_dev->io_base + OCS_HCU_MSG_LEN_LO); + data->msg_len_hi = readl(hcu_dev->io_base + OCS_HCU_MSG_LEN_HI); + + return 0; +} + +/** + * ocs_hcu_set_intermediate_data() - Set intermediate data. + * @hcu_dev: The target HCU device. + * @data: The intermediate data to be set. + * @algo: The algorithm being used. + * + * This function is used to continue a previous hashing process. + */ +static void ocs_hcu_set_intermediate_data(struct ocs_hcu_dev *hcu_dev, + const struct ocs_hcu_idata *data, + enum ocs_hcu_algo algo) +{ + const int n = ocs_hcu_num_chains(algo); + u32 *chain = (u32 *)data->digest; + int i; + + /* + * This loops is safe because data->digest is an array of + * SHA512_DIGEST_SIZE bytes and the maximum value returned by + * ocs_hcu_num_chains() is OCS_HCU_NUM_CHAINS_SHA384_512 which is equal + * to SHA512_DIGEST_SIZE / sizeof(u32). + */ + for (i = 0; i < n; i++) + writel(chain[i], hcu_dev->io_base + OCS_HCU_CHAIN); + + writel(data->msg_len_lo, hcu_dev->io_base + OCS_HCU_MSG_LEN_LO); + writel(data->msg_len_hi, hcu_dev->io_base + OCS_HCU_MSG_LEN_HI); +} + +static int ocs_hcu_get_digest(struct ocs_hcu_dev *hcu_dev, + enum ocs_hcu_algo algo, u8 *dgst, size_t dgst_len) +{ + u32 *chain; + int rc; + int i; + + if (!dgst) + return -EINVAL; + + /* Length of the output buffer must match the algo digest size. */ + if (dgst_len != ocs_hcu_digest_size(algo)) + return -EINVAL; + + /* Ensure that the OCS is no longer busy before reading the chains. */ + rc = ocs_hcu_wait_busy(hcu_dev); + if (rc) + return rc; + + chain = (u32 *)dgst; + for (i = 0; i < dgst_len / sizeof(u32); i++) + chain[i] = readl(hcu_dev->io_base + OCS_HCU_CHAIN); + + return 0; +} + +/** + * ocs_hcu_hw_cfg() - Configure the HCU hardware. + * @hcu_dev: The HCU device to configure. + * @algo: The algorithm to be used by the HCU device. + * @use_hmac: Whether or not HW HMAC should be used. + * + * Return: 0 on success, negative error code otherwise. + */ +static int ocs_hcu_hw_cfg(struct ocs_hcu_dev *hcu_dev, enum ocs_hcu_algo algo, + bool use_hmac) +{ + u32 cfg; + int rc; + + if (algo != OCS_HCU_ALGO_SHA256 && algo != OCS_HCU_ALGO_SHA224 && + algo != OCS_HCU_ALGO_SHA384 && algo != OCS_HCU_ALGO_SHA512 && + algo != OCS_HCU_ALGO_SM3) + return -EINVAL; + + rc = ocs_hcu_wait_busy(hcu_dev); + if (rc) + return rc; + + /* Ensure interrupts are disabled. */ + ocs_hcu_irq_dis(hcu_dev); + + /* Configure endianness, hashing algorithm and HW HMAC (if needed) */ + cfg = OCS_HCU_ENDIANNESS_VALUE << HCU_DATA_WRITE_ENDIANNESS_OFFSET; + cfg |= algo << HCU_MODE_ALGO_SHIFT; + if (use_hmac) + cfg |= BIT(HCU_MODE_HMAC_SHIFT); + + writel(cfg, hcu_dev->io_base + OCS_HCU_MODE); + + return 0; +} + +/** + * ocs_hcu_ll_dma_start() - Start OCS HCU hashing via DMA + * @hcu_dev: The OCS HCU device to use. + * @dma_list: The OCS DMA list mapping the data to hash. + * @finalize: Whether or not this is the last hashing operation and therefore + * the final hash should be compute even if data is not + * block-aligned. + * + * Return: 0 on success, negative error code otherwise. + */ +static int ocs_hcu_ll_dma_start(struct ocs_hcu_dev *hcu_dev, + const struct ocs_hcu_dma_list *dma_list, + bool finalize) +{ + u32 cfg = HCU_DMA_SNOOP_MASK | HCU_DMA_SRC_LL_EN | HCU_DMA_EN; + int rc; + + if (!dma_list) + return -EINVAL; + + /* + * For final requests we use HCU_DONE IRQ to be notified when all input + * data has been processed by the HCU; however, we cannot do so for + * non-final requests, because we don't get a HCU_DONE IRQ when we + * don't terminate the operation. + * + * Therefore, for non-final requests, we use the DMA IRQ, which + * triggers when DMA has finishing feeding all the input data to the + * HCU, but the HCU may still be processing it. This is fine, since we + * will wait for the HCU processing to be completed when we try to read + * intermediate results, in ocs_hcu_get_intermediate_data(). + */ + if (finalize) + ocs_hcu_done_irq_en(hcu_dev); + else + ocs_hcu_dma_irq_en(hcu_dev); + + reinit_completion(&hcu_dev->irq_done); + writel(dma_list->dma_addr, hcu_dev->io_base + OCS_HCU_DMA_NEXT_SRC_DESCR); + writel(0, hcu_dev->io_base + OCS_HCU_DMA_SRC_SIZE); + writel(0, hcu_dev->io_base + OCS_HCU_DMA_DST_SIZE); + + writel(OCS_HCU_START, hcu_dev->io_base + OCS_HCU_OPERATION); + + writel(cfg, hcu_dev->io_base + OCS_HCU_DMA_DMA_MODE); + + if (finalize) + writel(OCS_HCU_TERMINATE, hcu_dev->io_base + OCS_HCU_OPERATION); + + rc = ocs_hcu_wait_and_disable_irq(hcu_dev); + if (rc) + return rc; + + return 0; +} + +struct ocs_hcu_dma_list *ocs_hcu_dma_list_alloc(struct ocs_hcu_dev *hcu_dev, + int max_nents) +{ + struct ocs_hcu_dma_list *dma_list; + + dma_list = kmalloc(sizeof(*dma_list), GFP_KERNEL); + if (!dma_list) + return NULL; + + /* Total size of the DMA list to allocate. */ + dma_list->head = dma_alloc_coherent(hcu_dev->dev, + sizeof(*dma_list->head) * max_nents, + &dma_list->dma_addr, GFP_KERNEL); + if (!dma_list->head) { + kfree(dma_list); + return NULL; + } + dma_list->max_nents = max_nents; + dma_list->tail = NULL; + + return dma_list; +} + +void ocs_hcu_dma_list_free(struct ocs_hcu_dev *hcu_dev, + struct ocs_hcu_dma_list *dma_list) +{ + if (!dma_list) + return; + + dma_free_coherent(hcu_dev->dev, + sizeof(*dma_list->head) * dma_list->max_nents, + dma_list->head, dma_list->dma_addr); + + kfree(dma_list); +} + +/* Add a new DMA entry at the end of the OCS DMA list. */ +int ocs_hcu_dma_list_add_tail(struct ocs_hcu_dev *hcu_dev, + struct ocs_hcu_dma_list *dma_list, + dma_addr_t addr, u32 len) +{ + struct device *dev = hcu_dev->dev; + struct ocs_hcu_dma_entry *old_tail; + struct ocs_hcu_dma_entry *new_tail; + + if (!len) + return 0; + + if (!dma_list) + return -EINVAL; + + if (addr & ~OCS_HCU_DMA_BIT_MASK) { + dev_err(dev, + "Unexpected error: Invalid DMA address for OCS HCU\n"); + return -EINVAL; + } + + old_tail = dma_list->tail; + new_tail = old_tail ? old_tail + 1 : dma_list->head; + + /* Check if list is full. */ + if (new_tail - dma_list->head >= dma_list->max_nents) + return -ENOMEM; + + /* + * If there was an old tail (i.e., this is not the first element we are + * adding), un-terminate the old tail and make it point to the new one. + */ + if (old_tail) { + old_tail->ll_flags &= ~OCS_LL_DMA_FLAG_TERMINATE; + /* + * The old tail 'nxt_desc' must point to the DMA address of the + * new tail. + */ + old_tail->nxt_desc = dma_list->dma_addr + + sizeof(*dma_list->tail) * (new_tail - + dma_list->head); + } + + new_tail->src_addr = (u32)addr; + new_tail->src_len = (u32)len; + new_tail->ll_flags = OCS_LL_DMA_FLAG_TERMINATE; + new_tail->nxt_desc = 0; + + /* Update list tail with new tail. */ + dma_list->tail = new_tail; + + return 0; +} + +/** + * ocs_hcu_hash_init() - Initialize hash operation context. + * @ctx: The context to initialize. + * @algo: The hashing algorithm to use. + * + * Return: 0 on success, negative error code otherwise. + */ +int ocs_hcu_hash_init(struct ocs_hcu_hash_ctx *ctx, enum ocs_hcu_algo algo) +{ + if (!ctx) + return -EINVAL; + + ctx->algo = algo; + ctx->idata.msg_len_lo = 0; + ctx->idata.msg_len_hi = 0; + /* No need to set idata.digest to 0. */ + + return 0; +} + +/** + * ocs_hcu_digest() - Perform a hashing iteration. + * @hcu_dev: The OCS HCU device to use. + * @ctx: The OCS HCU hashing context. + * @dma_list: The OCS DMA list mapping the input data to process. + * + * Return: 0 on success; negative error code otherwise. + */ +int ocs_hcu_hash_update(struct ocs_hcu_dev *hcu_dev, + struct ocs_hcu_hash_ctx *ctx, + const struct ocs_hcu_dma_list *dma_list) +{ + int rc; + + if (!hcu_dev || !ctx) + return -EINVAL; + + /* Configure the hardware for the current request. */ + rc = ocs_hcu_hw_cfg(hcu_dev, ctx->algo, false); + if (rc) + return rc; + + /* If we already processed some data, idata needs to be set. */ + if (ctx->idata.msg_len_lo || ctx->idata.msg_len_hi) + ocs_hcu_set_intermediate_data(hcu_dev, &ctx->idata, ctx->algo); + + /* Start linked-list DMA hashing. */ + rc = ocs_hcu_ll_dma_start(hcu_dev, dma_list, false); + if (rc) + return rc; + + /* Update idata and return. */ + return ocs_hcu_get_intermediate_data(hcu_dev, &ctx->idata, ctx->algo); +} + +/** + * ocs_hcu_hash_final() - Update and finalize hash computation. + * @hcu_dev: The OCS HCU device to use. + * @ctx: The OCS HCU hashing context. + * @dma_list: The OCS DMA list mapping the input data to process. + * @dgst: The buffer where to save the computed digest. + * @dgst_len: The length of @dgst. + * + * Return: 0 on success; negative error code otherwise. + */ +int ocs_hcu_hash_finup(struct ocs_hcu_dev *hcu_dev, + const struct ocs_hcu_hash_ctx *ctx, + const struct ocs_hcu_dma_list *dma_list, + u8 *dgst, size_t dgst_len) +{ + int rc; + + if (!hcu_dev || !ctx) + return -EINVAL; + + /* Configure the hardware for the current request. */ + rc = ocs_hcu_hw_cfg(hcu_dev, ctx->algo, false); + if (rc) + return rc; + + /* If we already processed some data, idata needs to be set. */ + if (ctx->idata.msg_len_lo || ctx->idata.msg_len_hi) + ocs_hcu_set_intermediate_data(hcu_dev, &ctx->idata, ctx->algo); + + /* Start linked-list DMA hashing. */ + rc = ocs_hcu_ll_dma_start(hcu_dev, dma_list, true); + if (rc) + return rc; + + /* Get digest and return. */ + return ocs_hcu_get_digest(hcu_dev, ctx->algo, dgst, dgst_len); +} + +/** + * ocs_hcu_hash_final() - Finalize hash computation. + * @hcu_dev: The OCS HCU device to use. + * @ctx: The OCS HCU hashing context. + * @dgst: The buffer where to save the computed digest. + * @dgst_len: The length of @dgst. + * + * Return: 0 on success; negative error code otherwise. + */ +int ocs_hcu_hash_final(struct ocs_hcu_dev *hcu_dev, + const struct ocs_hcu_hash_ctx *ctx, u8 *dgst, + size_t dgst_len) +{ + int rc; + + if (!hcu_dev || !ctx) + return -EINVAL; + + /* Configure the hardware for the current request. */ + rc = ocs_hcu_hw_cfg(hcu_dev, ctx->algo, false); + if (rc) + return rc; + + /* If we already processed some data, idata needs to be set. */ + if (ctx->idata.msg_len_lo || ctx->idata.msg_len_hi) + ocs_hcu_set_intermediate_data(hcu_dev, &ctx->idata, ctx->algo); + + /* + * Enable HCU interrupts, so that HCU_DONE will be triggered once the + * final hash is computed. + */ + ocs_hcu_done_irq_en(hcu_dev); + reinit_completion(&hcu_dev->irq_done); + writel(OCS_HCU_TERMINATE, hcu_dev->io_base + OCS_HCU_OPERATION); + + rc = ocs_hcu_wait_and_disable_irq(hcu_dev); + if (rc) + return rc; + + /* Get digest and return. */ + return ocs_hcu_get_digest(hcu_dev, ctx->algo, dgst, dgst_len); +} + +irqreturn_t ocs_hcu_irq_handler(int irq, void *dev_id) +{ + struct ocs_hcu_dev *hcu_dev = dev_id; + u32 hcu_irq; + u32 dma_irq; + + /* Read and clear the HCU interrupt. */ + hcu_irq = readl(hcu_dev->io_base + OCS_HCU_ISR); + writel(hcu_irq, hcu_dev->io_base + OCS_HCU_ISR); + + /* Read and clear the HCU DMA interrupt. */ + dma_irq = readl(hcu_dev->io_base + OCS_HCU_DMA_MSI_ISR); + writel(dma_irq, hcu_dev->io_base + OCS_HCU_DMA_MSI_ISR); + + /* Check for errors. */ + if (hcu_irq & HCU_IRQ_HASH_ERR_MASK || dma_irq & HCU_DMA_IRQ_ERR_MASK) { + hcu_dev->irq_err = true; + goto complete; + } + + /* Check for DONE IRQs. */ + if (hcu_irq & HCU_IRQ_HASH_DONE || dma_irq & HCU_DMA_IRQ_SRC_DONE) + goto complete; + + return IRQ_NONE; + +complete: + complete(&hcu_dev->irq_done); + + return IRQ_HANDLED; +} + +MODULE_LICENSE("GPL"); diff --git a/drivers/crypto/keembay/ocs-hcu.h b/drivers/crypto/keembay/ocs-hcu.h new file mode 100644 index 000000000000..6a467dcaf99c --- /dev/null +++ b/drivers/crypto/keembay/ocs-hcu.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Intel Keem Bay OCS HCU Crypto Driver. + * + * Copyright (C) 2018-2020 Intel Corporation + */ + +#include + +#ifndef _CRYPTO_OCS_HCU_H +#define _CRYPTO_OCS_HCU_H + +#define OCS_HCU_DMA_BIT_MASK DMA_BIT_MASK(32) + +#define OCS_HCU_HW_KEY_LEN 64 + +struct ocs_hcu_dma_list; + +enum ocs_hcu_algo { + OCS_HCU_ALGO_SHA256 = 2, + OCS_HCU_ALGO_SHA224 = 3, + OCS_HCU_ALGO_SHA384 = 4, + OCS_HCU_ALGO_SHA512 = 5, + OCS_HCU_ALGO_SM3 = 6, +}; + +/** + * struct ocs_hcu_dev - OCS HCU device context. + * @list: List of device contexts. + * @dev: OCS HCU device. + * @io_base: Base address of OCS HCU registers. + * @engine: Crypto engine for the device. + * @irq: IRQ number. + * @irq_done: Completion for IRQ. + * @irq_err: Flag indicating an IRQ error has happened. + */ +struct ocs_hcu_dev { + struct list_head list; + struct device *dev; + void __iomem *io_base; + struct crypto_engine *engine; + int irq; + struct completion irq_done; + bool irq_err; +}; + +/** + * struct ocs_hcu_idata - Intermediate data generated by the HCU. + * @msg_len_lo: Length of data the HCU has operated on in bits, low 32b. + * @msg_len_hi: Length of data the HCU has operated on in bits, high 32b. + * @digest: The digest read from the HCU. If the HCU is terminated, it will + * contain the actual hash digest. Otherwise it is the intermediate + * state. + */ +struct ocs_hcu_idata { + u32 msg_len_lo; + u32 msg_len_hi; + u8 digest[SHA512_DIGEST_SIZE]; +}; + +/** + * struct ocs_hcu_hash_ctx - Context for OCS HCU hashing operation. + * @algo: The hashing algorithm being used. + * @idata: The current intermediate data. + */ +struct ocs_hcu_hash_ctx { + enum ocs_hcu_algo algo; + struct ocs_hcu_idata idata; +}; + +irqreturn_t ocs_hcu_irq_handler(int irq, void *dev_id); + +struct ocs_hcu_dma_list *ocs_hcu_dma_list_alloc(struct ocs_hcu_dev *hcu_dev, + int max_nents); + +void ocs_hcu_dma_list_free(struct ocs_hcu_dev *hcu_dev, + struct ocs_hcu_dma_list *dma_list); + +int ocs_hcu_dma_list_add_tail(struct ocs_hcu_dev *hcu_dev, + struct ocs_hcu_dma_list *dma_list, + dma_addr_t addr, u32 len); + +int ocs_hcu_hash_init(struct ocs_hcu_hash_ctx *ctx, enum ocs_hcu_algo algo); + +int ocs_hcu_hash_update(struct ocs_hcu_dev *hcu_dev, + struct ocs_hcu_hash_ctx *ctx, + const struct ocs_hcu_dma_list *dma_list); + +int ocs_hcu_hash_finup(struct ocs_hcu_dev *hcu_dev, + const struct ocs_hcu_hash_ctx *ctx, + const struct ocs_hcu_dma_list *dma_list, + u8 *dgst, size_t dgst_len); + +int ocs_hcu_hash_final(struct ocs_hcu_dev *hcu_dev, + const struct ocs_hcu_hash_ctx *ctx, u8 *dgst, + size_t dgst_len); + +#endif /* _CRYPTO_OCS_HCU_H */ From ae832e329a8d17144e5ae625e1704901f0e0b024 Mon Sep 17 00:00:00 2001 From: Daniele Alessandrelli Date: Wed, 16 Dec 2020 11:46:37 +0000 Subject: [PATCH 020/154] crypto: keembay-ocs-hcu - Add HMAC support Add HMAC support to the Keem Bay OCS HCU driver, thus making it provide the following additional transformations: - hmac(sha256) - hmac(sha384) - hmac(sha512) - hmac(sm3) The Keem Bay OCS HCU hardware does not allow "context-switch" for HMAC operations, i.e., it does not support computing a partial HMAC, save its state and then continue it later. Therefore, full hardware acceleration is provided only when possible (e.g., when crypto_ahash_digest() is called); in all other cases hardware acceleration is only partial (OPAD and IPAD calculation is done in software, while hashing is hardware accelerated). Co-developed-by: Declan Murphy Signed-off-by: Declan Murphy Signed-off-by: Daniele Alessandrelli Signed-off-by: Herbert Xu --- drivers/crypto/keembay/Kconfig | 2 +- drivers/crypto/keembay/keembay-ocs-hcu-core.c | 387 +++++++++++++++++- drivers/crypto/keembay/ocs-hcu.c | 156 +++++++ drivers/crypto/keembay/ocs-hcu.h | 8 + 4 files changed, 544 insertions(+), 9 deletions(-) diff --git a/drivers/crypto/keembay/Kconfig b/drivers/crypto/keembay/Kconfig index e99b5ddf4b94..e5f90cebe679 100644 --- a/drivers/crypto/keembay/Kconfig +++ b/drivers/crypto/keembay/Kconfig @@ -49,7 +49,7 @@ config CRYPTO_DEV_KEEMBAY_OCS_HCU Control Unit (HCU) hardware acceleration for use with Crypto API. Provides OCS HCU hardware acceleration of sha256, sha384, sha512, and - sm3. + sm3, as well as the HMAC variant of these algorithms. Say Y or M if you're building for the Intel Keem Bay SoC. If compiled as a module, the module will be called keembay-ocs-hcu. diff --git a/drivers/crypto/keembay/keembay-ocs-hcu-core.c b/drivers/crypto/keembay/keembay-ocs-hcu-core.c index 388cf9add757..37c4b4a689a8 100644 --- a/drivers/crypto/keembay/keembay-ocs-hcu-core.c +++ b/drivers/crypto/keembay/keembay-ocs-hcu-core.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "ocs-hcu.h" @@ -24,17 +25,29 @@ /* Flag marking a final request. */ #define REQ_FINAL BIT(0) +/* Flag marking a HMAC request. */ +#define REQ_FLAGS_HMAC BIT(1) +/* Flag set when HW HMAC is being used. */ +#define REQ_FLAGS_HMAC_HW BIT(2) +/* Flag set when SW HMAC is being used. */ +#define REQ_FLAGS_HMAC_SW BIT(3) /** * struct ocs_hcu_ctx: OCS HCU Transform context. * @engine_ctx: Crypto Engine context. * @hcu_dev: The OCS HCU device used by the transformation. + * @key: The key (used only for HMAC transformations). + * @key_len: The length of the key. * @is_sm3_tfm: Whether or not this is an SM3 transformation. + * @is_hmac_tfm: Whether or not this is a HMAC transformation. */ struct ocs_hcu_ctx { struct crypto_engine_ctx engine_ctx; struct ocs_hcu_dev *hcu_dev; + u8 key[SHA512_BLOCK_SIZE]; + size_t key_len; bool is_sm3_tfm; + bool is_hmac_tfm; }; /** @@ -46,7 +59,8 @@ struct ocs_hcu_ctx { * @dig_sz: Digest size of the transformation / request. * @dma_list: OCS DMA linked list. * @hash_ctx: OCS HCU hashing context. - * @buffer: Buffer to store partial block of data. + * @buffer: Buffer to store: partial block of data and SW HMAC + * artifacts (ipad, opad, etc.). * @buf_cnt: Number of bytes currently stored in the buffer. * @buf_dma_addr: The DMA address of @buffer (when mapped). * @buf_dma_count: The number of bytes in @buffer currently DMA-mapped. @@ -63,7 +77,13 @@ struct ocs_hcu_rctx { size_t dig_sz; struct ocs_hcu_dma_list *dma_list; struct ocs_hcu_hash_ctx hash_ctx; - u8 buffer[SHA512_BLOCK_SIZE]; + /* + * Buffer is double the block size because we need space for SW HMAC + * artifacts, i.e: + * - ipad (1 block) + a possible partial block of data. + * - opad (1 block) + digest of H(k ^ ipad || m) + */ + u8 buffer[2 * SHA512_BLOCK_SIZE]; size_t buf_cnt; dma_addr_t buf_dma_addr; size_t buf_dma_count; @@ -352,19 +372,82 @@ static int kmb_ocs_hcu_handle_queue(struct ahash_request *req) return crypto_transfer_hash_request_to_engine(hcu_dev->engine, req); } +static int prepare_ipad(struct ahash_request *req) +{ + struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ocs_hcu_ctx *ctx = crypto_ahash_ctx(tfm); + int i; + + WARN(rctx->buf_cnt, "%s: Context buffer is not empty\n", __func__); + WARN(!(rctx->flags & REQ_FLAGS_HMAC_SW), + "%s: HMAC_SW flag is not set\n", __func__); + /* + * Key length must be equal to block size. If key is shorter, + * we pad it with zero (note: key cannot be longer, since + * longer keys are hashed by kmb_ocs_hcu_setkey()). + */ + if (ctx->key_len > rctx->blk_sz) { + WARN("%s: Invalid key length in tfm context\n", __func__); + return -EINVAL; + } + memzero_explicit(&ctx->key[ctx->key_len], + rctx->blk_sz - ctx->key_len); + ctx->key_len = rctx->blk_sz; + /* + * Prepare IPAD for HMAC. Only done for first block. + * HMAC(k,m) = H(k ^ opad || H(k ^ ipad || m)) + * k ^ ipad will be first hashed block. + * k ^ opad will be calculated in the final request. + * Only needed if not using HW HMAC. + */ + for (i = 0; i < rctx->blk_sz; i++) + rctx->buffer[i] = ctx->key[i] ^ HMAC_IPAD_VALUE; + rctx->buf_cnt = rctx->blk_sz; + + return 0; +} + static int kmb_ocs_hcu_do_one_request(struct crypto_engine *engine, void *areq) { struct ahash_request *req = container_of(areq, struct ahash_request, base); struct ocs_hcu_dev *hcu_dev = kmb_ocs_hcu_find_dev(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_ctx *tctx = crypto_ahash_ctx(tfm); int rc; + int i; if (!hcu_dev) { rc = -ENOENT; goto error; } + /* + * If hardware HMAC flag is set, perform HMAC in hardware. + * + * NOTE: this flag implies REQ_FINAL && kmb_get_total_data(rctx) + */ + if (rctx->flags & REQ_FLAGS_HMAC_HW) { + /* Map input data into the HCU DMA linked list. */ + rc = kmb_ocs_dma_prepare(req); + if (rc) + goto error; + + rc = ocs_hcu_hmac(hcu_dev, rctx->algo, tctx->key, tctx->key_len, + rctx->dma_list, req->result, rctx->dig_sz); + + /* Unmap data and free DMA list regardless of return code. */ + kmb_ocs_hcu_dma_cleanup(req, rctx); + + /* Process previous return code. */ + if (rc) + goto error; + + goto done; + } + /* Handle update request case. */ if (!(rctx->flags & REQ_FINAL)) { /* Update should always have input data. */ @@ -433,6 +516,36 @@ static int kmb_ocs_hcu_do_one_request(struct crypto_engine *engine, void *areq) goto error; } + /* + * If we are finalizing a SW HMAC request, we just computed the result + * of: H(k ^ ipad || m). + * + * We now need to complete the HMAC calculation with the OPAD step, + * that is, we need to compute H(k ^ opad || digest), where digest is + * the digest we just obtained, i.e., H(k ^ ipad || m). + */ + if (rctx->flags & REQ_FLAGS_HMAC_SW) { + /* + * Compute k ^ opad and store it in the request buffer (which + * is not used anymore at this point). + * Note: key has been padded / hashed already (so keylen == + * blksz) . + */ + WARN_ON(tctx->key_len != rctx->blk_sz); + for (i = 0; i < rctx->blk_sz; i++) + rctx->buffer[i] = tctx->key[i] ^ HMAC_OPAD_VALUE; + /* Now append the digest to the rest of the buffer. */ + for (i = 0; (i < rctx->dig_sz); i++) + rctx->buffer[rctx->blk_sz + i] = req->result[i]; + + /* Now hash the buffer to obtain the final HMAC. */ + rc = ocs_hcu_digest(hcu_dev, rctx->algo, rctx->buffer, + rctx->blk_sz + rctx->dig_sz, req->result, + rctx->dig_sz); + if (rc) + goto error; + } + /* Perform secure clean-up. */ kmb_ocs_hcu_secure_cleanup(req); done: @@ -486,12 +599,17 @@ static int kmb_ocs_hcu_init(struct ahash_request *req) /* Initialize intermediate data. */ ocs_hcu_hash_init(&rctx->hash_ctx, rctx->algo); + /* If this a HMAC request, set HMAC flag. */ + if (ctx->is_hmac_tfm) + rctx->flags |= REQ_FLAGS_HMAC; + return 0; } static int kmb_ocs_hcu_update(struct ahash_request *req) { struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + int rc; if (!req->nbytes) return 0; @@ -500,6 +618,19 @@ static int kmb_ocs_hcu_update(struct ahash_request *req) rctx->sg_data_offset = 0; rctx->sg = req->src; + /* + * If we are doing HMAC, then we must use SW-assisted HMAC, since HW + * HMAC does not support context switching (there it can only be used + * with finup() or digest()). + */ + if (rctx->flags & REQ_FLAGS_HMAC && + !(rctx->flags & REQ_FLAGS_HMAC_SW)) { + rctx->flags |= REQ_FLAGS_HMAC_SW; + rc = prepare_ipad(req); + if (rc) + return rc; + } + /* * If remaining sg_data fits into ctx buffer, just copy it there; we'll * process it at the next update() or final(). @@ -510,6 +641,44 @@ static int kmb_ocs_hcu_update(struct ahash_request *req) return kmb_ocs_hcu_handle_queue(req); } +/* Common logic for kmb_ocs_hcu_final() and kmb_ocs_hcu_finup(). */ +static int kmb_ocs_hcu_fin_common(struct ahash_request *req) +{ + struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ocs_hcu_ctx *ctx = crypto_ahash_ctx(tfm); + int rc; + + rctx->flags |= REQ_FINAL; + + /* + * If this is a HMAC request and, so far, we didn't have to switch to + * SW HMAC, check if we can use HW HMAC. + */ + if (rctx->flags & REQ_FLAGS_HMAC && + !(rctx->flags & REQ_FLAGS_HMAC_SW)) { + /* + * If we are here, it means we never processed any data so far, + * so we can use HW HMAC, but only if there is some data to + * process (since OCS HW MAC does not support zero-length + * messages) and the key length is supported by the hardware + * (OCS HCU HW only supports length <= 64); if HW HMAC cannot + * be used, fall back to SW-assisted HMAC. + */ + if (kmb_get_total_data(rctx) && + ctx->key_len <= OCS_HCU_HW_KEY_LEN) { + rctx->flags |= REQ_FLAGS_HMAC_HW; + } else { + rctx->flags |= REQ_FLAGS_HMAC_SW; + rc = prepare_ipad(req); + if (rc) + return rc; + } + } + + return kmb_ocs_hcu_handle_queue(req); +} + static int kmb_ocs_hcu_final(struct ahash_request *req) { struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); @@ -518,9 +687,7 @@ static int kmb_ocs_hcu_final(struct ahash_request *req) rctx->sg_data_offset = 0; rctx->sg = NULL; - rctx->flags |= REQ_FINAL; - - return kmb_ocs_hcu_handle_queue(req); + return kmb_ocs_hcu_fin_common(req); } static int kmb_ocs_hcu_finup(struct ahash_request *req) @@ -531,9 +698,7 @@ static int kmb_ocs_hcu_finup(struct ahash_request *req) rctx->sg_data_offset = 0; rctx->sg = req->src; - rctx->flags |= REQ_FINAL; - - return kmb_ocs_hcu_handle_queue(req); + return kmb_ocs_hcu_fin_common(req); } static int kmb_ocs_hcu_digest(struct ahash_request *req) @@ -573,6 +738,76 @@ static int kmb_ocs_hcu_import(struct ahash_request *req, const void *in) return 0; } +static int kmb_ocs_hcu_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + unsigned int digestsize = crypto_ahash_digestsize(tfm); + struct ocs_hcu_ctx *ctx = crypto_ahash_ctx(tfm); + size_t blk_sz = crypto_ahash_blocksize(tfm); + struct crypto_ahash *ahash_tfm; + struct ahash_request *req; + struct crypto_wait wait; + struct scatterlist sg; + const char *alg_name; + int rc; + + /* + * Key length must be equal to block size: + * - If key is shorter, we are done for now (the key will be padded + * later on); this is to maximize the use of HW HMAC (which works + * only for keys <= 64 bytes). + * - If key is longer, we hash it. + */ + if (keylen <= blk_sz) { + memcpy(ctx->key, key, keylen); + ctx->key_len = keylen; + return 0; + } + + switch (digestsize) { + case SHA256_DIGEST_SIZE: + alg_name = ctx->is_sm3_tfm ? "sm3-keembay-ocs" : + "sha256-keembay-ocs"; + break; + case SHA384_DIGEST_SIZE: + alg_name = "sha384-keembay-ocs"; + break; + case SHA512_DIGEST_SIZE: + alg_name = "sha512-keembay-ocs"; + break; + default: + return -EINVAL; + } + + ahash_tfm = crypto_alloc_ahash(alg_name, 0, 0); + if (IS_ERR(ahash_tfm)) + return PTR_ERR(ahash_tfm); + + req = ahash_request_alloc(ahash_tfm, GFP_KERNEL); + if (!req) { + rc = -ENOMEM; + goto err_free_ahash; + } + + crypto_init_wait(&wait); + ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &wait); + crypto_ahash_clear_flags(ahash_tfm, ~0); + + sg_init_one(&sg, key, keylen); + ahash_request_set_crypt(req, &sg, ctx->key, keylen); + + rc = crypto_wait_req(crypto_ahash_digest(req), &wait); + if (rc == 0) + ctx->key_len = digestsize; + + ahash_request_free(req); +err_free_ahash: + crypto_free_ahash(ahash_tfm); + + return rc; +} + /* Set request size and initialize tfm context. */ static void __cra_init(struct crypto_tfm *tfm, struct ocs_hcu_ctx *ctx) { @@ -605,6 +840,38 @@ static int kmb_ocs_hcu_sm3_cra_init(struct crypto_tfm *tfm) return 0; } +static int kmb_ocs_hcu_hmac_sm3_cra_init(struct crypto_tfm *tfm) +{ + struct ocs_hcu_ctx *ctx = crypto_tfm_ctx(tfm); + + __cra_init(tfm, ctx); + + ctx->is_sm3_tfm = true; + ctx->is_hmac_tfm = true; + + return 0; +} + +static int kmb_ocs_hcu_hmac_cra_init(struct crypto_tfm *tfm) +{ + struct ocs_hcu_ctx *ctx = crypto_tfm_ctx(tfm); + + __cra_init(tfm, ctx); + + ctx->is_hmac_tfm = true; + + return 0; +} + +/* Function called when 'tfm' is de-initialized. */ +static void kmb_ocs_hcu_hmac_cra_exit(struct crypto_tfm *tfm) +{ + struct ocs_hcu_ctx *ctx = crypto_tfm_ctx(tfm); + + /* Clear the key. */ + memzero_explicit(ctx->key, sizeof(ctx->key)); +} + static struct ahash_alg ocs_hcu_algs[] = { { .init = kmb_ocs_hcu_init, @@ -630,6 +897,32 @@ static struct ahash_alg ocs_hcu_algs[] = { } } }, +{ + .init = kmb_ocs_hcu_init, + .update = kmb_ocs_hcu_update, + .final = kmb_ocs_hcu_final, + .finup = kmb_ocs_hcu_finup, + .digest = kmb_ocs_hcu_digest, + .export = kmb_ocs_hcu_export, + .import = kmb_ocs_hcu_import, + .setkey = kmb_ocs_hcu_setkey, + .halg = { + .digestsize = SHA256_DIGEST_SIZE, + .statesize = sizeof(struct ocs_hcu_rctx), + .base = { + .cra_name = "hmac(sha256)", + .cra_driver_name = "hmac-sha256-keembay-ocs", + .cra_priority = 255, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ocs_hcu_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = kmb_ocs_hcu_hmac_cra_init, + .cra_exit = kmb_ocs_hcu_hmac_cra_exit, + } + } +}, { .init = kmb_ocs_hcu_init, .update = kmb_ocs_hcu_update, @@ -654,6 +947,32 @@ static struct ahash_alg ocs_hcu_algs[] = { } } }, +{ + .init = kmb_ocs_hcu_init, + .update = kmb_ocs_hcu_update, + .final = kmb_ocs_hcu_final, + .finup = kmb_ocs_hcu_finup, + .digest = kmb_ocs_hcu_digest, + .export = kmb_ocs_hcu_export, + .import = kmb_ocs_hcu_import, + .setkey = kmb_ocs_hcu_setkey, + .halg = { + .digestsize = SM3_DIGEST_SIZE, + .statesize = sizeof(struct ocs_hcu_rctx), + .base = { + .cra_name = "hmac(sm3)", + .cra_driver_name = "hmac-sm3-keembay-ocs", + .cra_priority = 255, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SM3_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ocs_hcu_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = kmb_ocs_hcu_hmac_sm3_cra_init, + .cra_exit = kmb_ocs_hcu_hmac_cra_exit, + } + } +}, { .init = kmb_ocs_hcu_init, .update = kmb_ocs_hcu_update, @@ -678,6 +997,32 @@ static struct ahash_alg ocs_hcu_algs[] = { } } }, +{ + .init = kmb_ocs_hcu_init, + .update = kmb_ocs_hcu_update, + .final = kmb_ocs_hcu_final, + .finup = kmb_ocs_hcu_finup, + .digest = kmb_ocs_hcu_digest, + .export = kmb_ocs_hcu_export, + .import = kmb_ocs_hcu_import, + .setkey = kmb_ocs_hcu_setkey, + .halg = { + .digestsize = SHA384_DIGEST_SIZE, + .statesize = sizeof(struct ocs_hcu_rctx), + .base = { + .cra_name = "hmac(sha384)", + .cra_driver_name = "hmac-sha384-keembay-ocs", + .cra_priority = 255, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ocs_hcu_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = kmb_ocs_hcu_hmac_cra_init, + .cra_exit = kmb_ocs_hcu_hmac_cra_exit, + } + } +}, { .init = kmb_ocs_hcu_init, .update = kmb_ocs_hcu_update, @@ -702,6 +1047,32 @@ static struct ahash_alg ocs_hcu_algs[] = { } } }, +{ + .init = kmb_ocs_hcu_init, + .update = kmb_ocs_hcu_update, + .final = kmb_ocs_hcu_final, + .finup = kmb_ocs_hcu_finup, + .digest = kmb_ocs_hcu_digest, + .export = kmb_ocs_hcu_export, + .import = kmb_ocs_hcu_import, + .setkey = kmb_ocs_hcu_setkey, + .halg = { + .digestsize = SHA512_DIGEST_SIZE, + .statesize = sizeof(struct ocs_hcu_rctx), + .base = { + .cra_name = "hmac(sha512)", + .cra_driver_name = "hmac-sha512-keembay-ocs", + .cra_priority = 255, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ocs_hcu_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = kmb_ocs_hcu_hmac_cra_init, + .cra_exit = kmb_ocs_hcu_hmac_cra_exit, + } + } +}, }; /* Device tree driver match. */ diff --git a/drivers/crypto/keembay/ocs-hcu.c b/drivers/crypto/keembay/ocs-hcu.c index 6a80a31d0b00..81eecacf603a 100644 --- a/drivers/crypto/keembay/ocs-hcu.c +++ b/drivers/crypto/keembay/ocs-hcu.c @@ -367,6 +367,69 @@ static int ocs_hcu_hw_cfg(struct ocs_hcu_dev *hcu_dev, enum ocs_hcu_algo algo, return 0; } +/** + * ocs_hcu_clear_key() - Clear key stored in OCS HMAC KEY registers. + * @hcu_dev: The OCS HCU device whose key registers should be cleared. + */ +static void ocs_hcu_clear_key(struct ocs_hcu_dev *hcu_dev) +{ + int reg_off; + + /* Clear OCS_HCU_KEY_[0..15] */ + for (reg_off = 0; reg_off < OCS_HCU_HW_KEY_LEN; reg_off += sizeof(u32)) + writel(0, hcu_dev->io_base + OCS_HCU_KEY_0 + reg_off); +} + +/** + * ocs_hcu_write_key() - Write key to OCS HMAC KEY registers. + * @hcu_dev: The OCS HCU device the key should be written to. + * @key: The key to be written. + * @len: The size of the key to write. It must be OCS_HCU_HW_KEY_LEN. + * + * Return: 0 on success, negative error code otherwise. + */ +static int ocs_hcu_write_key(struct ocs_hcu_dev *hcu_dev, const u8 *key, size_t len) +{ + u32 key_u32[OCS_HCU_HW_KEY_LEN_U32]; + int i; + + if (len > OCS_HCU_HW_KEY_LEN) + return -EINVAL; + + /* Copy key into temporary u32 array. */ + memcpy(key_u32, key, len); + + /* + * Hardware requires all the bytes of the HW Key vector to be + * written. So pad with zero until we reach OCS_HCU_HW_KEY_LEN. + */ + memzero_explicit((u8 *)key_u32 + len, OCS_HCU_HW_KEY_LEN - len); + + /* + * OCS hardware expects the MSB of the key to be written at the highest + * address of the HCU Key vector; in other word, the key must be + * written in reverse order. + * + * Therefore, we first enable byte swapping for the HCU key vector; + * so that bytes of 32-bit word written to OCS_HCU_KEY_[0..15] will be + * swapped: + * 3 <---> 0, 2 <---> 1. + */ + writel(HCU_BYTE_ORDER_SWAP, + hcu_dev->io_base + OCS_HCU_KEY_BYTE_ORDER_CFG); + /* + * And then we write the 32-bit words composing the key starting from + * the end of the key. + */ + for (i = 0; i < OCS_HCU_HW_KEY_LEN_U32; i++) + writel(key_u32[OCS_HCU_HW_KEY_LEN_U32 - 1 - i], + hcu_dev->io_base + OCS_HCU_KEY_0 + (sizeof(u32) * i)); + + memzero_explicit(key_u32, OCS_HCU_HW_KEY_LEN); + + return 0; +} + /** * ocs_hcu_ll_dma_start() - Start OCS HCU hashing via DMA * @hcu_dev: The OCS HCU device to use. @@ -649,6 +712,99 @@ int ocs_hcu_hash_final(struct ocs_hcu_dev *hcu_dev, return ocs_hcu_get_digest(hcu_dev, ctx->algo, dgst, dgst_len); } +/** + * ocs_hcu_digest() - Compute hash digest. + * @hcu_dev: The OCS HCU device to use. + * @algo: The hash algorithm to use. + * @data: The input data to process. + * @data_len: The length of @data. + * @dgst: The buffer where to save the computed digest. + * @dgst_len: The length of @dgst. + * + * Return: 0 on success; negative error code otherwise. + */ +int ocs_hcu_digest(struct ocs_hcu_dev *hcu_dev, enum ocs_hcu_algo algo, + void *data, size_t data_len, u8 *dgst, size_t dgst_len) +{ + struct device *dev = hcu_dev->dev; + dma_addr_t dma_handle; + u32 reg; + int rc; + + /* Configure the hardware for the current request. */ + rc = ocs_hcu_hw_cfg(hcu_dev, algo, false); + if (rc) + return rc; + + dma_handle = dma_map_single(dev, data, data_len, DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_handle)) + return -EIO; + + reg = HCU_DMA_SNOOP_MASK | HCU_DMA_EN; + + ocs_hcu_done_irq_en(hcu_dev); + + reinit_completion(&hcu_dev->irq_done); + + writel(dma_handle, hcu_dev->io_base + OCS_HCU_DMA_SRC_ADDR); + writel(data_len, hcu_dev->io_base + OCS_HCU_DMA_SRC_SIZE); + writel(OCS_HCU_START, hcu_dev->io_base + OCS_HCU_OPERATION); + writel(reg, hcu_dev->io_base + OCS_HCU_DMA_DMA_MODE); + + writel(OCS_HCU_TERMINATE, hcu_dev->io_base + OCS_HCU_OPERATION); + + rc = ocs_hcu_wait_and_disable_irq(hcu_dev); + if (rc) + return rc; + + dma_unmap_single(dev, dma_handle, data_len, DMA_TO_DEVICE); + + return ocs_hcu_get_digest(hcu_dev, algo, dgst, dgst_len); +} + +/** + * ocs_hcu_hmac() - Compute HMAC. + * @hcu_dev: The OCS HCU device to use. + * @algo: The hash algorithm to use with HMAC. + * @key: The key to use. + * @dma_list: The OCS DMA list mapping the input data to process. + * @key_len: The length of @key. + * @dgst: The buffer where to save the computed HMAC. + * @dgst_len: The length of @dgst. + * + * Return: 0 on success; negative error code otherwise. + */ +int ocs_hcu_hmac(struct ocs_hcu_dev *hcu_dev, enum ocs_hcu_algo algo, + const u8 *key, size_t key_len, + const struct ocs_hcu_dma_list *dma_list, + u8 *dgst, size_t dgst_len) +{ + int rc; + + /* Ensure 'key' is not NULL. */ + if (!key || key_len == 0) + return -EINVAL; + + /* Configure the hardware for the current request. */ + rc = ocs_hcu_hw_cfg(hcu_dev, algo, true); + if (rc) + return rc; + + rc = ocs_hcu_write_key(hcu_dev, key, key_len); + if (rc) + return rc; + + rc = ocs_hcu_ll_dma_start(hcu_dev, dma_list, true); + + /* Clear HW key before processing return code. */ + ocs_hcu_clear_key(hcu_dev); + + if (rc) + return rc; + + return ocs_hcu_get_digest(hcu_dev, algo, dgst, dgst_len); +} + irqreturn_t ocs_hcu_irq_handler(int irq, void *dev_id) { struct ocs_hcu_dev *hcu_dev = dev_id; diff --git a/drivers/crypto/keembay/ocs-hcu.h b/drivers/crypto/keembay/ocs-hcu.h index 6a467dcaf99c..fbbbb92a0592 100644 --- a/drivers/crypto/keembay/ocs-hcu.h +++ b/drivers/crypto/keembay/ocs-hcu.h @@ -95,4 +95,12 @@ int ocs_hcu_hash_final(struct ocs_hcu_dev *hcu_dev, const struct ocs_hcu_hash_ctx *ctx, u8 *dgst, size_t dgst_len); +int ocs_hcu_digest(struct ocs_hcu_dev *hcu_dev, enum ocs_hcu_algo algo, + void *data, size_t data_len, u8 *dgst, size_t dgst_len); + +int ocs_hcu_hmac(struct ocs_hcu_dev *hcu_dev, enum ocs_hcu_algo algo, + const u8 *key, size_t key_len, + const struct ocs_hcu_dma_list *dma_list, + u8 *dgst, size_t dgst_len); + #endif /* _CRYPTO_OCS_HCU_H */ From b46f80368869cf46dbfe97ca8dfaf02e6be4510e Mon Sep 17 00:00:00 2001 From: Daniele Alessandrelli Date: Wed, 16 Dec 2020 11:46:38 +0000 Subject: [PATCH 021/154] crypto: keembay-ocs-hcu - Add optional support for sha224 Add optional support of sha224 and hmac(sha224). Co-developed-by: Declan Murphy Signed-off-by: Declan Murphy Signed-off-by: Daniele Alessandrelli Signed-off-by: Herbert Xu --- drivers/crypto/keembay/Kconfig | 12 ++++ drivers/crypto/keembay/keembay-ocs-hcu-core.c | 63 +++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/drivers/crypto/keembay/Kconfig b/drivers/crypto/keembay/Kconfig index e5f90cebe679..e45f1b039380 100644 --- a/drivers/crypto/keembay/Kconfig +++ b/drivers/crypto/keembay/Kconfig @@ -55,3 +55,15 @@ config CRYPTO_DEV_KEEMBAY_OCS_HCU as a module, the module will be called keembay-ocs-hcu. If unsure, say N. + +config CRYPTO_DEV_KEEMBAY_OCS_HCU_HMAC_SHA224 + bool "Enable sha224 and hmac(sha224) support in Intel Keem Bay OCS HCU" + depends on CRYPTO_DEV_KEEMBAY_OCS_HCU + help + Enables support for sha224 and hmac(sha224) algorithms in the Intel + Keem Bay OCS HCU driver. Intel recommends not to use these + algorithms. + + Provides OCS HCU hardware acceleration of sha224 and hmac(224). + + If unsure, say N. diff --git a/drivers/crypto/keembay/keembay-ocs-hcu-core.c b/drivers/crypto/keembay/keembay-ocs-hcu-core.c index 37c4b4a689a8..d547af047131 100644 --- a/drivers/crypto/keembay/keembay-ocs-hcu-core.c +++ b/drivers/crypto/keembay/keembay-ocs-hcu-core.c @@ -575,6 +575,12 @@ static int kmb_ocs_hcu_init(struct ahash_request *req) rctx->dig_sz = crypto_ahash_digestsize(tfm); switch (rctx->dig_sz) { +#ifdef CONFIG_CRYPTO_DEV_KEEMBAY_OCS_HCU_HMAC_SHA224 + case SHA224_DIGEST_SIZE: + rctx->blk_sz = SHA224_BLOCK_SIZE; + rctx->algo = OCS_HCU_ALGO_SHA224; + break; +#endif /* CONFIG_CRYPTO_DEV_KEEMBAY_OCS_HCU_HMAC_SHA224 */ case SHA256_DIGEST_SIZE: rctx->blk_sz = SHA256_BLOCK_SIZE; /* @@ -765,6 +771,11 @@ static int kmb_ocs_hcu_setkey(struct crypto_ahash *tfm, const u8 *key, } switch (digestsize) { +#ifdef CONFIG_CRYPTO_DEV_KEEMBAY_OCS_HCU_HMAC_SHA224 + case SHA224_DIGEST_SIZE: + alg_name = "sha224-keembay-ocs"; + break; +#endif /* CONFIG_CRYPTO_DEV_KEEMBAY_OCS_HCU_HMAC_SHA224 */ case SHA256_DIGEST_SIZE: alg_name = ctx->is_sm3_tfm ? "sm3-keembay-ocs" : "sha256-keembay-ocs"; @@ -873,6 +884,58 @@ static void kmb_ocs_hcu_hmac_cra_exit(struct crypto_tfm *tfm) } static struct ahash_alg ocs_hcu_algs[] = { +#ifdef CONFIG_CRYPTO_DEV_KEEMBAY_OCS_HCU_HMAC_SHA224 +{ + .init = kmb_ocs_hcu_init, + .update = kmb_ocs_hcu_update, + .final = kmb_ocs_hcu_final, + .finup = kmb_ocs_hcu_finup, + .digest = kmb_ocs_hcu_digest, + .export = kmb_ocs_hcu_export, + .import = kmb_ocs_hcu_import, + .halg = { + .digestsize = SHA224_DIGEST_SIZE, + .statesize = sizeof(struct ocs_hcu_rctx), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-keembay-ocs", + .cra_priority = 255, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ocs_hcu_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = kmb_ocs_hcu_sha_cra_init, + } + } +}, +{ + .init = kmb_ocs_hcu_init, + .update = kmb_ocs_hcu_update, + .final = kmb_ocs_hcu_final, + .finup = kmb_ocs_hcu_finup, + .digest = kmb_ocs_hcu_digest, + .export = kmb_ocs_hcu_export, + .import = kmb_ocs_hcu_import, + .setkey = kmb_ocs_hcu_setkey, + .halg = { + .digestsize = SHA224_DIGEST_SIZE, + .statesize = sizeof(struct ocs_hcu_rctx), + .base = { + .cra_name = "hmac(sha224)", + .cra_driver_name = "hmac-sha224-keembay-ocs", + .cra_priority = 255, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ocs_hcu_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = kmb_ocs_hcu_hmac_cra_init, + .cra_exit = kmb_ocs_hcu_hmac_cra_exit, + } + } +}, +#endif /* CONFIG_CRYPTO_DEV_KEEMBAY_OCS_HCU_HMAC_SHA224 */ { .init = kmb_ocs_hcu_init, .update = kmb_ocs_hcu_update, From 5a5a27b3e1577dbd63b0ac114d784bc3695e245b Mon Sep 17 00:00:00 2001 From: Daniele Alessandrelli Date: Wed, 16 Dec 2020 11:46:39 +0000 Subject: [PATCH 022/154] MAINTAINERS: Add maintainers for Keem Bay OCS HCU driver Add maintainers for the Intel Keem Bay Offload Crypto Subsystem (OCS) Hash Control Unit (HCU) crypto driver. Signed-off-by: Daniele Alessandrelli Acked-by: Declan Murphy Signed-off-by: Herbert Xu --- MAINTAINERS | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index aeb3a118842e..25c27157f11e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9062,6 +9062,17 @@ F: drivers/crypto/keembay/keembay-ocs-aes-core.c F: drivers/crypto/keembay/ocs-aes.c F: drivers/crypto/keembay/ocs-aes.h +INTEL KEEM BAY OCS HCU CRYPTO DRIVER +M: Daniele Alessandrelli +M: Declan Murphy +S: Maintained +F: Documentation/devicetree/bindings/crypto/intel,keembay-ocs-hcu.yaml +F: drivers/crypto/keembay/Kconfig +F: drivers/crypto/keembay/Makefile +F: drivers/crypto/keembay/keembay-ocs-hcu-core.c +F: drivers/crypto/keembay/ocs-hcu.c +F: drivers/crypto/keembay/ocs-hcu.h + INTEL MANAGEMENT ENGINE (mei) M: Tomas Winkler L: linux-kernel@vger.kernel.org From 15deb4333cd6d4e1e3216582e4c531ec40a6b060 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 17 Dec 2020 19:55:15 +0100 Subject: [PATCH 023/154] crypto: arm64/aes-ce - really hide slower algos when faster ones are enabled Commit 69b6f2e817e5b ("crypto: arm64/aes-neon - limit exposed routines if faster driver is enabled") intended to hide modes from the plain NEON driver that are also implemented by the faster bit sliced NEON one if both are enabled. However, the defined() CPP function does not detect if the bit sliced NEON driver is enabled as a module. So instead, let's use IS_ENABLED() here. Fixes: 69b6f2e817e5b ("crypto: arm64/aes-neon - limit exposed routines if ...") Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-glue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 34b8a89197be..cafb5b96be0e 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -55,7 +55,7 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); #define aes_mac_update neon_aes_mac_update MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON"); #endif -#if defined(USE_V8_CRYPTO_EXTENSIONS) || !defined(CONFIG_CRYPTO_AES_ARM64_BS) +#if defined(USE_V8_CRYPTO_EXTENSIONS) || !IS_ENABLED(CONFIG_CRYPTO_AES_ARM64_BS) MODULE_ALIAS_CRYPTO("ecb(aes)"); MODULE_ALIAS_CRYPTO("cbc(aes)"); MODULE_ALIAS_CRYPTO("ctr(aes)"); @@ -650,7 +650,7 @@ static int __maybe_unused xts_decrypt(struct skcipher_request *req) } static struct skcipher_alg aes_algs[] = { { -#if defined(USE_V8_CRYPTO_EXTENSIONS) || !defined(CONFIG_CRYPTO_AES_ARM64_BS) +#if defined(USE_V8_CRYPTO_EXTENSIONS) || !IS_ENABLED(CONFIG_CRYPTO_AES_ARM64_BS) .base = { .cra_name = "__ecb(aes)", .cra_driver_name = "__ecb-aes-" MODE, From 5318d3db465d29efe97b0e18da29ad95156e6142 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 17 Dec 2020 19:55:16 +0100 Subject: [PATCH 024/154] crypto: arm64/aes-ctr - improve tail handling Counter mode is a stream cipher chaining mode that is typically used with inputs that are of arbitrarily length, and so a tail block which is smaller than a full AES block is rule rather than exception. The current ctr(aes) implementation for arm64 always makes a separate call into the assembler routine to process this tail block, which is suboptimal, given that it requires reloading of the AES round keys, and prevents us from handling this tail block using the 5-way stride that we use for better performance on deep pipelines. So let's update the assembler routine so it can handle any input size, and uses NEON permutation instructions and overlapping loads and stores to handle the tail block. This results in a ~16% speedup for 1420 byte blocks on cores with deep pipelines such as ThunderX2. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-glue.c | 46 +++++----- arch/arm64/crypto/aes-modes.S | 165 +++++++++++++++++++++++----------- 2 files changed, 137 insertions(+), 74 deletions(-) diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index cafb5b96be0e..e7f116d833b9 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -24,6 +24,7 @@ #ifdef USE_V8_CRYPTO_EXTENSIONS #define MODE "ce" #define PRIO 300 +#define STRIDE 5 #define aes_expandkey ce_aes_expandkey #define aes_ecb_encrypt ce_aes_ecb_encrypt #define aes_ecb_decrypt ce_aes_ecb_decrypt @@ -41,6 +42,7 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); #else #define MODE "neon" #define PRIO 200 +#define STRIDE 4 #define aes_ecb_encrypt neon_aes_ecb_encrypt #define aes_ecb_decrypt neon_aes_ecb_decrypt #define aes_cbc_encrypt neon_aes_cbc_encrypt @@ -87,7 +89,7 @@ asmlinkage void aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int bytes, u8 const iv[]); asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], - int rounds, int blocks, u8 ctr[]); + int rounds, int bytes, u8 ctr[], u8 finalbuf[]); asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds, int bytes, u32 const rk2[], u8 iv[], @@ -448,34 +450,36 @@ static int ctr_encrypt(struct skcipher_request *req) struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); int err, rounds = 6 + ctx->key_length / 4; struct skcipher_walk walk; - int blocks; err = skcipher_walk_virt(&walk, req, false); - while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { - kernel_neon_begin(); - aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_enc, rounds, blocks, walk.iv); - kernel_neon_end(); - err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); - } - if (walk.nbytes) { - u8 __aligned(8) tail[AES_BLOCK_SIZE]; + while (walk.nbytes > 0) { + const u8 *src = walk.src.virt.addr; unsigned int nbytes = walk.nbytes; - u8 *tdst = walk.dst.virt.addr; - u8 *tsrc = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + u8 buf[AES_BLOCK_SIZE]; + unsigned int tail; - /* - * Tell aes_ctr_encrypt() to process a tail block. - */ - blocks = -1; + if (unlikely(nbytes < AES_BLOCK_SIZE)) + src = memcpy(buf, src, nbytes); + else if (nbytes < walk.total) + nbytes &= ~(AES_BLOCK_SIZE - 1); kernel_neon_begin(); - aes_ctr_encrypt(tail, NULL, ctx->key_enc, rounds, - blocks, walk.iv); + aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes, + walk.iv, buf); kernel_neon_end(); - crypto_xor_cpy(tdst, tsrc, tail, nbytes); - err = skcipher_walk_done(&walk, 0); + + tail = nbytes % (STRIDE * AES_BLOCK_SIZE); + if (tail > 0 && tail < AES_BLOCK_SIZE) + /* + * The final partial block could not be returned using + * an overlapping store, so it was passed via buf[] + * instead. + */ + memcpy(dst + nbytes - tail, buf, tail); + + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } return err; diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index cf618d8f6cec..3d1f97799899 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -321,42 +321,76 @@ AES_FUNC_END(aes_cbc_cts_decrypt) /* * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int blocks, u8 ctr[]) + * int bytes, u8 ctr[], u8 finalbuf[]) */ AES_FUNC_START(aes_ctr_encrypt) stp x29, x30, [sp, #-16]! mov x29, sp - enc_prepare w3, x2, x6 + enc_prepare w3, x2, x12 ld1 {vctr.16b}, [x5] - umov x6, vctr.d[1] /* keep swabbed ctr in reg */ - rev x6, x6 - cmn w6, w4 /* 32 bit overflow? */ - bcs .Lctrloop + umov x12, vctr.d[1] /* keep swabbed ctr in reg */ + rev x12, x12 + .LctrloopNx: - subs w4, w4, #MAX_STRIDE - bmi .Lctr1x - add w7, w6, #1 + add w7, w4, #15 + sub w4, w4, #MAX_STRIDE << 4 + lsr w7, w7, #4 + mov w8, #MAX_STRIDE + cmp w7, w8 + csel w7, w7, w8, lt + adds x12, x12, x7 + mov v0.16b, vctr.16b - add w8, w6, #2 mov v1.16b, vctr.16b - add w9, w6, #3 mov v2.16b, vctr.16b - add w9, w6, #3 - rev w7, w7 mov v3.16b, vctr.16b - rev w8, w8 ST5( mov v4.16b, vctr.16b ) - mov v1.s[3], w7 - rev w9, w9 -ST5( add w10, w6, #4 ) - mov v2.s[3], w8 -ST5( rev w10, w10 ) - mov v3.s[3], w9 -ST5( mov v4.s[3], w10 ) - ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */ + bcs 0f + + .subsection 1 + /* apply carry to outgoing counter */ +0: umov x8, vctr.d[0] + rev x8, x8 + add x8, x8, #1 + rev x8, x8 + ins vctr.d[0], x8 + + /* apply carry to N counter blocks for N := x12 */ + adr x16, 1f + sub x16, x16, x12, lsl #3 + br x16 + hint 34 // bti c + mov v0.d[0], vctr.d[0] + hint 34 // bti c + mov v1.d[0], vctr.d[0] + hint 34 // bti c + mov v2.d[0], vctr.d[0] + hint 34 // bti c + mov v3.d[0], vctr.d[0] +ST5( hint 34 ) +ST5( mov v4.d[0], vctr.d[0] ) +1: b 2f + .previous + +2: rev x7, x12 + ins vctr.d[1], x7 + sub x7, x12, #MAX_STRIDE - 1 + sub x8, x12, #MAX_STRIDE - 2 + sub x9, x12, #MAX_STRIDE - 3 + rev x7, x7 + rev x8, x8 + mov v1.d[1], x7 + rev x9, x9 +ST5( sub x10, x12, #MAX_STRIDE - 4 ) + mov v2.d[1], x8 +ST5( rev x10, x10 ) + mov v3.d[1], x9 +ST5( mov v4.d[1], x10 ) + tbnz w4, #31, .Lctrtail + ld1 {v5.16b-v7.16b}, [x1], #48 ST4( bl aes_encrypt_block4x ) ST5( bl aes_encrypt_block5x ) eor v0.16b, v5.16b, v0.16b @@ -368,47 +402,72 @@ ST5( ld1 {v5.16b-v6.16b}, [x1], #32 ) ST5( eor v4.16b, v6.16b, v4.16b ) st1 {v0.16b-v3.16b}, [x0], #64 ST5( st1 {v4.16b}, [x0], #16 ) - add x6, x6, #MAX_STRIDE - rev x7, x6 - ins vctr.d[1], x7 cbz w4, .Lctrout b .LctrloopNx -.Lctr1x: - adds w4, w4, #MAX_STRIDE - beq .Lctrout -.Lctrloop: - mov v0.16b, vctr.16b - encrypt_block v0, w3, x2, x8, w7 - - adds x6, x6, #1 /* increment BE ctr */ - rev x7, x6 - ins vctr.d[1], x7 - bcs .Lctrcarry /* overflow? */ - -.Lctrcarrydone: - subs w4, w4, #1 - bmi .Lctrtailblock /* blocks <0 means tail block */ - ld1 {v3.16b}, [x1], #16 - eor v3.16b, v0.16b, v3.16b - st1 {v3.16b}, [x0], #16 - bne .Lctrloop .Lctrout: st1 {vctr.16b}, [x5] /* return next CTR value */ ldp x29, x30, [sp], #16 ret -.Lctrtailblock: - st1 {v0.16b}, [x0] +.Lctrtail: + /* XOR up to MAX_STRIDE * 16 - 1 bytes of in/output with v0 ... v3/v4 */ + mov x16, #16 + ands x13, x4, #0xf + csel x13, x13, x16, ne + +ST5( cmp w4, #64 - (MAX_STRIDE << 4) ) +ST5( csel x14, x16, xzr, gt ) + cmp w4, #48 - (MAX_STRIDE << 4) + csel x15, x16, xzr, gt + cmp w4, #32 - (MAX_STRIDE << 4) + csel x16, x16, xzr, gt + cmp w4, #16 - (MAX_STRIDE << 4) + ble .Lctrtail1x + + adr_l x12, .Lcts_permute_table + add x12, x12, x13 + +ST5( ld1 {v5.16b}, [x1], x14 ) + ld1 {v6.16b}, [x1], x15 + ld1 {v7.16b}, [x1], x16 + +ST4( bl aes_encrypt_block4x ) +ST5( bl aes_encrypt_block5x ) + + ld1 {v8.16b}, [x1], x13 + ld1 {v9.16b}, [x1] + ld1 {v10.16b}, [x12] + +ST4( eor v6.16b, v6.16b, v0.16b ) +ST4( eor v7.16b, v7.16b, v1.16b ) +ST4( tbl v3.16b, {v3.16b}, v10.16b ) +ST4( eor v8.16b, v8.16b, v2.16b ) +ST4( eor v9.16b, v9.16b, v3.16b ) + +ST5( eor v5.16b, v5.16b, v0.16b ) +ST5( eor v6.16b, v6.16b, v1.16b ) +ST5( tbl v4.16b, {v4.16b}, v10.16b ) +ST5( eor v7.16b, v7.16b, v2.16b ) +ST5( eor v8.16b, v8.16b, v3.16b ) +ST5( eor v9.16b, v9.16b, v4.16b ) + +ST5( st1 {v5.16b}, [x0], x14 ) + st1 {v6.16b}, [x0], x15 + st1 {v7.16b}, [x0], x16 + add x13, x13, x0 + st1 {v9.16b}, [x13] // overlapping stores + st1 {v8.16b}, [x0] b .Lctrout -.Lctrcarry: - umov x7, vctr.d[0] /* load upper word of ctr */ - rev x7, x7 /* ... to handle the carry */ - add x7, x7, #1 - rev x7, x7 - ins vctr.d[0], x7 - b .Lctrcarrydone +.Lctrtail1x: + csel x0, x0, x6, eq // use finalbuf if less than a full block + ld1 {v5.16b}, [x1] +ST5( mov v3.16b, v4.16b ) + encrypt_block v3, w3, x2, x8, w7 + eor v5.16b, v5.16b, v3.16b + st1 {v5.16b}, [x0] + b .Lctrout AES_FUNC_END(aes_ctr_encrypt) From 96a6af540396ed93ba231d0ae2e6fe196dc22032 Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Fri, 18 Dec 2020 11:57:07 +0100 Subject: [PATCH 025/154] hwrng: iproc-rng200 - Fix disable of the block. When trying to disable the block we bitwise or the control register with value zero. This is confusing as using bitwise or with value zero doesn't have any effect at all. Drop this as we already set the enable bit to zero by appling inverted RNG_RBGEN_MASK. Signed-off-by: Matthias Brugger Acked-by: Scott Branden Acked-by: Florian Fainelli Signed-off-by: Herbert Xu --- drivers/char/hw_random/iproc-rng200.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/char/hw_random/iproc-rng200.c b/drivers/char/hw_random/iproc-rng200.c index 01583faf9893..70cd818a0f31 100644 --- a/drivers/char/hw_random/iproc-rng200.c +++ b/drivers/char/hw_random/iproc-rng200.c @@ -28,7 +28,6 @@ #define RNG_CTRL_OFFSET 0x00 #define RNG_CTRL_RNG_RBGEN_MASK 0x00001FFF #define RNG_CTRL_RNG_RBGEN_ENABLE 0x00000001 -#define RNG_CTRL_RNG_RBGEN_DISABLE 0x00000000 #define RNG_SOFT_RESET_OFFSET 0x04 #define RNG_SOFT_RESET 0x00000001 @@ -61,7 +60,6 @@ static void iproc_rng200_restart(void __iomem *rng_base) /* Disable RBG */ val = ioread32(rng_base + RNG_CTRL_OFFSET); val &= ~RNG_CTRL_RNG_RBGEN_MASK; - val |= RNG_CTRL_RNG_RBGEN_DISABLE; iowrite32(val, rng_base + RNG_CTRL_OFFSET); /* Clear all interrupt status */ @@ -174,7 +172,6 @@ static void iproc_rng200_cleanup(struct hwrng *rng) /* Disable RNG hardware */ val = ioread32(priv->base + RNG_CTRL_OFFSET); val &= ~RNG_CTRL_RNG_RBGEN_MASK; - val |= RNG_CTRL_RNG_RBGEN_DISABLE; iowrite32(val, priv->base + RNG_CTRL_OFFSET); } From 256693a36203f51b0a3659c8b215a7026a03a3f1 Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Fri, 18 Dec 2020 11:57:08 +0100 Subject: [PATCH 026/154] hwrng: iproc-rng200 - Move enable/disable in separate function We are calling the same code for enable and disable the block in various parts of the driver. Put that code into a new function to reduce code duplication. Signed-off-by: Matthias Brugger Acked-by: Florian Fainelli Acked-by: Scott Branden Signed-off-by: Herbert Xu --- drivers/char/hw_random/iproc-rng200.c | 37 ++++++++++++--------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/drivers/char/hw_random/iproc-rng200.c b/drivers/char/hw_random/iproc-rng200.c index 70cd818a0f31..a43743887db1 100644 --- a/drivers/char/hw_random/iproc-rng200.c +++ b/drivers/char/hw_random/iproc-rng200.c @@ -53,14 +53,24 @@ struct iproc_rng200_dev { #define to_rng_priv(rng) container_of(rng, struct iproc_rng200_dev, rng) +static void iproc_rng200_enable_set(void __iomem *rng_base, bool enable) +{ + u32 val; + + val = ioread32(rng_base + RNG_CTRL_OFFSET); + val &= ~RNG_CTRL_RNG_RBGEN_MASK; + + if (enable) + val |= RNG_CTRL_RNG_RBGEN_ENABLE; + + iowrite32(val, rng_base + RNG_CTRL_OFFSET); +} + static void iproc_rng200_restart(void __iomem *rng_base) { uint32_t val; - /* Disable RBG */ - val = ioread32(rng_base + RNG_CTRL_OFFSET); - val &= ~RNG_CTRL_RNG_RBGEN_MASK; - iowrite32(val, rng_base + RNG_CTRL_OFFSET); + iproc_rng200_enable_set(rng_base, false); /* Clear all interrupt status */ iowrite32(0xFFFFFFFFUL, rng_base + RNG_INT_STATUS_OFFSET); @@ -82,11 +92,7 @@ static void iproc_rng200_restart(void __iomem *rng_base) val &= ~RBG_SOFT_RESET; iowrite32(val, rng_base + RBG_SOFT_RESET_OFFSET); - /* Enable RBG */ - val = ioread32(rng_base + RNG_CTRL_OFFSET); - val &= ~RNG_CTRL_RNG_RBGEN_MASK; - val |= RNG_CTRL_RNG_RBGEN_ENABLE; - iowrite32(val, rng_base + RNG_CTRL_OFFSET); + iproc_rng200_enable_set(rng_base, true); } static int iproc_rng200_read(struct hwrng *rng, void *buf, size_t max, @@ -153,13 +159,8 @@ static int iproc_rng200_read(struct hwrng *rng, void *buf, size_t max, static int iproc_rng200_init(struct hwrng *rng) { struct iproc_rng200_dev *priv = to_rng_priv(rng); - uint32_t val; - /* Setup RNG. */ - val = ioread32(priv->base + RNG_CTRL_OFFSET); - val &= ~RNG_CTRL_RNG_RBGEN_MASK; - val |= RNG_CTRL_RNG_RBGEN_ENABLE; - iowrite32(val, priv->base + RNG_CTRL_OFFSET); + iproc_rng200_enable_set(priv->base, true); return 0; } @@ -167,12 +168,8 @@ static int iproc_rng200_init(struct hwrng *rng) static void iproc_rng200_cleanup(struct hwrng *rng) { struct iproc_rng200_dev *priv = to_rng_priv(rng); - uint32_t val; - /* Disable RNG hardware */ - val = ioread32(priv->base + RNG_CTRL_OFFSET); - val &= ~RNG_CTRL_RNG_RBGEN_MASK; - iowrite32(val, priv->base + RNG_CTRL_OFFSET); + iproc_rng200_enable_set(priv->base, false); } static int iproc_rng200_probe(struct platform_device *pdev) From c4ff41b93d1f10d1b8be258c31a0436c5769fc00 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 19 Dec 2020 08:52:07 +0100 Subject: [PATCH 027/154] hwrng: ingenic - Fix a resource leak in an error handling path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case of error, we should call 'clk_disable_unprepare()' to undo a previous 'clk_prepare_enable()' call, as already done in the remove function. Fixes: 406346d22278 ("hwrng: ingenic - Add hardware TRNG for Ingenic X1830") Signed-off-by: Christophe JAILLET Tested-by: 周琰杰 (Zhou Yanjie) Signed-off-by: Herbert Xu --- drivers/char/hw_random/ingenic-trng.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/char/hw_random/ingenic-trng.c b/drivers/char/hw_random/ingenic-trng.c index 954a8411d67d..0eb80f786f4d 100644 --- a/drivers/char/hw_random/ingenic-trng.c +++ b/drivers/char/hw_random/ingenic-trng.c @@ -113,13 +113,17 @@ static int ingenic_trng_probe(struct platform_device *pdev) ret = hwrng_register(&trng->rng); if (ret) { dev_err(&pdev->dev, "Failed to register hwrng\n"); - return ret; + goto err_unprepare_clk; } platform_set_drvdata(pdev, trng); dev_info(&pdev->dev, "Ingenic DTRNG driver registered\n"); return 0; + +err_unprepare_clk: + clk_disable_unprepare(trng->clk); + return ret; } static int ingenic_trng_remove(struct platform_device *pdev) From 0d396058f92ae7e5ac62839fed54bc2bba630ab5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 23 Dec 2020 00:09:50 -0800 Subject: [PATCH 028/154] crypto: blake2s - define shash_alg structs using macros The shash_alg structs for the four variants of BLAKE2s are identical except for the algorithm name, driver name, and digest size. So, avoid code duplication by using a macro to define these structs. Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/blake2s_generic.c | 86 ++++++++++++---------------------------- 1 file changed, 26 insertions(+), 60 deletions(-) diff --git a/crypto/blake2s_generic.c b/crypto/blake2s_generic.c index 005783ff45ad..e3aa6e7ff3d8 100644 --- a/crypto/blake2s_generic.c +++ b/crypto/blake2s_generic.c @@ -83,67 +83,33 @@ static int crypto_blake2s_final(struct shash_desc *desc, u8 *out) return 0; } -static struct shash_alg blake2s_algs[] = {{ - .base.cra_name = "blake2s-128", - .base.cra_driver_name = "blake2s-128-generic", - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), - .base.cra_priority = 200, - .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, +#define BLAKE2S_ALG(name, driver_name, digest_size) \ + { \ + .base.cra_name = name, \ + .base.cra_driver_name = driver_name, \ + .base.cra_priority = 100, \ + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \ + .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \ + .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \ + .base.cra_module = THIS_MODULE, \ + .digestsize = digest_size, \ + .setkey = crypto_blake2s_setkey, \ + .init = crypto_blake2s_init, \ + .update = crypto_blake2s_update, \ + .final = crypto_blake2s_final, \ + .descsize = sizeof(struct blake2s_state), \ + } - .digestsize = BLAKE2S_128_HASH_SIZE, - .setkey = crypto_blake2s_setkey, - .init = crypto_blake2s_init, - .update = crypto_blake2s_update, - .final = crypto_blake2s_final, - .descsize = sizeof(struct blake2s_state), -}, { - .base.cra_name = "blake2s-160", - .base.cra_driver_name = "blake2s-160-generic", - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), - .base.cra_priority = 200, - .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, - - .digestsize = BLAKE2S_160_HASH_SIZE, - .setkey = crypto_blake2s_setkey, - .init = crypto_blake2s_init, - .update = crypto_blake2s_update, - .final = crypto_blake2s_final, - .descsize = sizeof(struct blake2s_state), -}, { - .base.cra_name = "blake2s-224", - .base.cra_driver_name = "blake2s-224-generic", - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), - .base.cra_priority = 200, - .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, - - .digestsize = BLAKE2S_224_HASH_SIZE, - .setkey = crypto_blake2s_setkey, - .init = crypto_blake2s_init, - .update = crypto_blake2s_update, - .final = crypto_blake2s_final, - .descsize = sizeof(struct blake2s_state), -}, { - .base.cra_name = "blake2s-256", - .base.cra_driver_name = "blake2s-256-generic", - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), - .base.cra_priority = 200, - .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, - - .digestsize = BLAKE2S_256_HASH_SIZE, - .setkey = crypto_blake2s_setkey, - .init = crypto_blake2s_init, - .update = crypto_blake2s_update, - .final = crypto_blake2s_final, - .descsize = sizeof(struct blake2s_state), -}}; +static struct shash_alg blake2s_algs[] = { + BLAKE2S_ALG("blake2s-128", "blake2s-128-generic", + BLAKE2S_128_HASH_SIZE), + BLAKE2S_ALG("blake2s-160", "blake2s-160-generic", + BLAKE2S_160_HASH_SIZE), + BLAKE2S_ALG("blake2s-224", "blake2s-224-generic", + BLAKE2S_224_HASH_SIZE), + BLAKE2S_ALG("blake2s-256", "blake2s-256-generic", + BLAKE2S_256_HASH_SIZE), +}; static int __init blake2s_mod_init(void) { From 1aa90f4cf034ed4f016a02330820ac0551a6c13c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 23 Dec 2020 00:09:51 -0800 Subject: [PATCH 029/154] crypto: x86/blake2s - define shash_alg structs using macros The shash_alg structs for the four variants of BLAKE2s are identical except for the algorithm name, driver name, and digest size. So, avoid code duplication by using a macro to define these structs. Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/blake2s-glue.c | 82 +++++++++------------------------- 1 file changed, 22 insertions(+), 60 deletions(-) diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c index c025a01cf708..4dcb2ee89efc 100644 --- a/arch/x86/crypto/blake2s-glue.c +++ b/arch/x86/crypto/blake2s-glue.c @@ -129,67 +129,29 @@ static int crypto_blake2s_final(struct shash_desc *desc, u8 *out) return 0; } -static struct shash_alg blake2s_algs[] = {{ - .base.cra_name = "blake2s-128", - .base.cra_driver_name = "blake2s-128-x86", - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), - .base.cra_priority = 200, - .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, +#define BLAKE2S_ALG(name, driver_name, digest_size) \ + { \ + .base.cra_name = name, \ + .base.cra_driver_name = driver_name, \ + .base.cra_priority = 200, \ + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \ + .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \ + .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \ + .base.cra_module = THIS_MODULE, \ + .digestsize = digest_size, \ + .setkey = crypto_blake2s_setkey, \ + .init = crypto_blake2s_init, \ + .update = crypto_blake2s_update, \ + .final = crypto_blake2s_final, \ + .descsize = sizeof(struct blake2s_state), \ + } - .digestsize = BLAKE2S_128_HASH_SIZE, - .setkey = crypto_blake2s_setkey, - .init = crypto_blake2s_init, - .update = crypto_blake2s_update, - .final = crypto_blake2s_final, - .descsize = sizeof(struct blake2s_state), -}, { - .base.cra_name = "blake2s-160", - .base.cra_driver_name = "blake2s-160-x86", - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), - .base.cra_priority = 200, - .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, - - .digestsize = BLAKE2S_160_HASH_SIZE, - .setkey = crypto_blake2s_setkey, - .init = crypto_blake2s_init, - .update = crypto_blake2s_update, - .final = crypto_blake2s_final, - .descsize = sizeof(struct blake2s_state), -}, { - .base.cra_name = "blake2s-224", - .base.cra_driver_name = "blake2s-224-x86", - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), - .base.cra_priority = 200, - .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, - - .digestsize = BLAKE2S_224_HASH_SIZE, - .setkey = crypto_blake2s_setkey, - .init = crypto_blake2s_init, - .update = crypto_blake2s_update, - .final = crypto_blake2s_final, - .descsize = sizeof(struct blake2s_state), -}, { - .base.cra_name = "blake2s-256", - .base.cra_driver_name = "blake2s-256-x86", - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), - .base.cra_priority = 200, - .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, - - .digestsize = BLAKE2S_256_HASH_SIZE, - .setkey = crypto_blake2s_setkey, - .init = crypto_blake2s_init, - .update = crypto_blake2s_update, - .final = crypto_blake2s_final, - .descsize = sizeof(struct blake2s_state), -}}; +static struct shash_alg blake2s_algs[] = { + BLAKE2S_ALG("blake2s-128", "blake2s-128-x86", BLAKE2S_128_HASH_SIZE), + BLAKE2S_ALG("blake2s-160", "blake2s-160-x86", BLAKE2S_160_HASH_SIZE), + BLAKE2S_ALG("blake2s-224", "blake2s-224-x86", BLAKE2S_224_HASH_SIZE), + BLAKE2S_ALG("blake2s-256", "blake2s-256-x86", BLAKE2S_256_HASH_SIZE), +}; static int __init blake2s_mod_init(void) { From df412e7efda1e2c5b5fcb06701bba77434cbd1e8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 23 Dec 2020 00:09:52 -0800 Subject: [PATCH 030/154] crypto: blake2s - remove unneeded includes It doesn't make sense for the generic implementation of BLAKE2s to include and , as these are things that would only be useful in an architecture-specific implementation. Remove these unnecessary includes. Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/blake2s_generic.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/crypto/blake2s_generic.c b/crypto/blake2s_generic.c index e3aa6e7ff3d8..b89536c3671c 100644 --- a/crypto/blake2s_generic.c +++ b/crypto/blake2s_generic.c @@ -4,11 +4,9 @@ */ #include -#include #include #include -#include #include #include From 057edc9c8bb2d5ff5b058b521792c392428a0714 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 23 Dec 2020 00:09:53 -0800 Subject: [PATCH 031/154] crypto: blake2s - move update and final logic to internal/blake2s.h Move most of blake2s_update() and blake2s_final() into new inline functions __blake2s_update() and __blake2s_final() in include/crypto/internal/blake2s.h so that this logic can be shared by the shash helper functions. This will avoid duplicating this logic between the library and shash implementations. Signed-off-by: Eric Biggers Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- include/crypto/internal/blake2s.h | 41 ++++++++++++++++++++++++++ lib/crypto/blake2s.c | 48 ++++++------------------------- 2 files changed, 49 insertions(+), 40 deletions(-) diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h index 6e376ae6b6b5..42deba4b8cee 100644 --- a/include/crypto/internal/blake2s.h +++ b/include/crypto/internal/blake2s.h @@ -4,6 +4,7 @@ #define BLAKE2S_INTERNAL_H #include +#include struct blake2s_tfm_ctx { u8 key[BLAKE2S_KEY_SIZE]; @@ -23,4 +24,44 @@ static inline void blake2s_set_lastblock(struct blake2s_state *state) state->f[0] = -1; } +typedef void (*blake2s_compress_t)(struct blake2s_state *state, + const u8 *block, size_t nblocks, u32 inc); + +static inline void __blake2s_update(struct blake2s_state *state, + const u8 *in, size_t inlen, + blake2s_compress_t compress) +{ + const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; + + if (unlikely(!inlen)) + return; + if (inlen > fill) { + memcpy(state->buf + state->buflen, in, fill); + (*compress)(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); + state->buflen = 0; + in += fill; + inlen -= fill; + } + if (inlen > BLAKE2S_BLOCK_SIZE) { + const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); + /* Hash one less (full) block than strictly possible */ + (*compress)(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); + in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); + inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); + } + memcpy(state->buf + state->buflen, in, inlen); + state->buflen += inlen; +} + +static inline void __blake2s_final(struct blake2s_state *state, u8 *out, + blake2s_compress_t compress) +{ + blake2s_set_lastblock(state); + memset(state->buf + state->buflen, 0, + BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ + (*compress)(state, state->buf, 1, state->buflen); + cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); + memcpy(out, state->h, state->outlen); +} + #endif /* BLAKE2S_INTERNAL_H */ diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c index 6a4b6b78d630..c64ac8bfb6a9 100644 --- a/lib/crypto/blake2s.c +++ b/lib/crypto/blake2s.c @@ -15,55 +15,23 @@ #include #include #include -#include + +#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S) +# define blake2s_compress blake2s_compress_arch +#else +# define blake2s_compress blake2s_compress_generic +#endif void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen) { - const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; - - if (unlikely(!inlen)) - return; - if (inlen > fill) { - memcpy(state->buf + state->buflen, in, fill); - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S)) - blake2s_compress_arch(state, state->buf, 1, - BLAKE2S_BLOCK_SIZE); - else - blake2s_compress_generic(state, state->buf, 1, - BLAKE2S_BLOCK_SIZE); - state->buflen = 0; - in += fill; - inlen -= fill; - } - if (inlen > BLAKE2S_BLOCK_SIZE) { - const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); - /* Hash one less (full) block than strictly possible */ - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S)) - blake2s_compress_arch(state, in, nblocks - 1, - BLAKE2S_BLOCK_SIZE); - else - blake2s_compress_generic(state, in, nblocks - 1, - BLAKE2S_BLOCK_SIZE); - in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); - inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); - } - memcpy(state->buf + state->buflen, in, inlen); - state->buflen += inlen; + __blake2s_update(state, in, inlen, blake2s_compress); } EXPORT_SYMBOL(blake2s_update); void blake2s_final(struct blake2s_state *state, u8 *out) { WARN_ON(IS_ENABLED(DEBUG) && !out); - blake2s_set_lastblock(state); - memset(state->buf + state->buflen, 0, - BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S)) - blake2s_compress_arch(state, state->buf, 1, state->buflen); - else - blake2s_compress_generic(state, state->buf, 1, state->buflen); - cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); - memcpy(out, state->h, state->outlen); + __blake2s_final(state, out, blake2s_compress); memzero_explicit(state, sizeof(*state)); } EXPORT_SYMBOL(blake2s_final); From 8c4a93a1270ddffc7660ae43fa8030ecfe9c06d9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 23 Dec 2020 00:09:54 -0800 Subject: [PATCH 032/154] crypto: blake2s - share the "shash" API boilerplate code Add helper functions for shash implementations of BLAKE2s to include/crypto/internal/blake2s.h, taking advantage of __blake2s_update() and __blake2s_final() that were added by the previous patch to share more code between the library and shash implementations. crypto_blake2s_setkey() and crypto_blake2s_init() are usable as shash_alg::setkey and shash_alg::init directly, while crypto_blake2s_update() and crypto_blake2s_final() take an extra 'blake2s_compress_t' function pointer parameter. This allows the implementation of the compression function to be overridden, which is the only part that optimized implementations really care about. The new functions are inline functions (similar to those in sha1_base.h, sha256_base.h, and sm3_base.h) because this avoids needing to add a new module blake2s_helpers.ko, they aren't *too* long, and this avoids indirect calls which are expensive these days. Note that they can't go in blake2s_generic.ko, as that would require selecting CRYPTO_BLAKE2S from CRYPTO_BLAKE2S_X86, which would cause a recursive dependency. Finally, use these new helper functions in the x86 implementation of BLAKE2s. (This part should be a separate patch, but unfortunately the x86 implementation used the exact same function names like "crypto_blake2s_update()", so it had to be updated at the same time.) Signed-off-by: Eric Biggers Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/blake2s-glue.c | 74 +++--------------------------- crypto/blake2s_generic.c | 76 ++++--------------------------- include/crypto/internal/blake2s.h | 65 ++++++++++++++++++++++++-- 3 files changed, 76 insertions(+), 139 deletions(-) diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c index 4dcb2ee89efc..a40365ab301e 100644 --- a/arch/x86/crypto/blake2s-glue.c +++ b/arch/x86/crypto/blake2s-glue.c @@ -58,75 +58,15 @@ void blake2s_compress_arch(struct blake2s_state *state, } EXPORT_SYMBOL(blake2s_compress_arch); -static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key, - unsigned int keylen) +static int crypto_blake2s_update_x86(struct shash_desc *desc, + const u8 *in, unsigned int inlen) { - struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm); - - if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) - return -EINVAL; - - memcpy(tctx->key, key, keylen); - tctx->keylen = keylen; - - return 0; + return crypto_blake2s_update(desc, in, inlen, blake2s_compress_arch); } -static int crypto_blake2s_init(struct shash_desc *desc) +static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out) { - struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); - struct blake2s_state *state = shash_desc_ctx(desc); - const int outlen = crypto_shash_digestsize(desc->tfm); - - if (tctx->keylen) - blake2s_init_key(state, outlen, tctx->key, tctx->keylen); - else - blake2s_init(state, outlen); - - return 0; -} - -static int crypto_blake2s_update(struct shash_desc *desc, const u8 *in, - unsigned int inlen) -{ - struct blake2s_state *state = shash_desc_ctx(desc); - const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; - - if (unlikely(!inlen)) - return 0; - if (inlen > fill) { - memcpy(state->buf + state->buflen, in, fill); - blake2s_compress_arch(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); - state->buflen = 0; - in += fill; - inlen -= fill; - } - if (inlen > BLAKE2S_BLOCK_SIZE) { - const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); - /* Hash one less (full) block than strictly possible */ - blake2s_compress_arch(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); - in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); - inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); - } - memcpy(state->buf + state->buflen, in, inlen); - state->buflen += inlen; - - return 0; -} - -static int crypto_blake2s_final(struct shash_desc *desc, u8 *out) -{ - struct blake2s_state *state = shash_desc_ctx(desc); - - blake2s_set_lastblock(state); - memset(state->buf + state->buflen, 0, - BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ - blake2s_compress_arch(state, state->buf, 1, state->buflen); - cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); - memcpy(out, state->h, state->outlen); - memzero_explicit(state, sizeof(*state)); - - return 0; + return crypto_blake2s_final(desc, out, blake2s_compress_arch); } #define BLAKE2S_ALG(name, driver_name, digest_size) \ @@ -141,8 +81,8 @@ static int crypto_blake2s_final(struct shash_desc *desc, u8 *out) .digestsize = digest_size, \ .setkey = crypto_blake2s_setkey, \ .init = crypto_blake2s_init, \ - .update = crypto_blake2s_update, \ - .final = crypto_blake2s_final, \ + .update = crypto_blake2s_update_x86, \ + .final = crypto_blake2s_final_x86, \ .descsize = sizeof(struct blake2s_state), \ } diff --git a/crypto/blake2s_generic.c b/crypto/blake2s_generic.c index b89536c3671c..72fe480f9bd6 100644 --- a/crypto/blake2s_generic.c +++ b/crypto/blake2s_generic.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /* + * shash interface to the generic implementation of BLAKE2s + * * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. */ @@ -10,75 +12,15 @@ #include #include -static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key, - unsigned int keylen) +static int crypto_blake2s_update_generic(struct shash_desc *desc, + const u8 *in, unsigned int inlen) { - struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm); - - if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) - return -EINVAL; - - memcpy(tctx->key, key, keylen); - tctx->keylen = keylen; - - return 0; + return crypto_blake2s_update(desc, in, inlen, blake2s_compress_generic); } -static int crypto_blake2s_init(struct shash_desc *desc) +static int crypto_blake2s_final_generic(struct shash_desc *desc, u8 *out) { - struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); - struct blake2s_state *state = shash_desc_ctx(desc); - const int outlen = crypto_shash_digestsize(desc->tfm); - - if (tctx->keylen) - blake2s_init_key(state, outlen, tctx->key, tctx->keylen); - else - blake2s_init(state, outlen); - - return 0; -} - -static int crypto_blake2s_update(struct shash_desc *desc, const u8 *in, - unsigned int inlen) -{ - struct blake2s_state *state = shash_desc_ctx(desc); - const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; - - if (unlikely(!inlen)) - return 0; - if (inlen > fill) { - memcpy(state->buf + state->buflen, in, fill); - blake2s_compress_generic(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); - state->buflen = 0; - in += fill; - inlen -= fill; - } - if (inlen > BLAKE2S_BLOCK_SIZE) { - const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); - /* Hash one less (full) block than strictly possible */ - blake2s_compress_generic(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); - in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); - inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); - } - memcpy(state->buf + state->buflen, in, inlen); - state->buflen += inlen; - - return 0; -} - -static int crypto_blake2s_final(struct shash_desc *desc, u8 *out) -{ - struct blake2s_state *state = shash_desc_ctx(desc); - - blake2s_set_lastblock(state); - memset(state->buf + state->buflen, 0, - BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ - blake2s_compress_generic(state, state->buf, 1, state->buflen); - cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); - memcpy(out, state->h, state->outlen); - memzero_explicit(state, sizeof(*state)); - - return 0; + return crypto_blake2s_final(desc, out, blake2s_compress_generic); } #define BLAKE2S_ALG(name, driver_name, digest_size) \ @@ -93,8 +35,8 @@ static int crypto_blake2s_final(struct shash_desc *desc, u8 *out) .digestsize = digest_size, \ .setkey = crypto_blake2s_setkey, \ .init = crypto_blake2s_init, \ - .update = crypto_blake2s_update, \ - .final = crypto_blake2s_final, \ + .update = crypto_blake2s_update_generic, \ + .final = crypto_blake2s_final_generic, \ .descsize = sizeof(struct blake2s_state), \ } diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h index 42deba4b8cee..2ea0a8f5e7f4 100644 --- a/include/crypto/internal/blake2s.h +++ b/include/crypto/internal/blake2s.h @@ -1,16 +1,16 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Helper functions for BLAKE2s implementations. + * Keep this in sync with the corresponding BLAKE2b header. + */ #ifndef BLAKE2S_INTERNAL_H #define BLAKE2S_INTERNAL_H #include +#include #include -struct blake2s_tfm_ctx { - u8 key[BLAKE2S_KEY_SIZE]; - unsigned int keylen; -}; - void blake2s_compress_generic(struct blake2s_state *state,const u8 *block, size_t nblocks, const u32 inc); @@ -27,6 +27,8 @@ static inline void blake2s_set_lastblock(struct blake2s_state *state) typedef void (*blake2s_compress_t)(struct blake2s_state *state, const u8 *block, size_t nblocks, u32 inc); +/* Helper functions for BLAKE2s shared by the library and shash APIs */ + static inline void __blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen, blake2s_compress_t compress) @@ -64,4 +66,57 @@ static inline void __blake2s_final(struct blake2s_state *state, u8 *out, memcpy(out, state->h, state->outlen); } +/* Helper functions for shash implementations of BLAKE2s */ + +struct blake2s_tfm_ctx { + u8 key[BLAKE2S_KEY_SIZE]; + unsigned int keylen; +}; + +static inline int crypto_blake2s_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) +{ + struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm); + + if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) + return -EINVAL; + + memcpy(tctx->key, key, keylen); + tctx->keylen = keylen; + + return 0; +} + +static inline int crypto_blake2s_init(struct shash_desc *desc) +{ + const struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); + struct blake2s_state *state = shash_desc_ctx(desc); + unsigned int outlen = crypto_shash_digestsize(desc->tfm); + + if (tctx->keylen) + blake2s_init_key(state, outlen, tctx->key, tctx->keylen); + else + blake2s_init(state, outlen); + return 0; +} + +static inline int crypto_blake2s_update(struct shash_desc *desc, + const u8 *in, unsigned int inlen, + blake2s_compress_t compress) +{ + struct blake2s_state *state = shash_desc_ctx(desc); + + __blake2s_update(state, in, inlen, compress); + return 0; +} + +static inline int crypto_blake2s_final(struct shash_desc *desc, u8 *out, + blake2s_compress_t compress) +{ + struct blake2s_state *state = shash_desc_ctx(desc); + + __blake2s_final(state, out, compress); + return 0; +} + #endif /* BLAKE2S_INTERNAL_H */ From 42ad8cf821f0d8564c393e9ad7d00a1a271d18ae Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 23 Dec 2020 00:09:55 -0800 Subject: [PATCH 033/154] crypto: blake2s - optimize blake2s initialization If no key was provided, then don't waste time initializing the block buffer, as its initial contents won't be used. Also, make crypto_blake2s_init() and blake2s() call a single internal function __blake2s_init() which treats the key as optional, rather than conditionally calling blake2s_init() or blake2s_init_key(). This reduces the compiled code size, as previously both blake2s_init() and blake2s_init_key() were being inlined into these two callers, except when the key size passed to blake2s() was a compile-time constant. These optimizations aren't that significant for BLAKE2s. However, the equivalent optimizations will be more significant for BLAKE2b, as everything is twice as big in BLAKE2b. And it's good to keep things consistent rather than making optimizations for BLAKE2b but not BLAKE2s. Signed-off-by: Eric Biggers Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- include/crypto/blake2s.h | 53 ++++++++++++++++--------------- include/crypto/internal/blake2s.h | 5 +-- 2 files changed, 28 insertions(+), 30 deletions(-) diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h index b471deac28ff..734ed22b7a6a 100644 --- a/include/crypto/blake2s.h +++ b/include/crypto/blake2s.h @@ -43,29 +43,34 @@ enum blake2s_iv { BLAKE2S_IV7 = 0x5BE0CD19UL, }; -void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen); -void blake2s_final(struct blake2s_state *state, u8 *out); - -static inline void blake2s_init_param(struct blake2s_state *state, - const u32 param) +static inline void __blake2s_init(struct blake2s_state *state, size_t outlen, + const void *key, size_t keylen) { - *state = (struct blake2s_state){{ - BLAKE2S_IV0 ^ param, - BLAKE2S_IV1, - BLAKE2S_IV2, - BLAKE2S_IV3, - BLAKE2S_IV4, - BLAKE2S_IV5, - BLAKE2S_IV6, - BLAKE2S_IV7, - }}; + state->h[0] = BLAKE2S_IV0 ^ (0x01010000 | keylen << 8 | outlen); + state->h[1] = BLAKE2S_IV1; + state->h[2] = BLAKE2S_IV2; + state->h[3] = BLAKE2S_IV3; + state->h[4] = BLAKE2S_IV4; + state->h[5] = BLAKE2S_IV5; + state->h[6] = BLAKE2S_IV6; + state->h[7] = BLAKE2S_IV7; + state->t[0] = 0; + state->t[1] = 0; + state->f[0] = 0; + state->f[1] = 0; + state->buflen = 0; + state->outlen = outlen; + if (keylen) { + memcpy(state->buf, key, keylen); + memset(&state->buf[keylen], 0, BLAKE2S_BLOCK_SIZE - keylen); + state->buflen = BLAKE2S_BLOCK_SIZE; + } } static inline void blake2s_init(struct blake2s_state *state, const size_t outlen) { - blake2s_init_param(state, 0x01010000 | outlen); - state->outlen = outlen; + __blake2s_init(state, outlen, NULL, 0); } static inline void blake2s_init_key(struct blake2s_state *state, @@ -75,12 +80,12 @@ static inline void blake2s_init_key(struct blake2s_state *state, WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2S_HASH_SIZE || !key || !keylen || keylen > BLAKE2S_KEY_SIZE)); - blake2s_init_param(state, 0x01010000 | keylen << 8 | outlen); - memcpy(state->buf, key, keylen); - state->buflen = BLAKE2S_BLOCK_SIZE; - state->outlen = outlen; + __blake2s_init(state, outlen, key, keylen); } +void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen); +void blake2s_final(struct blake2s_state *state, u8 *out); + static inline void blake2s(u8 *out, const u8 *in, const u8 *key, const size_t outlen, const size_t inlen, const size_t keylen) @@ -91,11 +96,7 @@ static inline void blake2s(u8 *out, const u8 *in, const u8 *key, outlen > BLAKE2S_HASH_SIZE || keylen > BLAKE2S_KEY_SIZE || (!key && keylen))); - if (keylen) - blake2s_init_key(&state, outlen, key, keylen); - else - blake2s_init(&state, outlen); - + __blake2s_init(&state, outlen, key, keylen); blake2s_update(&state, in, inlen); blake2s_final(&state, out); } diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h index 2ea0a8f5e7f4..867ef3753f5c 100644 --- a/include/crypto/internal/blake2s.h +++ b/include/crypto/internal/blake2s.h @@ -93,10 +93,7 @@ static inline int crypto_blake2s_init(struct shash_desc *desc) struct blake2s_state *state = shash_desc_ctx(desc); unsigned int outlen = crypto_shash_digestsize(desc->tfm); - if (tctx->keylen) - blake2s_init_key(state, outlen, tctx->key, tctx->keylen); - else - blake2s_init(state, outlen); + __blake2s_init(state, outlen, tctx->key, tctx->keylen); return 0; } From 7d87131fadd53a0401b5c078dd64e58c3ea6994c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 23 Dec 2020 00:09:56 -0800 Subject: [PATCH 034/154] crypto: blake2s - add comment for blake2s_state fields The first three fields of 'struct blake2s_state' are used in assembly code, which isn't immediately obvious, so add a comment to this effect. Signed-off-by: Eric Biggers Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- include/crypto/blake2s.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h index 734ed22b7a6a..f1c8330a61a9 100644 --- a/include/crypto/blake2s.h +++ b/include/crypto/blake2s.h @@ -24,6 +24,7 @@ enum blake2s_lengths { }; struct blake2s_state { + /* 'h', 't', and 'f' are used in assembly code, so keep them as-is. */ u32 h[8]; u32 t[2]; u32 f[2]; From 8786841bc2020f7f2513a6c74e64912f07b9c0dc Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 23 Dec 2020 00:09:57 -0800 Subject: [PATCH 035/154] crypto: blake2s - adjust include guard naming Use the full path in the include guards for the BLAKE2s headers to avoid ambiguity and to match the convention for most files in include/crypto/. Signed-off-by: Eric Biggers Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- include/crypto/blake2s.h | 6 +++--- include/crypto/internal/blake2s.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h index f1c8330a61a9..3f06183c2d80 100644 --- a/include/crypto/blake2s.h +++ b/include/crypto/blake2s.h @@ -3,8 +3,8 @@ * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. */ -#ifndef BLAKE2S_H -#define BLAKE2S_H +#ifndef _CRYPTO_BLAKE2S_H +#define _CRYPTO_BLAKE2S_H #include #include @@ -105,4 +105,4 @@ static inline void blake2s(u8 *out, const u8 *in, const u8 *key, void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, const size_t keylen); -#endif /* BLAKE2S_H */ +#endif /* _CRYPTO_BLAKE2S_H */ diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h index 867ef3753f5c..8e50d487500f 100644 --- a/include/crypto/internal/blake2s.h +++ b/include/crypto/internal/blake2s.h @@ -4,8 +4,8 @@ * Keep this in sync with the corresponding BLAKE2b header. */ -#ifndef BLAKE2S_INTERNAL_H -#define BLAKE2S_INTERNAL_H +#ifndef _CRYPTO_INTERNAL_BLAKE2S_H +#define _CRYPTO_INTERNAL_BLAKE2S_H #include #include @@ -116,4 +116,4 @@ static inline int crypto_blake2s_final(struct shash_desc *desc, u8 *out, return 0; } -#endif /* BLAKE2S_INTERNAL_H */ +#endif /* _CRYPTO_INTERNAL_BLAKE2S_H */ From bbda6e0f1303953c855ee3669655a81b69fbe899 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 23 Dec 2020 00:09:58 -0800 Subject: [PATCH 036/154] crypto: blake2s - include instead of Address the following checkpatch warning: WARNING: Use #include instead of Signed-off-by: Eric Biggers Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- include/crypto/blake2s.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h index 3f06183c2d80..bc3fb59442ce 100644 --- a/include/crypto/blake2s.h +++ b/include/crypto/blake2s.h @@ -6,12 +6,11 @@ #ifndef _CRYPTO_BLAKE2S_H #define _CRYPTO_BLAKE2S_H +#include #include #include #include -#include - enum blake2s_lengths { BLAKE2S_BLOCK_SIZE = 64, BLAKE2S_HASH_SIZE = 32, From 5172d322d34c30fb926b29aeb5a064e1fd8a5e13 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 23 Dec 2020 00:09:59 -0800 Subject: [PATCH 037/154] crypto: arm/blake2s - add ARM scalar optimized BLAKE2s Add an ARM scalar optimized implementation of BLAKE2s. NEON isn't very useful for BLAKE2s because the BLAKE2s block size is too small for NEON to help. Each NEON instruction would depend on the previous one, resulting in poor performance. With scalar instructions, on the other hand, we can take advantage of ARM's "free" rotations (like I did in chacha-scalar-core.S) to get an implementation get runs much faster than the C implementation. Performance results on Cortex-A7 in cycles per byte using the shash API: 4096-byte messages: blake2s-256-arm: 18.8 blake2s-256-generic: 26.0 500-byte messages: blake2s-256-arm: 20.3 blake2s-256-generic: 27.9 100-byte messages: blake2s-256-arm: 29.7 blake2s-256-generic: 39.2 32-byte messages: blake2s-256-arm: 50.6 blake2s-256-generic: 66.2 Except on very short messages, this is still slower than the NEON implementation of BLAKE2b which I've written; that is 14.0, 16.4, 25.8, and 76.1 cpb on 4096, 500, 100, and 32-byte messages, respectively. However, optimized BLAKE2s is useful for cases where BLAKE2s is used instead of BLAKE2b, such as WireGuard. This new implementation is added in the form of a new module blake2s-arm.ko, which is analogous to blake2s-x86_64.ko in that it provides blake2s_compress_arch() for use by the library API as well as optionally register the algorithms with the shash API. Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Tested-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/Kconfig | 9 ++ arch/arm/crypto/Makefile | 2 + arch/arm/crypto/blake2s-core.S | 285 +++++++++++++++++++++++++++++++++ arch/arm/crypto/blake2s-glue.c | 78 +++++++++ 4 files changed, 374 insertions(+) create mode 100644 arch/arm/crypto/blake2s-core.S create mode 100644 arch/arm/crypto/blake2s-glue.c diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index c9bf2df85cb9..281c829c12d0 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -62,6 +62,15 @@ config CRYPTO_SHA512_ARM SHA-512 secure hash standard (DFIPS 180-2) implemented using optimized ARM assembler and NEON, when available. +config CRYPTO_BLAKE2S_ARM + tristate "BLAKE2s digest algorithm (ARM)" + select CRYPTO_ARCH_HAVE_LIB_BLAKE2S + help + BLAKE2s digest algorithm optimized with ARM scalar instructions. This + is faster than the generic implementations of BLAKE2s and BLAKE2b, but + slower than the NEON implementation of BLAKE2b. (There is no NEON + implementation of BLAKE2s, since NEON doesn't really help with it.) + config CRYPTO_AES_ARM tristate "Scalar AES cipher for ARM" select CRYPTO_ALGAPI diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index b745c17d356f..5ad1e985a718 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o +obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += blake2s-arm.o obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o @@ -29,6 +30,7 @@ sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y) sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o sha512-arm-y := sha512-core.o sha512-glue.o $(sha512-arm-neon-y) +blake2s-arm-y := blake2s-core.o blake2s-glue.o sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o diff --git a/arch/arm/crypto/blake2s-core.S b/arch/arm/crypto/blake2s-core.S new file mode 100644 index 000000000000..bed897e9a181 --- /dev/null +++ b/arch/arm/crypto/blake2s-core.S @@ -0,0 +1,285 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * BLAKE2s digest algorithm, ARM scalar implementation + * + * Copyright 2020 Google LLC + * + * Author: Eric Biggers + */ + +#include + + // Registers used to hold message words temporarily. There aren't + // enough ARM registers to hold the whole message block, so we have to + // load the words on-demand. + M_0 .req r12 + M_1 .req r14 + +// The BLAKE2s initialization vector +.Lblake2s_IV: + .word 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A + .word 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 + +.macro __ldrd a, b, src, offset +#if __LINUX_ARM_ARCH__ >= 6 + ldrd \a, \b, [\src, #\offset] +#else + ldr \a, [\src, #\offset] + ldr \b, [\src, #\offset + 4] +#endif +.endm + +.macro __strd a, b, dst, offset +#if __LINUX_ARM_ARCH__ >= 6 + strd \a, \b, [\dst, #\offset] +#else + str \a, [\dst, #\offset] + str \b, [\dst, #\offset + 4] +#endif +.endm + +// Execute a quarter-round of BLAKE2s by mixing two columns or two diagonals. +// (a0, b0, c0, d0) and (a1, b1, c1, d1) give the registers containing the two +// columns/diagonals. s0-s1 are the word offsets to the message words the first +// column/diagonal needs, and likewise s2-s3 for the second column/diagonal. +// M_0 and M_1 are free to use, and the message block can be found at sp + 32. +// +// Note that to save instructions, the rotations don't happen when the +// pseudocode says they should, but rather they are delayed until the values are +// used. See the comment above _blake2s_round(). +.macro _blake2s_quarterround a0, b0, c0, d0, a1, b1, c1, d1, s0, s1, s2, s3 + + ldr M_0, [sp, #32 + 4 * \s0] + ldr M_1, [sp, #32 + 4 * \s2] + + // a += b + m[blake2s_sigma[r][2*i + 0]]; + add \a0, \a0, \b0, ror #brot + add \a1, \a1, \b1, ror #brot + add \a0, \a0, M_0 + add \a1, \a1, M_1 + + // d = ror32(d ^ a, 16); + eor \d0, \a0, \d0, ror #drot + eor \d1, \a1, \d1, ror #drot + + // c += d; + add \c0, \c0, \d0, ror #16 + add \c1, \c1, \d1, ror #16 + + // b = ror32(b ^ c, 12); + eor \b0, \c0, \b0, ror #brot + eor \b1, \c1, \b1, ror #brot + + ldr M_0, [sp, #32 + 4 * \s1] + ldr M_1, [sp, #32 + 4 * \s3] + + // a += b + m[blake2s_sigma[r][2*i + 1]]; + add \a0, \a0, \b0, ror #12 + add \a1, \a1, \b1, ror #12 + add \a0, \a0, M_0 + add \a1, \a1, M_1 + + // d = ror32(d ^ a, 8); + eor \d0, \a0, \d0, ror#16 + eor \d1, \a1, \d1, ror#16 + + // c += d; + add \c0, \c0, \d0, ror#8 + add \c1, \c1, \d1, ror#8 + + // b = ror32(b ^ c, 7); + eor \b0, \c0, \b0, ror#12 + eor \b1, \c1, \b1, ror#12 +.endm + +// Execute one round of BLAKE2s by updating the state matrix v[0..15]. v[0..9] +// are in r0..r9. The stack pointer points to 8 bytes of scratch space for +// spilling v[8..9], then to v[9..15], then to the message block. r10-r12 and +// r14 are free to use. The macro arguments s0-s15 give the order in which the +// message words are used in this round. +// +// All rotates are performed using the implicit rotate operand accepted by the +// 'add' and 'eor' instructions. This is faster than using explicit rotate +// instructions. To make this work, we allow the values in the second and last +// rows of the BLAKE2s state matrix (rows 'b' and 'd') to temporarily have the +// wrong rotation amount. The rotation amount is then fixed up just in time +// when the values are used. 'brot' is the number of bits the values in row 'b' +// need to be rotated right to arrive at the correct values, and 'drot' +// similarly for row 'd'. (brot, drot) start out as (0, 0) but we make it such +// that they end up as (7, 8) after every round. +.macro _blake2s_round s0, s1, s2, s3, s4, s5, s6, s7, \ + s8, s9, s10, s11, s12, s13, s14, s15 + + // Mix first two columns: + // (v[0], v[4], v[8], v[12]) and (v[1], v[5], v[9], v[13]). + __ldrd r10, r11, sp, 16 // load v[12] and v[13] + _blake2s_quarterround r0, r4, r8, r10, r1, r5, r9, r11, \ + \s0, \s1, \s2, \s3 + __strd r8, r9, sp, 0 + __strd r10, r11, sp, 16 + + // Mix second two columns: + // (v[2], v[6], v[10], v[14]) and (v[3], v[7], v[11], v[15]). + __ldrd r8, r9, sp, 8 // load v[10] and v[11] + __ldrd r10, r11, sp, 24 // load v[14] and v[15] + _blake2s_quarterround r2, r6, r8, r10, r3, r7, r9, r11, \ + \s4, \s5, \s6, \s7 + str r10, [sp, #24] // store v[14] + // v[10], v[11], and v[15] are used below, so no need to store them yet. + + .set brot, 7 + .set drot, 8 + + // Mix first two diagonals: + // (v[0], v[5], v[10], v[15]) and (v[1], v[6], v[11], v[12]). + ldr r10, [sp, #16] // load v[12] + _blake2s_quarterround r0, r5, r8, r11, r1, r6, r9, r10, \ + \s8, \s9, \s10, \s11 + __strd r8, r9, sp, 8 + str r11, [sp, #28] + str r10, [sp, #16] + + // Mix second two diagonals: + // (v[2], v[7], v[8], v[13]) and (v[3], v[4], v[9], v[14]). + __ldrd r8, r9, sp, 0 // load v[8] and v[9] + __ldrd r10, r11, sp, 20 // load v[13] and v[14] + _blake2s_quarterround r2, r7, r8, r10, r3, r4, r9, r11, \ + \s12, \s13, \s14, \s15 + __strd r10, r11, sp, 20 +.endm + +// +// void blake2s_compress_arch(struct blake2s_state *state, +// const u8 *block, size_t nblocks, u32 inc); +// +// Only the first three fields of struct blake2s_state are used: +// u32 h[8]; (inout) +// u32 t[2]; (inout) +// u32 f[2]; (in) +// + .align 5 +ENTRY(blake2s_compress_arch) + push {r0-r2,r4-r11,lr} // keep this an even number + +.Lnext_block: + // r0 is 'state' + // r1 is 'block' + // r3 is 'inc' + + // Load and increment the counter t[0..1]. + __ldrd r10, r11, r0, 32 + adds r10, r10, r3 + adc r11, r11, #0 + __strd r10, r11, r0, 32 + + // _blake2s_round is very short on registers, so copy the message block + // to the stack to save a register during the rounds. This also has the + // advantage that misalignment only needs to be dealt with in one place. + sub sp, sp, #64 + mov r12, sp + tst r1, #3 + bne .Lcopy_block_misaligned + ldmia r1!, {r2-r9} + stmia r12!, {r2-r9} + ldmia r1!, {r2-r9} + stmia r12, {r2-r9} +.Lcopy_block_done: + str r1, [sp, #68] // Update message pointer + + // Calculate v[8..15]. Push v[9..15] onto the stack, and leave space + // for spilling v[8..9]. Leave v[8..9] in r8-r9. + mov r14, r0 // r14 = state + adr r12, .Lblake2s_IV + ldmia r12!, {r8-r9} // load IV[0..1] + __ldrd r0, r1, r14, 40 // load f[0..1] + ldm r12, {r2-r7} // load IV[3..7] + eor r4, r4, r10 // v[12] = IV[4] ^ t[0] + eor r5, r5, r11 // v[13] = IV[5] ^ t[1] + eor r6, r6, r0 // v[14] = IV[6] ^ f[0] + eor r7, r7, r1 // v[15] = IV[7] ^ f[1] + push {r2-r7} // push v[9..15] + sub sp, sp, #8 // leave space for v[8..9] + + // Load h[0..7] == v[0..7]. + ldm r14, {r0-r7} + + // Execute the rounds. Each round is provided the order in which it + // needs to use the message words. + .set brot, 0 + .set drot, 0 + _blake2s_round 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + _blake2s_round 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 + _blake2s_round 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 + _blake2s_round 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 + _blake2s_round 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 + _blake2s_round 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 + _blake2s_round 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 + _blake2s_round 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 + _blake2s_round 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 + _blake2s_round 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 + + // Fold the final state matrix into the hash chaining value: + // + // for (i = 0; i < 8; i++) + // h[i] ^= v[i] ^ v[i + 8]; + // + ldr r14, [sp, #96] // r14 = &h[0] + add sp, sp, #8 // v[8..9] are already loaded. + pop {r10-r11} // load v[10..11] + eor r0, r0, r8 + eor r1, r1, r9 + eor r2, r2, r10 + eor r3, r3, r11 + ldm r14, {r8-r11} // load h[0..3] + eor r0, r0, r8 + eor r1, r1, r9 + eor r2, r2, r10 + eor r3, r3, r11 + stmia r14!, {r0-r3} // store new h[0..3] + ldm r14, {r0-r3} // load old h[4..7] + pop {r8-r11} // load v[12..15] + eor r0, r0, r4, ror #brot + eor r1, r1, r5, ror #brot + eor r2, r2, r6, ror #brot + eor r3, r3, r7, ror #brot + eor r0, r0, r8, ror #drot + eor r1, r1, r9, ror #drot + eor r2, r2, r10, ror #drot + eor r3, r3, r11, ror #drot + add sp, sp, #64 // skip copy of message block + stm r14, {r0-r3} // store new h[4..7] + + // Advance to the next block, if there is one. Note that if there are + // multiple blocks, then 'inc' (the counter increment amount) must be + // 64. So we can simply set it to 64 without re-loading it. + ldm sp, {r0, r1, r2} // load (state, block, nblocks) + mov r3, #64 // set 'inc' + subs r2, r2, #1 // nblocks-- + str r2, [sp, #8] + bne .Lnext_block // nblocks != 0? + + pop {r0-r2,r4-r11,pc} + + // The next message block (pointed to by r1) isn't 4-byte aligned, so it + // can't be loaded using ldmia. Copy it to the stack buffer (pointed to + // by r12) using an alternative method. r2-r9 are free to use. +.Lcopy_block_misaligned: + mov r2, #64 +1: +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + ldr r3, [r1], #4 +#else + ldrb r3, [r1, #0] + ldrb r4, [r1, #1] + ldrb r5, [r1, #2] + ldrb r6, [r1, #3] + add r1, r1, #4 + orr r3, r3, r4, lsl #8 + orr r3, r3, r5, lsl #16 + orr r3, r3, r6, lsl #24 +#endif + subs r2, r2, #4 + str r3, [r12], #4 + bne 1b + b .Lcopy_block_done +ENDPROC(blake2s_compress_arch) diff --git a/arch/arm/crypto/blake2s-glue.c b/arch/arm/crypto/blake2s-glue.c new file mode 100644 index 000000000000..f2cc1e5fc9ec --- /dev/null +++ b/arch/arm/crypto/blake2s-glue.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * BLAKE2s digest algorithm, ARM scalar implementation + * + * Copyright 2020 Google LLC + */ + +#include +#include + +#include + +/* defined in blake2s-core.S */ +EXPORT_SYMBOL(blake2s_compress_arch); + +static int crypto_blake2s_update_arm(struct shash_desc *desc, + const u8 *in, unsigned int inlen) +{ + return crypto_blake2s_update(desc, in, inlen, blake2s_compress_arch); +} + +static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out) +{ + return crypto_blake2s_final(desc, out, blake2s_compress_arch); +} + +#define BLAKE2S_ALG(name, driver_name, digest_size) \ + { \ + .base.cra_name = name, \ + .base.cra_driver_name = driver_name, \ + .base.cra_priority = 200, \ + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \ + .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \ + .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \ + .base.cra_module = THIS_MODULE, \ + .digestsize = digest_size, \ + .setkey = crypto_blake2s_setkey, \ + .init = crypto_blake2s_init, \ + .update = crypto_blake2s_update_arm, \ + .final = crypto_blake2s_final_arm, \ + .descsize = sizeof(struct blake2s_state), \ + } + +static struct shash_alg blake2s_arm_algs[] = { + BLAKE2S_ALG("blake2s-128", "blake2s-128-arm", BLAKE2S_128_HASH_SIZE), + BLAKE2S_ALG("blake2s-160", "blake2s-160-arm", BLAKE2S_160_HASH_SIZE), + BLAKE2S_ALG("blake2s-224", "blake2s-224-arm", BLAKE2S_224_HASH_SIZE), + BLAKE2S_ALG("blake2s-256", "blake2s-256-arm", BLAKE2S_256_HASH_SIZE), +}; + +static int __init blake2s_arm_mod_init(void) +{ + return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? + crypto_register_shashes(blake2s_arm_algs, + ARRAY_SIZE(blake2s_arm_algs)) : 0; +} + +static void __exit blake2s_arm_mod_exit(void) +{ + if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) + crypto_unregister_shashes(blake2s_arm_algs, + ARRAY_SIZE(blake2s_arm_algs)); +} + +module_init(blake2s_arm_mod_init); +module_exit(blake2s_arm_mod_exit); + +MODULE_DESCRIPTION("BLAKE2s digest algorithm, ARM scalar implementation"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Eric Biggers "); +MODULE_ALIAS_CRYPTO("blake2s-128"); +MODULE_ALIAS_CRYPTO("blake2s-128-arm"); +MODULE_ALIAS_CRYPTO("blake2s-160"); +MODULE_ALIAS_CRYPTO("blake2s-160-arm"); +MODULE_ALIAS_CRYPTO("blake2s-224"); +MODULE_ALIAS_CRYPTO("blake2s-224-arm"); +MODULE_ALIAS_CRYPTO("blake2s-256"); +MODULE_ALIAS_CRYPTO("blake2s-256-arm"); From a64bfe7ad42e329a1c63575d52c7927ad0f9e202 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 23 Dec 2020 00:10:00 -0800 Subject: [PATCH 038/154] wireguard: Kconfig: select CRYPTO_BLAKE2S_ARM When available, select the new implementation of BLAKE2s for 32-bit ARM. This is faster than the generic C implementation. Reviewed-by: Jason A. Donenfeld Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- drivers/net/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 260f9f46668b..672fcdd9aecb 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -90,6 +90,7 @@ config WIREGUARD select CRYPTO_CHACHA20_NEON if (ARM || ARM64) && KERNEL_MODE_NEON select CRYPTO_POLY1305_NEON if ARM64 && KERNEL_MODE_NEON select CRYPTO_POLY1305_ARM if ARM + select CRYPTO_BLAKE2S_ARM if ARM select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON select CRYPTO_CHACHA_MIPS if CPU_MIPS32_R2 select CRYPTO_POLY1305_MIPS if CPU_MIPS32 || (CPU_MIPS64 && 64BIT) From 28dcca4cc0c01e2467549a36b1b0eacfdb01236c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 23 Dec 2020 00:10:01 -0800 Subject: [PATCH 039/154] crypto: blake2b - sync with blake2s implementation Sync the BLAKE2b code with the BLAKE2s code as much as possible: - Move a lot of code into new headers and , and adjust it to be like the corresponding BLAKE2s code, i.e. like and . - Rename constants, e.g. BLAKE2B_*_DIGEST_SIZE => BLAKE2B_*_HASH_SIZE. - Use a macro BLAKE2B_ALG() to define the shash_alg structs. - Export blake2b_compress_generic() for use as a fallback. This makes it much easier to add optimized implementations of BLAKE2b, as optimized implementations can use the helper functions crypto_blake2b_{setkey,init,update,final}() and blake2b_compress_generic(). The ARM implementation will use these. But this change is also helpful because it eliminates unnecessary differences between the BLAKE2b and BLAKE2s code, so that the same improvements can easily be made to both. (The two algorithms are basically identical, except for the word size and constants.) It also makes it straightforward to add a library API for BLAKE2b in the future if/when it's needed. This change does make the BLAKE2b code slightly more complicated than it needs to be, as it doesn't actually provide a library API yet. For example, __blake2b_update() doesn't really need to exist yet; it could just be inlined into crypto_blake2b_update(). But I believe this is outweighed by the benefits of keeping the code in sync. Signed-off-by: Eric Biggers Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/blake2b_generic.c | 230 +++++++----------------------- include/crypto/blake2b.h | 67 +++++++++ include/crypto/internal/blake2b.h | 115 +++++++++++++++ 3 files changed, 232 insertions(+), 180 deletions(-) create mode 100644 include/crypto/blake2b.h create mode 100644 include/crypto/internal/blake2b.h diff --git a/crypto/blake2b_generic.c b/crypto/blake2b_generic.c index a2ffe60e06d3..963f7fe0e4ea 100644 --- a/crypto/blake2b_generic.c +++ b/crypto/blake2b_generic.c @@ -20,36 +20,11 @@ #include #include -#include #include #include +#include #include -#define BLAKE2B_160_DIGEST_SIZE (160 / 8) -#define BLAKE2B_256_DIGEST_SIZE (256 / 8) -#define BLAKE2B_384_DIGEST_SIZE (384 / 8) -#define BLAKE2B_512_DIGEST_SIZE (512 / 8) - -enum blake2b_constant { - BLAKE2B_BLOCKBYTES = 128, - BLAKE2B_KEYBYTES = 64, -}; - -struct blake2b_state { - u64 h[8]; - u64 t[2]; - u64 f[2]; - u8 buf[BLAKE2B_BLOCKBYTES]; - size_t buflen; -}; - -static const u64 blake2b_IV[8] = { - 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, - 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, - 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, - 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL -}; - static const u8 blake2b_sigma[12][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, @@ -95,8 +70,8 @@ static void blake2b_increment_counter(struct blake2b_state *S, const u64 inc) G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ } while (0) -static void blake2b_compress(struct blake2b_state *S, - const u8 block[BLAKE2B_BLOCKBYTES]) +static void blake2b_compress_one_generic(struct blake2b_state *S, + const u8 block[BLAKE2B_BLOCK_SIZE]) { u64 m[16]; u64 v[16]; @@ -108,14 +83,14 @@ static void blake2b_compress(struct blake2b_state *S, for (i = 0; i < 8; ++i) v[i] = S->h[i]; - v[ 8] = blake2b_IV[0]; - v[ 9] = blake2b_IV[1]; - v[10] = blake2b_IV[2]; - v[11] = blake2b_IV[3]; - v[12] = blake2b_IV[4] ^ S->t[0]; - v[13] = blake2b_IV[5] ^ S->t[1]; - v[14] = blake2b_IV[6] ^ S->f[0]; - v[15] = blake2b_IV[7] ^ S->f[1]; + v[ 8] = BLAKE2B_IV0; + v[ 9] = BLAKE2B_IV1; + v[10] = BLAKE2B_IV2; + v[11] = BLAKE2B_IV3; + v[12] = BLAKE2B_IV4 ^ S->t[0]; + v[13] = BLAKE2B_IV5 ^ S->t[1]; + v[14] = BLAKE2B_IV6 ^ S->f[0]; + v[15] = BLAKE2B_IV7 ^ S->f[1]; ROUND(0); ROUND(1); @@ -139,159 +114,54 @@ static void blake2b_compress(struct blake2b_state *S, #undef G #undef ROUND -struct blake2b_tfm_ctx { - u8 key[BLAKE2B_KEYBYTES]; - unsigned int keylen; -}; - -static int blake2b_setkey(struct crypto_shash *tfm, const u8 *key, - unsigned int keylen) +void blake2b_compress_generic(struct blake2b_state *state, + const u8 *block, size_t nblocks, u32 inc) { - struct blake2b_tfm_ctx *tctx = crypto_shash_ctx(tfm); + do { + blake2b_increment_counter(state, inc); + blake2b_compress_one_generic(state, block); + block += BLAKE2B_BLOCK_SIZE; + } while (--nblocks); +} +EXPORT_SYMBOL(blake2b_compress_generic); - if (keylen == 0 || keylen > BLAKE2B_KEYBYTES) - return -EINVAL; - - memcpy(tctx->key, key, keylen); - tctx->keylen = keylen; - - return 0; +static int crypto_blake2b_update_generic(struct shash_desc *desc, + const u8 *in, unsigned int inlen) +{ + return crypto_blake2b_update(desc, in, inlen, blake2b_compress_generic); } -static int blake2b_init(struct shash_desc *desc) +static int crypto_blake2b_final_generic(struct shash_desc *desc, u8 *out) { - struct blake2b_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); - struct blake2b_state *state = shash_desc_ctx(desc); - const int digestsize = crypto_shash_digestsize(desc->tfm); + return crypto_blake2b_final(desc, out, blake2b_compress_generic); +} - memset(state, 0, sizeof(*state)); - memcpy(state->h, blake2b_IV, sizeof(state->h)); - - /* Parameter block is all zeros except index 0, no xor for 1..7 */ - state->h[0] ^= 0x01010000 | tctx->keylen << 8 | digestsize; - - if (tctx->keylen) { - /* - * Prefill the buffer with the key, next call to _update or - * _final will process it - */ - memcpy(state->buf, tctx->key, tctx->keylen); - state->buflen = BLAKE2B_BLOCKBYTES; +#define BLAKE2B_ALG(name, driver_name, digest_size) \ + { \ + .base.cra_name = name, \ + .base.cra_driver_name = driver_name, \ + .base.cra_priority = 100, \ + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \ + .base.cra_blocksize = BLAKE2B_BLOCK_SIZE, \ + .base.cra_ctxsize = sizeof(struct blake2b_tfm_ctx), \ + .base.cra_module = THIS_MODULE, \ + .digestsize = digest_size, \ + .setkey = crypto_blake2b_setkey, \ + .init = crypto_blake2b_init, \ + .update = crypto_blake2b_update_generic, \ + .final = crypto_blake2b_final_generic, \ + .descsize = sizeof(struct blake2b_state), \ } - return 0; -} - -static int blake2b_update(struct shash_desc *desc, const u8 *in, - unsigned int inlen) -{ - struct blake2b_state *state = shash_desc_ctx(desc); - const size_t left = state->buflen; - const size_t fill = BLAKE2B_BLOCKBYTES - left; - - if (!inlen) - return 0; - - if (inlen > fill) { - state->buflen = 0; - /* Fill buffer */ - memcpy(state->buf + left, in, fill); - blake2b_increment_counter(state, BLAKE2B_BLOCKBYTES); - /* Compress */ - blake2b_compress(state, state->buf); - in += fill; - inlen -= fill; - while (inlen > BLAKE2B_BLOCKBYTES) { - blake2b_increment_counter(state, BLAKE2B_BLOCKBYTES); - blake2b_compress(state, in); - in += BLAKE2B_BLOCKBYTES; - inlen -= BLAKE2B_BLOCKBYTES; - } - } - memcpy(state->buf + state->buflen, in, inlen); - state->buflen += inlen; - - return 0; -} - -static int blake2b_final(struct shash_desc *desc, u8 *out) -{ - struct blake2b_state *state = shash_desc_ctx(desc); - const int digestsize = crypto_shash_digestsize(desc->tfm); - size_t i; - - blake2b_increment_counter(state, state->buflen); - /* Set last block */ - state->f[0] = (u64)-1; - /* Padding */ - memset(state->buf + state->buflen, 0, BLAKE2B_BLOCKBYTES - state->buflen); - blake2b_compress(state, state->buf); - - /* Avoid temporary buffer and switch the internal output to LE order */ - for (i = 0; i < ARRAY_SIZE(state->h); i++) - __cpu_to_le64s(&state->h[i]); - - memcpy(out, state->h, digestsize); - return 0; -} static struct shash_alg blake2b_algs[] = { - { - .base.cra_name = "blake2b-160", - .base.cra_driver_name = "blake2b-160-generic", - .base.cra_priority = 100, - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .base.cra_blocksize = BLAKE2B_BLOCKBYTES, - .base.cra_ctxsize = sizeof(struct blake2b_tfm_ctx), - .base.cra_module = THIS_MODULE, - .digestsize = BLAKE2B_160_DIGEST_SIZE, - .setkey = blake2b_setkey, - .init = blake2b_init, - .update = blake2b_update, - .final = blake2b_final, - .descsize = sizeof(struct blake2b_state), - }, { - .base.cra_name = "blake2b-256", - .base.cra_driver_name = "blake2b-256-generic", - .base.cra_priority = 100, - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .base.cra_blocksize = BLAKE2B_BLOCKBYTES, - .base.cra_ctxsize = sizeof(struct blake2b_tfm_ctx), - .base.cra_module = THIS_MODULE, - .digestsize = BLAKE2B_256_DIGEST_SIZE, - .setkey = blake2b_setkey, - .init = blake2b_init, - .update = blake2b_update, - .final = blake2b_final, - .descsize = sizeof(struct blake2b_state), - }, { - .base.cra_name = "blake2b-384", - .base.cra_driver_name = "blake2b-384-generic", - .base.cra_priority = 100, - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .base.cra_blocksize = BLAKE2B_BLOCKBYTES, - .base.cra_ctxsize = sizeof(struct blake2b_tfm_ctx), - .base.cra_module = THIS_MODULE, - .digestsize = BLAKE2B_384_DIGEST_SIZE, - .setkey = blake2b_setkey, - .init = blake2b_init, - .update = blake2b_update, - .final = blake2b_final, - .descsize = sizeof(struct blake2b_state), - }, { - .base.cra_name = "blake2b-512", - .base.cra_driver_name = "blake2b-512-generic", - .base.cra_priority = 100, - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .base.cra_blocksize = BLAKE2B_BLOCKBYTES, - .base.cra_ctxsize = sizeof(struct blake2b_tfm_ctx), - .base.cra_module = THIS_MODULE, - .digestsize = BLAKE2B_512_DIGEST_SIZE, - .setkey = blake2b_setkey, - .init = blake2b_init, - .update = blake2b_update, - .final = blake2b_final, - .descsize = sizeof(struct blake2b_state), - } + BLAKE2B_ALG("blake2b-160", "blake2b-160-generic", + BLAKE2B_160_HASH_SIZE), + BLAKE2B_ALG("blake2b-256", "blake2b-256-generic", + BLAKE2B_256_HASH_SIZE), + BLAKE2B_ALG("blake2b-384", "blake2b-384-generic", + BLAKE2B_384_HASH_SIZE), + BLAKE2B_ALG("blake2b-512", "blake2b-512-generic", + BLAKE2B_512_HASH_SIZE), }; static int __init blake2b_mod_init(void) diff --git a/include/crypto/blake2b.h b/include/crypto/blake2b.h new file mode 100644 index 000000000000..18875f16f8ca --- /dev/null +++ b/include/crypto/blake2b.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +#ifndef _CRYPTO_BLAKE2B_H +#define _CRYPTO_BLAKE2B_H + +#include +#include +#include +#include + +enum blake2b_lengths { + BLAKE2B_BLOCK_SIZE = 128, + BLAKE2B_HASH_SIZE = 64, + BLAKE2B_KEY_SIZE = 64, + + BLAKE2B_160_HASH_SIZE = 20, + BLAKE2B_256_HASH_SIZE = 32, + BLAKE2B_384_HASH_SIZE = 48, + BLAKE2B_512_HASH_SIZE = 64, +}; + +struct blake2b_state { + /* 'h', 't', and 'f' are used in assembly code, so keep them as-is. */ + u64 h[8]; + u64 t[2]; + u64 f[2]; + u8 buf[BLAKE2B_BLOCK_SIZE]; + unsigned int buflen; + unsigned int outlen; +}; + +enum blake2b_iv { + BLAKE2B_IV0 = 0x6A09E667F3BCC908ULL, + BLAKE2B_IV1 = 0xBB67AE8584CAA73BULL, + BLAKE2B_IV2 = 0x3C6EF372FE94F82BULL, + BLAKE2B_IV3 = 0xA54FF53A5F1D36F1ULL, + BLAKE2B_IV4 = 0x510E527FADE682D1ULL, + BLAKE2B_IV5 = 0x9B05688C2B3E6C1FULL, + BLAKE2B_IV6 = 0x1F83D9ABFB41BD6BULL, + BLAKE2B_IV7 = 0x5BE0CD19137E2179ULL, +}; + +static inline void __blake2b_init(struct blake2b_state *state, size_t outlen, + const void *key, size_t keylen) +{ + state->h[0] = BLAKE2B_IV0 ^ (0x01010000 | keylen << 8 | outlen); + state->h[1] = BLAKE2B_IV1; + state->h[2] = BLAKE2B_IV2; + state->h[3] = BLAKE2B_IV3; + state->h[4] = BLAKE2B_IV4; + state->h[5] = BLAKE2B_IV5; + state->h[6] = BLAKE2B_IV6; + state->h[7] = BLAKE2B_IV7; + state->t[0] = 0; + state->t[1] = 0; + state->f[0] = 0; + state->f[1] = 0; + state->buflen = 0; + state->outlen = outlen; + if (keylen) { + memcpy(state->buf, key, keylen); + memset(&state->buf[keylen], 0, BLAKE2B_BLOCK_SIZE - keylen); + state->buflen = BLAKE2B_BLOCK_SIZE; + } +} + +#endif /* _CRYPTO_BLAKE2B_H */ diff --git a/include/crypto/internal/blake2b.h b/include/crypto/internal/blake2b.h new file mode 100644 index 000000000000..982fe5e8471c --- /dev/null +++ b/include/crypto/internal/blake2b.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Helper functions for BLAKE2b implementations. + * Keep this in sync with the corresponding BLAKE2s header. + */ + +#ifndef _CRYPTO_INTERNAL_BLAKE2B_H +#define _CRYPTO_INTERNAL_BLAKE2B_H + +#include +#include +#include + +void blake2b_compress_generic(struct blake2b_state *state, + const u8 *block, size_t nblocks, u32 inc); + +static inline void blake2b_set_lastblock(struct blake2b_state *state) +{ + state->f[0] = -1; +} + +typedef void (*blake2b_compress_t)(struct blake2b_state *state, + const u8 *block, size_t nblocks, u32 inc); + +static inline void __blake2b_update(struct blake2b_state *state, + const u8 *in, size_t inlen, + blake2b_compress_t compress) +{ + const size_t fill = BLAKE2B_BLOCK_SIZE - state->buflen; + + if (unlikely(!inlen)) + return; + if (inlen > fill) { + memcpy(state->buf + state->buflen, in, fill); + (*compress)(state, state->buf, 1, BLAKE2B_BLOCK_SIZE); + state->buflen = 0; + in += fill; + inlen -= fill; + } + if (inlen > BLAKE2B_BLOCK_SIZE) { + const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2B_BLOCK_SIZE); + /* Hash one less (full) block than strictly possible */ + (*compress)(state, in, nblocks - 1, BLAKE2B_BLOCK_SIZE); + in += BLAKE2B_BLOCK_SIZE * (nblocks - 1); + inlen -= BLAKE2B_BLOCK_SIZE * (nblocks - 1); + } + memcpy(state->buf + state->buflen, in, inlen); + state->buflen += inlen; +} + +static inline void __blake2b_final(struct blake2b_state *state, u8 *out, + blake2b_compress_t compress) +{ + int i; + + blake2b_set_lastblock(state); + memset(state->buf + state->buflen, 0, + BLAKE2B_BLOCK_SIZE - state->buflen); /* Padding */ + (*compress)(state, state->buf, 1, state->buflen); + for (i = 0; i < ARRAY_SIZE(state->h); i++) + __cpu_to_le64s(&state->h[i]); + memcpy(out, state->h, state->outlen); +} + +/* Helper functions for shash implementations of BLAKE2b */ + +struct blake2b_tfm_ctx { + u8 key[BLAKE2B_KEY_SIZE]; + unsigned int keylen; +}; + +static inline int crypto_blake2b_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) +{ + struct blake2b_tfm_ctx *tctx = crypto_shash_ctx(tfm); + + if (keylen == 0 || keylen > BLAKE2B_KEY_SIZE) + return -EINVAL; + + memcpy(tctx->key, key, keylen); + tctx->keylen = keylen; + + return 0; +} + +static inline int crypto_blake2b_init(struct shash_desc *desc) +{ + const struct blake2b_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); + struct blake2b_state *state = shash_desc_ctx(desc); + unsigned int outlen = crypto_shash_digestsize(desc->tfm); + + __blake2b_init(state, outlen, tctx->key, tctx->keylen); + return 0; +} + +static inline int crypto_blake2b_update(struct shash_desc *desc, + const u8 *in, unsigned int inlen, + blake2b_compress_t compress) +{ + struct blake2b_state *state = shash_desc_ctx(desc); + + __blake2b_update(state, in, inlen, compress); + return 0; +} + +static inline int crypto_blake2b_final(struct shash_desc *desc, u8 *out, + blake2b_compress_t compress) +{ + struct blake2b_state *state = shash_desc_ctx(desc); + + __blake2b_final(state, out, compress); + return 0; +} + +#endif /* _CRYPTO_INTERNAL_BLAKE2B_H */ From 0cdc438e6e13436b0190910ef7da49ce4f5a44f4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 23 Dec 2020 00:10:02 -0800 Subject: [PATCH 040/154] crypto: blake2b - update file comment The file comment for blake2b_generic.c makes it sound like it's the reference implementation of BLAKE2b with only minor changes. But it's actually been changed a lot. Update the comment to make this clearer. Reviewed-by: David Sterba Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/blake2b_generic.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/crypto/blake2b_generic.c b/crypto/blake2b_generic.c index 963f7fe0e4ea..6704c0355889 100644 --- a/crypto/blake2b_generic.c +++ b/crypto/blake2b_generic.c @@ -1,21 +1,18 @@ // SPDX-License-Identifier: (GPL-2.0-only OR Apache-2.0) /* - * BLAKE2b reference source code package - reference C implementations + * Generic implementation of the BLAKE2b digest algorithm. Based on the BLAKE2b + * reference implementation, but it has been heavily modified for use in the + * kernel. The reference implementation was: * - * Copyright 2012, Samuel Neves . You may use this under the - * terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at - * your option. The terms of these licenses can be found at: + * Copyright 2012, Samuel Neves . You may use this under + * the terms of the CC0, the OpenSSL Licence, or the Apache Public License + * 2.0, at your option. The terms of these licenses can be found at: * - * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 - * - OpenSSL license : https://www.openssl.org/source/license.html - * - Apache 2.0 : https://www.apache.org/licenses/LICENSE-2.0 + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - OpenSSL license : https://www.openssl.org/source/license.html + * - Apache 2.0 : https://www.apache.org/licenses/LICENSE-2.0 * - * More information about the BLAKE2 hash function can be found at - * https://blake2.net. - * - * Note: the original sources have been modified for inclusion in linux kernel - * in terms of coding style, using generic helpers and simplifications of error - * handling. + * More information about BLAKE2 can be found at https://blake2.net. */ #include From 1862eb007367f9e4cfd52d0406742de337b28ebf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 23 Dec 2020 00:10:03 -0800 Subject: [PATCH 041/154] crypto: arm/blake2b - add NEON-accelerated BLAKE2b Add a NEON-accelerated implementation of BLAKE2b. On Cortex-A7 (which these days is the most common ARM processor that doesn't have the ARMv8 Crypto Extensions), this is over twice as fast as SHA-256, and slightly faster than SHA-1. It is also almost three times as fast as the generic implementation of BLAKE2b: Algorithm Cycles per byte (on 4096-byte messages) =================== ======================================= blake2b-256-neon 14.0 sha1-neon 16.3 blake2s-256-arm 18.8 sha1-asm 20.8 blake2s-256-generic 26.0 sha256-neon 28.9 sha256-asm 32.0 blake2b-256-generic 38.9 This implementation isn't directly based on any other implementation, but it borrows some ideas from previous NEON code I've written as well as from chacha-neon-core.S. At least on Cortex-A7, it is faster than the other NEON implementations of BLAKE2b I'm aware of (the implementation in the BLAKE2 official repository using intrinsics, and Andrew Moon's implementation which can be found in SUPERCOP). It does only one block at a time, so it performs well on short messages too. NEON-accelerated BLAKE2b is useful because there is interest in using BLAKE2b-256 for dm-verity on low-end Android devices (specifically, devices that lack the ARMv8 Crypto Extensions) to replace SHA-1. On these devices, the performance cost of upgrading to SHA-256 may be unacceptable, whereas BLAKE2b-256 would actually improve performance. Although BLAKE2b is intended for 64-bit platforms (unlike BLAKE2s which is intended for 32-bit platforms), on 32-bit ARM processors with NEON, BLAKE2b is actually faster than BLAKE2s. This is because NEON supports 64-bit operations, and because BLAKE2s's block size is too small for NEON to be helpful for it. The best I've been able to do with BLAKE2s on Cortex-A7 is 18.8 cpb with an optimized scalar implementation. (I didn't try BLAKE2sp and BLAKE3, which in theory would be faster, but they're more complex as they require running multiple hashes at once. Note that BLAKE2b already uses all the NEON bandwidth on the Cortex-A7, so I expect that any speedup from BLAKE2sp or BLAKE3 would come only from the smaller number of rounds, not from the extra parallelism.) For now this BLAKE2b implementation is only wired up to the shash API, since there is no library API for BLAKE2b yet. However, I've tried to keep things consistent with BLAKE2s, e.g. by defining blake2b_compress_arch() which is analogous to blake2s_compress_arch() and could be exported for use by the library API later if needed. Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Tested-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/Kconfig | 10 + arch/arm/crypto/Makefile | 2 + arch/arm/crypto/blake2b-neon-core.S | 347 ++++++++++++++++++++++++++++ arch/arm/crypto/blake2b-neon-glue.c | 105 +++++++++ 4 files changed, 464 insertions(+) create mode 100644 arch/arm/crypto/blake2b-neon-core.S create mode 100644 arch/arm/crypto/blake2b-neon-glue.c diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 281c829c12d0..2b575792363e 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -71,6 +71,16 @@ config CRYPTO_BLAKE2S_ARM slower than the NEON implementation of BLAKE2b. (There is no NEON implementation of BLAKE2s, since NEON doesn't really help with it.) +config CRYPTO_BLAKE2B_NEON + tristate "BLAKE2b digest algorithm (ARM NEON)" + depends on KERNEL_MODE_NEON + select CRYPTO_BLAKE2B + help + BLAKE2b digest algorithm optimized with ARM NEON instructions. + On ARM processors that have NEON support but not the ARMv8 + Crypto Extensions, typically this BLAKE2b implementation is + much faster than SHA-2 and slightly faster than SHA-1. + config CRYPTO_AES_ARM tristate "Scalar AES cipher for ARM" select CRYPTO_ALGAPI diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 5ad1e985a718..8f26c454ea12 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += blake2s-arm.o +obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o @@ -31,6 +32,7 @@ sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y) sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o sha512-arm-y := sha512-core.o sha512-glue.o $(sha512-arm-neon-y) blake2s-arm-y := blake2s-core.o blake2s-glue.o +blake2b-neon-y := blake2b-neon-core.o blake2b-neon-glue.o sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o diff --git a/arch/arm/crypto/blake2b-neon-core.S b/arch/arm/crypto/blake2b-neon-core.S new file mode 100644 index 000000000000..0406a186377f --- /dev/null +++ b/arch/arm/crypto/blake2b-neon-core.S @@ -0,0 +1,347 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * BLAKE2b digest algorithm, NEON accelerated + * + * Copyright 2020 Google LLC + * + * Author: Eric Biggers + */ + +#include + + .text + .fpu neon + + // The arguments to blake2b_compress_neon() + STATE .req r0 + BLOCK .req r1 + NBLOCKS .req r2 + INC .req r3 + + // Pointers to the rotation tables + ROR24_TABLE .req r4 + ROR16_TABLE .req r5 + + // The original stack pointer + ORIG_SP .req r6 + + // NEON registers which contain the message words of the current block. + // M_0-M_3 are occasionally used for other purposes too. + M_0 .req d16 + M_1 .req d17 + M_2 .req d18 + M_3 .req d19 + M_4 .req d20 + M_5 .req d21 + M_6 .req d22 + M_7 .req d23 + M_8 .req d24 + M_9 .req d25 + M_10 .req d26 + M_11 .req d27 + M_12 .req d28 + M_13 .req d29 + M_14 .req d30 + M_15 .req d31 + + .align 4 + // Tables for computing ror64(x, 24) and ror64(x, 16) using the vtbl.8 + // instruction. This is the most efficient way to implement these + // rotation amounts with NEON. (On Cortex-A53 it's the same speed as + // vshr.u64 + vsli.u64, while on Cortex-A7 it's faster.) +.Lror24_table: + .byte 3, 4, 5, 6, 7, 0, 1, 2 +.Lror16_table: + .byte 2, 3, 4, 5, 6, 7, 0, 1 + // The BLAKE2b initialization vector +.Lblake2b_IV: + .quad 0x6a09e667f3bcc908, 0xbb67ae8584caa73b + .quad 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1 + .quad 0x510e527fade682d1, 0x9b05688c2b3e6c1f + .quad 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179 + +// Execute one round of BLAKE2b by updating the state matrix v[0..15] in the +// NEON registers q0-q7. The message block is in q8..q15 (M_0-M_15). The stack +// pointer points to a 32-byte aligned buffer containing a copy of q8 and q9 +// (M_0-M_3), so that they can be reloaded if they are used as temporary +// registers. The macro arguments s0-s15 give the order in which the message +// words are used in this round. 'final' is 1 if this is the final round. +.macro _blake2b_round s0, s1, s2, s3, s4, s5, s6, s7, \ + s8, s9, s10, s11, s12, s13, s14, s15, final=0 + + // Mix the columns: + // (v[0], v[4], v[8], v[12]), (v[1], v[5], v[9], v[13]), + // (v[2], v[6], v[10], v[14]), and (v[3], v[7], v[11], v[15]). + + // a += b + m[blake2b_sigma[r][2*i + 0]]; + vadd.u64 q0, q0, q2 + vadd.u64 q1, q1, q3 + vadd.u64 d0, d0, M_\s0 + vadd.u64 d1, d1, M_\s2 + vadd.u64 d2, d2, M_\s4 + vadd.u64 d3, d3, M_\s6 + + // d = ror64(d ^ a, 32); + veor q6, q6, q0 + veor q7, q7, q1 + vrev64.32 q6, q6 + vrev64.32 q7, q7 + + // c += d; + vadd.u64 q4, q4, q6 + vadd.u64 q5, q5, q7 + + // b = ror64(b ^ c, 24); + vld1.8 {M_0}, [ROR24_TABLE, :64] + veor q2, q2, q4 + veor q3, q3, q5 + vtbl.8 d4, {d4}, M_0 + vtbl.8 d5, {d5}, M_0 + vtbl.8 d6, {d6}, M_0 + vtbl.8 d7, {d7}, M_0 + + // a += b + m[blake2b_sigma[r][2*i + 1]]; + // + // M_0 got clobbered above, so we have to reload it if any of the four + // message words this step needs happens to be M_0. Otherwise we don't + // need to reload it here, as it will just get clobbered again below. +.if \s1 == 0 || \s3 == 0 || \s5 == 0 || \s7 == 0 + vld1.8 {M_0}, [sp, :64] +.endif + vadd.u64 q0, q0, q2 + vadd.u64 q1, q1, q3 + vadd.u64 d0, d0, M_\s1 + vadd.u64 d1, d1, M_\s3 + vadd.u64 d2, d2, M_\s5 + vadd.u64 d3, d3, M_\s7 + + // d = ror64(d ^ a, 16); + vld1.8 {M_0}, [ROR16_TABLE, :64] + veor q6, q6, q0 + veor q7, q7, q1 + vtbl.8 d12, {d12}, M_0 + vtbl.8 d13, {d13}, M_0 + vtbl.8 d14, {d14}, M_0 + vtbl.8 d15, {d15}, M_0 + + // c += d; + vadd.u64 q4, q4, q6 + vadd.u64 q5, q5, q7 + + // b = ror64(b ^ c, 63); + // + // This rotation amount isn't a multiple of 8, so it has to be + // implemented using a pair of shifts, which requires temporary + // registers. Use q8-q9 (M_0-M_3) for this, and reload them afterwards. + veor q8, q2, q4 + veor q9, q3, q5 + vshr.u64 q2, q8, #63 + vshr.u64 q3, q9, #63 + vsli.u64 q2, q8, #1 + vsli.u64 q3, q9, #1 + vld1.8 {q8-q9}, [sp, :256] + + // Mix the diagonals: + // (v[0], v[5], v[10], v[15]), (v[1], v[6], v[11], v[12]), + // (v[2], v[7], v[8], v[13]), and (v[3], v[4], v[9], v[14]). + // + // There are two possible ways to do this: use 'vext' instructions to + // shift the rows of the matrix so that the diagonals become columns, + // and undo it afterwards; or just use 64-bit operations on 'd' + // registers instead of 128-bit operations on 'q' registers. We use the + // latter approach, as it performs much better on Cortex-A7. + + // a += b + m[blake2b_sigma[r][2*i + 0]]; + vadd.u64 d0, d0, d5 + vadd.u64 d1, d1, d6 + vadd.u64 d2, d2, d7 + vadd.u64 d3, d3, d4 + vadd.u64 d0, d0, M_\s8 + vadd.u64 d1, d1, M_\s10 + vadd.u64 d2, d2, M_\s12 + vadd.u64 d3, d3, M_\s14 + + // d = ror64(d ^ a, 32); + veor d15, d15, d0 + veor d12, d12, d1 + veor d13, d13, d2 + veor d14, d14, d3 + vrev64.32 d15, d15 + vrev64.32 d12, d12 + vrev64.32 d13, d13 + vrev64.32 d14, d14 + + // c += d; + vadd.u64 d10, d10, d15 + vadd.u64 d11, d11, d12 + vadd.u64 d8, d8, d13 + vadd.u64 d9, d9, d14 + + // b = ror64(b ^ c, 24); + vld1.8 {M_0}, [ROR24_TABLE, :64] + veor d5, d5, d10 + veor d6, d6, d11 + veor d7, d7, d8 + veor d4, d4, d9 + vtbl.8 d5, {d5}, M_0 + vtbl.8 d6, {d6}, M_0 + vtbl.8 d7, {d7}, M_0 + vtbl.8 d4, {d4}, M_0 + + // a += b + m[blake2b_sigma[r][2*i + 1]]; +.if \s9 == 0 || \s11 == 0 || \s13 == 0 || \s15 == 0 + vld1.8 {M_0}, [sp, :64] +.endif + vadd.u64 d0, d0, d5 + vadd.u64 d1, d1, d6 + vadd.u64 d2, d2, d7 + vadd.u64 d3, d3, d4 + vadd.u64 d0, d0, M_\s9 + vadd.u64 d1, d1, M_\s11 + vadd.u64 d2, d2, M_\s13 + vadd.u64 d3, d3, M_\s15 + + // d = ror64(d ^ a, 16); + vld1.8 {M_0}, [ROR16_TABLE, :64] + veor d15, d15, d0 + veor d12, d12, d1 + veor d13, d13, d2 + veor d14, d14, d3 + vtbl.8 d12, {d12}, M_0 + vtbl.8 d13, {d13}, M_0 + vtbl.8 d14, {d14}, M_0 + vtbl.8 d15, {d15}, M_0 + + // c += d; + vadd.u64 d10, d10, d15 + vadd.u64 d11, d11, d12 + vadd.u64 d8, d8, d13 + vadd.u64 d9, d9, d14 + + // b = ror64(b ^ c, 63); + veor d16, d4, d9 + veor d17, d5, d10 + veor d18, d6, d11 + veor d19, d7, d8 + vshr.u64 q2, q8, #63 + vshr.u64 q3, q9, #63 + vsli.u64 q2, q8, #1 + vsli.u64 q3, q9, #1 + // Reloading q8-q9 can be skipped on the final round. +.if ! \final + vld1.8 {q8-q9}, [sp, :256] +.endif +.endm + +// +// void blake2b_compress_neon(struct blake2b_state *state, +// const u8 *block, size_t nblocks, u32 inc); +// +// Only the first three fields of struct blake2b_state are used: +// u64 h[8]; (inout) +// u64 t[2]; (inout) +// u64 f[2]; (in) +// + .align 5 +ENTRY(blake2b_compress_neon) + push {r4-r10} + + // Allocate a 32-byte stack buffer that is 32-byte aligned. + mov ORIG_SP, sp + sub ip, sp, #32 + bic ip, ip, #31 + mov sp, ip + + adr ROR24_TABLE, .Lror24_table + adr ROR16_TABLE, .Lror16_table + + mov ip, STATE + vld1.64 {q0-q1}, [ip]! // Load h[0..3] + vld1.64 {q2-q3}, [ip]! // Load h[4..7] +.Lnext_block: + adr r10, .Lblake2b_IV + vld1.64 {q14-q15}, [ip] // Load t[0..1] and f[0..1] + vld1.64 {q4-q5}, [r10]! // Load IV[0..3] + vmov r7, r8, d28 // Copy t[0] to (r7, r8) + vld1.64 {q6-q7}, [r10] // Load IV[4..7] + adds r7, r7, INC // Increment counter + bcs .Lslow_inc_ctr + vmov.i32 d28[0], r7 + vst1.64 {d28}, [ip] // Update t[0] +.Linc_ctr_done: + + // Load the next message block and finish initializing the state matrix + // 'v'. Fortunately, there are exactly enough NEON registers to fit the + // entire state matrix in q0-q7 and the entire message block in q8-15. + // + // However, _blake2b_round also needs some extra registers for rotates, + // so we have to spill some registers. It's better to spill the message + // registers than the state registers, as the message doesn't change. + // Therefore we store a copy of the first 32 bytes of the message block + // (q8-q9) in an aligned buffer on the stack so that they can be + // reloaded when needed. (We could just reload directly from the + // message buffer, but it's faster to use aligned loads.) + vld1.8 {q8-q9}, [BLOCK]! + veor q6, q6, q14 // v[12..13] = IV[4..5] ^ t[0..1] + vld1.8 {q10-q11}, [BLOCK]! + veor q7, q7, q15 // v[14..15] = IV[6..7] ^ f[0..1] + vld1.8 {q12-q13}, [BLOCK]! + vst1.8 {q8-q9}, [sp, :256] + mov ip, STATE + vld1.8 {q14-q15}, [BLOCK]! + + // Execute the rounds. Each round is provided the order in which it + // needs to use the message words. + _blake2b_round 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + _blake2b_round 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 + _blake2b_round 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 + _blake2b_round 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 + _blake2b_round 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 + _blake2b_round 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 + _blake2b_round 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 + _blake2b_round 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 + _blake2b_round 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 + _blake2b_round 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 + _blake2b_round 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + _blake2b_round 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 \ + final=1 + + // Fold the final state matrix into the hash chaining value: + // + // for (i = 0; i < 8; i++) + // h[i] ^= v[i] ^ v[i + 8]; + // + vld1.64 {q8-q9}, [ip]! // Load old h[0..3] + veor q0, q0, q4 // v[0..1] ^= v[8..9] + veor q1, q1, q5 // v[2..3] ^= v[10..11] + vld1.64 {q10-q11}, [ip] // Load old h[4..7] + veor q2, q2, q6 // v[4..5] ^= v[12..13] + veor q3, q3, q7 // v[6..7] ^= v[14..15] + veor q0, q0, q8 // v[0..1] ^= h[0..1] + veor q1, q1, q9 // v[2..3] ^= h[2..3] + mov ip, STATE + subs NBLOCKS, NBLOCKS, #1 // nblocks-- + vst1.64 {q0-q1}, [ip]! // Store new h[0..3] + veor q2, q2, q10 // v[4..5] ^= h[4..5] + veor q3, q3, q11 // v[6..7] ^= h[6..7] + vst1.64 {q2-q3}, [ip]! // Store new h[4..7] + + // Advance to the next block, if there is one. + bne .Lnext_block // nblocks != 0? + + mov sp, ORIG_SP + pop {r4-r10} + mov pc, lr + +.Lslow_inc_ctr: + // Handle the case where the counter overflowed its low 32 bits, by + // carrying the overflow bit into the full 128-bit counter. + vmov r9, r10, d29 + adcs r8, r8, #0 + adcs r9, r9, #0 + adc r10, r10, #0 + vmov d28, r7, r8 + vmov d29, r9, r10 + vst1.64 {q14}, [ip] // Update t[0] and t[1] + b .Linc_ctr_done +ENDPROC(blake2b_compress_neon) diff --git a/arch/arm/crypto/blake2b-neon-glue.c b/arch/arm/crypto/blake2b-neon-glue.c new file mode 100644 index 000000000000..34d73200e7fa --- /dev/null +++ b/arch/arm/crypto/blake2b-neon-glue.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * BLAKE2b digest algorithm, NEON accelerated + * + * Copyright 2020 Google LLC + */ + +#include +#include +#include + +#include +#include + +#include +#include + +asmlinkage void blake2b_compress_neon(struct blake2b_state *state, + const u8 *block, size_t nblocks, u32 inc); + +static void blake2b_compress_arch(struct blake2b_state *state, + const u8 *block, size_t nblocks, u32 inc) +{ + if (!crypto_simd_usable()) { + blake2b_compress_generic(state, block, nblocks, inc); + return; + } + + do { + const size_t blocks = min_t(size_t, nblocks, + SZ_4K / BLAKE2B_BLOCK_SIZE); + + kernel_neon_begin(); + blake2b_compress_neon(state, block, blocks, inc); + kernel_neon_end(); + + nblocks -= blocks; + block += blocks * BLAKE2B_BLOCK_SIZE; + } while (nblocks); +} + +static int crypto_blake2b_update_neon(struct shash_desc *desc, + const u8 *in, unsigned int inlen) +{ + return crypto_blake2b_update(desc, in, inlen, blake2b_compress_arch); +} + +static int crypto_blake2b_final_neon(struct shash_desc *desc, u8 *out) +{ + return crypto_blake2b_final(desc, out, blake2b_compress_arch); +} + +#define BLAKE2B_ALG(name, driver_name, digest_size) \ + { \ + .base.cra_name = name, \ + .base.cra_driver_name = driver_name, \ + .base.cra_priority = 200, \ + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \ + .base.cra_blocksize = BLAKE2B_BLOCK_SIZE, \ + .base.cra_ctxsize = sizeof(struct blake2b_tfm_ctx), \ + .base.cra_module = THIS_MODULE, \ + .digestsize = digest_size, \ + .setkey = crypto_blake2b_setkey, \ + .init = crypto_blake2b_init, \ + .update = crypto_blake2b_update_neon, \ + .final = crypto_blake2b_final_neon, \ + .descsize = sizeof(struct blake2b_state), \ + } + +static struct shash_alg blake2b_neon_algs[] = { + BLAKE2B_ALG("blake2b-160", "blake2b-160-neon", BLAKE2B_160_HASH_SIZE), + BLAKE2B_ALG("blake2b-256", "blake2b-256-neon", BLAKE2B_256_HASH_SIZE), + BLAKE2B_ALG("blake2b-384", "blake2b-384-neon", BLAKE2B_384_HASH_SIZE), + BLAKE2B_ALG("blake2b-512", "blake2b-512-neon", BLAKE2B_512_HASH_SIZE), +}; + +static int __init blake2b_neon_mod_init(void) +{ + if (!(elf_hwcap & HWCAP_NEON)) + return -ENODEV; + + return crypto_register_shashes(blake2b_neon_algs, + ARRAY_SIZE(blake2b_neon_algs)); +} + +static void __exit blake2b_neon_mod_exit(void) +{ + return crypto_unregister_shashes(blake2b_neon_algs, + ARRAY_SIZE(blake2b_neon_algs)); +} + +module_init(blake2b_neon_mod_init); +module_exit(blake2b_neon_mod_exit); + +MODULE_DESCRIPTION("BLAKE2b digest algorithm, NEON accelerated"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Eric Biggers "); +MODULE_ALIAS_CRYPTO("blake2b-160"); +MODULE_ALIAS_CRYPTO("blake2b-160-neon"); +MODULE_ALIAS_CRYPTO("blake2b-256"); +MODULE_ALIAS_CRYPTO("blake2b-256-neon"); +MODULE_ALIAS_CRYPTO("blake2b-384"); +MODULE_ALIAS_CRYPTO("blake2b-384-neon"); +MODULE_ALIAS_CRYPTO("blake2b-512"); +MODULE_ALIAS_CRYPTO("blake2b-512-neon"); From fecff3b931a52c8d5263fb1537161f0214acb44a Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 10 Dec 2020 14:03:14 -0600 Subject: [PATCH 042/154] crypto: picoxcell - Remove PicoXcell driver PicoXcell has had nothing but treewide cleanups for at least the last 8 years and no signs of activity. The most recent activity is a yocto vendor kernel based on v3.0 in 2015. Cc: Jamie Iles Cc: Herbert Xu Cc: "David S. Miller" Cc: linux-crypto@vger.kernel.org Signed-off-by: Rob Herring Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 18 - drivers/crypto/Makefile | 1 - drivers/crypto/picoxcell_crypto.c | 1807 ------------------------ drivers/crypto/picoxcell_crypto_regs.h | 115 -- 4 files changed, 1941 deletions(-) delete mode 100644 drivers/crypto/picoxcell_crypto.c delete mode 100644 drivers/crypto/picoxcell_crypto_regs.h diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 857b7956feca..cc29bc3f6a6c 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -403,24 +403,6 @@ config CRYPTO_DEV_OMAP_DES endif # CRYPTO_DEV_OMAP -config CRYPTO_DEV_PICOXCELL - tristate "Support for picoXcell IPSEC and Layer2 crypto engines" - depends on (ARCH_PICOXCELL || COMPILE_TEST) && HAVE_CLK - select CRYPTO_AEAD - select CRYPTO_AES - select CRYPTO_AUTHENC - select CRYPTO_SKCIPHER - select CRYPTO_LIB_DES - select CRYPTO_CBC - select CRYPTO_ECB - select CRYPTO_SEQIV - help - This option enables support for the hardware offload engines in the - Picochip picoXcell SoC devices. Select this for IPSEC ESP offload - and for 3gpp Layer 2 ciphering support. - - Saying m here will build a module named picoxcell_crypto. - config CRYPTO_DEV_SAHARA tristate "Support for SAHARA crypto accelerator" depends on ARCH_MXC && OF diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 367630e7e888..fa22cb19e242 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -30,7 +30,6 @@ obj-$(CONFIG_CRYPTO_DEV_OMAP_DES) += omap-des.o obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o -obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/ obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/ obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/ diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c deleted file mode 100644 index 84f9c16d984c..000000000000 --- a/drivers/crypto/picoxcell_crypto.c +++ /dev/null @@ -1,1807 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (c) 2010-2011 Picochip Ltd., Jamie Iles - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "picoxcell_crypto_regs.h" - -/* - * The threshold for the number of entries in the CMD FIFO available before - * the CMD0_CNT interrupt is raised. Increasing this value will reduce the - * number of interrupts raised to the CPU. - */ -#define CMD0_IRQ_THRESHOLD 1 - -/* - * The timeout period (in jiffies) for a PDU. When the the number of PDUs in - * flight is greater than the STAT_IRQ_THRESHOLD or 0 the timer is disabled. - * When there are packets in flight but lower than the threshold, we enable - * the timer and at expiry, attempt to remove any processed packets from the - * queue and if there are still packets left, schedule the timer again. - */ -#define PACKET_TIMEOUT 1 - -/* The priority to register each algorithm with. */ -#define SPACC_CRYPTO_ALG_PRIORITY 10000 - -#define SPACC_CRYPTO_KASUMI_F8_KEY_LEN 16 -#define SPACC_CRYPTO_IPSEC_CIPHER_PG_SZ 64 -#define SPACC_CRYPTO_IPSEC_HASH_PG_SZ 64 -#define SPACC_CRYPTO_IPSEC_MAX_CTXS 32 -#define SPACC_CRYPTO_IPSEC_FIFO_SZ 32 -#define SPACC_CRYPTO_L2_CIPHER_PG_SZ 64 -#define SPACC_CRYPTO_L2_HASH_PG_SZ 64 -#define SPACC_CRYPTO_L2_MAX_CTXS 128 -#define SPACC_CRYPTO_L2_FIFO_SZ 128 - -#define MAX_DDT_LEN 16 - -/* DDT format. This must match the hardware DDT format exactly. */ -struct spacc_ddt { - dma_addr_t p; - u32 len; -}; - -/* - * Asynchronous crypto request structure. - * - * This structure defines a request that is either queued for processing or - * being processed. - */ -struct spacc_req { - struct list_head list; - struct spacc_engine *engine; - struct crypto_async_request *req; - int result; - bool is_encrypt; - unsigned ctx_id; - dma_addr_t src_addr, dst_addr; - struct spacc_ddt *src_ddt, *dst_ddt; - void (*complete)(struct spacc_req *req); - struct skcipher_request fallback_req; // keep at the end -}; - -struct spacc_aead { - unsigned long ctrl_default; - unsigned long type; - struct aead_alg alg; - struct spacc_engine *engine; - struct list_head entry; - int key_offs; - int iv_offs; -}; - -struct spacc_engine { - void __iomem *regs; - struct list_head pending; - int next_ctx; - spinlock_t hw_lock; - int in_flight; - struct list_head completed; - struct list_head in_progress; - struct tasklet_struct complete; - unsigned long fifo_sz; - void __iomem *cipher_ctx_base; - void __iomem *hash_key_base; - struct spacc_alg *algs; - unsigned num_algs; - struct list_head registered_algs; - struct spacc_aead *aeads; - unsigned num_aeads; - struct list_head registered_aeads; - size_t cipher_pg_sz; - size_t hash_pg_sz; - const char *name; - struct clk *clk; - struct device *dev; - unsigned max_ctxs; - struct timer_list packet_timeout; - unsigned stat_irq_thresh; - struct dma_pool *req_pool; -}; - -/* Algorithm type mask. */ -#define SPACC_CRYPTO_ALG_MASK 0x7 - -/* SPACC definition of a crypto algorithm. */ -struct spacc_alg { - unsigned long ctrl_default; - unsigned long type; - struct skcipher_alg alg; - struct spacc_engine *engine; - struct list_head entry; - int key_offs; - int iv_offs; -}; - -/* Generic context structure for any algorithm type. */ -struct spacc_generic_ctx { - struct spacc_engine *engine; - int flags; - int key_offs; - int iv_offs; -}; - -/* Block cipher context. */ -struct spacc_ablk_ctx { - struct spacc_generic_ctx generic; - u8 key[AES_MAX_KEY_SIZE]; - u8 key_len; - /* - * The fallback cipher. If the operation can't be done in hardware, - * fallback to a software version. - */ - struct crypto_skcipher *sw_cipher; -}; - -/* AEAD cipher context. */ -struct spacc_aead_ctx { - struct spacc_generic_ctx generic; - u8 cipher_key[AES_MAX_KEY_SIZE]; - u8 hash_ctx[SPACC_CRYPTO_IPSEC_HASH_PG_SZ]; - u8 cipher_key_len; - u8 hash_key_len; - struct crypto_aead *sw_cipher; -}; - -static int spacc_ablk_submit(struct spacc_req *req); - -static inline struct spacc_alg *to_spacc_skcipher(struct skcipher_alg *alg) -{ - return alg ? container_of(alg, struct spacc_alg, alg) : NULL; -} - -static inline struct spacc_aead *to_spacc_aead(struct aead_alg *alg) -{ - return container_of(alg, struct spacc_aead, alg); -} - -static inline int spacc_fifo_cmd_full(struct spacc_engine *engine) -{ - u32 fifo_stat = readl(engine->regs + SPA_FIFO_STAT_REG_OFFSET); - - return fifo_stat & SPA_FIFO_CMD_FULL; -} - -/* - * Given a cipher context, and a context number, get the base address of the - * context page. - * - * Returns the address of the context page where the key/context may - * be written. - */ -static inline void __iomem *spacc_ctx_page_addr(struct spacc_generic_ctx *ctx, - unsigned indx, - bool is_cipher_ctx) -{ - return is_cipher_ctx ? ctx->engine->cipher_ctx_base + - (indx * ctx->engine->cipher_pg_sz) : - ctx->engine->hash_key_base + (indx * ctx->engine->hash_pg_sz); -} - -/* The context pages can only be written with 32-bit accesses. */ -static inline void memcpy_toio32(u32 __iomem *dst, const void *src, - unsigned count) -{ - const u32 *src32 = (const u32 *) src; - - while (count--) - writel(*src32++, dst++); -} - -static void spacc_cipher_write_ctx(struct spacc_generic_ctx *ctx, - void __iomem *page_addr, const u8 *key, - size_t key_len, const u8 *iv, size_t iv_len) -{ - void __iomem *key_ptr = page_addr + ctx->key_offs; - void __iomem *iv_ptr = page_addr + ctx->iv_offs; - - memcpy_toio32(key_ptr, key, key_len / 4); - memcpy_toio32(iv_ptr, iv, iv_len / 4); -} - -/* - * Load a context into the engines context memory. - * - * Returns the index of the context page where the context was loaded. - */ -static unsigned spacc_load_ctx(struct spacc_generic_ctx *ctx, - const u8 *ciph_key, size_t ciph_len, - const u8 *iv, size_t ivlen, const u8 *hash_key, - size_t hash_len) -{ - unsigned indx = ctx->engine->next_ctx++; - void __iomem *ciph_page_addr, *hash_page_addr; - - ciph_page_addr = spacc_ctx_page_addr(ctx, indx, 1); - hash_page_addr = spacc_ctx_page_addr(ctx, indx, 0); - - ctx->engine->next_ctx &= ctx->engine->fifo_sz - 1; - spacc_cipher_write_ctx(ctx, ciph_page_addr, ciph_key, ciph_len, iv, - ivlen); - writel(ciph_len | (indx << SPA_KEY_SZ_CTX_INDEX_OFFSET) | - (1 << SPA_KEY_SZ_CIPHER_OFFSET), - ctx->engine->regs + SPA_KEY_SZ_REG_OFFSET); - - if (hash_key) { - memcpy_toio32(hash_page_addr, hash_key, hash_len / 4); - writel(hash_len | (indx << SPA_KEY_SZ_CTX_INDEX_OFFSET), - ctx->engine->regs + SPA_KEY_SZ_REG_OFFSET); - } - - return indx; -} - -static inline void ddt_set(struct spacc_ddt *ddt, dma_addr_t phys, size_t len) -{ - ddt->p = phys; - ddt->len = len; -} - -/* - * Take a crypto request and scatterlists for the data and turn them into DDTs - * for passing to the crypto engines. This also DMA maps the data so that the - * crypto engines can DMA to/from them. - */ -static struct spacc_ddt *spacc_sg_to_ddt(struct spacc_engine *engine, - struct scatterlist *payload, - unsigned nbytes, - enum dma_data_direction dir, - dma_addr_t *ddt_phys) -{ - unsigned mapped_ents; - struct scatterlist *cur; - struct spacc_ddt *ddt; - int i; - int nents; - - nents = sg_nents_for_len(payload, nbytes); - if (nents < 0) { - dev_err(engine->dev, "Invalid numbers of SG.\n"); - return NULL; - } - mapped_ents = dma_map_sg(engine->dev, payload, nents, dir); - - if (mapped_ents + 1 > MAX_DDT_LEN) - goto out; - - ddt = dma_pool_alloc(engine->req_pool, GFP_ATOMIC, ddt_phys); - if (!ddt) - goto out; - - for_each_sg(payload, cur, mapped_ents, i) - ddt_set(&ddt[i], sg_dma_address(cur), sg_dma_len(cur)); - ddt_set(&ddt[mapped_ents], 0, 0); - - return ddt; - -out: - dma_unmap_sg(engine->dev, payload, nents, dir); - return NULL; -} - -static int spacc_aead_make_ddts(struct aead_request *areq) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(areq); - struct spacc_req *req = aead_request_ctx(areq); - struct spacc_engine *engine = req->engine; - struct spacc_ddt *src_ddt, *dst_ddt; - unsigned total; - int src_nents, dst_nents; - struct scatterlist *cur; - int i, dst_ents, src_ents; - - total = areq->assoclen + areq->cryptlen; - if (req->is_encrypt) - total += crypto_aead_authsize(aead); - - src_nents = sg_nents_for_len(areq->src, total); - if (src_nents < 0) { - dev_err(engine->dev, "Invalid numbers of src SG.\n"); - return src_nents; - } - if (src_nents + 1 > MAX_DDT_LEN) - return -E2BIG; - - dst_nents = 0; - if (areq->src != areq->dst) { - dst_nents = sg_nents_for_len(areq->dst, total); - if (dst_nents < 0) { - dev_err(engine->dev, "Invalid numbers of dst SG.\n"); - return dst_nents; - } - if (src_nents + 1 > MAX_DDT_LEN) - return -E2BIG; - } - - src_ddt = dma_pool_alloc(engine->req_pool, GFP_ATOMIC, &req->src_addr); - if (!src_ddt) - goto err; - - dst_ddt = dma_pool_alloc(engine->req_pool, GFP_ATOMIC, &req->dst_addr); - if (!dst_ddt) - goto err_free_src; - - req->src_ddt = src_ddt; - req->dst_ddt = dst_ddt; - - if (dst_nents) { - src_ents = dma_map_sg(engine->dev, areq->src, src_nents, - DMA_TO_DEVICE); - if (!src_ents) - goto err_free_dst; - - dst_ents = dma_map_sg(engine->dev, areq->dst, dst_nents, - DMA_FROM_DEVICE); - - if (!dst_ents) { - dma_unmap_sg(engine->dev, areq->src, src_nents, - DMA_TO_DEVICE); - goto err_free_dst; - } - } else { - src_ents = dma_map_sg(engine->dev, areq->src, src_nents, - DMA_BIDIRECTIONAL); - if (!src_ents) - goto err_free_dst; - dst_ents = src_ents; - } - - /* - * Now map in the payload for the source and destination and terminate - * with the NULL pointers. - */ - for_each_sg(areq->src, cur, src_ents, i) - ddt_set(src_ddt++, sg_dma_address(cur), sg_dma_len(cur)); - - /* For decryption we need to skip the associated data. */ - total = req->is_encrypt ? 0 : areq->assoclen; - for_each_sg(areq->dst, cur, dst_ents, i) { - unsigned len = sg_dma_len(cur); - - if (len <= total) { - total -= len; - continue; - } - - ddt_set(dst_ddt++, sg_dma_address(cur) + total, len - total); - } - - ddt_set(src_ddt, 0, 0); - ddt_set(dst_ddt, 0, 0); - - return 0; - -err_free_dst: - dma_pool_free(engine->req_pool, dst_ddt, req->dst_addr); -err_free_src: - dma_pool_free(engine->req_pool, src_ddt, req->src_addr); -err: - return -ENOMEM; -} - -static void spacc_aead_free_ddts(struct spacc_req *req) -{ - struct aead_request *areq = container_of(req->req, struct aead_request, - base); - struct crypto_aead *aead = crypto_aead_reqtfm(areq); - unsigned total = areq->assoclen + areq->cryptlen + - (req->is_encrypt ? crypto_aead_authsize(aead) : 0); - struct spacc_aead_ctx *aead_ctx = crypto_aead_ctx(aead); - struct spacc_engine *engine = aead_ctx->generic.engine; - int nents = sg_nents_for_len(areq->src, total); - - /* sg_nents_for_len should not fail since it works when mapping sg */ - if (unlikely(nents < 0)) { - dev_err(engine->dev, "Invalid numbers of src SG.\n"); - return; - } - - if (areq->src != areq->dst) { - dma_unmap_sg(engine->dev, areq->src, nents, DMA_TO_DEVICE); - nents = sg_nents_for_len(areq->dst, total); - if (unlikely(nents < 0)) { - dev_err(engine->dev, "Invalid numbers of dst SG.\n"); - return; - } - dma_unmap_sg(engine->dev, areq->dst, nents, DMA_FROM_DEVICE); - } else - dma_unmap_sg(engine->dev, areq->src, nents, DMA_BIDIRECTIONAL); - - dma_pool_free(engine->req_pool, req->src_ddt, req->src_addr); - dma_pool_free(engine->req_pool, req->dst_ddt, req->dst_addr); -} - -static void spacc_free_ddt(struct spacc_req *req, struct spacc_ddt *ddt, - dma_addr_t ddt_addr, struct scatterlist *payload, - unsigned nbytes, enum dma_data_direction dir) -{ - int nents = sg_nents_for_len(payload, nbytes); - - if (nents < 0) { - dev_err(req->engine->dev, "Invalid numbers of SG.\n"); - return; - } - - dma_unmap_sg(req->engine->dev, payload, nents, dir); - dma_pool_free(req->engine->req_pool, ddt, ddt_addr); -} - -static int spacc_aead_setkey(struct crypto_aead *tfm, const u8 *key, - unsigned int keylen) -{ - struct spacc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct crypto_authenc_keys keys; - int err; - - crypto_aead_clear_flags(ctx->sw_cipher, CRYPTO_TFM_REQ_MASK); - crypto_aead_set_flags(ctx->sw_cipher, crypto_aead_get_flags(tfm) & - CRYPTO_TFM_REQ_MASK); - err = crypto_aead_setkey(ctx->sw_cipher, key, keylen); - if (err) - return err; - - if (crypto_authenc_extractkeys(&keys, key, keylen) != 0) - goto badkey; - - if (keys.enckeylen > AES_MAX_KEY_SIZE) - goto badkey; - - if (keys.authkeylen > sizeof(ctx->hash_ctx)) - goto badkey; - - memcpy(ctx->cipher_key, keys.enckey, keys.enckeylen); - ctx->cipher_key_len = keys.enckeylen; - - memcpy(ctx->hash_ctx, keys.authkey, keys.authkeylen); - ctx->hash_key_len = keys.authkeylen; - - memzero_explicit(&keys, sizeof(keys)); - return 0; - -badkey: - memzero_explicit(&keys, sizeof(keys)); - return -EINVAL; -} - -static int spacc_aead_setauthsize(struct crypto_aead *tfm, - unsigned int authsize) -{ - struct spacc_aead_ctx *ctx = crypto_tfm_ctx(crypto_aead_tfm(tfm)); - - return crypto_aead_setauthsize(ctx->sw_cipher, authsize); -} - -/* - * Check if an AEAD request requires a fallback operation. Some requests can't - * be completed in hardware because the hardware may not support certain key - * sizes. In these cases we need to complete the request in software. - */ -static int spacc_aead_need_fallback(struct aead_request *aead_req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(aead_req); - struct aead_alg *alg = crypto_aead_alg(aead); - struct spacc_aead *spacc_alg = to_spacc_aead(alg); - struct spacc_aead_ctx *ctx = crypto_aead_ctx(aead); - - /* - * If we have a non-supported key-length, then we need to do a - * software fallback. - */ - if ((spacc_alg->ctrl_default & SPACC_CRYPTO_ALG_MASK) == - SPA_CTRL_CIPH_ALG_AES && - ctx->cipher_key_len != AES_KEYSIZE_128 && - ctx->cipher_key_len != AES_KEYSIZE_256) - return 1; - - return 0; -} - -static int spacc_aead_do_fallback(struct aead_request *req, unsigned alg_type, - bool is_encrypt) -{ - struct crypto_tfm *old_tfm = crypto_aead_tfm(crypto_aead_reqtfm(req)); - struct spacc_aead_ctx *ctx = crypto_tfm_ctx(old_tfm); - struct aead_request *subreq = aead_request_ctx(req); - - aead_request_set_tfm(subreq, ctx->sw_cipher); - aead_request_set_callback(subreq, req->base.flags, - req->base.complete, req->base.data); - aead_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, - req->iv); - aead_request_set_ad(subreq, req->assoclen); - - return is_encrypt ? crypto_aead_encrypt(subreq) : - crypto_aead_decrypt(subreq); -} - -static void spacc_aead_complete(struct spacc_req *req) -{ - spacc_aead_free_ddts(req); - req->req->complete(req->req, req->result); -} - -static int spacc_aead_submit(struct spacc_req *req) -{ - struct aead_request *aead_req = - container_of(req->req, struct aead_request, base); - struct crypto_aead *aead = crypto_aead_reqtfm(aead_req); - unsigned int authsize = crypto_aead_authsize(aead); - struct spacc_aead_ctx *ctx = crypto_aead_ctx(aead); - struct aead_alg *alg = crypto_aead_alg(aead); - struct spacc_aead *spacc_alg = to_spacc_aead(alg); - struct spacc_engine *engine = ctx->generic.engine; - u32 ctrl, proc_len, assoc_len; - - req->result = -EINPROGRESS; - req->ctx_id = spacc_load_ctx(&ctx->generic, ctx->cipher_key, - ctx->cipher_key_len, aead_req->iv, crypto_aead_ivsize(aead), - ctx->hash_ctx, ctx->hash_key_len); - - /* Set the source and destination DDT pointers. */ - writel(req->src_addr, engine->regs + SPA_SRC_PTR_REG_OFFSET); - writel(req->dst_addr, engine->regs + SPA_DST_PTR_REG_OFFSET); - writel(0, engine->regs + SPA_OFFSET_REG_OFFSET); - - assoc_len = aead_req->assoclen; - proc_len = aead_req->cryptlen + assoc_len; - - /* - * If we are decrypting, we need to take the length of the ICV out of - * the processing length. - */ - if (!req->is_encrypt) - proc_len -= authsize; - - writel(proc_len, engine->regs + SPA_PROC_LEN_REG_OFFSET); - writel(assoc_len, engine->regs + SPA_AAD_LEN_REG_OFFSET); - writel(authsize, engine->regs + SPA_ICV_LEN_REG_OFFSET); - writel(0, engine->regs + SPA_ICV_OFFSET_REG_OFFSET); - writel(0, engine->regs + SPA_AUX_INFO_REG_OFFSET); - - ctrl = spacc_alg->ctrl_default | (req->ctx_id << SPA_CTRL_CTX_IDX) | - (1 << SPA_CTRL_ICV_APPEND); - if (req->is_encrypt) - ctrl |= (1 << SPA_CTRL_ENCRYPT_IDX) | (1 << SPA_CTRL_AAD_COPY); - else - ctrl |= (1 << SPA_CTRL_KEY_EXP); - - mod_timer(&engine->packet_timeout, jiffies + PACKET_TIMEOUT); - - writel(ctrl, engine->regs + SPA_CTRL_REG_OFFSET); - - return -EINPROGRESS; -} - -static int spacc_req_submit(struct spacc_req *req); - -static void spacc_push(struct spacc_engine *engine) -{ - struct spacc_req *req; - - while (!list_empty(&engine->pending) && - engine->in_flight + 1 <= engine->fifo_sz) { - - ++engine->in_flight; - req = list_first_entry(&engine->pending, struct spacc_req, - list); - list_move_tail(&req->list, &engine->in_progress); - - req->result = spacc_req_submit(req); - } -} - -/* - * Setup an AEAD request for processing. This will configure the engine, load - * the context and then start the packet processing. - */ -static int spacc_aead_setup(struct aead_request *req, - unsigned alg_type, bool is_encrypt) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct aead_alg *alg = crypto_aead_alg(aead); - struct spacc_engine *engine = to_spacc_aead(alg)->engine; - struct spacc_req *dev_req = aead_request_ctx(req); - int err; - unsigned long flags; - - dev_req->req = &req->base; - dev_req->is_encrypt = is_encrypt; - dev_req->result = -EBUSY; - dev_req->engine = engine; - dev_req->complete = spacc_aead_complete; - - if (unlikely(spacc_aead_need_fallback(req) || - ((err = spacc_aead_make_ddts(req)) == -E2BIG))) - return spacc_aead_do_fallback(req, alg_type, is_encrypt); - - if (err) - goto out; - - err = -EINPROGRESS; - spin_lock_irqsave(&engine->hw_lock, flags); - if (unlikely(spacc_fifo_cmd_full(engine)) || - engine->in_flight + 1 > engine->fifo_sz) { - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { - err = -EBUSY; - spin_unlock_irqrestore(&engine->hw_lock, flags); - goto out_free_ddts; - } - list_add_tail(&dev_req->list, &engine->pending); - } else { - list_add_tail(&dev_req->list, &engine->pending); - spacc_push(engine); - } - spin_unlock_irqrestore(&engine->hw_lock, flags); - - goto out; - -out_free_ddts: - spacc_aead_free_ddts(dev_req); -out: - return err; -} - -static int spacc_aead_encrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct spacc_aead *alg = to_spacc_aead(crypto_aead_alg(aead)); - - return spacc_aead_setup(req, alg->type, 1); -} - -static int spacc_aead_decrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct spacc_aead *alg = to_spacc_aead(crypto_aead_alg(aead)); - - return spacc_aead_setup(req, alg->type, 0); -} - -/* - * Initialise a new AEAD context. This is responsible for allocating the - * fallback cipher and initialising the context. - */ -static int spacc_aead_cra_init(struct crypto_aead *tfm) -{ - struct spacc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_alg *alg = crypto_aead_alg(tfm); - struct spacc_aead *spacc_alg = to_spacc_aead(alg); - struct spacc_engine *engine = spacc_alg->engine; - - ctx->generic.flags = spacc_alg->type; - ctx->generic.engine = engine; - ctx->sw_cipher = crypto_alloc_aead(alg->base.cra_name, 0, - CRYPTO_ALG_NEED_FALLBACK); - if (IS_ERR(ctx->sw_cipher)) - return PTR_ERR(ctx->sw_cipher); - ctx->generic.key_offs = spacc_alg->key_offs; - ctx->generic.iv_offs = spacc_alg->iv_offs; - - crypto_aead_set_reqsize( - tfm, - max(sizeof(struct spacc_req), - sizeof(struct aead_request) + - crypto_aead_reqsize(ctx->sw_cipher))); - - return 0; -} - -/* - * Destructor for an AEAD context. This is called when the transform is freed - * and must free the fallback cipher. - */ -static void spacc_aead_cra_exit(struct crypto_aead *tfm) -{ - struct spacc_aead_ctx *ctx = crypto_aead_ctx(tfm); - - crypto_free_aead(ctx->sw_cipher); -} - -/* - * Set the DES key for a block cipher transform. This also performs weak key - * checking if the transform has requested it. - */ -static int spacc_des_setkey(struct crypto_skcipher *cipher, const u8 *key, - unsigned int len) -{ - struct spacc_ablk_ctx *ctx = crypto_skcipher_ctx(cipher); - int err; - - err = verify_skcipher_des_key(cipher, key); - if (err) - return err; - - memcpy(ctx->key, key, len); - ctx->key_len = len; - - return 0; -} - -/* - * Set the 3DES key for a block cipher transform. This also performs weak key - * checking if the transform has requested it. - */ -static int spacc_des3_setkey(struct crypto_skcipher *cipher, const u8 *key, - unsigned int len) -{ - struct spacc_ablk_ctx *ctx = crypto_skcipher_ctx(cipher); - int err; - - err = verify_skcipher_des3_key(cipher, key); - if (err) - return err; - - memcpy(ctx->key, key, len); - ctx->key_len = len; - - return 0; -} - -/* - * Set the key for an AES block cipher. Some key lengths are not supported in - * hardware so this must also check whether a fallback is needed. - */ -static int spacc_aes_setkey(struct crypto_skcipher *cipher, const u8 *key, - unsigned int len) -{ - struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); - struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm); - int err = 0; - - if (len > AES_MAX_KEY_SIZE) - return -EINVAL; - - /* - * IPSec engine only supports 128 and 256 bit AES keys. If we get a - * request for any other size (192 bits) then we need to do a software - * fallback. - */ - if (len != AES_KEYSIZE_128 && len != AES_KEYSIZE_256) { - if (!ctx->sw_cipher) - return -EINVAL; - - /* - * Set the fallback transform to use the same request flags as - * the hardware transform. - */ - crypto_skcipher_clear_flags(ctx->sw_cipher, - CRYPTO_TFM_REQ_MASK); - crypto_skcipher_set_flags(ctx->sw_cipher, - cipher->base.crt_flags & - CRYPTO_TFM_REQ_MASK); - - err = crypto_skcipher_setkey(ctx->sw_cipher, key, len); - if (err) - goto sw_setkey_failed; - } - - memcpy(ctx->key, key, len); - ctx->key_len = len; - -sw_setkey_failed: - return err; -} - -static int spacc_kasumi_f8_setkey(struct crypto_skcipher *cipher, - const u8 *key, unsigned int len) -{ - struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); - struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm); - int err = 0; - - if (len > AES_MAX_KEY_SIZE) { - err = -EINVAL; - goto out; - } - - memcpy(ctx->key, key, len); - ctx->key_len = len; - -out: - return err; -} - -static int spacc_ablk_need_fallback(struct spacc_req *req) -{ - struct skcipher_request *ablk_req = skcipher_request_cast(req->req); - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(ablk_req); - struct spacc_alg *spacc_alg = to_spacc_skcipher(crypto_skcipher_alg(tfm)); - struct spacc_ablk_ctx *ctx; - - ctx = crypto_skcipher_ctx(tfm); - - return (spacc_alg->ctrl_default & SPACC_CRYPTO_ALG_MASK) == - SPA_CTRL_CIPH_ALG_AES && - ctx->key_len != AES_KEYSIZE_128 && - ctx->key_len != AES_KEYSIZE_256; -} - -static void spacc_ablk_complete(struct spacc_req *req) -{ - struct skcipher_request *ablk_req = skcipher_request_cast(req->req); - - if (ablk_req->src != ablk_req->dst) { - spacc_free_ddt(req, req->src_ddt, req->src_addr, ablk_req->src, - ablk_req->cryptlen, DMA_TO_DEVICE); - spacc_free_ddt(req, req->dst_ddt, req->dst_addr, ablk_req->dst, - ablk_req->cryptlen, DMA_FROM_DEVICE); - } else - spacc_free_ddt(req, req->dst_ddt, req->dst_addr, ablk_req->dst, - ablk_req->cryptlen, DMA_BIDIRECTIONAL); - - req->req->complete(req->req, req->result); -} - -static int spacc_ablk_submit(struct spacc_req *req) -{ - struct skcipher_request *ablk_req = skcipher_request_cast(req->req); - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(ablk_req); - struct skcipher_alg *alg = crypto_skcipher_alg(tfm); - struct spacc_alg *spacc_alg = to_spacc_skcipher(alg); - struct spacc_ablk_ctx *ctx = crypto_skcipher_ctx(tfm); - struct spacc_engine *engine = ctx->generic.engine; - u32 ctrl; - - req->ctx_id = spacc_load_ctx(&ctx->generic, ctx->key, - ctx->key_len, ablk_req->iv, alg->ivsize, - NULL, 0); - - writel(req->src_addr, engine->regs + SPA_SRC_PTR_REG_OFFSET); - writel(req->dst_addr, engine->regs + SPA_DST_PTR_REG_OFFSET); - writel(0, engine->regs + SPA_OFFSET_REG_OFFSET); - - writel(ablk_req->cryptlen, engine->regs + SPA_PROC_LEN_REG_OFFSET); - writel(0, engine->regs + SPA_ICV_OFFSET_REG_OFFSET); - writel(0, engine->regs + SPA_AUX_INFO_REG_OFFSET); - writel(0, engine->regs + SPA_AAD_LEN_REG_OFFSET); - - ctrl = spacc_alg->ctrl_default | (req->ctx_id << SPA_CTRL_CTX_IDX) | - (req->is_encrypt ? (1 << SPA_CTRL_ENCRYPT_IDX) : - (1 << SPA_CTRL_KEY_EXP)); - - mod_timer(&engine->packet_timeout, jiffies + PACKET_TIMEOUT); - - writel(ctrl, engine->regs + SPA_CTRL_REG_OFFSET); - - return -EINPROGRESS; -} - -static int spacc_ablk_do_fallback(struct skcipher_request *req, - unsigned alg_type, bool is_encrypt) -{ - struct crypto_tfm *old_tfm = - crypto_skcipher_tfm(crypto_skcipher_reqtfm(req)); - struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(old_tfm); - struct spacc_req *dev_req = skcipher_request_ctx(req); - int err; - - /* - * Change the request to use the software fallback transform, and once - * the ciphering has completed, put the old transform back into the - * request. - */ - skcipher_request_set_tfm(&dev_req->fallback_req, ctx->sw_cipher); - skcipher_request_set_callback(&dev_req->fallback_req, req->base.flags, - req->base.complete, req->base.data); - skcipher_request_set_crypt(&dev_req->fallback_req, req->src, req->dst, - req->cryptlen, req->iv); - err = is_encrypt ? crypto_skcipher_encrypt(&dev_req->fallback_req) : - crypto_skcipher_decrypt(&dev_req->fallback_req); - - return err; -} - -static int spacc_ablk_setup(struct skcipher_request *req, unsigned alg_type, - bool is_encrypt) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct skcipher_alg *alg = crypto_skcipher_alg(tfm); - struct spacc_engine *engine = to_spacc_skcipher(alg)->engine; - struct spacc_req *dev_req = skcipher_request_ctx(req); - unsigned long flags; - int err = -ENOMEM; - - dev_req->req = &req->base; - dev_req->is_encrypt = is_encrypt; - dev_req->engine = engine; - dev_req->complete = spacc_ablk_complete; - dev_req->result = -EINPROGRESS; - - if (unlikely(spacc_ablk_need_fallback(dev_req))) - return spacc_ablk_do_fallback(req, alg_type, is_encrypt); - - /* - * Create the DDT's for the engine. If we share the same source and - * destination then we can optimize by reusing the DDT's. - */ - if (req->src != req->dst) { - dev_req->src_ddt = spacc_sg_to_ddt(engine, req->src, - req->cryptlen, DMA_TO_DEVICE, &dev_req->src_addr); - if (!dev_req->src_ddt) - goto out; - - dev_req->dst_ddt = spacc_sg_to_ddt(engine, req->dst, - req->cryptlen, DMA_FROM_DEVICE, &dev_req->dst_addr); - if (!dev_req->dst_ddt) - goto out_free_src; - } else { - dev_req->dst_ddt = spacc_sg_to_ddt(engine, req->dst, - req->cryptlen, DMA_BIDIRECTIONAL, &dev_req->dst_addr); - if (!dev_req->dst_ddt) - goto out; - - dev_req->src_ddt = NULL; - dev_req->src_addr = dev_req->dst_addr; - } - - err = -EINPROGRESS; - spin_lock_irqsave(&engine->hw_lock, flags); - /* - * Check if the engine will accept the operation now. If it won't then - * we either stick it on the end of a pending list if we can backlog, - * or bailout with an error if not. - */ - if (unlikely(spacc_fifo_cmd_full(engine)) || - engine->in_flight + 1 > engine->fifo_sz) { - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { - err = -EBUSY; - spin_unlock_irqrestore(&engine->hw_lock, flags); - goto out_free_ddts; - } - list_add_tail(&dev_req->list, &engine->pending); - } else { - list_add_tail(&dev_req->list, &engine->pending); - spacc_push(engine); - } - spin_unlock_irqrestore(&engine->hw_lock, flags); - - goto out; - -out_free_ddts: - spacc_free_ddt(dev_req, dev_req->dst_ddt, dev_req->dst_addr, req->dst, - req->cryptlen, req->src == req->dst ? - DMA_BIDIRECTIONAL : DMA_FROM_DEVICE); -out_free_src: - if (req->src != req->dst) - spacc_free_ddt(dev_req, dev_req->src_ddt, dev_req->src_addr, - req->src, req->cryptlen, DMA_TO_DEVICE); -out: - return err; -} - -static int spacc_ablk_init_tfm(struct crypto_skcipher *tfm) -{ - struct spacc_ablk_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_alg *alg = crypto_skcipher_alg(tfm); - struct spacc_alg *spacc_alg = to_spacc_skcipher(alg); - struct spacc_engine *engine = spacc_alg->engine; - - ctx->generic.flags = spacc_alg->type; - ctx->generic.engine = engine; - if (alg->base.cra_flags & CRYPTO_ALG_NEED_FALLBACK) { - ctx->sw_cipher = crypto_alloc_skcipher(alg->base.cra_name, 0, - CRYPTO_ALG_NEED_FALLBACK); - if (IS_ERR(ctx->sw_cipher)) { - dev_warn(engine->dev, "failed to allocate fallback for %s\n", - alg->base.cra_name); - return PTR_ERR(ctx->sw_cipher); - } - crypto_skcipher_set_reqsize(tfm, sizeof(struct spacc_req) + - crypto_skcipher_reqsize(ctx->sw_cipher)); - } else { - /* take the size without the fallback skcipher_request at the end */ - crypto_skcipher_set_reqsize(tfm, offsetof(struct spacc_req, - fallback_req)); - } - - ctx->generic.key_offs = spacc_alg->key_offs; - ctx->generic.iv_offs = spacc_alg->iv_offs; - - return 0; -} - -static void spacc_ablk_exit_tfm(struct crypto_skcipher *tfm) -{ - struct spacc_ablk_ctx *ctx = crypto_skcipher_ctx(tfm); - - crypto_free_skcipher(ctx->sw_cipher); -} - -static int spacc_ablk_encrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); - struct skcipher_alg *alg = crypto_skcipher_alg(cipher); - struct spacc_alg *spacc_alg = to_spacc_skcipher(alg); - - return spacc_ablk_setup(req, spacc_alg->type, 1); -} - -static int spacc_ablk_decrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); - struct skcipher_alg *alg = crypto_skcipher_alg(cipher); - struct spacc_alg *spacc_alg = to_spacc_skcipher(alg); - - return spacc_ablk_setup(req, spacc_alg->type, 0); -} - -static inline int spacc_fifo_stat_empty(struct spacc_engine *engine) -{ - return readl(engine->regs + SPA_FIFO_STAT_REG_OFFSET) & - SPA_FIFO_STAT_EMPTY; -} - -static void spacc_process_done(struct spacc_engine *engine) -{ - struct spacc_req *req; - unsigned long flags; - - spin_lock_irqsave(&engine->hw_lock, flags); - - while (!spacc_fifo_stat_empty(engine)) { - req = list_first_entry(&engine->in_progress, struct spacc_req, - list); - list_move_tail(&req->list, &engine->completed); - --engine->in_flight; - - /* POP the status register. */ - writel(~0, engine->regs + SPA_STAT_POP_REG_OFFSET); - req->result = (readl(engine->regs + SPA_STATUS_REG_OFFSET) & - SPA_STATUS_RES_CODE_MASK) >> SPA_STATUS_RES_CODE_OFFSET; - - /* - * Convert the SPAcc error status into the standard POSIX error - * codes. - */ - if (unlikely(req->result)) { - switch (req->result) { - case SPA_STATUS_ICV_FAIL: - req->result = -EBADMSG; - break; - - case SPA_STATUS_MEMORY_ERROR: - dev_warn(engine->dev, - "memory error triggered\n"); - req->result = -EFAULT; - break; - - case SPA_STATUS_BLOCK_ERROR: - dev_warn(engine->dev, - "block error triggered\n"); - req->result = -EIO; - break; - } - } - } - - tasklet_schedule(&engine->complete); - - spin_unlock_irqrestore(&engine->hw_lock, flags); -} - -static irqreturn_t spacc_spacc_irq(int irq, void *dev) -{ - struct spacc_engine *engine = (struct spacc_engine *)dev; - u32 spacc_irq_stat = readl(engine->regs + SPA_IRQ_STAT_REG_OFFSET); - - writel(spacc_irq_stat, engine->regs + SPA_IRQ_STAT_REG_OFFSET); - spacc_process_done(engine); - - return IRQ_HANDLED; -} - -static void spacc_packet_timeout(struct timer_list *t) -{ - struct spacc_engine *engine = from_timer(engine, t, packet_timeout); - - spacc_process_done(engine); -} - -static int spacc_req_submit(struct spacc_req *req) -{ - struct crypto_alg *alg = req->req->tfm->__crt_alg; - - if (CRYPTO_ALG_TYPE_AEAD == (CRYPTO_ALG_TYPE_MASK & alg->cra_flags)) - return spacc_aead_submit(req); - else - return spacc_ablk_submit(req); -} - -static void spacc_spacc_complete(unsigned long data) -{ - struct spacc_engine *engine = (struct spacc_engine *)data; - struct spacc_req *req, *tmp; - unsigned long flags; - LIST_HEAD(completed); - - spin_lock_irqsave(&engine->hw_lock, flags); - - list_splice_init(&engine->completed, &completed); - spacc_push(engine); - if (engine->in_flight) - mod_timer(&engine->packet_timeout, jiffies + PACKET_TIMEOUT); - - spin_unlock_irqrestore(&engine->hw_lock, flags); - - list_for_each_entry_safe(req, tmp, &completed, list) { - list_del(&req->list); - req->complete(req); - } -} - -#ifdef CONFIG_PM -static int spacc_suspend(struct device *dev) -{ - struct spacc_engine *engine = dev_get_drvdata(dev); - - /* - * We only support standby mode. All we have to do is gate the clock to - * the spacc. The hardware will preserve state until we turn it back - * on again. - */ - clk_disable(engine->clk); - - return 0; -} - -static int spacc_resume(struct device *dev) -{ - struct spacc_engine *engine = dev_get_drvdata(dev); - - return clk_enable(engine->clk); -} - -static const struct dev_pm_ops spacc_pm_ops = { - .suspend = spacc_suspend, - .resume = spacc_resume, -}; -#endif /* CONFIG_PM */ - -static inline struct spacc_engine *spacc_dev_to_engine(struct device *dev) -{ - return dev ? dev_get_drvdata(dev) : NULL; -} - -static ssize_t spacc_stat_irq_thresh_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct spacc_engine *engine = spacc_dev_to_engine(dev); - - return snprintf(buf, PAGE_SIZE, "%u\n", engine->stat_irq_thresh); -} - -static ssize_t spacc_stat_irq_thresh_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t len) -{ - struct spacc_engine *engine = spacc_dev_to_engine(dev); - unsigned long thresh; - - if (kstrtoul(buf, 0, &thresh)) - return -EINVAL; - - thresh = clamp(thresh, 1UL, engine->fifo_sz - 1); - - engine->stat_irq_thresh = thresh; - writel(engine->stat_irq_thresh << SPA_IRQ_CTRL_STAT_CNT_OFFSET, - engine->regs + SPA_IRQ_CTRL_REG_OFFSET); - - return len; -} -static DEVICE_ATTR(stat_irq_thresh, 0644, spacc_stat_irq_thresh_show, - spacc_stat_irq_thresh_store); - -static struct spacc_alg ipsec_engine_algs[] = { - { - .ctrl_default = SPA_CTRL_CIPH_ALG_AES | SPA_CTRL_CIPH_MODE_CBC, - .key_offs = 0, - .iv_offs = AES_MAX_KEY_SIZE, - .alg = { - .base.cra_name = "cbc(aes)", - .base.cra_driver_name = "cbc-aes-picoxcell", - .base.cra_priority = SPACC_CRYPTO_ALG_PRIORITY, - .base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | - CRYPTO_ALG_ASYNC | - CRYPTO_ALG_ALLOCATES_MEMORY | - CRYPTO_ALG_NEED_FALLBACK, - .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct spacc_ablk_ctx), - .base.cra_module = THIS_MODULE, - - .setkey = spacc_aes_setkey, - .encrypt = spacc_ablk_encrypt, - .decrypt = spacc_ablk_decrypt, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .init = spacc_ablk_init_tfm, - .exit = spacc_ablk_exit_tfm, - }, - }, - { - .key_offs = 0, - .iv_offs = AES_MAX_KEY_SIZE, - .ctrl_default = SPA_CTRL_CIPH_ALG_AES | SPA_CTRL_CIPH_MODE_ECB, - .alg = { - .base.cra_name = "ecb(aes)", - .base.cra_driver_name = "ecb-aes-picoxcell", - .base.cra_priority = SPACC_CRYPTO_ALG_PRIORITY, - .base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | - CRYPTO_ALG_ASYNC | - CRYPTO_ALG_ALLOCATES_MEMORY | - CRYPTO_ALG_NEED_FALLBACK, - .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct spacc_ablk_ctx), - .base.cra_module = THIS_MODULE, - - .setkey = spacc_aes_setkey, - .encrypt = spacc_ablk_encrypt, - .decrypt = spacc_ablk_decrypt, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .init = spacc_ablk_init_tfm, - .exit = spacc_ablk_exit_tfm, - }, - }, - { - .key_offs = DES_BLOCK_SIZE, - .iv_offs = 0, - .ctrl_default = SPA_CTRL_CIPH_ALG_DES | SPA_CTRL_CIPH_MODE_CBC, - .alg = { - .base.cra_name = "cbc(des)", - .base.cra_driver_name = "cbc-des-picoxcell", - .base.cra_priority = SPACC_CRYPTO_ALG_PRIORITY, - .base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | - CRYPTO_ALG_ASYNC | - CRYPTO_ALG_ALLOCATES_MEMORY, - .base.cra_blocksize = DES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct spacc_ablk_ctx), - .base.cra_module = THIS_MODULE, - - .setkey = spacc_des_setkey, - .encrypt = spacc_ablk_encrypt, - .decrypt = spacc_ablk_decrypt, - .min_keysize = DES_KEY_SIZE, - .max_keysize = DES_KEY_SIZE, - .ivsize = DES_BLOCK_SIZE, - .init = spacc_ablk_init_tfm, - .exit = spacc_ablk_exit_tfm, - }, - }, - { - .key_offs = DES_BLOCK_SIZE, - .iv_offs = 0, - .ctrl_default = SPA_CTRL_CIPH_ALG_DES | SPA_CTRL_CIPH_MODE_ECB, - .alg = { - .base.cra_name = "ecb(des)", - .base.cra_driver_name = "ecb-des-picoxcell", - .base.cra_priority = SPACC_CRYPTO_ALG_PRIORITY, - .base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | - CRYPTO_ALG_ASYNC | - CRYPTO_ALG_ALLOCATES_MEMORY, - .base.cra_blocksize = DES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct spacc_ablk_ctx), - .base.cra_module = THIS_MODULE, - - .setkey = spacc_des_setkey, - .encrypt = spacc_ablk_encrypt, - .decrypt = spacc_ablk_decrypt, - .min_keysize = DES_KEY_SIZE, - .max_keysize = DES_KEY_SIZE, - .init = spacc_ablk_init_tfm, - .exit = spacc_ablk_exit_tfm, - }, - }, - { - .key_offs = DES_BLOCK_SIZE, - .iv_offs = 0, - .ctrl_default = SPA_CTRL_CIPH_ALG_DES | SPA_CTRL_CIPH_MODE_CBC, - .alg = { - .base.cra_name = "cbc(des3_ede)", - .base.cra_driver_name = "cbc-des3-ede-picoxcell", - .base.cra_priority = SPACC_CRYPTO_ALG_PRIORITY, - .base.cra_flags = CRYPTO_ALG_ASYNC | - CRYPTO_ALG_ALLOCATES_MEMORY | - CRYPTO_ALG_KERN_DRIVER_ONLY, - .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct spacc_ablk_ctx), - .base.cra_module = THIS_MODULE, - - .setkey = spacc_des3_setkey, - .encrypt = spacc_ablk_encrypt, - .decrypt = spacc_ablk_decrypt, - .min_keysize = DES3_EDE_KEY_SIZE, - .max_keysize = DES3_EDE_KEY_SIZE, - .ivsize = DES3_EDE_BLOCK_SIZE, - .init = spacc_ablk_init_tfm, - .exit = spacc_ablk_exit_tfm, - }, - }, - { - .key_offs = DES_BLOCK_SIZE, - .iv_offs = 0, - .ctrl_default = SPA_CTRL_CIPH_ALG_DES | SPA_CTRL_CIPH_MODE_ECB, - .alg = { - .base.cra_name = "ecb(des3_ede)", - .base.cra_driver_name = "ecb-des3-ede-picoxcell", - .base.cra_priority = SPACC_CRYPTO_ALG_PRIORITY, - .base.cra_flags = CRYPTO_ALG_ASYNC | - CRYPTO_ALG_ALLOCATES_MEMORY | - CRYPTO_ALG_KERN_DRIVER_ONLY, - .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct spacc_ablk_ctx), - .base.cra_module = THIS_MODULE, - - .setkey = spacc_des3_setkey, - .encrypt = spacc_ablk_encrypt, - .decrypt = spacc_ablk_decrypt, - .min_keysize = DES3_EDE_KEY_SIZE, - .max_keysize = DES3_EDE_KEY_SIZE, - .init = spacc_ablk_init_tfm, - .exit = spacc_ablk_exit_tfm, - }, - }, -}; - -static struct spacc_aead ipsec_engine_aeads[] = { - { - .ctrl_default = SPA_CTRL_CIPH_ALG_AES | - SPA_CTRL_CIPH_MODE_CBC | - SPA_CTRL_HASH_ALG_SHA | - SPA_CTRL_HASH_MODE_HMAC, - .key_offs = 0, - .iv_offs = AES_MAX_KEY_SIZE, - .alg = { - .base = { - .cra_name = "authenc(hmac(sha1),cbc(aes))", - .cra_driver_name = "authenc-hmac-sha1-" - "cbc-aes-picoxcell", - .cra_priority = SPACC_CRYPTO_ALG_PRIORITY, - .cra_flags = CRYPTO_ALG_ASYNC | - CRYPTO_ALG_ALLOCATES_MEMORY | - CRYPTO_ALG_NEED_FALLBACK | - CRYPTO_ALG_KERN_DRIVER_ONLY, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct spacc_aead_ctx), - .cra_module = THIS_MODULE, - }, - .setkey = spacc_aead_setkey, - .setauthsize = spacc_aead_setauthsize, - .encrypt = spacc_aead_encrypt, - .decrypt = spacc_aead_decrypt, - .ivsize = AES_BLOCK_SIZE, - .maxauthsize = SHA1_DIGEST_SIZE, - .init = spacc_aead_cra_init, - .exit = spacc_aead_cra_exit, - }, - }, - { - .ctrl_default = SPA_CTRL_CIPH_ALG_AES | - SPA_CTRL_CIPH_MODE_CBC | - SPA_CTRL_HASH_ALG_SHA256 | - SPA_CTRL_HASH_MODE_HMAC, - .key_offs = 0, - .iv_offs = AES_MAX_KEY_SIZE, - .alg = { - .base = { - .cra_name = "authenc(hmac(sha256),cbc(aes))", - .cra_driver_name = "authenc-hmac-sha256-" - "cbc-aes-picoxcell", - .cra_priority = SPACC_CRYPTO_ALG_PRIORITY, - .cra_flags = CRYPTO_ALG_ASYNC | - CRYPTO_ALG_ALLOCATES_MEMORY | - CRYPTO_ALG_NEED_FALLBACK | - CRYPTO_ALG_KERN_DRIVER_ONLY, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct spacc_aead_ctx), - .cra_module = THIS_MODULE, - }, - .setkey = spacc_aead_setkey, - .setauthsize = spacc_aead_setauthsize, - .encrypt = spacc_aead_encrypt, - .decrypt = spacc_aead_decrypt, - .ivsize = AES_BLOCK_SIZE, - .maxauthsize = SHA256_DIGEST_SIZE, - .init = spacc_aead_cra_init, - .exit = spacc_aead_cra_exit, - }, - }, - { - .key_offs = 0, - .iv_offs = AES_MAX_KEY_SIZE, - .ctrl_default = SPA_CTRL_CIPH_ALG_AES | - SPA_CTRL_CIPH_MODE_CBC | - SPA_CTRL_HASH_ALG_MD5 | - SPA_CTRL_HASH_MODE_HMAC, - .alg = { - .base = { - .cra_name = "authenc(hmac(md5),cbc(aes))", - .cra_driver_name = "authenc-hmac-md5-" - "cbc-aes-picoxcell", - .cra_priority = SPACC_CRYPTO_ALG_PRIORITY, - .cra_flags = CRYPTO_ALG_ASYNC | - CRYPTO_ALG_ALLOCATES_MEMORY | - CRYPTO_ALG_NEED_FALLBACK | - CRYPTO_ALG_KERN_DRIVER_ONLY, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct spacc_aead_ctx), - .cra_module = THIS_MODULE, - }, - .setkey = spacc_aead_setkey, - .setauthsize = spacc_aead_setauthsize, - .encrypt = spacc_aead_encrypt, - .decrypt = spacc_aead_decrypt, - .ivsize = AES_BLOCK_SIZE, - .maxauthsize = MD5_DIGEST_SIZE, - .init = spacc_aead_cra_init, - .exit = spacc_aead_cra_exit, - }, - }, - { - .key_offs = DES_BLOCK_SIZE, - .iv_offs = 0, - .ctrl_default = SPA_CTRL_CIPH_ALG_DES | - SPA_CTRL_CIPH_MODE_CBC | - SPA_CTRL_HASH_ALG_SHA | - SPA_CTRL_HASH_MODE_HMAC, - .alg = { - .base = { - .cra_name = "authenc(hmac(sha1),cbc(des3_ede))", - .cra_driver_name = "authenc-hmac-sha1-" - "cbc-3des-picoxcell", - .cra_priority = SPACC_CRYPTO_ALG_PRIORITY, - .cra_flags = CRYPTO_ALG_ASYNC | - CRYPTO_ALG_ALLOCATES_MEMORY | - CRYPTO_ALG_NEED_FALLBACK | - CRYPTO_ALG_KERN_DRIVER_ONLY, - .cra_blocksize = DES3_EDE_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct spacc_aead_ctx), - .cra_module = THIS_MODULE, - }, - .setkey = spacc_aead_setkey, - .setauthsize = spacc_aead_setauthsize, - .encrypt = spacc_aead_encrypt, - .decrypt = spacc_aead_decrypt, - .ivsize = DES3_EDE_BLOCK_SIZE, - .maxauthsize = SHA1_DIGEST_SIZE, - .init = spacc_aead_cra_init, - .exit = spacc_aead_cra_exit, - }, - }, - { - .key_offs = DES_BLOCK_SIZE, - .iv_offs = 0, - .ctrl_default = SPA_CTRL_CIPH_ALG_AES | - SPA_CTRL_CIPH_MODE_CBC | - SPA_CTRL_HASH_ALG_SHA256 | - SPA_CTRL_HASH_MODE_HMAC, - .alg = { - .base = { - .cra_name = "authenc(hmac(sha256)," - "cbc(des3_ede))", - .cra_driver_name = "authenc-hmac-sha256-" - "cbc-3des-picoxcell", - .cra_priority = SPACC_CRYPTO_ALG_PRIORITY, - .cra_flags = CRYPTO_ALG_ASYNC | - CRYPTO_ALG_ALLOCATES_MEMORY | - CRYPTO_ALG_NEED_FALLBACK | - CRYPTO_ALG_KERN_DRIVER_ONLY, - .cra_blocksize = DES3_EDE_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct spacc_aead_ctx), - .cra_module = THIS_MODULE, - }, - .setkey = spacc_aead_setkey, - .setauthsize = spacc_aead_setauthsize, - .encrypt = spacc_aead_encrypt, - .decrypt = spacc_aead_decrypt, - .ivsize = DES3_EDE_BLOCK_SIZE, - .maxauthsize = SHA256_DIGEST_SIZE, - .init = spacc_aead_cra_init, - .exit = spacc_aead_cra_exit, - }, - }, - { - .key_offs = DES_BLOCK_SIZE, - .iv_offs = 0, - .ctrl_default = SPA_CTRL_CIPH_ALG_DES | - SPA_CTRL_CIPH_MODE_CBC | - SPA_CTRL_HASH_ALG_MD5 | - SPA_CTRL_HASH_MODE_HMAC, - .alg = { - .base = { - .cra_name = "authenc(hmac(md5),cbc(des3_ede))", - .cra_driver_name = "authenc-hmac-md5-" - "cbc-3des-picoxcell", - .cra_priority = SPACC_CRYPTO_ALG_PRIORITY, - .cra_flags = CRYPTO_ALG_ASYNC | - CRYPTO_ALG_ALLOCATES_MEMORY | - CRYPTO_ALG_NEED_FALLBACK | - CRYPTO_ALG_KERN_DRIVER_ONLY, - .cra_blocksize = DES3_EDE_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct spacc_aead_ctx), - .cra_module = THIS_MODULE, - }, - .setkey = spacc_aead_setkey, - .setauthsize = spacc_aead_setauthsize, - .encrypt = spacc_aead_encrypt, - .decrypt = spacc_aead_decrypt, - .ivsize = DES3_EDE_BLOCK_SIZE, - .maxauthsize = MD5_DIGEST_SIZE, - .init = spacc_aead_cra_init, - .exit = spacc_aead_cra_exit, - }, - }, -}; - -static struct spacc_alg l2_engine_algs[] = { - { - .key_offs = 0, - .iv_offs = SPACC_CRYPTO_KASUMI_F8_KEY_LEN, - .ctrl_default = SPA_CTRL_CIPH_ALG_KASUMI | - SPA_CTRL_CIPH_MODE_F8, - .alg = { - .base.cra_name = "f8(kasumi)", - .base.cra_driver_name = "f8-kasumi-picoxcell", - .base.cra_priority = SPACC_CRYPTO_ALG_PRIORITY, - .base.cra_flags = CRYPTO_ALG_ASYNC | - CRYPTO_ALG_ALLOCATES_MEMORY | - CRYPTO_ALG_KERN_DRIVER_ONLY, - .base.cra_blocksize = 8, - .base.cra_ctxsize = sizeof(struct spacc_ablk_ctx), - .base.cra_module = THIS_MODULE, - - .setkey = spacc_kasumi_f8_setkey, - .encrypt = spacc_ablk_encrypt, - .decrypt = spacc_ablk_decrypt, - .min_keysize = 16, - .max_keysize = 16, - .ivsize = 8, - .init = spacc_ablk_init_tfm, - .exit = spacc_ablk_exit_tfm, - }, - }, -}; - -#ifdef CONFIG_OF -static const struct of_device_id spacc_of_id_table[] = { - { .compatible = "picochip,spacc-ipsec" }, - { .compatible = "picochip,spacc-l2" }, - {} -}; -MODULE_DEVICE_TABLE(of, spacc_of_id_table); -#endif /* CONFIG_OF */ - -static void spacc_tasklet_kill(void *data) -{ - tasklet_kill(data); -} - -static int spacc_probe(struct platform_device *pdev) -{ - int i, err, ret; - struct resource *irq; - struct device_node *np = pdev->dev.of_node; - struct spacc_engine *engine = devm_kzalloc(&pdev->dev, sizeof(*engine), - GFP_KERNEL); - if (!engine) - return -ENOMEM; - - if (of_device_is_compatible(np, "picochip,spacc-ipsec")) { - engine->max_ctxs = SPACC_CRYPTO_IPSEC_MAX_CTXS; - engine->cipher_pg_sz = SPACC_CRYPTO_IPSEC_CIPHER_PG_SZ; - engine->hash_pg_sz = SPACC_CRYPTO_IPSEC_HASH_PG_SZ; - engine->fifo_sz = SPACC_CRYPTO_IPSEC_FIFO_SZ; - engine->algs = ipsec_engine_algs; - engine->num_algs = ARRAY_SIZE(ipsec_engine_algs); - engine->aeads = ipsec_engine_aeads; - engine->num_aeads = ARRAY_SIZE(ipsec_engine_aeads); - } else if (of_device_is_compatible(np, "picochip,spacc-l2")) { - engine->max_ctxs = SPACC_CRYPTO_L2_MAX_CTXS; - engine->cipher_pg_sz = SPACC_CRYPTO_L2_CIPHER_PG_SZ; - engine->hash_pg_sz = SPACC_CRYPTO_L2_HASH_PG_SZ; - engine->fifo_sz = SPACC_CRYPTO_L2_FIFO_SZ; - engine->algs = l2_engine_algs; - engine->num_algs = ARRAY_SIZE(l2_engine_algs); - } else { - return -EINVAL; - } - - engine->name = dev_name(&pdev->dev); - - engine->regs = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(engine->regs)) - return PTR_ERR(engine->regs); - - irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (!irq) { - dev_err(&pdev->dev, "no memory/irq resource for engine\n"); - return -ENXIO; - } - - tasklet_init(&engine->complete, spacc_spacc_complete, - (unsigned long)engine); - - ret = devm_add_action(&pdev->dev, spacc_tasklet_kill, - &engine->complete); - if (ret) - return ret; - - if (devm_request_irq(&pdev->dev, irq->start, spacc_spacc_irq, 0, - engine->name, engine)) { - dev_err(engine->dev, "failed to request IRQ\n"); - return -EBUSY; - } - - engine->dev = &pdev->dev; - engine->cipher_ctx_base = engine->regs + SPA_CIPH_KEY_BASE_REG_OFFSET; - engine->hash_key_base = engine->regs + SPA_HASH_KEY_BASE_REG_OFFSET; - - engine->req_pool = dmam_pool_create(engine->name, engine->dev, - MAX_DDT_LEN * sizeof(struct spacc_ddt), 8, SZ_64K); - if (!engine->req_pool) - return -ENOMEM; - - spin_lock_init(&engine->hw_lock); - - engine->clk = clk_get(&pdev->dev, "ref"); - if (IS_ERR(engine->clk)) { - dev_info(&pdev->dev, "clk unavailable\n"); - return PTR_ERR(engine->clk); - } - - if (clk_prepare_enable(engine->clk)) { - dev_info(&pdev->dev, "unable to prepare/enable clk\n"); - ret = -EIO; - goto err_clk_put; - } - - /* - * Use an IRQ threshold of 50% as a default. This seems to be a - * reasonable trade off of latency against throughput but can be - * changed at runtime. - */ - engine->stat_irq_thresh = (engine->fifo_sz / 2); - - ret = device_create_file(&pdev->dev, &dev_attr_stat_irq_thresh); - if (ret) - goto err_clk_disable; - - /* - * Configure the interrupts. We only use the STAT_CNT interrupt as we - * only submit a new packet for processing when we complete another in - * the queue. This minimizes time spent in the interrupt handler. - */ - writel(engine->stat_irq_thresh << SPA_IRQ_CTRL_STAT_CNT_OFFSET, - engine->regs + SPA_IRQ_CTRL_REG_OFFSET); - writel(SPA_IRQ_EN_STAT_EN | SPA_IRQ_EN_GLBL_EN, - engine->regs + SPA_IRQ_EN_REG_OFFSET); - - timer_setup(&engine->packet_timeout, spacc_packet_timeout, 0); - - INIT_LIST_HEAD(&engine->pending); - INIT_LIST_HEAD(&engine->completed); - INIT_LIST_HEAD(&engine->in_progress); - engine->in_flight = 0; - - platform_set_drvdata(pdev, engine); - - ret = -EINVAL; - INIT_LIST_HEAD(&engine->registered_algs); - for (i = 0; i < engine->num_algs; ++i) { - engine->algs[i].engine = engine; - err = crypto_register_skcipher(&engine->algs[i].alg); - if (!err) { - list_add_tail(&engine->algs[i].entry, - &engine->registered_algs); - ret = 0; - } - if (err) - dev_err(engine->dev, "failed to register alg \"%s\"\n", - engine->algs[i].alg.base.cra_name); - else - dev_dbg(engine->dev, "registered alg \"%s\"\n", - engine->algs[i].alg.base.cra_name); - } - - INIT_LIST_HEAD(&engine->registered_aeads); - for (i = 0; i < engine->num_aeads; ++i) { - engine->aeads[i].engine = engine; - err = crypto_register_aead(&engine->aeads[i].alg); - if (!err) { - list_add_tail(&engine->aeads[i].entry, - &engine->registered_aeads); - ret = 0; - } - if (err) - dev_err(engine->dev, "failed to register alg \"%s\"\n", - engine->aeads[i].alg.base.cra_name); - else - dev_dbg(engine->dev, "registered alg \"%s\"\n", - engine->aeads[i].alg.base.cra_name); - } - - if (!ret) - return 0; - - del_timer_sync(&engine->packet_timeout); - device_remove_file(&pdev->dev, &dev_attr_stat_irq_thresh); -err_clk_disable: - clk_disable_unprepare(engine->clk); -err_clk_put: - clk_put(engine->clk); - - return ret; -} - -static int spacc_remove(struct platform_device *pdev) -{ - struct spacc_aead *aead, *an; - struct spacc_alg *alg, *next; - struct spacc_engine *engine = platform_get_drvdata(pdev); - - del_timer_sync(&engine->packet_timeout); - device_remove_file(&pdev->dev, &dev_attr_stat_irq_thresh); - - list_for_each_entry_safe(aead, an, &engine->registered_aeads, entry) { - list_del(&aead->entry); - crypto_unregister_aead(&aead->alg); - } - - list_for_each_entry_safe(alg, next, &engine->registered_algs, entry) { - list_del(&alg->entry); - crypto_unregister_skcipher(&alg->alg); - } - - clk_disable_unprepare(engine->clk); - clk_put(engine->clk); - - return 0; -} - -static struct platform_driver spacc_driver = { - .probe = spacc_probe, - .remove = spacc_remove, - .driver = { - .name = "picochip,spacc", -#ifdef CONFIG_PM - .pm = &spacc_pm_ops, -#endif /* CONFIG_PM */ - .of_match_table = of_match_ptr(spacc_of_id_table), - }, -}; - -module_platform_driver(spacc_driver); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jamie Iles"); diff --git a/drivers/crypto/picoxcell_crypto_regs.h b/drivers/crypto/picoxcell_crypto_regs.h deleted file mode 100644 index b870a50238ba..000000000000 --- a/drivers/crypto/picoxcell_crypto_regs.h +++ /dev/null @@ -1,115 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2010 Picochip Ltd., Jamie Iles - */ -#ifndef __PICOXCELL_CRYPTO_REGS_H__ -#define __PICOXCELL_CRYPTO_REGS_H__ - -#define SPA_STATUS_OK 0 -#define SPA_STATUS_ICV_FAIL 1 -#define SPA_STATUS_MEMORY_ERROR 2 -#define SPA_STATUS_BLOCK_ERROR 3 - -#define SPA_IRQ_CTRL_STAT_CNT_OFFSET 16 -#define SPA_IRQ_STAT_STAT_MASK (1 << 4) -#define SPA_FIFO_STAT_STAT_OFFSET 16 -#define SPA_FIFO_STAT_STAT_CNT_MASK (0x3F << SPA_FIFO_STAT_STAT_OFFSET) -#define SPA_STATUS_RES_CODE_OFFSET 24 -#define SPA_STATUS_RES_CODE_MASK (0x3 << SPA_STATUS_RES_CODE_OFFSET) -#define SPA_KEY_SZ_CTX_INDEX_OFFSET 8 -#define SPA_KEY_SZ_CIPHER_OFFSET 31 - -#define SPA_IRQ_EN_REG_OFFSET 0x00000000 -#define SPA_IRQ_STAT_REG_OFFSET 0x00000004 -#define SPA_IRQ_CTRL_REG_OFFSET 0x00000008 -#define SPA_FIFO_STAT_REG_OFFSET 0x0000000C -#define SPA_SDMA_BRST_SZ_REG_OFFSET 0x00000010 -#define SPA_SRC_PTR_REG_OFFSET 0x00000020 -#define SPA_DST_PTR_REG_OFFSET 0x00000024 -#define SPA_OFFSET_REG_OFFSET 0x00000028 -#define SPA_AAD_LEN_REG_OFFSET 0x0000002C -#define SPA_PROC_LEN_REG_OFFSET 0x00000030 -#define SPA_ICV_LEN_REG_OFFSET 0x00000034 -#define SPA_ICV_OFFSET_REG_OFFSET 0x00000038 -#define SPA_SW_CTRL_REG_OFFSET 0x0000003C -#define SPA_CTRL_REG_OFFSET 0x00000040 -#define SPA_AUX_INFO_REG_OFFSET 0x0000004C -#define SPA_STAT_POP_REG_OFFSET 0x00000050 -#define SPA_STATUS_REG_OFFSET 0x00000054 -#define SPA_KEY_SZ_REG_OFFSET 0x00000100 -#define SPA_CIPH_KEY_BASE_REG_OFFSET 0x00004000 -#define SPA_HASH_KEY_BASE_REG_OFFSET 0x00008000 -#define SPA_RC4_CTX_BASE_REG_OFFSET 0x00020000 - -#define SPA_IRQ_EN_REG_RESET 0x00000000 -#define SPA_IRQ_CTRL_REG_RESET 0x00000000 -#define SPA_FIFO_STAT_REG_RESET 0x00000000 -#define SPA_SDMA_BRST_SZ_REG_RESET 0x00000000 -#define SPA_SRC_PTR_REG_RESET 0x00000000 -#define SPA_DST_PTR_REG_RESET 0x00000000 -#define SPA_OFFSET_REG_RESET 0x00000000 -#define SPA_AAD_LEN_REG_RESET 0x00000000 -#define SPA_PROC_LEN_REG_RESET 0x00000000 -#define SPA_ICV_LEN_REG_RESET 0x00000000 -#define SPA_ICV_OFFSET_REG_RESET 0x00000000 -#define SPA_SW_CTRL_REG_RESET 0x00000000 -#define SPA_CTRL_REG_RESET 0x00000000 -#define SPA_AUX_INFO_REG_RESET 0x00000000 -#define SPA_STAT_POP_REG_RESET 0x00000000 -#define SPA_STATUS_REG_RESET 0x00000000 -#define SPA_KEY_SZ_REG_RESET 0x00000000 - -#define SPA_CTRL_HASH_ALG_IDX 4 -#define SPA_CTRL_CIPH_MODE_IDX 8 -#define SPA_CTRL_HASH_MODE_IDX 12 -#define SPA_CTRL_CTX_IDX 16 -#define SPA_CTRL_ENCRYPT_IDX 24 -#define SPA_CTRL_AAD_COPY 25 -#define SPA_CTRL_ICV_PT 26 -#define SPA_CTRL_ICV_ENC 27 -#define SPA_CTRL_ICV_APPEND 28 -#define SPA_CTRL_KEY_EXP 29 - -#define SPA_KEY_SZ_CXT_IDX 8 -#define SPA_KEY_SZ_CIPHER_IDX 31 - -#define SPA_IRQ_EN_CMD0_EN (1 << 0) -#define SPA_IRQ_EN_STAT_EN (1 << 4) -#define SPA_IRQ_EN_GLBL_EN (1 << 31) - -#define SPA_CTRL_CIPH_ALG_NULL 0x00 -#define SPA_CTRL_CIPH_ALG_DES 0x01 -#define SPA_CTRL_CIPH_ALG_AES 0x02 -#define SPA_CTRL_CIPH_ALG_RC4 0x03 -#define SPA_CTRL_CIPH_ALG_MULTI2 0x04 -#define SPA_CTRL_CIPH_ALG_KASUMI 0x05 - -#define SPA_CTRL_HASH_ALG_NULL (0x00 << SPA_CTRL_HASH_ALG_IDX) -#define SPA_CTRL_HASH_ALG_MD5 (0x01 << SPA_CTRL_HASH_ALG_IDX) -#define SPA_CTRL_HASH_ALG_SHA (0x02 << SPA_CTRL_HASH_ALG_IDX) -#define SPA_CTRL_HASH_ALG_SHA224 (0x03 << SPA_CTRL_HASH_ALG_IDX) -#define SPA_CTRL_HASH_ALG_SHA256 (0x04 << SPA_CTRL_HASH_ALG_IDX) -#define SPA_CTRL_HASH_ALG_SHA384 (0x05 << SPA_CTRL_HASH_ALG_IDX) -#define SPA_CTRL_HASH_ALG_SHA512 (0x06 << SPA_CTRL_HASH_ALG_IDX) -#define SPA_CTRL_HASH_ALG_AESMAC (0x07 << SPA_CTRL_HASH_ALG_IDX) -#define SPA_CTRL_HASH_ALG_AESCMAC (0x08 << SPA_CTRL_HASH_ALG_IDX) -#define SPA_CTRL_HASH_ALG_KASF9 (0x09 << SPA_CTRL_HASH_ALG_IDX) - -#define SPA_CTRL_CIPH_MODE_NULL (0x00 << SPA_CTRL_CIPH_MODE_IDX) -#define SPA_CTRL_CIPH_MODE_ECB (0x00 << SPA_CTRL_CIPH_MODE_IDX) -#define SPA_CTRL_CIPH_MODE_CBC (0x01 << SPA_CTRL_CIPH_MODE_IDX) -#define SPA_CTRL_CIPH_MODE_CTR (0x02 << SPA_CTRL_CIPH_MODE_IDX) -#define SPA_CTRL_CIPH_MODE_CCM (0x03 << SPA_CTRL_CIPH_MODE_IDX) -#define SPA_CTRL_CIPH_MODE_GCM (0x05 << SPA_CTRL_CIPH_MODE_IDX) -#define SPA_CTRL_CIPH_MODE_OFB (0x07 << SPA_CTRL_CIPH_MODE_IDX) -#define SPA_CTRL_CIPH_MODE_CFB (0x08 << SPA_CTRL_CIPH_MODE_IDX) -#define SPA_CTRL_CIPH_MODE_F8 (0x09 << SPA_CTRL_CIPH_MODE_IDX) - -#define SPA_CTRL_HASH_MODE_RAW (0x00 << SPA_CTRL_HASH_MODE_IDX) -#define SPA_CTRL_HASH_MODE_SSLMAC (0x01 << SPA_CTRL_HASH_MODE_IDX) -#define SPA_CTRL_HASH_MODE_HMAC (0x02 << SPA_CTRL_HASH_MODE_IDX) - -#define SPA_FIFO_STAT_EMPTY (1 << 31) -#define SPA_FIFO_CMD_FULL (1 << 7) - -#endif /* __PICOXCELL_CRYPTO_REGS_H__ */ From 86ad60a65f29dd862a11c22bb4b5be28d6c5cef1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 31 Dec 2020 17:41:54 +0100 Subject: [PATCH 043/154] crypto: x86/aes-ni-xts - use direct calls to and 4-way stride The XTS asm helper arrangement is a bit odd: the 8-way stride helper consists of back-to-back calls to the 4-way core transforms, which are called indirectly, based on a boolean that indicates whether we are performing encryption or decryption. Given how costly indirect calls are on x86, let's switch to direct calls, and given how the 8-way stride doesn't really add anything substantial, use a 4-way stride instead, and make the asm core routine deal with any multiple of 4 blocks. Since 512 byte sectors or 4 KB blocks are the typical quantities XTS operates on, increase the stride exported to the glue helper to 512 bytes as well. As a result, the number of indirect calls is reduced from 3 per 64 bytes of in/output to 1 per 512 bytes of in/output, which produces a 65% speedup when operating on 1 KB blocks (measured on a Intel(R) Core(TM) i7-8650U CPU) Fixes: 9697fa39efd3f ("x86/retpoline/crypto: Convert crypto assembler indirect jumps") Tested-by: Eric Biggers # x86_64 Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 133 +++++++++++++++++------------ arch/x86/crypto/aesni-intel_glue.c | 25 +++--- 2 files changed, 93 insertions(+), 65 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index a2710f76862f..84d8a156cdcd 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -2842,25 +2842,18 @@ SYM_FUNC_END(aesni_ctr_enc) pxor CTR, IV; /* - * void aesni_xts_crypt8(const struct crypto_aes_ctx *ctx, u8 *dst, - * const u8 *src, bool enc, le128 *iv) + * void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst, + * const u8 *src, unsigned int len, le128 *iv) */ -SYM_FUNC_START(aesni_xts_crypt8) +SYM_FUNC_START(aesni_xts_encrypt) FRAME_BEGIN - testb %cl, %cl - movl $0, %ecx - movl $240, %r10d - leaq _aesni_enc4, %r11 - leaq _aesni_dec4, %rax - cmovel %r10d, %ecx - cmoveq %rax, %r11 movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK movups (IVP), IV mov 480(KEYP), KLEN - addq %rcx, KEYP +.Lxts_enc_loop4: movdqa IV, STATE1 movdqu 0x00(INP), INC pxor INC, STATE1 @@ -2884,71 +2877,103 @@ SYM_FUNC_START(aesni_xts_crypt8) pxor INC, STATE4 movdqu IV, 0x30(OUTP) - CALL_NOSPEC r11 + call _aesni_enc4 movdqu 0x00(OUTP), INC pxor INC, STATE1 movdqu STATE1, 0x00(OUTP) - _aesni_gf128mul_x_ble() - movdqa IV, STATE1 - movdqu 0x40(INP), INC - pxor INC, STATE1 - movdqu IV, 0x40(OUTP) - movdqu 0x10(OUTP), INC pxor INC, STATE2 movdqu STATE2, 0x10(OUTP) - _aesni_gf128mul_x_ble() - movdqa IV, STATE2 - movdqu 0x50(INP), INC - pxor INC, STATE2 - movdqu IV, 0x50(OUTP) - movdqu 0x20(OUTP), INC pxor INC, STATE3 movdqu STATE3, 0x20(OUTP) - _aesni_gf128mul_x_ble() - movdqa IV, STATE3 - movdqu 0x60(INP), INC - pxor INC, STATE3 - movdqu IV, 0x60(OUTP) - movdqu 0x30(OUTP), INC pxor INC, STATE4 movdqu STATE4, 0x30(OUTP) _aesni_gf128mul_x_ble() - movdqa IV, STATE4 - movdqu 0x70(INP), INC - pxor INC, STATE4 - movdqu IV, 0x70(OUTP) - _aesni_gf128mul_x_ble() + add $64, INP + add $64, OUTP + sub $64, LEN + ja .Lxts_enc_loop4 + movups IV, (IVP) - CALL_NOSPEC r11 - - movdqu 0x40(OUTP), INC - pxor INC, STATE1 - movdqu STATE1, 0x40(OUTP) - - movdqu 0x50(OUTP), INC - pxor INC, STATE2 - movdqu STATE2, 0x50(OUTP) - - movdqu 0x60(OUTP), INC - pxor INC, STATE3 - movdqu STATE3, 0x60(OUTP) - - movdqu 0x70(OUTP), INC - pxor INC, STATE4 - movdqu STATE4, 0x70(OUTP) - FRAME_END ret -SYM_FUNC_END(aesni_xts_crypt8) +SYM_FUNC_END(aesni_xts_encrypt) + +/* + * void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst, + * const u8 *src, unsigned int len, le128 *iv) + */ +SYM_FUNC_START(aesni_xts_decrypt) + FRAME_BEGIN + + movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK + movups (IVP), IV + + mov 480(KEYP), KLEN + add $240, KEYP + +.Lxts_dec_loop4: + movdqa IV, STATE1 + movdqu 0x00(INP), INC + pxor INC, STATE1 + movdqu IV, 0x00(OUTP) + + _aesni_gf128mul_x_ble() + movdqa IV, STATE2 + movdqu 0x10(INP), INC + pxor INC, STATE2 + movdqu IV, 0x10(OUTP) + + _aesni_gf128mul_x_ble() + movdqa IV, STATE3 + movdqu 0x20(INP), INC + pxor INC, STATE3 + movdqu IV, 0x20(OUTP) + + _aesni_gf128mul_x_ble() + movdqa IV, STATE4 + movdqu 0x30(INP), INC + pxor INC, STATE4 + movdqu IV, 0x30(OUTP) + + call _aesni_dec4 + + movdqu 0x00(OUTP), INC + pxor INC, STATE1 + movdqu STATE1, 0x00(OUTP) + + movdqu 0x10(OUTP), INC + pxor INC, STATE2 + movdqu STATE2, 0x10(OUTP) + + movdqu 0x20(OUTP), INC + pxor INC, STATE3 + movdqu STATE3, 0x20(OUTP) + + movdqu 0x30(OUTP), INC + pxor INC, STATE4 + movdqu STATE4, 0x30(OUTP) + + _aesni_gf128mul_x_ble() + + add $64, INP + add $64, OUTP + sub $64, LEN + ja .Lxts_dec_loop4 + + movups IV, (IVP) + + FRAME_END + ret +SYM_FUNC_END(aesni_xts_decrypt) #endif diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 96bdc1584215..84e3ed49b35d 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -101,6 +101,12 @@ asmlinkage void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, #define AVX_GEN2_OPTSIZE 640 #define AVX_GEN4_OPTSIZE 4096 +asmlinkage void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); + +asmlinkage void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); + #ifdef CONFIG_X86_64 static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out, @@ -108,9 +114,6 @@ static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out, asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); -asmlinkage void aesni_xts_crypt8(const struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in, bool enc, le128 *iv); - /* asmlinkage void aesni_gcm_enc() * void *ctx, AES Key schedule. Starts on a 16 byte boundary. * struct gcm_context_data. May be uninitialized. @@ -663,14 +666,14 @@ static void aesni_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv) glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_dec); } -static void aesni_xts_enc8(const void *ctx, u8 *dst, const u8 *src, le128 *iv) +static void aesni_xts_enc32(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - aesni_xts_crypt8(ctx, dst, src, true, iv); + aesni_xts_encrypt(ctx, dst, src, 32 * AES_BLOCK_SIZE, (u8 *)iv); } -static void aesni_xts_dec8(const void *ctx, u8 *dst, const u8 *src, le128 *iv) +static void aesni_xts_dec32(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - aesni_xts_crypt8(ctx, dst, src, false, iv); + aesni_xts_decrypt(ctx, dst, src, 32 * AES_BLOCK_SIZE, (u8 *)iv); } static const struct common_glue_ctx aesni_enc_xts = { @@ -678,8 +681,8 @@ static const struct common_glue_ctx aesni_enc_xts = { .fpu_blocks_limit = 1, .funcs = { { - .num_blocks = 8, - .fn_u = { .xts = aesni_xts_enc8 } + .num_blocks = 32, + .fn_u = { .xts = aesni_xts_enc32 } }, { .num_blocks = 1, .fn_u = { .xts = aesni_xts_enc } @@ -691,8 +694,8 @@ static const struct common_glue_ctx aesni_dec_xts = { .fpu_blocks_limit = 1, .funcs = { { - .num_blocks = 8, - .fn_u = { .xts = aesni_xts_dec8 } + .num_blocks = 32, + .fn_u = { .xts = aesni_xts_dec32 } }, { .num_blocks = 1, .fn_u = { .xts = aesni_xts_dec } From 2481104fe98d5b016fdd95d649b1235f21e491ba Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 31 Dec 2020 17:41:55 +0100 Subject: [PATCH 044/154] crypto: x86/aes-ni-xts - rewrite and drop indirections via glue helper The AES-NI driver implements XTS via the glue helper, which consumes a struct with sets of function pointers which are invoked on chunks of input data of the appropriate size, as annotated in the struct. Let's get rid of this indirection, so that we can perform direct calls to the assembler helpers. Instead, let's adopt the arm64 strategy, i.e., provide a helper which can consume inputs of any size, provided that the penultimate, full block is passed via the last call if ciphertext stealing needs to be applied. This also allows us to enable the XTS mode for i386. Tested-by: Eric Biggers # x86_64 Signed-off-by: Ard Biesheuvel Reported-by: kernel test robot Reported-by: kernel test robot Reported-by: kernel test robot Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 280 ++++++++++++++++++++++++----- arch/x86/crypto/aesni-intel_glue.c | 220 +++++++++++++---------- crypto/Kconfig | 1 - 3 files changed, 356 insertions(+), 145 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 84d8a156cdcd..4e3972570916 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -43,10 +43,6 @@ #ifdef __x86_64__ # constants in mergeable sections, linker can reorder and merge -.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16 -.align 16 -.Lgf128mul_x_ble_mask: - .octa 0x00000000000000010000000000000087 .section .rodata.cst16.POLY, "aM", @progbits, 16 .align 16 POLY: .octa 0xC2000000000000000000000000000001 @@ -146,7 +142,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff #define CTR %xmm11 #define INC %xmm12 -#define GF128MUL_MASK %xmm10 +#define GF128MUL_MASK %xmm7 #ifdef __x86_64__ #define AREG %rax @@ -2823,6 +2819,14 @@ SYM_FUNC_START(aesni_ctr_enc) ret SYM_FUNC_END(aesni_ctr_enc) +#endif + +.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16 +.align 16 +.Lgf128mul_x_ble_mask: + .octa 0x00000000000000010000000000000087 +.previous + /* * _aesni_gf128mul_x_ble: internal ABI * Multiply in GF(2^128) for XTS IVs @@ -2835,11 +2839,11 @@ SYM_FUNC_END(aesni_ctr_enc) * CTR: == temporary value */ #define _aesni_gf128mul_x_ble() \ - pshufd $0x13, IV, CTR; \ + pshufd $0x13, IV, KEY; \ paddq IV, IV; \ - psrad $31, CTR; \ - pand GF128MUL_MASK, CTR; \ - pxor CTR, IV; + psrad $31, KEY; \ + pand GF128MUL_MASK, KEY; \ + pxor KEY, IV; /* * void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst, @@ -2847,65 +2851,153 @@ SYM_FUNC_END(aesni_ctr_enc) */ SYM_FUNC_START(aesni_xts_encrypt) FRAME_BEGIN - +#ifndef __x86_64__ + pushl IVP + pushl LEN + pushl KEYP + pushl KLEN + movl (FRAME_OFFSET+20)(%esp), KEYP # ctx + movl (FRAME_OFFSET+24)(%esp), OUTP # dst + movl (FRAME_OFFSET+28)(%esp), INP # src + movl (FRAME_OFFSET+32)(%esp), LEN # len + movl (FRAME_OFFSET+36)(%esp), IVP # iv movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK +#else + movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK +#endif movups (IVP), IV mov 480(KEYP), KLEN .Lxts_enc_loop4: + sub $64, LEN + jl .Lxts_enc_1x + movdqa IV, STATE1 - movdqu 0x00(INP), INC - pxor INC, STATE1 + movdqu 0x00(INP), IN + pxor IN, STATE1 movdqu IV, 0x00(OUTP) _aesni_gf128mul_x_ble() movdqa IV, STATE2 - movdqu 0x10(INP), INC - pxor INC, STATE2 + movdqu 0x10(INP), IN + pxor IN, STATE2 movdqu IV, 0x10(OUTP) _aesni_gf128mul_x_ble() movdqa IV, STATE3 - movdqu 0x20(INP), INC - pxor INC, STATE3 + movdqu 0x20(INP), IN + pxor IN, STATE3 movdqu IV, 0x20(OUTP) _aesni_gf128mul_x_ble() movdqa IV, STATE4 - movdqu 0x30(INP), INC - pxor INC, STATE4 + movdqu 0x30(INP), IN + pxor IN, STATE4 movdqu IV, 0x30(OUTP) call _aesni_enc4 - movdqu 0x00(OUTP), INC - pxor INC, STATE1 + movdqu 0x00(OUTP), IN + pxor IN, STATE1 movdqu STATE1, 0x00(OUTP) - movdqu 0x10(OUTP), INC - pxor INC, STATE2 + movdqu 0x10(OUTP), IN + pxor IN, STATE2 movdqu STATE2, 0x10(OUTP) - movdqu 0x20(OUTP), INC - pxor INC, STATE3 + movdqu 0x20(OUTP), IN + pxor IN, STATE3 movdqu STATE3, 0x20(OUTP) - movdqu 0x30(OUTP), INC - pxor INC, STATE4 + movdqu 0x30(OUTP), IN + pxor IN, STATE4 movdqu STATE4, 0x30(OUTP) _aesni_gf128mul_x_ble() add $64, INP add $64, OUTP - sub $64, LEN - ja .Lxts_enc_loop4 + test LEN, LEN + jnz .Lxts_enc_loop4 +.Lxts_enc_ret_iv: movups IV, (IVP) +.Lxts_enc_ret: +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN + popl IVP +#endif FRAME_END ret + +.Lxts_enc_1x: + add $64, LEN + jz .Lxts_enc_ret_iv + sub $16, LEN + jl .Lxts_enc_cts4 + +.Lxts_enc_loop1: + movdqu (INP), STATE + pxor IV, STATE + call _aesni_enc1 + pxor IV, STATE + _aesni_gf128mul_x_ble() + + test LEN, LEN + jz .Lxts_enc_out + + add $16, INP + sub $16, LEN + jl .Lxts_enc_cts1 + + movdqu STATE, (OUTP) + add $16, OUTP + jmp .Lxts_enc_loop1 + +.Lxts_enc_out: + movdqu STATE, (OUTP) + jmp .Lxts_enc_ret_iv + +.Lxts_enc_cts4: + movdqa STATE4, STATE + sub $16, OUTP + +.Lxts_enc_cts1: +#ifndef __x86_64__ + lea .Lcts_permute_table, T1 +#else + lea .Lcts_permute_table(%rip), T1 +#endif + add LEN, INP /* rewind input pointer */ + add $16, LEN /* # bytes in final block */ + movups (INP), IN1 + + mov T1, IVP + add $32, IVP + add LEN, T1 + sub LEN, IVP + add OUTP, LEN + + movups (T1), %xmm4 + movaps STATE, IN2 + pshufb %xmm4, STATE + movups STATE, (LEN) + + movups (IVP), %xmm0 + pshufb %xmm0, IN1 + pblendvb IN2, IN1 + movaps IN1, STATE + + pxor IV, STATE + call _aesni_enc1 + pxor IV, STATE + + movups STATE, (OUTP) + jmp .Lxts_enc_ret SYM_FUNC_END(aesni_xts_encrypt) /* @@ -2914,66 +3006,158 @@ SYM_FUNC_END(aesni_xts_encrypt) */ SYM_FUNC_START(aesni_xts_decrypt) FRAME_BEGIN - +#ifndef __x86_64__ + pushl IVP + pushl LEN + pushl KEYP + pushl KLEN + movl (FRAME_OFFSET+20)(%esp), KEYP # ctx + movl (FRAME_OFFSET+24)(%esp), OUTP # dst + movl (FRAME_OFFSET+28)(%esp), INP # src + movl (FRAME_OFFSET+32)(%esp), LEN # len + movl (FRAME_OFFSET+36)(%esp), IVP # iv movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK +#else + movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK +#endif movups (IVP), IV mov 480(KEYP), KLEN add $240, KEYP + test $15, LEN + jz .Lxts_dec_loop4 + sub $16, LEN + .Lxts_dec_loop4: + sub $64, LEN + jl .Lxts_dec_1x + movdqa IV, STATE1 - movdqu 0x00(INP), INC - pxor INC, STATE1 + movdqu 0x00(INP), IN + pxor IN, STATE1 movdqu IV, 0x00(OUTP) _aesni_gf128mul_x_ble() movdqa IV, STATE2 - movdqu 0x10(INP), INC - pxor INC, STATE2 + movdqu 0x10(INP), IN + pxor IN, STATE2 movdqu IV, 0x10(OUTP) _aesni_gf128mul_x_ble() movdqa IV, STATE3 - movdqu 0x20(INP), INC - pxor INC, STATE3 + movdqu 0x20(INP), IN + pxor IN, STATE3 movdqu IV, 0x20(OUTP) _aesni_gf128mul_x_ble() movdqa IV, STATE4 - movdqu 0x30(INP), INC - pxor INC, STATE4 + movdqu 0x30(INP), IN + pxor IN, STATE4 movdqu IV, 0x30(OUTP) call _aesni_dec4 - movdqu 0x00(OUTP), INC - pxor INC, STATE1 + movdqu 0x00(OUTP), IN + pxor IN, STATE1 movdqu STATE1, 0x00(OUTP) - movdqu 0x10(OUTP), INC - pxor INC, STATE2 + movdqu 0x10(OUTP), IN + pxor IN, STATE2 movdqu STATE2, 0x10(OUTP) - movdqu 0x20(OUTP), INC - pxor INC, STATE3 + movdqu 0x20(OUTP), IN + pxor IN, STATE3 movdqu STATE3, 0x20(OUTP) - movdqu 0x30(OUTP), INC - pxor INC, STATE4 + movdqu 0x30(OUTP), IN + pxor IN, STATE4 movdqu STATE4, 0x30(OUTP) _aesni_gf128mul_x_ble() add $64, INP add $64, OUTP - sub $64, LEN - ja .Lxts_dec_loop4 + test LEN, LEN + jnz .Lxts_dec_loop4 +.Lxts_dec_ret_iv: movups IV, (IVP) +.Lxts_dec_ret: +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN + popl IVP +#endif FRAME_END ret -SYM_FUNC_END(aesni_xts_decrypt) +.Lxts_dec_1x: + add $64, LEN + jz .Lxts_dec_ret_iv + +.Lxts_dec_loop1: + movdqu (INP), STATE + + add $16, INP + sub $16, LEN + jl .Lxts_dec_cts1 + + pxor IV, STATE + call _aesni_dec1 + pxor IV, STATE + _aesni_gf128mul_x_ble() + + test LEN, LEN + jz .Lxts_dec_out + + movdqu STATE, (OUTP) + add $16, OUTP + jmp .Lxts_dec_loop1 + +.Lxts_dec_out: + movdqu STATE, (OUTP) + jmp .Lxts_dec_ret_iv + +.Lxts_dec_cts1: + movdqa IV, STATE4 + _aesni_gf128mul_x_ble() + + pxor IV, STATE + call _aesni_dec1 + pxor IV, STATE + +#ifndef __x86_64__ + lea .Lcts_permute_table, T1 +#else + lea .Lcts_permute_table(%rip), T1 #endif + add LEN, INP /* rewind input pointer */ + add $16, LEN /* # bytes in final block */ + movups (INP), IN1 + + mov T1, IVP + add $32, IVP + add LEN, T1 + sub LEN, IVP + add OUTP, LEN + + movups (T1), %xmm4 + movaps STATE, IN2 + pshufb %xmm4, STATE + movups STATE, (LEN) + + movups (IVP), %xmm0 + pshufb %xmm0, IN1 + pblendvb IN2, IN1 + movaps IN1, STATE + + pxor STATE4, STATE + call _aesni_dec1 + pxor STATE4, STATE + + movups STATE, (OUTP) + jmp .Lxts_dec_ret +SYM_FUNC_END(aesni_xts_decrypt) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 84e3ed49b35d..2116bc2b9507 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -33,9 +33,6 @@ #include #include #include -#ifdef CONFIG_X86_64 -#include -#endif #define AESNI_ALIGN 16 @@ -632,98 +629,6 @@ static int ctr_crypt(struct skcipher_request *req) return err; } -static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - int err; - - err = xts_verify_key(tfm, key, keylen); - if (err) - return err; - - keylen /= 2; - - /* first half of xts-key is for crypt */ - err = aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_crypt_ctx, - key, keylen); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_tweak_ctx, - key + keylen, keylen); -} - - -static void aesni_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_enc); -} - -static void aesni_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_dec); -} - -static void aesni_xts_enc32(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - aesni_xts_encrypt(ctx, dst, src, 32 * AES_BLOCK_SIZE, (u8 *)iv); -} - -static void aesni_xts_dec32(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - aesni_xts_decrypt(ctx, dst, src, 32 * AES_BLOCK_SIZE, (u8 *)iv); -} - -static const struct common_glue_ctx aesni_enc_xts = { - .num_funcs = 2, - .fpu_blocks_limit = 1, - - .funcs = { { - .num_blocks = 32, - .fn_u = { .xts = aesni_xts_enc32 } - }, { - .num_blocks = 1, - .fn_u = { .xts = aesni_xts_enc } - } } -}; - -static const struct common_glue_ctx aesni_dec_xts = { - .num_funcs = 2, - .fpu_blocks_limit = 1, - - .funcs = { { - .num_blocks = 32, - .fn_u = { .xts = aesni_xts_dec32 } - }, { - .num_blocks = 1, - .fn_u = { .xts = aesni_xts_dec } - } } -}; - -static int xts_encrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&aesni_enc_xts, req, aesni_enc, - aes_ctx(ctx->raw_tweak_ctx), - aes_ctx(ctx->raw_crypt_ctx), - false); -} - -static int xts_decrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&aesni_dec_xts, req, aesni_enc, - aes_ctx(ctx->raw_tweak_ctx), - aes_ctx(ctx->raw_crypt_ctx), - true); -} - static int rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len) { @@ -996,6 +901,128 @@ static int helper_rfc4106_decrypt(struct aead_request *req) } #endif +static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + int err; + + err = xts_verify_key(tfm, key, keylen); + if (err) + return err; + + keylen /= 2; + + /* first half of xts-key is for crypt */ + err = aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_crypt_ctx, + key, keylen); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_tweak_ctx, + key + keylen, keylen); +} + +static int xts_crypt(struct skcipher_request *req, bool encrypt) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + int tail = req->cryptlen % AES_BLOCK_SIZE; + struct skcipher_request subreq; + struct skcipher_walk walk; + int err; + + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + + err = skcipher_walk_virt(&walk, req, false); + + if (unlikely(tail > 0 && walk.nbytes < walk.total)) { + int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; + + skcipher_walk_abort(&walk); + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + skcipher_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + blocks * AES_BLOCK_SIZE, req->iv); + req = &subreq; + err = skcipher_walk_virt(&walk, req, false); + } else { + tail = 0; + } + + kernel_fpu_begin(); + + /* calculate first value of T */ + aesni_enc(aes_ctx(ctx->raw_tweak_ctx), walk.iv, walk.iv); + + while (walk.nbytes > 0) { + int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes &= ~(AES_BLOCK_SIZE - 1); + + if (encrypt) + aesni_xts_encrypt(aes_ctx(ctx->raw_crypt_ctx), + walk.dst.virt.addr, walk.src.virt.addr, + nbytes, walk.iv); + else + aesni_xts_decrypt(aes_ctx(ctx->raw_crypt_ctx), + walk.dst.virt.addr, walk.src.virt.addr, + nbytes, walk.iv); + kernel_fpu_end(); + + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + + if (walk.nbytes > 0) + kernel_fpu_begin(); + } + + if (unlikely(tail > 0 && !err)) { + struct scatterlist sg_src[2], sg_dst[2]; + struct scatterlist *src, *dst; + + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); + + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + + kernel_fpu_begin(); + if (encrypt) + aesni_xts_encrypt(aes_ctx(ctx->raw_crypt_ctx), + walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes, walk.iv); + else + aesni_xts_decrypt(aes_ctx(ctx->raw_crypt_ctx), + walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes, walk.iv); + kernel_fpu_end(); + + err = skcipher_walk_done(&walk, 0); + } + return err; +} + +static int xts_encrypt(struct skcipher_request *req) +{ + return xts_crypt(req, true); +} + +static int xts_decrypt(struct skcipher_request *req) +{ + return xts_crypt(req, false); +} + static struct crypto_alg aesni_cipher_alg = { .cra_name = "aes", .cra_driver_name = "aes-aesni", @@ -1082,6 +1109,7 @@ static struct skcipher_alg aesni_skciphers[] = { .setkey = aesni_skcipher_setkey, .encrypt = ctr_crypt, .decrypt = ctr_crypt, +#endif }, { .base = { .cra_name = "__xts(aes)", @@ -1095,10 +1123,10 @@ static struct skcipher_alg aesni_skciphers[] = { .min_keysize = 2 * AES_MIN_KEY_SIZE, .max_keysize = 2 * AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, + .walksize = 2 * AES_BLOCK_SIZE, .setkey = xts_aesni_setkey, .encrypt = xts_encrypt, .decrypt = xts_decrypt, -#endif } }; diff --git a/crypto/Kconfig b/crypto/Kconfig index a367fcfeb5d4..c48ca26e2169 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1133,7 +1133,6 @@ config CRYPTO_AES_NI_INTEL select CRYPTO_LIB_AES select CRYPTO_ALGAPI select CRYPTO_SKCIPHER - select CRYPTO_GLUE_HELPER_X86 if 64BIT select CRYPTO_SIMD help Use Intel AES-NI instructions for AES algorithm. From 622aae879c1d9449562e0cae353691a2a1f9eec0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 3 Jan 2021 08:56:18 +1100 Subject: [PATCH 045/154] crypto: vmx - Move extern declarations into header file This patch moves the extern algorithm declarations into a header file so that a number of compiler warnings are silenced. Signed-off-by: Herbert Xu --- drivers/crypto/vmx/aesp8-ppc.h | 6 ++++++ drivers/crypto/vmx/vmx.c | 6 +----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/vmx/aesp8-ppc.h b/drivers/crypto/vmx/aesp8-ppc.h index 01774a4d26a2..5764d4438388 100644 --- a/drivers/crypto/vmx/aesp8-ppc.h +++ b/drivers/crypto/vmx/aesp8-ppc.h @@ -7,6 +7,12 @@ struct aes_key { int rounds; }; +extern struct shash_alg p8_ghash_alg; +extern struct crypto_alg p8_aes_alg; +extern struct skcipher_alg p8_aes_cbc_alg; +extern struct skcipher_alg p8_aes_ctr_alg; +extern struct skcipher_alg p8_aes_xts_alg; + int aes_p8_set_encrypt_key(const u8 *userKey, const int bits, struct aes_key *key); int aes_p8_set_decrypt_key(const u8 *userKey, const int bits, diff --git a/drivers/crypto/vmx/vmx.c b/drivers/crypto/vmx/vmx.c index 87a194455d6a..a40d08e75fc0 100644 --- a/drivers/crypto/vmx/vmx.c +++ b/drivers/crypto/vmx/vmx.c @@ -17,11 +17,7 @@ #include #include -extern struct shash_alg p8_ghash_alg; -extern struct crypto_alg p8_aes_alg; -extern struct skcipher_alg p8_aes_cbc_alg; -extern struct skcipher_alg p8_aes_ctr_alg; -extern struct skcipher_alg p8_aes_xts_alg; +#include "aesp8-ppc.h" static int __init p8_init(void) { From 81064c96d88180ad6995d52419e94a78968308a2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 4 Jan 2021 17:15:45 +1100 Subject: [PATCH 046/154] crypto: stm32 - Fix last sparse warning in stm32_cryp_check_ctr_counter This patch changes the cast in stm32_cryp_check_ctr_counter from u32 to __be32 to match the prototype of stm32_cryp_hw_write_iv correctly. Reported-by: kernel test robot Signed-off-by: Herbert Xu --- drivers/crypto/stm32/stm32-cryp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/stm32/stm32-cryp.c b/drivers/crypto/stm32/stm32-cryp.c index 2670c30332fa..2a4793176c71 100644 --- a/drivers/crypto/stm32/stm32-cryp.c +++ b/drivers/crypto/stm32/stm32-cryp.c @@ -1229,7 +1229,7 @@ static void stm32_cryp_check_ctr_counter(struct stm32_cryp *cryp) cr = stm32_cryp_read(cryp, CRYP_CR); stm32_cryp_write(cryp, CRYP_CR, cr & ~CR_CRYPEN); - stm32_cryp_hw_write_iv(cryp, (u32 *)cryp->last_ctr); + stm32_cryp_hw_write_iv(cryp, (__be32 *)cryp->last_ctr); stm32_cryp_write(cryp, CRYP_CR, cr); } From 4f1a02e75a2eedfddd10222c0fe61d2a04d80099 Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Mon, 4 Jan 2021 15:35:15 +0000 Subject: [PATCH 047/154] crypto: qat - replace CRYPTO_AES with CRYPTO_LIB_AES in Kconfig Use CRYPTO_LIB_AES in place of CRYPTO_AES in the dependences for the QAT common code. Fixes: c0e583ab2016 ("crypto: qat - add CRYPTO_AES to Kconfig dependencies") Reported-by: Ard Biesheuvel Signed-off-by: Marco Chiappero Acked-by: Ard Biesheuvel Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/qat/Kconfig b/drivers/crypto/qat/Kconfig index 846a3d90b41a..77783feb62b2 100644 --- a/drivers/crypto/qat/Kconfig +++ b/drivers/crypto/qat/Kconfig @@ -11,7 +11,7 @@ config CRYPTO_DEV_QAT select CRYPTO_SHA1 select CRYPTO_SHA256 select CRYPTO_SHA512 - select CRYPTO_AES + select CRYPTO_LIB_AES select FW_LOADER config CRYPTO_DEV_QAT_DH895xCC From a13ed1d15b07a04b1f74b2df61ff7a5e47f45dd8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Jan 2021 16:55:46 +0100 Subject: [PATCH 048/154] crypto: aesni - prevent misaligned buffers on the stack The GCM mode driver uses 16 byte aligned buffers on the stack to pass the IV to the asm helpers, but unfortunately, the x86 port does not guarantee that the stack pointer is 16 byte aligned upon entry in the first place. Since the compiler is not aware of this, it will not emit the additional stack realignment sequence that is needed, and so the alignment is not guaranteed to be more than 8 bytes. So instead, allocate some padding on the stack, and realign the IV pointer by hand. Cc: Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 2116bc2b9507..880f9f8b5153 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -710,7 +710,8 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, struct crypto_aead *tfm = crypto_aead_reqtfm(req); unsigned long auth_tag_len = crypto_aead_authsize(tfm); const struct aesni_gcm_tfm_s *gcm_tfm = aesni_gcm_tfm; - struct gcm_context_data data AESNI_ALIGN_ATTR; + u8 databuf[sizeof(struct gcm_context_data) + (AESNI_ALIGN - 8)] __aligned(8); + struct gcm_context_data *data = PTR_ALIGN((void *)databuf, AESNI_ALIGN); struct scatter_walk dst_sg_walk = {}; unsigned long left = req->cryptlen; unsigned long len, srclen, dstlen; @@ -759,8 +760,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, } kernel_fpu_begin(); - gcm_tfm->init(aes_ctx, &data, iv, - hash_subkey, assoc, assoclen); + gcm_tfm->init(aes_ctx, data, iv, hash_subkey, assoc, assoclen); if (req->src != req->dst) { while (left) { src = scatterwalk_map(&src_sg_walk); @@ -770,10 +770,10 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, len = min(srclen, dstlen); if (len) { if (enc) - gcm_tfm->enc_update(aes_ctx, &data, + gcm_tfm->enc_update(aes_ctx, data, dst, src, len); else - gcm_tfm->dec_update(aes_ctx, &data, + gcm_tfm->dec_update(aes_ctx, data, dst, src, len); } left -= len; @@ -791,10 +791,10 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, len = scatterwalk_clamp(&src_sg_walk, left); if (len) { if (enc) - gcm_tfm->enc_update(aes_ctx, &data, + gcm_tfm->enc_update(aes_ctx, data, src, src, len); else - gcm_tfm->dec_update(aes_ctx, &data, + gcm_tfm->dec_update(aes_ctx, data, src, src, len); } left -= len; @@ -803,7 +803,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, scatterwalk_done(&src_sg_walk, 1, left); } } - gcm_tfm->finalize(aes_ctx, &data, authTag, auth_tag_len); + gcm_tfm->finalize(aes_ctx, data, authTag, auth_tag_len); kernel_fpu_end(); if (!assocmem) @@ -852,7 +852,8 @@ static int helper_rfc4106_encrypt(struct aead_request *req) struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); void *aes_ctx = &(ctx->aes_key_expanded); - u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); + u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8); + u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN); unsigned int i; __be32 counter = cpu_to_be32(1); @@ -879,7 +880,8 @@ static int helper_rfc4106_decrypt(struct aead_request *req) struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); void *aes_ctx = &(ctx->aes_key_expanded); - u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); + u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8); + u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN); unsigned int i; if (unlikely(req->assoclen != 16 && req->assoclen != 20)) @@ -1149,7 +1151,8 @@ static int generic_gcmaes_encrypt(struct aead_request *req) struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm); void *aes_ctx = &(ctx->aes_key_expanded); - u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); + u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8); + u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN); __be32 counter = cpu_to_be32(1); memcpy(iv, req->iv, 12); @@ -1165,7 +1168,8 @@ static int generic_gcmaes_decrypt(struct aead_request *req) struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm); void *aes_ctx = &(ctx->aes_key_expanded); - u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); + u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8); + u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN); memcpy(iv, req->iv, 12); *((__be32 *)(iv+12)) = counter; From 30f2c18eb564acdc1c2c31f8cea9c7d38f46c681 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Jan 2021 16:55:47 +0100 Subject: [PATCH 049/154] crypto: aesni - drop unused asm prototypes Drop some prototypes that are declared but never called. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 67 ------------------------------ 1 file changed, 67 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 880f9f8b5153..0f124d72e6b4 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -111,49 +111,6 @@ static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out, asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); -/* asmlinkage void aesni_gcm_enc() - * void *ctx, AES Key schedule. Starts on a 16 byte boundary. - * struct gcm_context_data. May be uninitialized. - * u8 *out, Ciphertext output. Encrypt in-place is allowed. - * const u8 *in, Plaintext input - * unsigned long plaintext_len, Length of data in bytes for encryption. - * u8 *iv, Pre-counter block j0: 12 byte IV concatenated with 0x00000001. - * 16-byte aligned pointer. - * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. - * const u8 *aad, Additional Authentication Data (AAD) - * unsigned long aad_len, Length of AAD in bytes. - * u8 *auth_tag, Authenticated Tag output. - * unsigned long auth_tag_len), Authenticated Tag Length in bytes. - * Valid values are 16 (most likely), 12 or 8. - */ -asmlinkage void aesni_gcm_enc(void *ctx, - struct gcm_context_data *gdata, u8 *out, - const u8 *in, unsigned long plaintext_len, u8 *iv, - u8 *hash_subkey, const u8 *aad, unsigned long aad_len, - u8 *auth_tag, unsigned long auth_tag_len); - -/* asmlinkage void aesni_gcm_dec() - * void *ctx, AES Key schedule. Starts on a 16 byte boundary. - * struct gcm_context_data. May be uninitialized. - * u8 *out, Plaintext output. Decrypt in-place is allowed. - * const u8 *in, Ciphertext input - * unsigned long ciphertext_len, Length of data in bytes for decryption. - * u8 *iv, Pre-counter block j0: 12 byte IV concatenated with 0x00000001. - * 16-byte aligned pointer. - * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. - * const u8 *aad, Additional Authentication Data (AAD) - * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this is going - * to be 8 or 12 bytes - * u8 *auth_tag, Authenticated Tag output. - * unsigned long auth_tag_len) Authenticated Tag Length in bytes. - * Valid values are 16 (most likely), 12 or 8. - */ -asmlinkage void aesni_gcm_dec(void *ctx, - struct gcm_context_data *gdata, u8 *out, - const u8 *in, unsigned long ciphertext_len, u8 *iv, - u8 *hash_subkey, const u8 *aad, unsigned long aad_len, - u8 *auth_tag, unsigned long auth_tag_len); - /* Scatter / Gather routines, with args similar to above */ asmlinkage void aesni_gcm_init(void *ctx, struct gcm_context_data *gdata, @@ -218,18 +175,6 @@ asmlinkage void aesni_gcm_finalize_avx_gen2(void *ctx, struct gcm_context_data *gdata, u8 *auth_tag, unsigned long auth_tag_len); -asmlinkage void aesni_gcm_enc_avx_gen2(void *ctx, - struct gcm_context_data *gdata, u8 *out, - const u8 *in, unsigned long plaintext_len, u8 *iv, - const u8 *aad, unsigned long aad_len, - u8 *auth_tag, unsigned long auth_tag_len); - -asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx, - struct gcm_context_data *gdata, u8 *out, - const u8 *in, unsigned long ciphertext_len, u8 *iv, - const u8 *aad, unsigned long aad_len, - u8 *auth_tag, unsigned long auth_tag_len); - static const struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen2 = { .init = &aesni_gcm_init_avx_gen2, .enc_update = &aesni_gcm_enc_update_avx_gen2, @@ -260,18 +205,6 @@ asmlinkage void aesni_gcm_finalize_avx_gen4(void *ctx, struct gcm_context_data *gdata, u8 *auth_tag, unsigned long auth_tag_len); -asmlinkage void aesni_gcm_enc_avx_gen4(void *ctx, - struct gcm_context_data *gdata, u8 *out, - const u8 *in, unsigned long plaintext_len, u8 *iv, - const u8 *aad, unsigned long aad_len, - u8 *auth_tag, unsigned long auth_tag_len); - -asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx, - struct gcm_context_data *gdata, u8 *out, - const u8 *in, unsigned long ciphertext_len, u8 *iv, - const u8 *aad, unsigned long aad_len, - u8 *auth_tag, unsigned long auth_tag_len); - static const struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen4 = { .init = &aesni_gcm_init_avx_gen4, .enc_update = &aesni_gcm_enc_update_avx_gen4, From 2694e23ffd210cbbc05cd45bec77dc1c11bb72a2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Jan 2021 16:55:48 +0100 Subject: [PATCH 050/154] crypto: aesni - clean up mapping of associated data The gcm(aes-ni) driver is only built for x86_64, which does not make use of highmem. So testing for PageHighMem is pointless and can be omitted. While at it, replace GFP_ATOMIC with the appropriate runtime decided value based on the context. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 0f124d72e6b4..efef6e6b1d34 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -667,14 +667,15 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, gcm_tfm = &aesni_gcm_tfm_sse; /* Linearize assoc, if not already linear */ - if (req->src->length >= assoclen && req->src->length && - (!PageHighMem(sg_page(req->src)) || - req->src->offset + req->src->length <= PAGE_SIZE)) { + if (req->src->length >= assoclen && req->src->length) { scatterwalk_start(&assoc_sg_walk, req->src); assoc = scatterwalk_map(&assoc_sg_walk); } else { + gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; + /* assoc can be any length, so must be on heap */ - assocmem = kmalloc(assoclen, GFP_ATOMIC); + assocmem = kmalloc(assoclen, flags); if (unlikely(!assocmem)) return -ENOMEM; assoc = assocmem; From 83c83e658863e4e57f4defe6cc1bc05f3d968e2a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Jan 2021 16:55:49 +0100 Subject: [PATCH 051/154] crypto: aesni - refactor scatterlist processing Currently, the gcm(aes-ni) driver open codes the scatterlist handling that is encapsulated by the skcipher walk API. So let's switch to that instead. Also, move the handling at the end of gcmaes_crypt_by_sg() that is dependent on whether we are encrypting or decrypting into the callers, which always do one or the other. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 139 ++++++++++++----------------- 1 file changed, 56 insertions(+), 83 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index efef6e6b1d34..180fa79a0727 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -638,25 +638,18 @@ static int generic_gcmaes_set_authsize(struct crypto_aead *tfm, static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, unsigned int assoclen, u8 *hash_subkey, - u8 *iv, void *aes_ctx) + u8 *iv, void *aes_ctx, u8 *auth_tag, + unsigned long auth_tag_len) { - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - unsigned long auth_tag_len = crypto_aead_authsize(tfm); const struct aesni_gcm_tfm_s *gcm_tfm = aesni_gcm_tfm; u8 databuf[sizeof(struct gcm_context_data) + (AESNI_ALIGN - 8)] __aligned(8); struct gcm_context_data *data = PTR_ALIGN((void *)databuf, AESNI_ALIGN); - struct scatter_walk dst_sg_walk = {}; unsigned long left = req->cryptlen; - unsigned long len, srclen, dstlen; struct scatter_walk assoc_sg_walk; - struct scatter_walk src_sg_walk; - struct scatterlist src_start[2]; - struct scatterlist dst_start[2]; - struct scatterlist *src_sg; - struct scatterlist *dst_sg; - u8 *src, *dst, *assoc; + struct skcipher_walk walk; u8 *assocmem = NULL; - u8 authTag[16]; + u8 *assoc; + int err; if (!enc) left -= auth_tag_len; @@ -683,61 +676,8 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0); } - if (left) { - src_sg = scatterwalk_ffwd(src_start, req->src, req->assoclen); - scatterwalk_start(&src_sg_walk, src_sg); - if (req->src != req->dst) { - dst_sg = scatterwalk_ffwd(dst_start, req->dst, - req->assoclen); - scatterwalk_start(&dst_sg_walk, dst_sg); - } - } - kernel_fpu_begin(); gcm_tfm->init(aes_ctx, data, iv, hash_subkey, assoc, assoclen); - if (req->src != req->dst) { - while (left) { - src = scatterwalk_map(&src_sg_walk); - dst = scatterwalk_map(&dst_sg_walk); - srclen = scatterwalk_clamp(&src_sg_walk, left); - dstlen = scatterwalk_clamp(&dst_sg_walk, left); - len = min(srclen, dstlen); - if (len) { - if (enc) - gcm_tfm->enc_update(aes_ctx, data, - dst, src, len); - else - gcm_tfm->dec_update(aes_ctx, data, - dst, src, len); - } - left -= len; - - scatterwalk_unmap(src); - scatterwalk_unmap(dst); - scatterwalk_advance(&src_sg_walk, len); - scatterwalk_advance(&dst_sg_walk, len); - scatterwalk_done(&src_sg_walk, 0, left); - scatterwalk_done(&dst_sg_walk, 1, left); - } - } else { - while (left) { - dst = src = scatterwalk_map(&src_sg_walk); - len = scatterwalk_clamp(&src_sg_walk, left); - if (len) { - if (enc) - gcm_tfm->enc_update(aes_ctx, data, - src, src, len); - else - gcm_tfm->dec_update(aes_ctx, data, - src, src, len); - } - left -= len; - scatterwalk_unmap(src); - scatterwalk_advance(&src_sg_walk, len); - scatterwalk_done(&src_sg_walk, 1, left); - } - } - gcm_tfm->finalize(aes_ctx, data, authTag, auth_tag_len); kernel_fpu_end(); if (!assocmem) @@ -745,24 +685,25 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, else kfree(assocmem); - if (!enc) { - u8 authTagMsg[16]; + err = enc ? skcipher_walk_aead_encrypt(&walk, req, false) + : skcipher_walk_aead_decrypt(&walk, req, false); - /* Copy out original authTag */ - scatterwalk_map_and_copy(authTagMsg, req->src, - req->assoclen + req->cryptlen - - auth_tag_len, - auth_tag_len, 0); + while (walk.nbytes > 0) { + kernel_fpu_begin(); + (enc ? gcm_tfm->enc_update + : gcm_tfm->dec_update)(aes_ctx, data, walk.dst.virt.addr, + walk.src.virt.addr, walk.nbytes); + kernel_fpu_end(); - /* Compare generated tag with passed in tag. */ - return crypto_memneq(authTagMsg, authTag, auth_tag_len) ? - -EBADMSG : 0; + err = skcipher_walk_done(&walk, 0); } - /* Copy in the authTag */ - scatterwalk_map_and_copy(authTag, req->dst, - req->assoclen + req->cryptlen, - auth_tag_len, 1); + if (err) + return err; + + kernel_fpu_begin(); + gcm_tfm->finalize(aes_ctx, data, auth_tag, auth_tag_len); + kernel_fpu_end(); return 0; } @@ -770,15 +711,47 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen, u8 *hash_subkey, u8 *iv, void *aes_ctx) { - return gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv, - aes_ctx); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + unsigned long auth_tag_len = crypto_aead_authsize(tfm); + u8 auth_tag[16]; + int err; + + err = gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv, aes_ctx, + auth_tag, auth_tag_len); + if (err) + return err; + + scatterwalk_map_and_copy(auth_tag, req->dst, + req->assoclen + req->cryptlen, + auth_tag_len, 1); + return 0; } static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen, u8 *hash_subkey, u8 *iv, void *aes_ctx) { - return gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv, - aes_ctx); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + unsigned long auth_tag_len = crypto_aead_authsize(tfm); + u8 auth_tag_msg[16]; + u8 auth_tag[16]; + int err; + + err = gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv, aes_ctx, + auth_tag, auth_tag_len); + if (err) + return err; + + /* Copy out original auth_tag */ + scatterwalk_map_and_copy(auth_tag_msg, req->src, + req->assoclen + req->cryptlen - auth_tag_len, + auth_tag_len, 0); + + /* Compare generated tag with passed in tag. */ + if (crypto_memneq(auth_tag_msg, auth_tag, auth_tag_len)) { + memzero_explicit(auth_tag, sizeof(auth_tag)); + return -EBADMSG; + } + return 0; } static int helper_rfc4106_encrypt(struct aead_request *req) From d6cbf4eaa46794b173c691a71211d882398d7977 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 4 Jan 2021 16:55:50 +0100 Subject: [PATCH 052/154] crypto: aesni - replace function pointers with static branches Replace the function pointers in the GCM implementation with static branches, which are based on code patching, which occurs only at module load time. This avoids the severe performance penalty caused by the use of retpolines. In order to retain the ability to switch between different versions of the implementation based on the input size on cores that support AVX and AVX2, use static branches instead of static calls. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 98 ++++++++++++++++-------------- 1 file changed, 54 insertions(+), 44 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 180fa79a0727..a548fdbc3073 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -128,24 +129,6 @@ asmlinkage void aesni_gcm_finalize(void *ctx, struct gcm_context_data *gdata, u8 *auth_tag, unsigned long auth_tag_len); -static const struct aesni_gcm_tfm_s { - void (*init)(void *ctx, struct gcm_context_data *gdata, u8 *iv, - u8 *hash_subkey, const u8 *aad, unsigned long aad_len); - void (*enc_update)(void *ctx, struct gcm_context_data *gdata, u8 *out, - const u8 *in, unsigned long plaintext_len); - void (*dec_update)(void *ctx, struct gcm_context_data *gdata, u8 *out, - const u8 *in, unsigned long ciphertext_len); - void (*finalize)(void *ctx, struct gcm_context_data *gdata, - u8 *auth_tag, unsigned long auth_tag_len); -} *aesni_gcm_tfm; - -static const struct aesni_gcm_tfm_s aesni_gcm_tfm_sse = { - .init = &aesni_gcm_init, - .enc_update = &aesni_gcm_enc_update, - .dec_update = &aesni_gcm_dec_update, - .finalize = &aesni_gcm_finalize, -}; - asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv, void *keys, u8 *out, unsigned int num_bytes); asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv, @@ -175,13 +158,6 @@ asmlinkage void aesni_gcm_finalize_avx_gen2(void *ctx, struct gcm_context_data *gdata, u8 *auth_tag, unsigned long auth_tag_len); -static const struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen2 = { - .init = &aesni_gcm_init_avx_gen2, - .enc_update = &aesni_gcm_enc_update_avx_gen2, - .dec_update = &aesni_gcm_dec_update_avx_gen2, - .finalize = &aesni_gcm_finalize_avx_gen2, -}; - /* * asmlinkage void aesni_gcm_init_avx_gen4() * gcm_data *my_ctx_data, context data @@ -205,12 +181,8 @@ asmlinkage void aesni_gcm_finalize_avx_gen4(void *ctx, struct gcm_context_data *gdata, u8 *auth_tag, unsigned long auth_tag_len); -static const struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen4 = { - .init = &aesni_gcm_init_avx_gen4, - .enc_update = &aesni_gcm_enc_update_avx_gen4, - .dec_update = &aesni_gcm_dec_update_avx_gen4, - .finalize = &aesni_gcm_finalize_avx_gen4, -}; +static __ro_after_init DEFINE_STATIC_KEY_FALSE(gcm_use_avx); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(gcm_use_avx2); static inline struct aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) @@ -641,12 +613,12 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, u8 *iv, void *aes_ctx, u8 *auth_tag, unsigned long auth_tag_len) { - const struct aesni_gcm_tfm_s *gcm_tfm = aesni_gcm_tfm; u8 databuf[sizeof(struct gcm_context_data) + (AESNI_ALIGN - 8)] __aligned(8); struct gcm_context_data *data = PTR_ALIGN((void *)databuf, AESNI_ALIGN); unsigned long left = req->cryptlen; struct scatter_walk assoc_sg_walk; struct skcipher_walk walk; + bool do_avx, do_avx2; u8 *assocmem = NULL; u8 *assoc; int err; @@ -654,10 +626,8 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, if (!enc) left -= auth_tag_len; - if (left < AVX_GEN4_OPTSIZE && gcm_tfm == &aesni_gcm_tfm_avx_gen4) - gcm_tfm = &aesni_gcm_tfm_avx_gen2; - if (left < AVX_GEN2_OPTSIZE && gcm_tfm == &aesni_gcm_tfm_avx_gen2) - gcm_tfm = &aesni_gcm_tfm_sse; + do_avx = (left >= AVX_GEN2_OPTSIZE); + do_avx2 = (left >= AVX_GEN4_OPTSIZE); /* Linearize assoc, if not already linear */ if (req->src->length >= assoclen && req->src->length) { @@ -677,7 +647,14 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, } kernel_fpu_begin(); - gcm_tfm->init(aes_ctx, data, iv, hash_subkey, assoc, assoclen); + if (static_branch_likely(&gcm_use_avx2) && do_avx2) + aesni_gcm_init_avx_gen4(aes_ctx, data, iv, hash_subkey, assoc, + assoclen); + else if (static_branch_likely(&gcm_use_avx) && do_avx) + aesni_gcm_init_avx_gen2(aes_ctx, data, iv, hash_subkey, assoc, + assoclen); + else + aesni_gcm_init(aes_ctx, data, iv, hash_subkey, assoc, assoclen); kernel_fpu_end(); if (!assocmem) @@ -690,9 +667,35 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, while (walk.nbytes > 0) { kernel_fpu_begin(); - (enc ? gcm_tfm->enc_update - : gcm_tfm->dec_update)(aes_ctx, data, walk.dst.virt.addr, - walk.src.virt.addr, walk.nbytes); + if (static_branch_likely(&gcm_use_avx2) && do_avx2) { + if (enc) + aesni_gcm_enc_update_avx_gen4(aes_ctx, data, + walk.dst.virt.addr, + walk.src.virt.addr, + walk.nbytes); + else + aesni_gcm_dec_update_avx_gen4(aes_ctx, data, + walk.dst.virt.addr, + walk.src.virt.addr, + walk.nbytes); + } else if (static_branch_likely(&gcm_use_avx) && do_avx) { + if (enc) + aesni_gcm_enc_update_avx_gen2(aes_ctx, data, + walk.dst.virt.addr, + walk.src.virt.addr, + walk.nbytes); + else + aesni_gcm_dec_update_avx_gen2(aes_ctx, data, + walk.dst.virt.addr, + walk.src.virt.addr, + walk.nbytes); + } else if (enc) { + aesni_gcm_enc_update(aes_ctx, data, walk.dst.virt.addr, + walk.src.virt.addr, walk.nbytes); + } else { + aesni_gcm_dec_update(aes_ctx, data, walk.dst.virt.addr, + walk.src.virt.addr, walk.nbytes); + } kernel_fpu_end(); err = skcipher_walk_done(&walk, 0); @@ -702,7 +705,14 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, return err; kernel_fpu_begin(); - gcm_tfm->finalize(aes_ctx, data, auth_tag, auth_tag_len); + if (static_branch_likely(&gcm_use_avx2) && do_avx2) + aesni_gcm_finalize_avx_gen4(aes_ctx, data, auth_tag, + auth_tag_len); + else if (static_branch_likely(&gcm_use_avx) && do_avx) + aesni_gcm_finalize_avx_gen2(aes_ctx, data, auth_tag, + auth_tag_len); + else + aesni_gcm_finalize(aes_ctx, data, auth_tag, auth_tag_len); kernel_fpu_end(); return 0; @@ -1141,14 +1151,14 @@ static int __init aesni_init(void) #ifdef CONFIG_X86_64 if (boot_cpu_has(X86_FEATURE_AVX2)) { pr_info("AVX2 version of gcm_enc/dec engaged.\n"); - aesni_gcm_tfm = &aesni_gcm_tfm_avx_gen4; + static_branch_enable(&gcm_use_avx); + static_branch_enable(&gcm_use_avx2); } else if (boot_cpu_has(X86_FEATURE_AVX)) { pr_info("AVX version of gcm_enc/dec engaged.\n"); - aesni_gcm_tfm = &aesni_gcm_tfm_avx_gen2; + static_branch_enable(&gcm_use_avx); } else { pr_info("SSE version of gcm_enc/dec engaged.\n"); - aesni_gcm_tfm = &aesni_gcm_tfm_sse; } aesni_ctr_enc_tfm = aesni_ctr_enc; if (boot_cpu_has(X86_FEATURE_AVX)) { From 0db0d797abca574a3a4fa141a82ea44c270c2dd8 Mon Sep 17 00:00:00 2001 From: Wojciech Ziemba Date: Mon, 4 Jan 2021 16:55:46 +0000 Subject: [PATCH 053/154] crypto: qat - configure arbiter mapping based on engines enabled The hardware specific function adf_get_arbiter_mapping() modifies the static array thrd_to_arb_map to disable mappings for AEs that are disabled. This static array is used for each device of the same type. If the ae mask is not identical for all devices of the same type then the arbiter mapping returned by adf_get_arbiter_mapping() may be wrong. This patch fixes this problem by ensuring the static arbiter mapping is unchanged and the device arbiter mapping is re-calculated each time based on the static mapping. Signed-off-by: Wojciech Ziemba Reviewed-by: Giovanni Cabiddu Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- .../crypto/qat/qat_4xxx/adf_4xxx_hw_data.c | 14 ++-------- .../crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c | 17 +++-------- .../crypto/qat/qat_c62x/adf_c62x_hw_data.c | 25 +++-------------- .../crypto/qat/qat_common/adf_accel_devices.h | 3 +- .../crypto/qat/qat_common/adf_hw_arbiter.c | 8 ++---- .../qat/qat_dh895xcc/adf_dh895xcc_hw_data.c | 28 +++---------------- 6 files changed, 19 insertions(+), 76 deletions(-) diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c index 344bfae45bff..6a9be01fdf33 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -19,7 +19,7 @@ static struct adf_fw_config adf_4xxx_fw_config[] = { }; /* Worker thread to service arbiter mappings */ -static u32 thrd_to_arb_map[] = { +static const u32 thrd_to_arb_map[ADF_4XXX_MAX_ACCELENGINES] = { 0x5555555, 0x5555555, 0x5555555, 0x5555555, 0xAAAAAAA, 0xAAAAAAA, 0xAAAAAAA, 0xAAAAAAA, 0x0 @@ -119,17 +119,9 @@ static enum dev_sku_info get_sku(struct adf_hw_device_data *self) return DEV_SKU_1; } -static void adf_get_arbiter_mapping(struct adf_accel_dev *accel_dev, - u32 const **arb_map_config) +static const u32 *adf_get_arbiter_mapping(void) { - struct adf_hw_device_data *hw_device = accel_dev->hw_device; - unsigned long ae_mask = hw_device->ae_mask; - int i; - - for_each_clear_bit(i, &ae_mask, ADF_4XXX_MAX_ACCELENGINES) - thrd_to_arb_map[i] = 0; - - *arb_map_config = thrd_to_arb_map; + return thrd_to_arb_map; } static void get_arb_info(struct arb_info *arb_info) diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c index eb45f1b1ae3e..f5990d042c9a 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c @@ -7,8 +7,8 @@ #include "adf_c3xxx_hw_data.h" #include "icp_qat_hw.h" -/* Worker thread to service arbiter mappings based on dev SKUs */ -static const u32 thrd_to_arb_map_6_me_sku[] = { +/* Worker thread to service arbiter mappings */ +static const u32 thrd_to_arb_map[ADF_C3XXX_MAX_ACCELENGINES] = { 0x12222AAA, 0x11222AAA, 0x12222AAA, 0x11222AAA, 0x12222AAA, 0x11222AAA }; @@ -101,18 +101,9 @@ static enum dev_sku_info get_sku(struct adf_hw_device_data *self) return DEV_SKU_UNKNOWN; } -static void adf_get_arbiter_mapping(struct adf_accel_dev *accel_dev, - u32 const **arb_map_config) +static const u32 *adf_get_arbiter_mapping(void) { - switch (accel_dev->accel_pci_dev.sku) { - case DEV_SKU_4: - *arb_map_config = thrd_to_arb_map_6_me_sku; - break; - default: - dev_err(&GET_DEV(accel_dev), - "The configuration doesn't match any SKU"); - *arb_map_config = NULL; - } + return thrd_to_arb_map; } static u32 get_pf2vf_offset(u32 i) diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c index babdffbcb846..cadcf12884c8 100644 --- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c +++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c @@ -7,13 +7,8 @@ #include "adf_c62x_hw_data.h" #include "icp_qat_hw.h" -/* Worker thread to service arbiter mappings based on dev SKUs */ -static const u32 thrd_to_arb_map_8_me_sku[] = { - 0x12222AAA, 0x11222AAA, 0x12222AAA, 0x11222AAA, 0x12222AAA, - 0x11222AAA, 0x12222AAA, 0x11222AAA, 0, 0 -}; - -static const u32 thrd_to_arb_map_10_me_sku[] = { +/* Worker thread to service arbiter mappings */ +static const u32 thrd_to_arb_map[ADF_C62X_MAX_ACCELENGINES] = { 0x12222AAA, 0x11222AAA, 0x12222AAA, 0x11222AAA, 0x12222AAA, 0x11222AAA, 0x12222AAA, 0x11222AAA, 0x12222AAA, 0x11222AAA }; @@ -108,21 +103,9 @@ static enum dev_sku_info get_sku(struct adf_hw_device_data *self) return DEV_SKU_UNKNOWN; } -static void adf_get_arbiter_mapping(struct adf_accel_dev *accel_dev, - u32 const **arb_map_config) +static const u32 *adf_get_arbiter_mapping(void) { - switch (accel_dev->accel_pci_dev.sku) { - case DEV_SKU_2: - *arb_map_config = thrd_to_arb_map_8_me_sku; - break; - case DEV_SKU_4: - *arb_map_config = thrd_to_arb_map_10_me_sku; - break; - default: - dev_err(&GET_DEV(accel_dev), - "The configuration doesn't match any SKU"); - *arb_map_config = NULL; - } + return thrd_to_arb_map; } static u32 get_pf2vf_offset(u32 i) diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index c46a5805b294..5527344546e5 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -168,8 +168,7 @@ struct adf_hw_device_data { int (*send_admin_init)(struct adf_accel_dev *accel_dev); int (*init_arb)(struct adf_accel_dev *accel_dev); void (*exit_arb)(struct adf_accel_dev *accel_dev); - void (*get_arb_mapping)(struct adf_accel_dev *accel_dev, - const u32 **cfg); + const u32 *(*get_arb_mapping)(void); void (*disable_iov)(struct adf_accel_dev *accel_dev); void (*configure_iov_threads)(struct adf_accel_dev *accel_dev, bool enable); diff --git a/drivers/crypto/qat/qat_common/adf_hw_arbiter.c b/drivers/crypto/qat/qat_common/adf_hw_arbiter.c index 9f5240d9488b..64e4596a24f4 100644 --- a/drivers/crypto/qat/qat_common/adf_hw_arbiter.c +++ b/drivers/crypto/qat/qat_common/adf_hw_arbiter.c @@ -19,6 +19,7 @@ int adf_init_arb(struct adf_accel_dev *accel_dev) { struct adf_hw_device_data *hw_data = accel_dev->hw_device; void __iomem *csr = accel_dev->transport->banks[0].csr_addr; + unsigned long ae_mask = hw_data->ae_mask; u32 arb_off, wt_off, arb_cfg; const u32 *thd_2_arb_cfg; struct arb_info info; @@ -35,12 +36,9 @@ int adf_init_arb(struct adf_accel_dev *accel_dev) WRITE_CSR_ARB_SARCONFIG(csr, arb_off, arb, arb_cfg); /* Map worker threads to service arbiters */ - hw_data->get_arb_mapping(accel_dev, &thd_2_arb_cfg); + thd_2_arb_cfg = hw_data->get_arb_mapping(); - if (!thd_2_arb_cfg) - return -EFAULT; - - for (i = 0; i < hw_data->num_engines; i++) + for_each_set_bit(i, &ae_mask, hw_data->num_engines) WRITE_CSR_ARB_WT2SAM(csr, arb_off, wt_off, i, thd_2_arb_cfg[i]); return 0; diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index 1e83d9397b11..7dd7cd6c3ef8 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -7,14 +7,8 @@ #include "adf_dh895xcc_hw_data.h" #include "icp_qat_hw.h" -/* Worker thread to service arbiter mappings based on dev SKUs */ -static const u32 thrd_to_arb_map_sku4[] = { - 0x12222AAA, 0x11666666, 0x12222AAA, 0x11666666, - 0x12222AAA, 0x11222222, 0x12222AAA, 0x11222222, - 0x00000000, 0x00000000, 0x00000000, 0x00000000 -}; - -static const u32 thrd_to_arb_map_sku6[] = { +/* Worker thread to service arbiter mappings */ +static const u32 thrd_to_arb_map[ADF_DH895XCC_MAX_ACCELENGINES] = { 0x12222AAA, 0x11666666, 0x12222AAA, 0x11666666, 0x12222AAA, 0x11222222, 0x12222AAA, 0x11222222, 0x12222AAA, 0x11222222, 0x12222AAA, 0x11222222 @@ -127,23 +121,9 @@ static enum dev_sku_info get_sku(struct adf_hw_device_data *self) return DEV_SKU_UNKNOWN; } -static void adf_get_arbiter_mapping(struct adf_accel_dev *accel_dev, - u32 const **arb_map_config) +static const u32 *adf_get_arbiter_mapping(void) { - switch (accel_dev->accel_pci_dev.sku) { - case DEV_SKU_1: - *arb_map_config = thrd_to_arb_map_sku4; - break; - - case DEV_SKU_2: - case DEV_SKU_4: - *arb_map_config = thrd_to_arb_map_sku6; - break; - default: - dev_err(&GET_DEV(accel_dev), - "The configuration doesn't match any SKU"); - *arb_map_config = NULL; - } + return thrd_to_arb_map; } static u32 get_pf2vf_offset(u32 i) From 1aaae055d48e8f9c841dcce07d90fa5f8b6acf2e Mon Sep 17 00:00:00 2001 From: Adam Guerin Date: Mon, 4 Jan 2021 17:21:57 +0000 Subject: [PATCH 054/154] crypto: qat - fix potential spectre issue Sanitize ring_num value coming from configuration (and potentially from user space) before it is used as index in the banks array. This issue was detected by smatch: drivers/crypto/qat/qat_common/adf_transport.c:233 adf_create_ring() warn: potential spectre issue 'bank->rings' [r] (local cap) Signed-off-by: Adam Guerin Reviewed-by: Giovanni Cabiddu Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_transport.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/crypto/qat/qat_common/adf_transport.c b/drivers/crypto/qat/qat_common/adf_transport.c index 5a7030acdc33..888c1e047295 100644 --- a/drivers/crypto/qat/qat_common/adf_transport.c +++ b/drivers/crypto/qat/qat_common/adf_transport.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) /* Copyright(c) 2014 - 2020 Intel Corporation */ #include +#include #include "adf_accel_devices.h" #include "adf_transport_internal.h" #include "adf_transport_access_macros.h" @@ -246,6 +247,7 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section, return -EFAULT; } + ring_num = array_index_nospec(ring_num, num_rings_per_bank); bank = &transport_data->banks[bank_num]; if (adf_reserve_ring(bank, ring_num)) { dev_err(&GET_DEV(accel_dev), "Ring %d, %s already exists.\n", From 80fccf18fec399de2151f84276d799ee0f704141 Mon Sep 17 00:00:00 2001 From: Adam Guerin Date: Mon, 4 Jan 2021 17:21:58 +0000 Subject: [PATCH 055/154] crypto: qat - change format string and cast ring size Cast ADF_SIZE_TO_RING_SIZE_IN_BYTES() so it can return a 64 bit value. This issue was detected by smatch: drivers/crypto/qat/qat_common/adf_transport_debug.c:65 adf_ring_show() warn: should '(1 << (ring->ring_size - 1)) << 7' be a 64 bit type? Signed-off-by: Adam Guerin Reviewed-by: Giovanni Cabiddu Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_transport_debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_transport_debug.c b/drivers/crypto/qat/qat_common/adf_transport_debug.c index 1205186ad51e..e69e5907f595 100644 --- a/drivers/crypto/qat/qat_common/adf_transport_debug.c +++ b/drivers/crypto/qat/qat_common/adf_transport_debug.c @@ -62,8 +62,8 @@ static int adf_ring_show(struct seq_file *sfile, void *v) seq_printf(sfile, "head %x, tail %x, empty: %d\n", head, tail, (empty & 1 << ring->ring_number) >> ring->ring_number); - seq_printf(sfile, "ring size %d, msg size %d\n", - ADF_SIZE_TO_RING_SIZE_IN_BYTES(ring->ring_size), + seq_printf(sfile, "ring size %lld, msg size %d\n", + (long long)ADF_SIZE_TO_RING_SIZE_IN_BYTES(ring->ring_size), ADF_MSG_SIZE_TO_BYTES(ring->msg_size)); seq_puts(sfile, "----------- Ring data ------------\n"); return 0; From e48767c17718067ba21fb2ef461779ec2506f845 Mon Sep 17 00:00:00 2001 From: Adam Guerin Date: Mon, 4 Jan 2021 17:21:59 +0000 Subject: [PATCH 056/154] crypto: qat - reduce size of mapped region Restrict size of field to what is required by the operation. This issue was detected by smatch: drivers/crypto/qat/qat_common/qat_asym_algs.c:328 qat_dh_compute_value() error: dma_map_single_attrs() '&qat_req->in.dh.in.b' too small (8 vs 64) Signed-off-by: Adam Guerin Reviewed-by: Giovanni Cabiddu Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_asym_algs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index 2c863d25327a..b0b78445418b 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -321,13 +321,13 @@ static int qat_dh_compute_value(struct kpp_request *req) qat_req->out.dh.out_tab[1] = 0; /* Mapping in.in.b or in.in_g2.xa is the same */ qat_req->phy_in = dma_map_single(dev, &qat_req->in.dh.in.b, - sizeof(struct qat_dh_input_params), + sizeof(qat_req->in.dh.in.b), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, qat_req->phy_in))) goto unmap_dst; qat_req->phy_out = dma_map_single(dev, &qat_req->out.dh.r, - sizeof(struct qat_dh_output_params), + sizeof(qat_req->out.dh.r), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, qat_req->phy_out))) goto unmap_in_params; @@ -716,13 +716,13 @@ static int qat_rsa_enc(struct akcipher_request *req) qat_req->in.rsa.in_tab[3] = 0; qat_req->out.rsa.out_tab[1] = 0; qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.enc.m, - sizeof(struct qat_rsa_input_params), + sizeof(qat_req->in.rsa.enc.m), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, qat_req->phy_in))) goto unmap_dst; qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa.enc.c, - sizeof(struct qat_rsa_output_params), + sizeof(qat_req->out.rsa.enc.c), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, qat_req->phy_out))) goto unmap_in_params; @@ -864,13 +864,13 @@ static int qat_rsa_dec(struct akcipher_request *req) qat_req->in.rsa.in_tab[3] = 0; qat_req->out.rsa.out_tab[1] = 0; qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.dec.c, - sizeof(struct qat_rsa_input_params), + sizeof(qat_req->in.rsa.dec.c), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, qat_req->phy_in))) goto unmap_dst; qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa.dec.m, - sizeof(struct qat_rsa_output_params), + sizeof(qat_req->out.rsa.dec.m), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, qat_req->phy_out))) goto unmap_in_params; From f7f2b43eaf6b4cfe54c75100709be31d5c4b52c8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 5 Jan 2021 00:02:37 +0100 Subject: [PATCH 057/154] crypto: bcm - Rename struct device_private to bcm_device_private Renaming 'struct device_private' to 'struct bcm_device_private', because it clashes with 'struct device_private' from 'drivers/base/base.h'. While it's not a functional problem, it's causing two distinct type hierarchies in BTF data. It also breaks build with options: CONFIG_DEBUG_INFO_BTF=y CONFIG_CRYPTO_DEV_BCM_SPU=y as reported by Qais Yousef [1]. [1] https://lore.kernel.org/lkml/20201229151352.6hzmjvu3qh6p2qgg@e107158-lin/ Fixes: 9d12ba86f818 ("crypto: brcm - Add Broadcom SPU driver") Signed-off-by: Jiri Olsa Tested-by: Qais Yousef Signed-off-by: Herbert Xu --- drivers/crypto/bcm/cipher.c | 2 +- drivers/crypto/bcm/cipher.h | 4 ++-- drivers/crypto/bcm/util.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c index 30390a7324b2..0e5537838ef3 100644 --- a/drivers/crypto/bcm/cipher.c +++ b/drivers/crypto/bcm/cipher.c @@ -42,7 +42,7 @@ /* ================= Device Structure ================== */ -struct device_private iproc_priv; +struct bcm_device_private iproc_priv; /* ==================== Parameters ===================== */ diff --git a/drivers/crypto/bcm/cipher.h b/drivers/crypto/bcm/cipher.h index 0ad5892b445d..71281a3bdbdc 100644 --- a/drivers/crypto/bcm/cipher.h +++ b/drivers/crypto/bcm/cipher.h @@ -420,7 +420,7 @@ struct spu_hw { u32 num_chan; }; -struct device_private { +struct bcm_device_private { struct platform_device *pdev; struct spu_hw spu; @@ -467,6 +467,6 @@ struct device_private { struct mbox_chan **mbox; }; -extern struct device_private iproc_priv; +extern struct bcm_device_private iproc_priv; #endif diff --git a/drivers/crypto/bcm/util.c b/drivers/crypto/bcm/util.c index 2b304fc78059..77aeedb84055 100644 --- a/drivers/crypto/bcm/util.c +++ b/drivers/crypto/bcm/util.c @@ -348,7 +348,7 @@ char *spu_alg_name(enum spu_cipher_alg alg, enum spu_cipher_mode mode) static ssize_t spu_debugfs_read(struct file *filp, char __user *ubuf, size_t count, loff_t *offp) { - struct device_private *ipriv; + struct bcm_device_private *ipriv; char *buf; ssize_t ret, out_offset, out_count; int i; From 0d61c3f1449a70fbf70f99648c4075b1e758be4d Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Tue, 5 Jan 2021 14:12:03 +0800 Subject: [PATCH 058/154] crypto: hisilicon/qm - SVA bugfixed on Kunpeng920 Kunpeng920 SEC/HPRE/ZIP cannot support running user space SVA and kernel Crypto at the same time. Therefore, the algorithms should not be registered to Crypto as user space SVA is enabled. Signed-off-by: Kai Ye Reviewed-by: Zaibo Xu Reviewed-by: Zhou Wang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index f21ccae0e8ea..10a04ab15570 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -4004,6 +4004,9 @@ int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list) { int flag = 0; int ret = 0; + /* HW V2 not support both use uacce sva mode and hardware crypto algs */ + if (qm->ver <= QM_HW_V2 && qm->use_sva) + return 0; mutex_lock(&qm_list->lock); if (list_empty(&qm_list->list)) @@ -4035,6 +4038,9 @@ EXPORT_SYMBOL_GPL(hisi_qm_alg_register); */ void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list) { + if (qm->ver <= QM_HW_V2 && qm->use_sva) + return; + mutex_lock(&qm_list->lock); list_del(&qm->list); mutex_unlock(&qm_list->lock); From f8408d2b79b834f79b6c578817e84f74a85d2190 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Tue, 5 Jan 2021 14:16:42 +0800 Subject: [PATCH 059/154] crypto: hisilicon - add ZIP device using mode parameter Add 'uacce_mode' parameter for ZIP, which can be set as 0(default) or 1. '0' means ZIP is only registered to kernel crypto, and '1' means it's registered to both kernel crypto and UACCE. Signed-off-by: Kai Ye Reviewed-by: Zhou Wang Reviewed-by: Zaibo Xu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 2 +- drivers/crypto/hisilicon/qm.h | 27 +++++++++++++++++++++++++ drivers/crypto/hisilicon/zip/zip_main.c | 14 +++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 10a04ab15570..904b99a22442 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -2206,7 +2206,7 @@ static int qm_alloc_uacce(struct hisi_qm *qm) if (IS_ERR(uacce)) return PTR_ERR(uacce); - if (uacce->flags & UACCE_DEV_SVA) { + if (uacce->flags & UACCE_DEV_SVA && qm->mode == UACCE_MODE_SVA) { qm->use_sva = true; } else { /* only consider sva case */ diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h index 8624d1288afe..c1dd0fcf5beb 100644 --- a/drivers/crypto/hisilicon/qm.h +++ b/drivers/crypto/hisilicon/qm.h @@ -85,6 +85,11 @@ /* page number for queue file region */ #define QM_DOORBELL_PAGE_NR 1 +/* uacce mode of the driver */ +#define UACCE_MODE_NOUACCE 0 /* don't use uacce */ +#define UACCE_MODE_SVA 1 /* use uacce sva mode */ +#define UACCE_MODE_DESC "0(default) means only register to crypto, 1 means both register to crypto and uacce" + enum qm_stop_reason { QM_NORMAL, QM_SOFT_RESET, @@ -249,6 +254,7 @@ struct hisi_qm { resource_size_t phys_base; resource_size_t phys_size; struct uacce_device *uacce; + int mode; }; struct hisi_qp_status { @@ -333,6 +339,27 @@ static inline int vfs_num_set(const char *val, const struct kernel_param *kp) return param_set_int(val, kp); } +static inline int mode_set(const char *val, const struct kernel_param *kp) +{ + u32 n; + int ret; + + if (!val) + return -EINVAL; + + ret = kstrtou32(val, 10, &n); + if (ret != 0 || (n != UACCE_MODE_SVA && + n != UACCE_MODE_NOUACCE)) + return -EINVAL; + + return param_set_int(val, kp); +} + +static inline int uacce_mode_set(const char *val, const struct kernel_param *kp) +{ + return mode_set(val, kp); +} + static inline void hisi_qm_init_list(struct hisi_qm_list *qm_list) { INIT_LIST_HEAD(&qm_list->list); diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 4fb5a32bf830..9cdecff01bcb 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -211,6 +211,19 @@ static const struct debugfs_reg32 hzip_dfx_regs[] = { {"HZIP_DECOMP_LZ77_CURR_ST ", 0x9cull}, }; +static const struct kernel_param_ops zip_uacce_mode_ops = { + .set = uacce_mode_set, + .get = param_get_int, +}; + +/* + * uacce_mode = 0 means zip only register to crypto, + * uacce_mode = 1 means zip both register to crypto and uacce. + */ +static u32 uacce_mode = UACCE_MODE_NOUACCE; +module_param_cb(uacce_mode, &zip_uacce_mode_ops, &uacce_mode, 0444); +MODULE_PARM_DESC(uacce_mode, UACCE_MODE_DESC); + static int pf_q_num_set(const char *val, const struct kernel_param *kp) { return q_num_set(val, kp, PCI_DEVICE_ID_ZIP_PF); @@ -752,6 +765,7 @@ static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) qm->pdev = pdev; qm->ver = pdev->revision; qm->algs = "zlib\ngzip"; + qm->mode = uacce_mode; qm->sqe_size = HZIP_SQE_SIZE; qm->dev_name = hisi_zip_name; From bedd04e4aa1434d2f0f038e15bb6c48ac36876e1 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Tue, 5 Jan 2021 14:16:43 +0800 Subject: [PATCH 060/154] crypto: hisilicon/hpre - register HPRE device to uacce Register HPRE device to uacce framework for user space. Signed-off-by: Kai Ye Reviewed-by: Zhou Wang Reviewed-by: Zaibo Xu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 54 +++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index e5c991913f09..ad8b691887a6 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "hpre.h" #define HPRE_QUEUE_NUM_V2 1024 @@ -178,6 +179,19 @@ static const char *hpre_dfx_files[HPRE_DFX_FILE_NUM] = { "invalid_req_cnt" }; +static const struct kernel_param_ops hpre_uacce_mode_ops = { + .set = uacce_mode_set, + .get = param_get_int, +}; + +/* + * uacce_mode = 0 means hpre only register to crypto, + * uacce_mode = 1 means hpre both register to crypto and uacce. + */ +static u32 uacce_mode = UACCE_MODE_NOUACCE; +module_param_cb(uacce_mode, &hpre_uacce_mode_ops, &uacce_mode, 0444); +MODULE_PARM_DESC(uacce_mode, UACCE_MODE_DESC); + static int pf_q_num_set(const char *val, const struct kernel_param *kp) { return q_num_set(val, kp, HPRE_PCI_DEVICE_ID); @@ -214,6 +228,30 @@ struct hisi_qp *hpre_create_qp(void) return NULL; } +static void hpre_pasid_enable(struct hisi_qm *qm) +{ + u32 val; + + val = readl_relaxed(qm->io_base + HPRE_DATA_RUSER_CFG); + val |= BIT(HPRE_PASID_EN_BIT); + writel_relaxed(val, qm->io_base + HPRE_DATA_RUSER_CFG); + val = readl_relaxed(qm->io_base + HPRE_DATA_WUSER_CFG); + val |= BIT(HPRE_PASID_EN_BIT); + writel_relaxed(val, qm->io_base + HPRE_DATA_WUSER_CFG); +} + +static void hpre_pasid_disable(struct hisi_qm *qm) +{ + u32 val; + + val = readl_relaxed(qm->io_base + HPRE_DATA_RUSER_CFG); + val &= ~BIT(HPRE_PASID_EN_BIT); + writel_relaxed(val, qm->io_base + HPRE_DATA_RUSER_CFG); + val = readl_relaxed(qm->io_base + HPRE_DATA_WUSER_CFG); + val &= ~BIT(HPRE_PASID_EN_BIT); + writel_relaxed(val, qm->io_base + HPRE_DATA_WUSER_CFG); +} + static int hpre_cfg_by_dsm(struct hisi_qm *qm) { struct device *dev = &qm->pdev->dev; @@ -279,6 +317,10 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm) writel(0x0, HPRE_ADDR(qm, HPRE_COMM_CNT_CLR_CE)); writel(0x0, HPRE_ADDR(qm, HPRE_ECC_BYPASS)); + /* Enable data buffer pasid */ + if (qm->use_sva) + hpre_pasid_enable(qm); + writel(HPRE_BD_USR_MASK, HPRE_ADDR(qm, HPRE_BD_ARUSR_CFG)); writel(HPRE_BD_USR_MASK, HPRE_ADDR(qm, HPRE_BD_AWUSR_CFG)); writel(0x1, HPRE_ADDR(qm, HPRE_RDCHN_INI_CFG)); @@ -734,6 +776,8 @@ static int hpre_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) return -EINVAL; } + qm->algs = "rsa\ndh\n"; + qm->mode = uacce_mode; qm->pdev = pdev; qm->ver = pdev->revision; qm->sqe_size = HPRE_SQE_SIZE; @@ -872,6 +916,14 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_with_qm_start; } + if (qm->uacce) { + ret = uacce_register(qm->uacce); + if (ret) { + pci_err(pdev, "failed to register uacce (%d)!\n", ret); + goto err_with_alg_register; + } + } + if (qm->fun_type == QM_HW_PF && vfs_num) { ret = hisi_qm_sriov_enable(pdev, vfs_num); if (ret < 0) @@ -911,6 +963,8 @@ static void hpre_remove(struct pci_dev *pdev) } } if (qm->fun_type == QM_HW_PF) { + if (qm->use_sva) + hpre_pasid_disable(qm); hpre_cnt_regs_clear(qm); qm->debug.curr_qm_qp_num = 0; } From 34932a6033be3c0088935c334e4dc5ad43dcb0cc Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Tue, 5 Jan 2021 14:16:44 +0800 Subject: [PATCH 061/154] crypto: hisilicon/sec - register SEC device to uacce Register SEC device to uacce framework for user space. Signed-off-by: Kai Ye Reviewed-by: Zhou Wang Reviewed-by: Zaibo Xu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_main.c | 39 +++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index b35c1c2271a3..4809c19dcdc8 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "sec.h" @@ -74,6 +75,16 @@ #define SEC_USER0_SMMU_NORMAL (BIT(23) | BIT(15)) #define SEC_USER1_SMMU_NORMAL (BIT(31) | BIT(23) | BIT(15) | BIT(7)) +#define SEC_USER1_ENABLE_CONTEXT_SSV BIT(24) +#define SEC_USER1_ENABLE_DATA_SSV BIT(16) +#define SEC_USER1_WB_CONTEXT_SSV BIT(8) +#define SEC_USER1_WB_DATA_SSV BIT(0) +#define SEC_USER1_SVA_SET (SEC_USER1_ENABLE_CONTEXT_SSV | \ + SEC_USER1_ENABLE_DATA_SSV | \ + SEC_USER1_WB_CONTEXT_SSV | \ + SEC_USER1_WB_DATA_SSV) +#define SEC_USER1_SMMU_SVA (SEC_USER1_SMMU_NORMAL | SEC_USER1_SVA_SET) +#define SEC_USER1_SMMU_MASK (~SEC_USER1_SVA_SET) #define SEC_CORE_INT_STATUS_M_ECC BIT(2) #define SEC_DELAY_10_US 10 @@ -233,6 +244,18 @@ struct hisi_qp **sec_create_qps(void) return NULL; } +static const struct kernel_param_ops sec_uacce_mode_ops = { + .set = uacce_mode_set, + .get = param_get_int, +}; + +/* + * uacce_mode = 0 means sec only register to crypto, + * uacce_mode = 1 means sec both register to crypto and uacce. + */ +static u32 uacce_mode = UACCE_MODE_NOUACCE; +module_param_cb(uacce_mode, &sec_uacce_mode_ops, &uacce_mode, 0444); +MODULE_PARM_DESC(uacce_mode, UACCE_MODE_DESC); static const struct pci_device_id sec_dev_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, SEC_PF_PCI_DEVICE_ID) }, @@ -299,7 +322,11 @@ static int sec_engine_init(struct hisi_qm *qm) writel_relaxed(reg, SEC_ADDR(qm, SEC_INTERFACE_USER_CTRL0_REG)); reg = readl_relaxed(SEC_ADDR(qm, SEC_INTERFACE_USER_CTRL1_REG)); - reg |= SEC_USER1_SMMU_NORMAL; + reg &= SEC_USER1_SMMU_MASK; + if (qm->use_sva) + reg |= SEC_USER1_SMMU_SVA; + else + reg |= SEC_USER1_SMMU_NORMAL; writel_relaxed(reg, SEC_ADDR(qm, SEC_INTERFACE_USER_CTRL1_REG)); writel(SEC_SINGLE_PORT_MAX_TRANS, @@ -758,6 +785,8 @@ static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) qm->pdev = pdev; qm->ver = pdev->revision; + qm->algs = "cipher\ndigest\naead\n"; + qm->mode = uacce_mode; qm->sqe_size = SEC_SQE_SIZE; qm->dev_name = sec_name; @@ -885,6 +914,14 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_qm_stop; } + if (qm->uacce) { + ret = uacce_register(qm->uacce); + if (ret) { + pci_err(pdev, "failed to register uacce (%d)!\n", ret); + goto err_alg_unregister; + } + } + if (qm->fun_type == QM_HW_PF && vfs_num) { ret = hisi_qm_sriov_enable(pdev, vfs_num); if (ret < 0) From 4d6a5a4b1e4a7606bf666ce694671f6897bdabaa Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Tue, 5 Jan 2021 15:31:08 +0530 Subject: [PATCH 062/154] crypto: marvell/cesa - Fix a spelling s/fautly/faultly/ in comment s/fautly/faulty/p Signed-off-by: Bhaskar Chowdhury Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa/tdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/marvell/cesa/tdma.c b/drivers/crypto/marvell/cesa/tdma.c index 5d9c48fb72b2..0e0d63359798 100644 --- a/drivers/crypto/marvell/cesa/tdma.c +++ b/drivers/crypto/marvell/cesa/tdma.c @@ -177,7 +177,7 @@ int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status) /* * Save the last request in error to engine->req, so that the core - * knows which request was fautly + * knows which request was faulty */ if (res) { spin_lock_bh(&engine->lock); From 55a7e88f016873ef1717295d8460416b1ccd05a5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2021 17:47:49 +0100 Subject: [PATCH 063/154] crypto: x86/camellia - switch to XTS template Now that the XTS template can wrap accelerated ECB modes, it can be used to implement Camellia in XTS mode as well, which turns out to be at least as fast, and sometimes even faster. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/camellia-aesni-avx-asm_64.S | 181 ---------------- arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 207 ------------------- arch/x86/crypto/camellia_aesni_avx2_glue.c | 70 ------- arch/x86/crypto/camellia_aesni_avx_glue.c | 101 +-------- arch/x86/include/asm/crypto/camellia.h | 18 -- crypto/Kconfig | 2 +- 6 files changed, 2 insertions(+), 577 deletions(-) diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index ecc0a9a905c4..471c34e6cac2 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -17,7 +17,6 @@ #include #include -#include #define CAMELLIA_TABLE_BYTE_LEN 272 @@ -593,10 +592,6 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 -/* For XTS mode IV generation */ -.Lxts_gf128mul_and_shl1_mask: - .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 - /* * pre-SubByte transform * @@ -1111,179 +1106,3 @@ SYM_FUNC_START(camellia_ctr_16way) FRAME_END ret; SYM_FUNC_END(camellia_ctr_16way) - -#define gf128mul_x_ble(iv, mask, tmp) \ - vpsrad $31, iv, tmp; \ - vpaddq iv, iv, iv; \ - vpshufd $0x13, tmp, tmp; \ - vpand mask, tmp, tmp; \ - vpxor tmp, iv, iv; - -.align 8 -SYM_FUNC_START_LOCAL(camellia_xts_crypt_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - * %r8: index for input whitening key - * %r9: pointer to __camellia_enc_blk16 or __camellia_dec_blk16 - */ - FRAME_BEGIN - - subq $(16 * 16), %rsp; - movq %rsp, %rax; - - vmovdqa .Lxts_gf128mul_and_shl1_mask, %xmm14; - - /* load IV */ - vmovdqu (%rcx), %xmm0; - vpxor 0 * 16(%rdx), %xmm0, %xmm15; - vmovdqu %xmm15, 15 * 16(%rax); - vmovdqu %xmm0, 0 * 16(%rsi); - - /* construct IVs */ - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 1 * 16(%rdx), %xmm0, %xmm15; - vmovdqu %xmm15, 14 * 16(%rax); - vmovdqu %xmm0, 1 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 2 * 16(%rdx), %xmm0, %xmm13; - vmovdqu %xmm0, 2 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 3 * 16(%rdx), %xmm0, %xmm12; - vmovdqu %xmm0, 3 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 4 * 16(%rdx), %xmm0, %xmm11; - vmovdqu %xmm0, 4 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 5 * 16(%rdx), %xmm0, %xmm10; - vmovdqu %xmm0, 5 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 6 * 16(%rdx), %xmm0, %xmm9; - vmovdqu %xmm0, 6 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 7 * 16(%rdx), %xmm0, %xmm8; - vmovdqu %xmm0, 7 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 8 * 16(%rdx), %xmm0, %xmm7; - vmovdqu %xmm0, 8 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 9 * 16(%rdx), %xmm0, %xmm6; - vmovdqu %xmm0, 9 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 10 * 16(%rdx), %xmm0, %xmm5; - vmovdqu %xmm0, 10 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 11 * 16(%rdx), %xmm0, %xmm4; - vmovdqu %xmm0, 11 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 12 * 16(%rdx), %xmm0, %xmm3; - vmovdqu %xmm0, 12 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 13 * 16(%rdx), %xmm0, %xmm2; - vmovdqu %xmm0, 13 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 14 * 16(%rdx), %xmm0, %xmm1; - vmovdqu %xmm0, 14 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vpxor 15 * 16(%rdx), %xmm0, %xmm15; - vmovdqu %xmm15, 0 * 16(%rax); - vmovdqu %xmm0, 15 * 16(%rsi); - - gf128mul_x_ble(%xmm0, %xmm14, %xmm15); - vmovdqu %xmm0, (%rcx); - - /* inpack16_pre: */ - vmovq (key_table)(CTX, %r8, 8), %xmm15; - vpshufb .Lpack_bswap, %xmm15, %xmm15; - vpxor 0 * 16(%rax), %xmm15, %xmm0; - vpxor %xmm1, %xmm15, %xmm1; - vpxor %xmm2, %xmm15, %xmm2; - vpxor %xmm3, %xmm15, %xmm3; - vpxor %xmm4, %xmm15, %xmm4; - vpxor %xmm5, %xmm15, %xmm5; - vpxor %xmm6, %xmm15, %xmm6; - vpxor %xmm7, %xmm15, %xmm7; - vpxor %xmm8, %xmm15, %xmm8; - vpxor %xmm9, %xmm15, %xmm9; - vpxor %xmm10, %xmm15, %xmm10; - vpxor %xmm11, %xmm15, %xmm11; - vpxor %xmm12, %xmm15, %xmm12; - vpxor %xmm13, %xmm15, %xmm13; - vpxor 14 * 16(%rax), %xmm15, %xmm14; - vpxor 15 * 16(%rax), %xmm15, %xmm15; - - CALL_NOSPEC r9; - - addq $(16 * 16), %rsp; - - vpxor 0 * 16(%rsi), %xmm7, %xmm7; - vpxor 1 * 16(%rsi), %xmm6, %xmm6; - vpxor 2 * 16(%rsi), %xmm5, %xmm5; - vpxor 3 * 16(%rsi), %xmm4, %xmm4; - vpxor 4 * 16(%rsi), %xmm3, %xmm3; - vpxor 5 * 16(%rsi), %xmm2, %xmm2; - vpxor 6 * 16(%rsi), %xmm1, %xmm1; - vpxor 7 * 16(%rsi), %xmm0, %xmm0; - vpxor 8 * 16(%rsi), %xmm15, %xmm15; - vpxor 9 * 16(%rsi), %xmm14, %xmm14; - vpxor 10 * 16(%rsi), %xmm13, %xmm13; - vpxor 11 * 16(%rsi), %xmm12, %xmm12; - vpxor 12 * 16(%rsi), %xmm11, %xmm11; - vpxor 13 * 16(%rsi), %xmm10, %xmm10; - vpxor 14 * 16(%rsi), %xmm9, %xmm9; - vpxor 15 * 16(%rsi), %xmm8, %xmm8; - write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, - %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, - %xmm8, %rsi); - - FRAME_END - ret; -SYM_FUNC_END(camellia_xts_crypt_16way) - -SYM_FUNC_START(camellia_xts_enc_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - xorl %r8d, %r8d; /* input whitening key, 0 for enc */ - - leaq __camellia_enc_blk16, %r9; - - jmp camellia_xts_crypt_16way; -SYM_FUNC_END(camellia_xts_enc_16way) - -SYM_FUNC_START(camellia_xts_dec_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - - cmpl $16, key_length(CTX); - movl $32, %r8d; - movl $24, %eax; - cmovel %eax, %r8d; /* input whitening key, last for dec */ - - leaq __camellia_dec_blk16, %r9; - - jmp camellia_xts_crypt_16way; -SYM_FUNC_END(camellia_xts_dec_16way) diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 0907243c501c..9561dee52de0 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -7,7 +7,6 @@ #include #include -#include #define CAMELLIA_TABLE_BYTE_LEN 272 @@ -629,12 +628,6 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 -/* For XTS mode */ -.Lxts_gf128mul_and_shl1_mask_0: - .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 -.Lxts_gf128mul_and_shl1_mask_1: - .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0 - /* * pre-SubByte transform * @@ -1201,203 +1194,3 @@ SYM_FUNC_START(camellia_ctr_32way) FRAME_END ret; SYM_FUNC_END(camellia_ctr_32way) - -#define gf128mul_x_ble(iv, mask, tmp) \ - vpsrad $31, iv, tmp; \ - vpaddq iv, iv, iv; \ - vpshufd $0x13, tmp, tmp; \ - vpand mask, tmp, tmp; \ - vpxor tmp, iv, iv; - -#define gf128mul_x2_ble(iv, mask1, mask2, tmp0, tmp1) \ - vpsrad $31, iv, tmp0; \ - vpaddq iv, iv, tmp1; \ - vpsllq $2, iv, iv; \ - vpshufd $0x13, tmp0, tmp0; \ - vpsrad $31, tmp1, tmp1; \ - vpand mask2, tmp0, tmp0; \ - vpshufd $0x13, tmp1, tmp1; \ - vpxor tmp0, iv, iv; \ - vpand mask1, tmp1, tmp1; \ - vpxor tmp1, iv, iv; - -.align 8 -SYM_FUNC_START_LOCAL(camellia_xts_crypt_32way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (32 blocks) - * %rdx: src (32 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - * %r8: index for input whitening key - * %r9: pointer to __camellia_enc_blk32 or __camellia_dec_blk32 - */ - FRAME_BEGIN - - vzeroupper; - - subq $(16 * 32), %rsp; - movq %rsp, %rax; - - vbroadcasti128 .Lxts_gf128mul_and_shl1_mask_0, %ymm12; - - /* load IV and construct second IV */ - vmovdqu (%rcx), %xmm0; - vmovdqa %xmm0, %xmm15; - gf128mul_x_ble(%xmm0, %xmm12, %xmm13); - vbroadcasti128 .Lxts_gf128mul_and_shl1_mask_1, %ymm13; - vinserti128 $1, %xmm0, %ymm15, %ymm0; - vpxor 0 * 32(%rdx), %ymm0, %ymm15; - vmovdqu %ymm15, 15 * 32(%rax); - vmovdqu %ymm0, 0 * 32(%rsi); - - /* construct IVs */ - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 1 * 32(%rdx), %ymm0, %ymm15; - vmovdqu %ymm15, 14 * 32(%rax); - vmovdqu %ymm0, 1 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 2 * 32(%rdx), %ymm0, %ymm15; - vmovdqu %ymm15, 13 * 32(%rax); - vmovdqu %ymm0, 2 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 3 * 32(%rdx), %ymm0, %ymm15; - vmovdqu %ymm15, 12 * 32(%rax); - vmovdqu %ymm0, 3 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 4 * 32(%rdx), %ymm0, %ymm11; - vmovdqu %ymm0, 4 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 5 * 32(%rdx), %ymm0, %ymm10; - vmovdqu %ymm0, 5 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 6 * 32(%rdx), %ymm0, %ymm9; - vmovdqu %ymm0, 6 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 7 * 32(%rdx), %ymm0, %ymm8; - vmovdqu %ymm0, 7 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 8 * 32(%rdx), %ymm0, %ymm7; - vmovdqu %ymm0, 8 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 9 * 32(%rdx), %ymm0, %ymm6; - vmovdqu %ymm0, 9 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 10 * 32(%rdx), %ymm0, %ymm5; - vmovdqu %ymm0, 10 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 11 * 32(%rdx), %ymm0, %ymm4; - vmovdqu %ymm0, 11 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 12 * 32(%rdx), %ymm0, %ymm3; - vmovdqu %ymm0, 12 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 13 * 32(%rdx), %ymm0, %ymm2; - vmovdqu %ymm0, 13 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 14 * 32(%rdx), %ymm0, %ymm1; - vmovdqu %ymm0, 14 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 15 * 32(%rdx), %ymm0, %ymm15; - vmovdqu %ymm15, 0 * 32(%rax); - vmovdqu %ymm0, 15 * 32(%rsi); - - vextracti128 $1, %ymm0, %xmm0; - gf128mul_x_ble(%xmm0, %xmm12, %xmm15); - vmovdqu %xmm0, (%rcx); - - /* inpack32_pre: */ - vpbroadcastq (key_table)(CTX, %r8, 8), %ymm15; - vpshufb .Lpack_bswap, %ymm15, %ymm15; - vpxor 0 * 32(%rax), %ymm15, %ymm0; - vpxor %ymm1, %ymm15, %ymm1; - vpxor %ymm2, %ymm15, %ymm2; - vpxor %ymm3, %ymm15, %ymm3; - vpxor %ymm4, %ymm15, %ymm4; - vpxor %ymm5, %ymm15, %ymm5; - vpxor %ymm6, %ymm15, %ymm6; - vpxor %ymm7, %ymm15, %ymm7; - vpxor %ymm8, %ymm15, %ymm8; - vpxor %ymm9, %ymm15, %ymm9; - vpxor %ymm10, %ymm15, %ymm10; - vpxor %ymm11, %ymm15, %ymm11; - vpxor 12 * 32(%rax), %ymm15, %ymm12; - vpxor 13 * 32(%rax), %ymm15, %ymm13; - vpxor 14 * 32(%rax), %ymm15, %ymm14; - vpxor 15 * 32(%rax), %ymm15, %ymm15; - - CALL_NOSPEC r9; - - addq $(16 * 32), %rsp; - - vpxor 0 * 32(%rsi), %ymm7, %ymm7; - vpxor 1 * 32(%rsi), %ymm6, %ymm6; - vpxor 2 * 32(%rsi), %ymm5, %ymm5; - vpxor 3 * 32(%rsi), %ymm4, %ymm4; - vpxor 4 * 32(%rsi), %ymm3, %ymm3; - vpxor 5 * 32(%rsi), %ymm2, %ymm2; - vpxor 6 * 32(%rsi), %ymm1, %ymm1; - vpxor 7 * 32(%rsi), %ymm0, %ymm0; - vpxor 8 * 32(%rsi), %ymm15, %ymm15; - vpxor 9 * 32(%rsi), %ymm14, %ymm14; - vpxor 10 * 32(%rsi), %ymm13, %ymm13; - vpxor 11 * 32(%rsi), %ymm12, %ymm12; - vpxor 12 * 32(%rsi), %ymm11, %ymm11; - vpxor 13 * 32(%rsi), %ymm10, %ymm10; - vpxor 14 * 32(%rsi), %ymm9, %ymm9; - vpxor 15 * 32(%rsi), %ymm8, %ymm8; - write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, - %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, - %ymm8, %rsi); - - vzeroupper; - - FRAME_END - ret; -SYM_FUNC_END(camellia_xts_crypt_32way) - -SYM_FUNC_START(camellia_xts_enc_32way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (32 blocks) - * %rdx: src (32 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - - xorl %r8d, %r8d; /* input whitening key, 0 for enc */ - - leaq __camellia_enc_blk32, %r9; - - jmp camellia_xts_crypt_32way; -SYM_FUNC_END(camellia_xts_enc_32way) - -SYM_FUNC_START(camellia_xts_dec_32way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (32 blocks) - * %rdx: src (32 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - - cmpl $16, key_length(CTX); - movl $32, %r8d; - movl $24, %eax; - cmovel %eax, %r8d; /* input whitening key, last for dec */ - - leaq __camellia_dec_blk32, %r9; - - jmp camellia_xts_crypt_32way; -SYM_FUNC_END(camellia_xts_dec_32way) diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index ccda647422d6..d956d0473668 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -26,11 +25,6 @@ asmlinkage void camellia_cbc_dec_32way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void camellia_ctr_32way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -asmlinkage void camellia_xts_enc_32way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -asmlinkage void camellia_xts_dec_32way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); - static const struct common_glue_ctx camellia_enc = { .num_funcs = 4, .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, @@ -69,22 +63,6 @@ static const struct common_glue_ctx camellia_ctr = { } } }; -static const struct common_glue_ctx camellia_enc_xts = { - .num_funcs = 3, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .xts = camellia_xts_enc_32way } - }, { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .xts = camellia_xts_enc_16way } - }, { - .num_blocks = 1, - .fn_u = { .xts = camellia_xts_enc } - } } -}; - static const struct common_glue_ctx camellia_dec = { .num_funcs = 4, .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, @@ -123,22 +101,6 @@ static const struct common_glue_ctx camellia_dec_cbc = { } } }; -static const struct common_glue_ctx camellia_dec_xts = { - .num_funcs = 3, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .xts = camellia_xts_dec_32way } - }, { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .xts = camellia_xts_dec_16way } - }, { - .num_blocks = 1, - .fn_u = { .xts = camellia_xts_dec } - } } -}; - static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { @@ -170,24 +132,6 @@ static int ctr_crypt(struct skcipher_request *req) return glue_ctr_req_128bit(&camellia_ctr, req); } -static int xts_encrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&camellia_enc_xts, req, camellia_enc_blk, - &ctx->tweak_ctx, &ctx->crypt_ctx, false); -} - -static int xts_decrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&camellia_dec_xts, req, camellia_enc_blk, - &ctx->tweak_ctx, &ctx->crypt_ctx, true); -} - static struct skcipher_alg camellia_algs[] = { { .base.cra_name = "__ecb(camellia)", @@ -231,20 +175,6 @@ static struct skcipher_alg camellia_algs[] = { .setkey = camellia_setkey, .encrypt = ctr_crypt, .decrypt = ctr_crypt, - }, { - .base.cra_name = "__xts(camellia)", - .base.cra_driver_name = "__xts-camellia-aesni-avx2", - .base.cra_priority = 500, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct camellia_xts_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = 2 * CAMELLIA_MIN_KEY_SIZE, - .max_keysize = 2 * CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = xts_camellia_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, }, }; diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 4e5de6ef206e..44614f8a452c 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -31,26 +30,6 @@ asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); EXPORT_SYMBOL_GPL(camellia_ctr_16way); -asmlinkage void camellia_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -EXPORT_SYMBOL_GPL(camellia_xts_enc_16way); - -asmlinkage void camellia_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -EXPORT_SYMBOL_GPL(camellia_xts_dec_16way); - -void camellia_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - glue_xts_crypt_128bit_one(ctx, dst, src, iv, camellia_enc_blk); -} -EXPORT_SYMBOL_GPL(camellia_xts_enc); - -void camellia_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - glue_xts_crypt_128bit_one(ctx, dst, src, iv, camellia_dec_blk); -} -EXPORT_SYMBOL_GPL(camellia_xts_dec); - static const struct common_glue_ctx camellia_enc = { .num_funcs = 3, .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, @@ -83,19 +62,6 @@ static const struct common_glue_ctx camellia_ctr = { } } }; -static const struct common_glue_ctx camellia_enc_xts = { - .num_funcs = 2, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .xts = camellia_xts_enc_16way } - }, { - .num_blocks = 1, - .fn_u = { .xts = camellia_xts_enc } - } } -}; - static const struct common_glue_ctx camellia_dec = { .num_funcs = 3, .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, @@ -128,19 +94,6 @@ static const struct common_glue_ctx camellia_dec_cbc = { } } }; -static const struct common_glue_ctx camellia_dec_xts = { - .num_funcs = 2, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .xts = camellia_xts_dec_16way } - }, { - .num_blocks = 1, - .fn_u = { .xts = camellia_xts_dec } - } } -}; - static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { @@ -172,44 +125,6 @@ static int ctr_crypt(struct skcipher_request *req) return glue_ctr_req_128bit(&camellia_ctr, req); } -int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - int err; - - err = xts_verify_key(tfm, key, keylen); - if (err) - return err; - - /* first half of xts-key is for crypt */ - err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); -} -EXPORT_SYMBOL_GPL(xts_camellia_setkey); - -static int xts_encrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&camellia_enc_xts, req, camellia_enc_blk, - &ctx->tweak_ctx, &ctx->crypt_ctx, false); -} - -static int xts_decrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&camellia_dec_xts, req, camellia_enc_blk, - &ctx->tweak_ctx, &ctx->crypt_ctx, true); -} - static struct skcipher_alg camellia_algs[] = { { .base.cra_name = "__ecb(camellia)", @@ -253,21 +168,7 @@ static struct skcipher_alg camellia_algs[] = { .setkey = camellia_setkey, .encrypt = ctr_crypt, .decrypt = ctr_crypt, - }, { - .base.cra_name = "__xts(camellia)", - .base.cra_driver_name = "__xts-camellia-aesni", - .base.cra_priority = 400, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct camellia_xts_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = 2 * CAMELLIA_MIN_KEY_SIZE, - .max_keysize = 2 * CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = xts_camellia_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, + } }; static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)]; diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h index f6d91861cb14..0e5f82adbaf9 100644 --- a/arch/x86/include/asm/crypto/camellia.h +++ b/arch/x86/include/asm/crypto/camellia.h @@ -19,18 +19,10 @@ struct camellia_ctx { u32 key_length; }; -struct camellia_xts_ctx { - struct camellia_ctx tweak_ctx; - struct camellia_ctx crypt_ctx; -}; - extern int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key, unsigned int key_len); -extern int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen); - /* regular block cipher functions */ asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src, bool xor); @@ -49,11 +41,6 @@ asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -asmlinkage void camellia_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -asmlinkage void camellia_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); - static inline void camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src) { __camellia_enc_blk(ctx, dst, src, false); @@ -83,9 +70,4 @@ extern void camellia_crypt_ctr(const void *ctx, u8 *dst, const u8 *src, extern void camellia_crypt_ctr_2way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -extern void camellia_xts_enc(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -extern void camellia_xts_dec(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); - #endif /* ASM_X86_CAMELLIA_H */ diff --git a/crypto/Kconfig b/crypto/Kconfig index c48ca26e2169..b9ea4e262ebe 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1305,7 +1305,7 @@ config CRYPTO_CAMELLIA_AESNI_AVX_X86_64 select CRYPTO_CAMELLIA_X86_64 select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SIMD - select CRYPTO_XTS + imply CRYPTO_XTS help Camellia cipher algorithm module (x86_64/AES-NI/AVX). From 2cc0fedb8124ac7a75d132988f1e11f5de30c61f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2021 17:47:50 +0100 Subject: [PATCH 064/154] crypto: x86/cast6 - switch to XTS template Now that the XTS template can wrap accelerated ECB modes, it can be used to implement CAST6 in XTS mode as well, which turns out to be at least as fast, and sometimes even faster Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 56 ------------- arch/x86/crypto/cast6_avx_glue.c | 98 ----------------------- crypto/Kconfig | 2 +- 3 files changed, 1 insertion(+), 155 deletions(-) diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index 932a3ce32a88..0c1ea836215a 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -212,8 +212,6 @@ .section .rodata.cst16, "aM", @progbits, 16 .align 16 -.Lxts_gf128mul_and_shl1_mask: - .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 .Lbswap_mask: .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 .Lbswap128_mask: @@ -440,57 +438,3 @@ SYM_FUNC_START(cast6_ctr_8way) FRAME_END ret; SYM_FUNC_END(cast6_ctr_8way) - -SYM_FUNC_START(cast6_xts_enc_8way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - FRAME_BEGIN - pushq %r15; - - movq %rdi, CTX - movq %rsi, %r11; - - /* regs <= src, dst <= IVs, regs <= regs xor IVs */ - load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, - RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask); - - call __cast6_enc_blk8; - - /* dst <= regs xor IVs(in dst) */ - store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); - - popq %r15; - FRAME_END - ret; -SYM_FUNC_END(cast6_xts_enc_8way) - -SYM_FUNC_START(cast6_xts_dec_8way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - FRAME_BEGIN - pushq %r15; - - movq %rdi, CTX - movq %rsi, %r11; - - /* regs <= src, dst <= IVs, regs <= regs xor IVs */ - load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, - RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask); - - call __cast6_dec_blk8; - - /* dst <= regs xor IVs(in dst) */ - store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); - - popq %r15; - FRAME_END - ret; -SYM_FUNC_END(cast6_xts_dec_8way) diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 48e0f37796fa..5a21d3e9041c 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #define CAST6_PARALLEL_BLOCKS 8 @@ -27,27 +26,12 @@ asmlinkage void cast6_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void cast6_ctr_8way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -asmlinkage void cast6_xts_enc_8way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -asmlinkage void cast6_xts_dec_8way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); - static int cast6_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { return cast6_setkey(&tfm->base, key, keylen); } -static void cast6_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - glue_xts_crypt_128bit_one(ctx, dst, src, iv, __cast6_encrypt); -} - -static void cast6_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - glue_xts_crypt_128bit_one(ctx, dst, src, iv, __cast6_decrypt); -} - static void cast6_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) { be128 ctrblk; @@ -87,19 +71,6 @@ static const struct common_glue_ctx cast6_ctr = { } } }; -static const struct common_glue_ctx cast6_enc_xts = { - .num_funcs = 2, - .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .xts = cast6_xts_enc_8way } - }, { - .num_blocks = 1, - .fn_u = { .xts = cast6_xts_enc } - } } -}; - static const struct common_glue_ctx cast6_dec = { .num_funcs = 2, .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, @@ -126,19 +97,6 @@ static const struct common_glue_ctx cast6_dec_cbc = { } } }; -static const struct common_glue_ctx cast6_dec_xts = { - .num_funcs = 2, - .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .xts = cast6_xts_dec_8way } - }, { - .num_blocks = 1, - .fn_u = { .xts = cast6_xts_dec } - } } -}; - static int ecb_encrypt(struct skcipher_request *req) { return glue_ecb_req_128bit(&cast6_enc, req); @@ -164,48 +122,6 @@ static int ctr_crypt(struct skcipher_request *req) return glue_ctr_req_128bit(&cast6_ctr, req); } -struct cast6_xts_ctx { - struct cast6_ctx tweak_ctx; - struct cast6_ctx crypt_ctx; -}; - -static int xts_cast6_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - int err; - - err = xts_verify_key(tfm, key, keylen); - if (err) - return err; - - /* first half of xts-key is for crypt */ - err = __cast6_setkey(&ctx->crypt_ctx, key, keylen / 2); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __cast6_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); -} - -static int xts_encrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&cast6_enc_xts, req, __cast6_encrypt, - &ctx->tweak_ctx, &ctx->crypt_ctx, false); -} - -static int xts_decrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&cast6_dec_xts, req, __cast6_encrypt, - &ctx->tweak_ctx, &ctx->crypt_ctx, true); -} - static struct skcipher_alg cast6_algs[] = { { .base.cra_name = "__ecb(cast6)", @@ -249,20 +165,6 @@ static struct skcipher_alg cast6_algs[] = { .setkey = cast6_setkey_skcipher, .encrypt = ctr_crypt, .decrypt = ctr_crypt, - }, { - .base.cra_name = "__xts(cast6)", - .base.cra_driver_name = "__xts-cast6-avx", - .base.cra_priority = 200, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = CAST6_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct cast6_xts_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = 2 * CAST6_MIN_KEY_SIZE, - .max_keysize = 2 * CAST6_MAX_KEY_SIZE, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = xts_cast6_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, }, }; diff --git a/crypto/Kconfig b/crypto/Kconfig index b9ea4e262ebe..03e8468e57df 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1394,7 +1394,7 @@ config CRYPTO_CAST6_AVX_X86_64 select CRYPTO_CAST_COMMON select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SIMD - select CRYPTO_XTS + imply CRYPTO_XTS help The CAST6 encryption algorithm (synonymous with CAST-256) is described in RFC2612. From 9ec0af8aa6038163e7cd01dea3b8e085712d19fc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2021 17:47:51 +0100 Subject: [PATCH 065/154] crypto: x86/serpent- switch to XTS template Now that the XTS template can wrap accelerated ECB modes, it can be used to implement Serpent in XTS mode as well, which turns out to be at least as fast, and sometimes even faster Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 48 ---------- arch/x86/crypto/serpent-avx2-asm_64.S | 62 ------------ arch/x86/crypto/serpent_avx2_glue.c | 72 -------------- arch/x86/crypto/serpent_avx_glue.c | 101 -------------------- arch/x86/include/asm/crypto/serpent-avx.h | 21 ---- crypto/Kconfig | 2 +- 6 files changed, 1 insertion(+), 305 deletions(-) diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index ba9e4c1e7f5c..6b41f46bcc76 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -18,10 +18,6 @@ .align 16 .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 -.section .rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16 -.align 16 -.Lxts_gf128mul_and_shl1_mask: - .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 .text @@ -735,47 +731,3 @@ SYM_FUNC_START(serpent_ctr_8way_avx) FRAME_END ret; SYM_FUNC_END(serpent_ctr_8way_avx) - -SYM_FUNC_START(serpent_xts_enc_8way_avx) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - FRAME_BEGIN - - /* regs <= src, dst <= IVs, regs <= regs xor IVs */ - load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, - RK0, RK1, RK2, .Lxts_gf128mul_and_shl1_mask); - - call __serpent_enc_blk8_avx; - - /* dst <= regs xor IVs(in dst) */ - store_xts_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); - - FRAME_END - ret; -SYM_FUNC_END(serpent_xts_enc_8way_avx) - -SYM_FUNC_START(serpent_xts_dec_8way_avx) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - FRAME_BEGIN - - /* regs <= src, dst <= IVs, regs <= regs xor IVs */ - load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, - RK0, RK1, RK2, .Lxts_gf128mul_and_shl1_mask); - - call __serpent_dec_blk8_avx; - - /* dst <= regs xor IVs(in dst) */ - store_xts_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); - - FRAME_END - ret; -SYM_FUNC_END(serpent_xts_dec_8way_avx) diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S index c9648aeae705..a510a949f02f 100644 --- a/arch/x86/crypto/serpent-avx2-asm_64.S +++ b/arch/x86/crypto/serpent-avx2-asm_64.S @@ -20,16 +20,6 @@ .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 -.section .rodata.cst16.xts_gf128mul_and_shl1_mask_0, "aM", @progbits, 16 -.align 16 -.Lxts_gf128mul_and_shl1_mask_0: - .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 - -.section .rodata.cst16.xts_gf128mul_and_shl1_mask_1, "aM", @progbits, 16 -.align 16 -.Lxts_gf128mul_and_shl1_mask_1: - .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0 - .text #define CTX %rdi @@ -759,55 +749,3 @@ SYM_FUNC_START(serpent_ctr_16way) FRAME_END ret; SYM_FUNC_END(serpent_ctr_16way) - -SYM_FUNC_START(serpent_xts_enc_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - FRAME_BEGIN - - vzeroupper; - - load_xts_16way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, - RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT, - .Lxts_gf128mul_and_shl1_mask_0, - .Lxts_gf128mul_and_shl1_mask_1); - - call __serpent_enc_blk16; - - store_xts_16way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); - - vzeroupper; - - FRAME_END - ret; -SYM_FUNC_END(serpent_xts_enc_16way) - -SYM_FUNC_START(serpent_xts_dec_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - FRAME_BEGIN - - vzeroupper; - - load_xts_16way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, - RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT, - .Lxts_gf128mul_and_shl1_mask_0, - .Lxts_gf128mul_and_shl1_mask_1); - - call __serpent_dec_blk16; - - store_xts_16way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); - - vzeroupper; - - FRAME_END - ret; -SYM_FUNC_END(serpent_xts_dec_16way) diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index f973ace44ad3..9cdf2c078e21 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include @@ -25,11 +24,6 @@ asmlinkage void serpent_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void serpent_ctr_16way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -asmlinkage void serpent_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -asmlinkage void serpent_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); - static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { @@ -68,22 +62,6 @@ static const struct common_glue_ctx serpent_ctr = { } } }; -static const struct common_glue_ctx serpent_enc_xts = { - .num_funcs = 3, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .xts = serpent_xts_enc_16way } - }, { - .num_blocks = 8, - .fn_u = { .xts = serpent_xts_enc_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .xts = serpent_xts_enc } - } } -}; - static const struct common_glue_ctx serpent_dec = { .num_funcs = 3, .fpu_blocks_limit = 8, @@ -116,22 +94,6 @@ static const struct common_glue_ctx serpent_dec_cbc = { } } }; -static const struct common_glue_ctx serpent_dec_xts = { - .num_funcs = 3, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .xts = serpent_xts_dec_16way } - }, { - .num_blocks = 8, - .fn_u = { .xts = serpent_xts_dec_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .xts = serpent_xts_dec } - } } -}; - static int ecb_encrypt(struct skcipher_request *req) { return glue_ecb_req_128bit(&serpent_enc, req); @@ -157,26 +119,6 @@ static int ctr_crypt(struct skcipher_request *req) return glue_ctr_req_128bit(&serpent_ctr, req); } -static int xts_encrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&serpent_enc_xts, req, - __serpent_encrypt, &ctx->tweak_ctx, - &ctx->crypt_ctx, false); -} - -static int xts_decrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&serpent_dec_xts, req, - __serpent_encrypt, &ctx->tweak_ctx, - &ctx->crypt_ctx, true); -} - static struct skcipher_alg serpent_algs[] = { { .base.cra_name = "__ecb(serpent)", @@ -220,20 +162,6 @@ static struct skcipher_alg serpent_algs[] = { .setkey = serpent_setkey_skcipher, .encrypt = ctr_crypt, .decrypt = ctr_crypt, - }, { - .base.cra_name = "__xts(serpent)", - .base.cra_driver_name = "__xts-serpent-avx2", - .base.cra_priority = 600, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = SERPENT_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct serpent_xts_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = 2 * SERPENT_MIN_KEY_SIZE, - .max_keysize = 2 * SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = xts_serpent_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, }, }; diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 7806d1cbe854..b17a08b57a91 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -36,14 +35,6 @@ asmlinkage void serpent_ctr_8way_avx(const void *ctx, u8 *dst, const u8 *src, le128 *iv); EXPORT_SYMBOL_GPL(serpent_ctr_8way_avx); -asmlinkage void serpent_xts_enc_8way_avx(const void *ctx, u8 *dst, - const u8 *src, le128 *iv); -EXPORT_SYMBOL_GPL(serpent_xts_enc_8way_avx); - -asmlinkage void serpent_xts_dec_8way_avx(const void *ctx, u8 *dst, - const u8 *src, le128 *iv); -EXPORT_SYMBOL_GPL(serpent_xts_dec_8way_avx); - void __serpent_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) { be128 ctrblk; @@ -58,44 +49,12 @@ void __serpent_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) } EXPORT_SYMBOL_GPL(__serpent_crypt_ctr); -void serpent_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - glue_xts_crypt_128bit_one(ctx, dst, src, iv, __serpent_encrypt); -} -EXPORT_SYMBOL_GPL(serpent_xts_enc); - -void serpent_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - glue_xts_crypt_128bit_one(ctx, dst, src, iv, __serpent_decrypt); -} -EXPORT_SYMBOL_GPL(serpent_xts_dec); - static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); } -int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - int err; - - err = xts_verify_key(tfm, key, keylen); - if (err) - return err; - - /* first half of xts-key is for crypt */ - err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); -} -EXPORT_SYMBOL_GPL(xts_serpent_setkey); - static const struct common_glue_ctx serpent_enc = { .num_funcs = 2, .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, @@ -122,19 +81,6 @@ static const struct common_glue_ctx serpent_ctr = { } } }; -static const struct common_glue_ctx serpent_enc_xts = { - .num_funcs = 2, - .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .xts = serpent_xts_enc_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .xts = serpent_xts_enc } - } } -}; - static const struct common_glue_ctx serpent_dec = { .num_funcs = 2, .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, @@ -161,19 +107,6 @@ static const struct common_glue_ctx serpent_dec_cbc = { } } }; -static const struct common_glue_ctx serpent_dec_xts = { - .num_funcs = 2, - .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .xts = serpent_xts_dec_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .xts = serpent_xts_dec } - } } -}; - static int ecb_encrypt(struct skcipher_request *req) { return glue_ecb_req_128bit(&serpent_enc, req); @@ -199,26 +132,6 @@ static int ctr_crypt(struct skcipher_request *req) return glue_ctr_req_128bit(&serpent_ctr, req); } -static int xts_encrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&serpent_enc_xts, req, - __serpent_encrypt, &ctx->tweak_ctx, - &ctx->crypt_ctx, false); -} - -static int xts_decrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&serpent_dec_xts, req, - __serpent_encrypt, &ctx->tweak_ctx, - &ctx->crypt_ctx, true); -} - static struct skcipher_alg serpent_algs[] = { { .base.cra_name = "__ecb(serpent)", @@ -262,20 +175,6 @@ static struct skcipher_alg serpent_algs[] = { .setkey = serpent_setkey_skcipher, .encrypt = ctr_crypt, .decrypt = ctr_crypt, - }, { - .base.cra_name = "__xts(serpent)", - .base.cra_driver_name = "__xts-serpent-avx", - .base.cra_priority = 500, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = SERPENT_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct serpent_xts_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = 2 * SERPENT_MIN_KEY_SIZE, - .max_keysize = 2 * SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = xts_serpent_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, }, }; diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h index 251c2c89d7cf..23f3361a0e72 100644 --- a/arch/x86/include/asm/crypto/serpent-avx.h +++ b/arch/x86/include/asm/crypto/serpent-avx.h @@ -10,11 +10,6 @@ struct crypto_skcipher; #define SERPENT_PARALLEL_BLOCKS 8 -struct serpent_xts_ctx { - struct serpent_ctx tweak_ctx; - struct serpent_ctx crypt_ctx; -}; - asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst, const u8 *src); asmlinkage void serpent_ecb_dec_8way_avx(const void *ctx, u8 *dst, @@ -22,21 +17,5 @@ asmlinkage void serpent_ecb_dec_8way_avx(const void *ctx, u8 *dst, asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void serpent_ctr_8way_avx(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); - -asmlinkage void serpent_xts_enc_8way_avx(const void *ctx, u8 *dst, - const u8 *src, le128 *iv); -asmlinkage void serpent_xts_dec_8way_avx(const void *ctx, u8 *dst, - const u8 *src, le128 *iv); - -extern void __serpent_crypt_ctr(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); - -extern void serpent_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -extern void serpent_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv); - -extern int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen); #endif diff --git a/crypto/Kconfig b/crypto/Kconfig index 03e8468e57df..ce69a5ae26b5 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1576,7 +1576,7 @@ config CRYPTO_SERPENT_AVX_X86_64 select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT select CRYPTO_SIMD - select CRYPTO_XTS + imply CRYPTO_XTS help Serpent cipher algorithm, by Anderson, Biham & Knudsen. From da4df93a94a5aa7c5a599959d79ee99cdbe4c6b7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2021 17:47:52 +0100 Subject: [PATCH 066/154] crypto: x86/twofish - switch to XTS template Now that the XTS template can wrap accelerated ECB modes, it can be used to implement Twofish in XTS mode as well, which turns out to be at least as fast, and sometimes even faster Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 53 ----------- arch/x86/crypto/twofish_avx_glue.c | 98 --------------------- crypto/Kconfig | 1 + 3 files changed, 1 insertion(+), 151 deletions(-) diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index a5151393bb2f..84e61ef03638 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -19,11 +19,6 @@ .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 -.section .rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16 -.align 16 -.Lxts_gf128mul_and_shl1_mask: - .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 - .text /* structure of crypto context */ @@ -406,51 +401,3 @@ SYM_FUNC_START(twofish_ctr_8way) FRAME_END ret; SYM_FUNC_END(twofish_ctr_8way) - -SYM_FUNC_START(twofish_xts_enc_8way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - FRAME_BEGIN - - movq %rsi, %r11; - - /* regs <= src, dst <= IVs, regs <= regs xor IVs */ - load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, - RX0, RX1, RY0, .Lxts_gf128mul_and_shl1_mask); - - call __twofish_enc_blk8; - - /* dst <= regs xor IVs(in dst) */ - store_xts_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); - - FRAME_END - ret; -SYM_FUNC_END(twofish_xts_enc_8way) - -SYM_FUNC_START(twofish_xts_dec_8way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - FRAME_BEGIN - - movq %rsi, %r11; - - /* regs <= src, dst <= IVs, regs <= regs xor IVs */ - load_xts_8way(%rcx, %rdx, %rsi, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2, - RX0, RX1, RY0, .Lxts_gf128mul_and_shl1_mask); - - call __twofish_dec_blk8; - - /* dst <= regs xor IVs(in dst) */ - store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); - - FRAME_END - ret; -SYM_FUNC_END(twofish_xts_dec_8way) diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 2dbc8ce3730e..7b539bbb108f 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -29,11 +28,6 @@ asmlinkage void twofish_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void twofish_ctr_8way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -asmlinkage void twofish_xts_enc_8way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -asmlinkage void twofish_xts_dec_8way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); - static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { @@ -45,40 +39,6 @@ static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src) __twofish_enc_blk_3way(ctx, dst, src, false); } -static void twofish_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - glue_xts_crypt_128bit_one(ctx, dst, src, iv, twofish_enc_blk); -} - -static void twofish_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - glue_xts_crypt_128bit_one(ctx, dst, src, iv, twofish_dec_blk); -} - -struct twofish_xts_ctx { - struct twofish_ctx tweak_ctx; - struct twofish_ctx crypt_ctx; -}; - -static int xts_twofish_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - int err; - - err = xts_verify_key(tfm, key, keylen); - if (err) - return err; - - /* first half of xts-key is for crypt */ - err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); -} - static const struct common_glue_ctx twofish_enc = { .num_funcs = 3, .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, @@ -111,19 +71,6 @@ static const struct common_glue_ctx twofish_ctr = { } } }; -static const struct common_glue_ctx twofish_enc_xts = { - .num_funcs = 2, - .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .xts = twofish_xts_enc_8way } - }, { - .num_blocks = 1, - .fn_u = { .xts = twofish_xts_enc } - } } -}; - static const struct common_glue_ctx twofish_dec = { .num_funcs = 3, .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, @@ -156,19 +103,6 @@ static const struct common_glue_ctx twofish_dec_cbc = { } } }; -static const struct common_glue_ctx twofish_dec_xts = { - .num_funcs = 2, - .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .xts = twofish_xts_dec_8way } - }, { - .num_blocks = 1, - .fn_u = { .xts = twofish_xts_dec } - } } -}; - static int ecb_encrypt(struct skcipher_request *req) { return glue_ecb_req_128bit(&twofish_enc, req); @@ -194,24 +128,6 @@ static int ctr_crypt(struct skcipher_request *req) return glue_ctr_req_128bit(&twofish_ctr, req); } -static int xts_encrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&twofish_enc_xts, req, twofish_enc_blk, - &ctx->tweak_ctx, &ctx->crypt_ctx, false); -} - -static int xts_decrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&twofish_dec_xts, req, twofish_enc_blk, - &ctx->tweak_ctx, &ctx->crypt_ctx, true); -} - static struct skcipher_alg twofish_algs[] = { { .base.cra_name = "__ecb(twofish)", @@ -255,20 +171,6 @@ static struct skcipher_alg twofish_algs[] = { .setkey = twofish_setkey_skcipher, .encrypt = ctr_crypt, .decrypt = ctr_crypt, - }, { - .base.cra_name = "__xts(twofish)", - .base.cra_driver_name = "__xts-twofish-avx", - .base.cra_priority = 400, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = TF_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct twofish_xts_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = 2 * TF_MIN_KEY_SIZE, - .max_keysize = 2 * TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = xts_twofish_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, }, }; diff --git a/crypto/Kconfig b/crypto/Kconfig index ce69a5ae26b5..7ad9bf84f4a0 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1731,6 +1731,7 @@ config CRYPTO_TWOFISH_AVX_X86_64 select CRYPTO_TWOFISH_COMMON select CRYPTO_TWOFISH_X86_64 select CRYPTO_TWOFISH_X86_64_3WAY + imply CRYPTO_XTS help Twofish cipher algorithm (x86_64/AVX). From 31d49c448ab8556ce8d340eb28da2484e5b5629c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2021 17:47:53 +0100 Subject: [PATCH 067/154] crypto: x86/glue-helper - drop XTS helper routines The glue helper's XTS routines are no longer used, so drop them. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/glue_helper-asm-avx.S | 59 --------- arch/x86/crypto/glue_helper-asm-avx2.S | 78 ----------- arch/x86/crypto/glue_helper.c | 154 ---------------------- arch/x86/include/asm/crypto/glue_helper.h | 12 -- 4 files changed, 303 deletions(-) diff --git a/arch/x86/crypto/glue_helper-asm-avx.S b/arch/x86/crypto/glue_helper-asm-avx.S index d08fc575ef7f..a94511432803 100644 --- a/arch/x86/crypto/glue_helper-asm-avx.S +++ b/arch/x86/crypto/glue_helper-asm-avx.S @@ -79,62 +79,3 @@ vpxor (6*16)(src), x6, x6; \ vpxor (7*16)(src), x7, x7; \ store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); - -#define gf128mul_x_ble(iv, mask, tmp) \ - vpsrad $31, iv, tmp; \ - vpaddq iv, iv, iv; \ - vpshufd $0x13, tmp, tmp; \ - vpand mask, tmp, tmp; \ - vpxor tmp, iv, iv; - -#define load_xts_8way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, t0, \ - t1, xts_gf128mul_and_shl1_mask) \ - vmovdqa xts_gf128mul_and_shl1_mask, t0; \ - \ - /* load IV */ \ - vmovdqu (iv), tiv; \ - vpxor (0*16)(src), tiv, x0; \ - vmovdqu tiv, (0*16)(dst); \ - \ - /* construct and store IVs, also xor with source */ \ - gf128mul_x_ble(tiv, t0, t1); \ - vpxor (1*16)(src), tiv, x1; \ - vmovdqu tiv, (1*16)(dst); \ - \ - gf128mul_x_ble(tiv, t0, t1); \ - vpxor (2*16)(src), tiv, x2; \ - vmovdqu tiv, (2*16)(dst); \ - \ - gf128mul_x_ble(tiv, t0, t1); \ - vpxor (3*16)(src), tiv, x3; \ - vmovdqu tiv, (3*16)(dst); \ - \ - gf128mul_x_ble(tiv, t0, t1); \ - vpxor (4*16)(src), tiv, x4; \ - vmovdqu tiv, (4*16)(dst); \ - \ - gf128mul_x_ble(tiv, t0, t1); \ - vpxor (5*16)(src), tiv, x5; \ - vmovdqu tiv, (5*16)(dst); \ - \ - gf128mul_x_ble(tiv, t0, t1); \ - vpxor (6*16)(src), tiv, x6; \ - vmovdqu tiv, (6*16)(dst); \ - \ - gf128mul_x_ble(tiv, t0, t1); \ - vpxor (7*16)(src), tiv, x7; \ - vmovdqu tiv, (7*16)(dst); \ - \ - gf128mul_x_ble(tiv, t0, t1); \ - vmovdqu tiv, (iv); - -#define store_xts_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \ - vpxor (0*16)(dst), x0, x0; \ - vpxor (1*16)(dst), x1, x1; \ - vpxor (2*16)(dst), x2, x2; \ - vpxor (3*16)(dst), x3, x3; \ - vpxor (4*16)(dst), x4, x4; \ - vpxor (5*16)(dst), x5, x5; \ - vpxor (6*16)(dst), x6, x6; \ - vpxor (7*16)(dst), x7, x7; \ - store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); diff --git a/arch/x86/crypto/glue_helper-asm-avx2.S b/arch/x86/crypto/glue_helper-asm-avx2.S index d84508c85c13..456bface1e5d 100644 --- a/arch/x86/crypto/glue_helper-asm-avx2.S +++ b/arch/x86/crypto/glue_helper-asm-avx2.S @@ -95,81 +95,3 @@ vpxor (6*32)(src), x6, x6; \ vpxor (7*32)(src), x7, x7; \ store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7); - -#define gf128mul_x_ble(iv, mask, tmp) \ - vpsrad $31, iv, tmp; \ - vpaddq iv, iv, iv; \ - vpshufd $0x13, tmp, tmp; \ - vpand mask, tmp, tmp; \ - vpxor tmp, iv, iv; - -#define gf128mul_x2_ble(iv, mask1, mask2, tmp0, tmp1) \ - vpsrad $31, iv, tmp0; \ - vpaddq iv, iv, tmp1; \ - vpsllq $2, iv, iv; \ - vpshufd $0x13, tmp0, tmp0; \ - vpsrad $31, tmp1, tmp1; \ - vpand mask2, tmp0, tmp0; \ - vpshufd $0x13, tmp1, tmp1; \ - vpxor tmp0, iv, iv; \ - vpand mask1, tmp1, tmp1; \ - vpxor tmp1, iv, iv; - -#define load_xts_16way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, \ - tivx, t0, t0x, t1, t1x, t2, t2x, t3, \ - xts_gf128mul_and_shl1_mask_0, \ - xts_gf128mul_and_shl1_mask_1) \ - vbroadcasti128 xts_gf128mul_and_shl1_mask_0, t1; \ - \ - /* load IV and construct second IV */ \ - vmovdqu (iv), tivx; \ - vmovdqa tivx, t0x; \ - gf128mul_x_ble(tivx, t1x, t2x); \ - vbroadcasti128 xts_gf128mul_and_shl1_mask_1, t2; \ - vinserti128 $1, tivx, t0, tiv; \ - vpxor (0*32)(src), tiv, x0; \ - vmovdqu tiv, (0*32)(dst); \ - \ - /* construct and store IVs, also xor with source */ \ - gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ - vpxor (1*32)(src), tiv, x1; \ - vmovdqu tiv, (1*32)(dst); \ - \ - gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ - vpxor (2*32)(src), tiv, x2; \ - vmovdqu tiv, (2*32)(dst); \ - \ - gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ - vpxor (3*32)(src), tiv, x3; \ - vmovdqu tiv, (3*32)(dst); \ - \ - gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ - vpxor (4*32)(src), tiv, x4; \ - vmovdqu tiv, (4*32)(dst); \ - \ - gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ - vpxor (5*32)(src), tiv, x5; \ - vmovdqu tiv, (5*32)(dst); \ - \ - gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ - vpxor (6*32)(src), tiv, x6; \ - vmovdqu tiv, (6*32)(dst); \ - \ - gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ - vpxor (7*32)(src), tiv, x7; \ - vmovdqu tiv, (7*32)(dst); \ - \ - vextracti128 $1, tiv, tivx; \ - gf128mul_x_ble(tivx, t1x, t2x); \ - vmovdqu tivx, (iv); - -#define store_xts_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \ - vpxor (0*32)(dst), x0, x0; \ - vpxor (1*32)(dst), x1, x1; \ - vpxor (2*32)(dst), x2, x2; \ - vpxor (3*32)(dst), x3, x3; \ - vpxor (4*32)(dst), x4, x4; \ - vpxor (5*32)(dst), x5, x5; \ - vpxor (6*32)(dst), x6, x6; \ - vpxor (7*32)(dst), x7, x7; \ - store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7); diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index d3d91a0abf88..786ffda1caf4 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -12,10 +12,8 @@ #include #include -#include #include #include -#include #include int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, @@ -226,156 +224,4 @@ int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, } EXPORT_SYMBOL_GPL(glue_ctr_req_128bit); -static unsigned int __glue_xts_req_128bit(const struct common_glue_ctx *gctx, - void *ctx, - struct skcipher_walk *walk) -{ - const unsigned int bsize = 128 / 8; - unsigned int nbytes = walk->nbytes; - u128 *src = walk->src.virt.addr; - u128 *dst = walk->dst.virt.addr; - unsigned int num_blocks, func_bytes; - unsigned int i; - - /* Process multi-block batch */ - for (i = 0; i < gctx->num_funcs; i++) { - num_blocks = gctx->funcs[i].num_blocks; - func_bytes = bsize * num_blocks; - - if (nbytes >= func_bytes) { - do { - gctx->funcs[i].fn_u.xts(ctx, (u8 *)dst, - (const u8 *)src, - walk->iv); - - src += num_blocks; - dst += num_blocks; - nbytes -= func_bytes; - } while (nbytes >= func_bytes); - - if (nbytes < bsize) - goto done; - } - } - -done: - return nbytes; -} - -int glue_xts_req_128bit(const struct common_glue_ctx *gctx, - struct skcipher_request *req, - common_glue_func_t tweak_fn, void *tweak_ctx, - void *crypt_ctx, bool decrypt) -{ - const bool cts = (req->cryptlen % XTS_BLOCK_SIZE); - const unsigned int bsize = 128 / 8; - struct skcipher_request subreq; - struct skcipher_walk walk; - bool fpu_enabled = false; - unsigned int nbytes, tail; - int err; - - if (req->cryptlen < XTS_BLOCK_SIZE) - return -EINVAL; - - if (unlikely(cts)) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - - tail = req->cryptlen % XTS_BLOCK_SIZE + XTS_BLOCK_SIZE; - - skcipher_request_set_tfm(&subreq, tfm); - skcipher_request_set_callback(&subreq, - crypto_skcipher_get_flags(tfm), - NULL, NULL); - skcipher_request_set_crypt(&subreq, req->src, req->dst, - req->cryptlen - tail, req->iv); - req = &subreq; - } - - err = skcipher_walk_virt(&walk, req, false); - nbytes = walk.nbytes; - if (err) - return err; - - /* set minimum length to bsize, for tweak_fn */ - fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - &walk, fpu_enabled, - nbytes < bsize ? bsize : nbytes); - - /* calculate first value of T */ - tweak_fn(tweak_ctx, walk.iv, walk.iv); - - while (nbytes) { - nbytes = __glue_xts_req_128bit(gctx, crypt_ctx, &walk); - - err = skcipher_walk_done(&walk, nbytes); - nbytes = walk.nbytes; - } - - if (unlikely(cts)) { - u8 *next_tweak, *final_tweak = req->iv; - struct scatterlist *src, *dst; - struct scatterlist s[2], d[2]; - le128 b[2]; - - dst = src = scatterwalk_ffwd(s, req->src, req->cryptlen); - if (req->dst != req->src) - dst = scatterwalk_ffwd(d, req->dst, req->cryptlen); - - if (decrypt) { - next_tweak = memcpy(b, req->iv, XTS_BLOCK_SIZE); - gf128mul_x_ble(b, b); - } else { - next_tweak = req->iv; - } - - skcipher_request_set_crypt(&subreq, src, dst, XTS_BLOCK_SIZE, - next_tweak); - - err = skcipher_walk_virt(&walk, req, false) ?: - skcipher_walk_done(&walk, - __glue_xts_req_128bit(gctx, crypt_ctx, &walk)); - if (err) - goto out; - - scatterwalk_map_and_copy(b, dst, 0, XTS_BLOCK_SIZE, 0); - memcpy(b + 1, b, tail - XTS_BLOCK_SIZE); - scatterwalk_map_and_copy(b, src, XTS_BLOCK_SIZE, - tail - XTS_BLOCK_SIZE, 0); - scatterwalk_map_and_copy(b, dst, 0, tail, 1); - - skcipher_request_set_crypt(&subreq, dst, dst, XTS_BLOCK_SIZE, - final_tweak); - - err = skcipher_walk_virt(&walk, req, false) ?: - skcipher_walk_done(&walk, - __glue_xts_req_128bit(gctx, crypt_ctx, &walk)); - } - -out: - glue_fpu_end(fpu_enabled); - - return err; -} -EXPORT_SYMBOL_GPL(glue_xts_req_128bit); - -void glue_xts_crypt_128bit_one(const void *ctx, u8 *dst, const u8 *src, - le128 *iv, common_glue_func_t fn) -{ - le128 ivblk = *iv; - - /* generate next IV */ - gf128mul_x_ble(iv, &ivblk); - - /* CC <- T xor C */ - u128_xor((u128 *)dst, (const u128 *)src, (u128 *)&ivblk); - - /* PP <- D(Key2,CC) */ - fn(ctx, dst, dst); - - /* P <- T xor PP */ - u128_xor((u128 *)dst, (u128 *)dst, (u128 *)&ivblk); -} -EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit_one); - MODULE_LICENSE("GPL"); diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index 777c0f63418c..62680775d189 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -15,8 +15,6 @@ typedef void (*common_glue_func_t)(const void *ctx, u8 *dst, const u8 *src); typedef void (*common_glue_cbc_func_t)(const void *ctx, u8 *dst, const u8 *src); typedef void (*common_glue_ctr_func_t)(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -typedef void (*common_glue_xts_func_t)(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); struct common_glue_func_entry { unsigned int num_blocks; /* number of blocks that @fn will process */ @@ -24,7 +22,6 @@ struct common_glue_func_entry { common_glue_func_t ecb; common_glue_cbc_func_t cbc; common_glue_ctr_func_t ctr; - common_glue_xts_func_t xts; } fn_u; }; @@ -106,13 +103,4 @@ extern int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, extern int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, struct skcipher_request *req); -extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx, - struct skcipher_request *req, - common_glue_func_t tweak_fn, void *tweak_ctx, - void *crypt_ctx, bool decrypt); - -extern void glue_xts_crypt_128bit_one(const void *ctx, u8 *dst, - const u8 *src, le128 *iv, - common_glue_func_t fn); - #endif /* _CRYPTO_GLUE_HELPER_H */ From a1f91ecf812ac333ee2897f3eb2d8f4f6b4ce942 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2021 17:47:54 +0100 Subject: [PATCH 068/154] crypto: x86/camellia - drop CTR mode implementation Camellia in CTR mode is never used by the kernel directly, and is highly unlikely to be relied upon by dm-crypt or algif_skcipher. So let's drop the accelerated CTR mode implementation, and instead, rely on the CTR template and the bare cipher. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/camellia-aesni-avx-asm_64.S | 117 --------------- arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 144 ------------------- arch/x86/crypto/camellia_aesni_avx2_glue.c | 41 ------ arch/x86/crypto/camellia_aesni_avx_glue.c | 40 ------ arch/x86/crypto/camellia_glue.c | 68 --------- arch/x86/include/asm/crypto/camellia.h | 6 - crypto/Kconfig | 1 + 7 files changed, 1 insertion(+), 416 deletions(-) diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index 471c34e6cac2..e2a0e0f4bf9d 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -588,10 +588,6 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .long 0x80808080 .long 0x80808080 -/* For CTR-mode IV byteswap */ -.Lbswap128_mask: - .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 - /* * pre-SubByte transform * @@ -993,116 +989,3 @@ SYM_FUNC_START(camellia_cbc_dec_16way) FRAME_END ret; SYM_FUNC_END(camellia_cbc_dec_16way) - -#define inc_le128(x, minus_one, tmp) \ - vpcmpeqq minus_one, x, tmp; \ - vpsubq minus_one, x, x; \ - vpslldq $8, tmp, tmp; \ - vpsubq tmp, x, x; - -SYM_FUNC_START(camellia_ctr_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) - * %rcx: iv (little endian, 128bit) - */ - FRAME_BEGIN - - subq $(16 * 16), %rsp; - movq %rsp, %rax; - - vmovdqa .Lbswap128_mask, %xmm14; - - /* load IV and byteswap */ - vmovdqu (%rcx), %xmm0; - vpshufb %xmm14, %xmm0, %xmm15; - vmovdqu %xmm15, 15 * 16(%rax); - - vpcmpeqd %xmm15, %xmm15, %xmm15; - vpsrldq $8, %xmm15, %xmm15; /* low: -1, high: 0 */ - - /* construct IVs */ - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm13; - vmovdqu %xmm13, 14 * 16(%rax); - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm13; - vmovdqu %xmm13, 13 * 16(%rax); - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm12; - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm11; - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm10; - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm9; - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm8; - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm7; - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm6; - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm5; - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm4; - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm3; - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm2; - inc_le128(%xmm0, %xmm15, %xmm13); - vpshufb %xmm14, %xmm0, %xmm1; - inc_le128(%xmm0, %xmm15, %xmm13); - vmovdqa %xmm0, %xmm13; - vpshufb %xmm14, %xmm0, %xmm0; - inc_le128(%xmm13, %xmm15, %xmm14); - vmovdqu %xmm13, (%rcx); - - /* inpack16_pre: */ - vmovq (key_table)(CTX), %xmm15; - vpshufb .Lpack_bswap, %xmm15, %xmm15; - vpxor %xmm0, %xmm15, %xmm0; - vpxor %xmm1, %xmm15, %xmm1; - vpxor %xmm2, %xmm15, %xmm2; - vpxor %xmm3, %xmm15, %xmm3; - vpxor %xmm4, %xmm15, %xmm4; - vpxor %xmm5, %xmm15, %xmm5; - vpxor %xmm6, %xmm15, %xmm6; - vpxor %xmm7, %xmm15, %xmm7; - vpxor %xmm8, %xmm15, %xmm8; - vpxor %xmm9, %xmm15, %xmm9; - vpxor %xmm10, %xmm15, %xmm10; - vpxor %xmm11, %xmm15, %xmm11; - vpxor %xmm12, %xmm15, %xmm12; - vpxor 13 * 16(%rax), %xmm15, %xmm13; - vpxor 14 * 16(%rax), %xmm15, %xmm14; - vpxor 15 * 16(%rax), %xmm15, %xmm15; - - call __camellia_enc_blk16; - - addq $(16 * 16), %rsp; - - vpxor 0 * 16(%rdx), %xmm7, %xmm7; - vpxor 1 * 16(%rdx), %xmm6, %xmm6; - vpxor 2 * 16(%rdx), %xmm5, %xmm5; - vpxor 3 * 16(%rdx), %xmm4, %xmm4; - vpxor 4 * 16(%rdx), %xmm3, %xmm3; - vpxor 5 * 16(%rdx), %xmm2, %xmm2; - vpxor 6 * 16(%rdx), %xmm1, %xmm1; - vpxor 7 * 16(%rdx), %xmm0, %xmm0; - vpxor 8 * 16(%rdx), %xmm15, %xmm15; - vpxor 9 * 16(%rdx), %xmm14, %xmm14; - vpxor 10 * 16(%rdx), %xmm13, %xmm13; - vpxor 11 * 16(%rdx), %xmm12, %xmm12; - vpxor 12 * 16(%rdx), %xmm11, %xmm11; - vpxor 13 * 16(%rdx), %xmm10, %xmm10; - vpxor 14 * 16(%rdx), %xmm9, %xmm9; - vpxor 15 * 16(%rdx), %xmm8, %xmm8; - write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, - %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, - %xmm8, %rsi); - - FRAME_END - ret; -SYM_FUNC_END(camellia_ctr_16way) diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 9561dee52de0..782e9712a1ec 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -624,10 +624,6 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .section .rodata.cst16, "aM", @progbits, 16 .align 16 -/* For CTR-mode IV byteswap */ -.Lbswap128_mask: - .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 - /* * pre-SubByte transform * @@ -1054,143 +1050,3 @@ SYM_FUNC_START(camellia_cbc_dec_32way) FRAME_END ret; SYM_FUNC_END(camellia_cbc_dec_32way) - -#define inc_le128(x, minus_one, tmp) \ - vpcmpeqq minus_one, x, tmp; \ - vpsubq minus_one, x, x; \ - vpslldq $8, tmp, tmp; \ - vpsubq tmp, x, x; - -#define add2_le128(x, minus_one, minus_two, tmp1, tmp2) \ - vpcmpeqq minus_one, x, tmp1; \ - vpcmpeqq minus_two, x, tmp2; \ - vpsubq minus_two, x, x; \ - vpor tmp2, tmp1, tmp1; \ - vpslldq $8, tmp1, tmp1; \ - vpsubq tmp1, x, x; - -SYM_FUNC_START(camellia_ctr_32way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (32 blocks) - * %rdx: src (32 blocks) - * %rcx: iv (little endian, 128bit) - */ - FRAME_BEGIN - - vzeroupper; - - movq %rsp, %r10; - cmpq %rsi, %rdx; - je .Lctr_use_stack; - - /* dst can be used as temporary storage, src is not overwritten. */ - movq %rsi, %rax; - jmp .Lctr_continue; - -.Lctr_use_stack: - subq $(16 * 32), %rsp; - movq %rsp, %rax; - -.Lctr_continue: - vpcmpeqd %ymm15, %ymm15, %ymm15; - vpsrldq $8, %ymm15, %ymm15; /* ab: -1:0 ; cd: -1:0 */ - vpaddq %ymm15, %ymm15, %ymm12; /* ab: -2:0 ; cd: -2:0 */ - - /* load IV and byteswap */ - vmovdqu (%rcx), %xmm0; - vmovdqa %xmm0, %xmm1; - inc_le128(%xmm0, %xmm15, %xmm14); - vbroadcasti128 .Lbswap128_mask, %ymm14; - vinserti128 $1, %xmm0, %ymm1, %ymm0; - vpshufb %ymm14, %ymm0, %ymm13; - vmovdqu %ymm13, 15 * 32(%rax); - - /* construct IVs */ - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); /* ab:le2 ; cd:le3 */ - vpshufb %ymm14, %ymm0, %ymm13; - vmovdqu %ymm13, 14 * 32(%rax); - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm13; - vmovdqu %ymm13, 13 * 32(%rax); - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm13; - vmovdqu %ymm13, 12 * 32(%rax); - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm13; - vmovdqu %ymm13, 11 * 32(%rax); - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm10; - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm9; - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm8; - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm7; - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm6; - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm5; - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm4; - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm3; - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm2; - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vpshufb %ymm14, %ymm0, %ymm1; - add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); - vextracti128 $1, %ymm0, %xmm13; - vpshufb %ymm14, %ymm0, %ymm0; - inc_le128(%xmm13, %xmm15, %xmm14); - vmovdqu %xmm13, (%rcx); - - /* inpack32_pre: */ - vpbroadcastq (key_table)(CTX), %ymm15; - vpshufb .Lpack_bswap, %ymm15, %ymm15; - vpxor %ymm0, %ymm15, %ymm0; - vpxor %ymm1, %ymm15, %ymm1; - vpxor %ymm2, %ymm15, %ymm2; - vpxor %ymm3, %ymm15, %ymm3; - vpxor %ymm4, %ymm15, %ymm4; - vpxor %ymm5, %ymm15, %ymm5; - vpxor %ymm6, %ymm15, %ymm6; - vpxor %ymm7, %ymm15, %ymm7; - vpxor %ymm8, %ymm15, %ymm8; - vpxor %ymm9, %ymm15, %ymm9; - vpxor %ymm10, %ymm15, %ymm10; - vpxor 11 * 32(%rax), %ymm15, %ymm11; - vpxor 12 * 32(%rax), %ymm15, %ymm12; - vpxor 13 * 32(%rax), %ymm15, %ymm13; - vpxor 14 * 32(%rax), %ymm15, %ymm14; - vpxor 15 * 32(%rax), %ymm15, %ymm15; - - call __camellia_enc_blk32; - - movq %r10, %rsp; - - vpxor 0 * 32(%rdx), %ymm7, %ymm7; - vpxor 1 * 32(%rdx), %ymm6, %ymm6; - vpxor 2 * 32(%rdx), %ymm5, %ymm5; - vpxor 3 * 32(%rdx), %ymm4, %ymm4; - vpxor 4 * 32(%rdx), %ymm3, %ymm3; - vpxor 5 * 32(%rdx), %ymm2, %ymm2; - vpxor 6 * 32(%rdx), %ymm1, %ymm1; - vpxor 7 * 32(%rdx), %ymm0, %ymm0; - vpxor 8 * 32(%rdx), %ymm15, %ymm15; - vpxor 9 * 32(%rdx), %ymm14, %ymm14; - vpxor 10 * 32(%rdx), %ymm13, %ymm13; - vpxor 11 * 32(%rdx), %ymm12, %ymm12; - vpxor 12 * 32(%rdx), %ymm11, %ymm11; - vpxor 13 * 32(%rdx), %ymm10, %ymm10; - vpxor 14 * 32(%rdx), %ymm9, %ymm9; - vpxor 15 * 32(%rdx), %ymm8, %ymm8; - write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, - %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, - %ymm8, %rsi); - - vzeroupper; - - FRAME_END - ret; -SYM_FUNC_END(camellia_ctr_32way) diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index d956d0473668..8f25a2a6222e 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -22,8 +22,6 @@ asmlinkage void camellia_ecb_enc_32way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void camellia_ecb_dec_32way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void camellia_cbc_dec_32way(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void camellia_ctr_32way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); static const struct common_glue_ctx camellia_enc = { .num_funcs = 4, @@ -44,25 +42,6 @@ static const struct common_glue_ctx camellia_enc = { } } }; -static const struct common_glue_ctx camellia_ctr = { - .num_funcs = 4, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .ctr = camellia_ctr_32way } - }, { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ctr = camellia_ctr_16way } - }, { - .num_blocks = 2, - .fn_u = { .ctr = camellia_crypt_ctr_2way } - }, { - .num_blocks = 1, - .fn_u = { .ctr = camellia_crypt_ctr } - } } -}; - static const struct common_glue_ctx camellia_dec = { .num_funcs = 4, .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, @@ -127,11 +106,6 @@ static int cbc_decrypt(struct skcipher_request *req) return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req); } -static int ctr_crypt(struct skcipher_request *req) -{ - return glue_ctr_req_128bit(&camellia_ctr, req); -} - static struct skcipher_alg camellia_algs[] = { { .base.cra_name = "__ecb(camellia)", @@ -160,21 +134,6 @@ static struct skcipher_alg camellia_algs[] = { .setkey = camellia_setkey, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "__ctr(camellia)", - .base.cra_driver_name = "__ctr-camellia-aesni-avx2", - .base.cra_priority = 500, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct camellia_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .chunksize = CAMELLIA_BLOCK_SIZE, - .setkey = camellia_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, }, }; diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 44614f8a452c..22a89cdfedfb 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -26,10 +26,6 @@ EXPORT_SYMBOL_GPL(camellia_ecb_dec_16way); asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(camellia_cbc_dec_16way); -asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -EXPORT_SYMBOL_GPL(camellia_ctr_16way); - static const struct common_glue_ctx camellia_enc = { .num_funcs = 3, .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, @@ -46,22 +42,6 @@ static const struct common_glue_ctx camellia_enc = { } } }; -static const struct common_glue_ctx camellia_ctr = { - .num_funcs = 3, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ctr = camellia_ctr_16way } - }, { - .num_blocks = 2, - .fn_u = { .ctr = camellia_crypt_ctr_2way } - }, { - .num_blocks = 1, - .fn_u = { .ctr = camellia_crypt_ctr } - } } -}; - static const struct common_glue_ctx camellia_dec = { .num_funcs = 3, .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, @@ -120,11 +100,6 @@ static int cbc_decrypt(struct skcipher_request *req) return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req); } -static int ctr_crypt(struct skcipher_request *req) -{ - return glue_ctr_req_128bit(&camellia_ctr, req); -} - static struct skcipher_alg camellia_algs[] = { { .base.cra_name = "__ecb(camellia)", @@ -153,21 +128,6 @@ static struct skcipher_alg camellia_algs[] = { .setkey = camellia_setkey, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "__ctr(camellia)", - .base.cra_driver_name = "__ctr-camellia-aesni", - .base.cra_priority = 400, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct camellia_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .chunksize = CAMELLIA_BLOCK_SIZE, - .setkey = camellia_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, } }; diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index 242c056e5fa8..fefeedf2b33d 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -1274,42 +1274,6 @@ void camellia_decrypt_cbc_2way(const void *ctx, u8 *d, const u8 *s) } EXPORT_SYMBOL_GPL(camellia_decrypt_cbc_2way); -void camellia_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) -{ - be128 ctrblk; - u128 *dst = (u128 *)d; - const u128 *src = (const u128 *)s; - - if (dst != src) - *dst = *src; - - le128_to_be128(&ctrblk, iv); - le128_inc(iv); - - camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk); -} -EXPORT_SYMBOL_GPL(camellia_crypt_ctr); - -void camellia_crypt_ctr_2way(const void *ctx, u8 *d, const u8 *s, le128 *iv) -{ - be128 ctrblks[2]; - u128 *dst = (u128 *)d; - const u128 *src = (const u128 *)s; - - if (dst != src) { - dst[0] = src[0]; - dst[1] = src[1]; - } - - le128_to_be128(&ctrblks[0], iv); - le128_inc(iv); - le128_to_be128(&ctrblks[1], iv); - le128_inc(iv); - - camellia_enc_blk_xor_2way(ctx, (u8 *)dst, (u8 *)ctrblks); -} -EXPORT_SYMBOL_GPL(camellia_crypt_ctr_2way); - static const struct common_glue_ctx camellia_enc = { .num_funcs = 2, .fpu_blocks_limit = -1, @@ -1323,19 +1287,6 @@ static const struct common_glue_ctx camellia_enc = { } } }; -static const struct common_glue_ctx camellia_ctr = { - .num_funcs = 2, - .fpu_blocks_limit = -1, - - .funcs = { { - .num_blocks = 2, - .fn_u = { .ctr = camellia_crypt_ctr_2way } - }, { - .num_blocks = 1, - .fn_u = { .ctr = camellia_crypt_ctr } - } } -}; - static const struct common_glue_ctx camellia_dec = { .num_funcs = 2, .fpu_blocks_limit = -1, @@ -1382,11 +1333,6 @@ static int cbc_decrypt(struct skcipher_request *req) return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req); } -static int ctr_crypt(struct skcipher_request *req) -{ - return glue_ctr_req_128bit(&camellia_ctr, req); -} - static struct crypto_alg camellia_cipher_alg = { .cra_name = "camellia", .cra_driver_name = "camellia-asm", @@ -1433,20 +1379,6 @@ static struct skcipher_alg camellia_skcipher_algs[] = { .setkey = camellia_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "ctr(camellia)", - .base.cra_driver_name = "ctr-camellia-asm", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct camellia_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .chunksize = CAMELLIA_BLOCK_SIZE, - .setkey = camellia_setkey_skcipher, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, } }; diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h index 0e5f82adbaf9..1dcea79e8f8e 100644 --- a/arch/x86/include/asm/crypto/camellia.h +++ b/arch/x86/include/asm/crypto/camellia.h @@ -38,8 +38,6 @@ asmlinkage void camellia_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void camellia_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); static inline void camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src) { @@ -65,9 +63,5 @@ static inline void camellia_enc_blk_xor_2way(const void *ctx, u8 *dst, /* glue helpers */ extern void camellia_decrypt_cbc_2way(const void *ctx, u8 *dst, const u8 *src); -extern void camellia_crypt_ctr(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -extern void camellia_crypt_ctr_2way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); #endif /* ASM_X86_CAMELLIA_H */ diff --git a/crypto/Kconfig b/crypto/Kconfig index 7ad9bf84f4a0..ea788cab8c7d 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1286,6 +1286,7 @@ config CRYPTO_CAMELLIA_X86_64 depends on CRYPTO select CRYPTO_SKCIPHER select CRYPTO_GLUE_HELPER_X86 + imply CRYPTO_CTR help Camellia cipher algorithm module (x86_64). From 2e9440ae6eab492572463d8cb266381264867723 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2021 17:47:55 +0100 Subject: [PATCH 069/154] crypto: x86/serpent - drop CTR mode implementation Serpent in CTR mode is never used by the kernel directly, and is highly unlikely to be relied upon by dm-crypt or algif_skcipher. So let's drop the accelerated CTR mode implementation, and instead, rely on the CTR template and the bare cipher. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 20 ------ arch/x86/crypto/serpent-avx2-asm_64.S | 25 -------- arch/x86/crypto/serpent_avx2_glue.c | 38 ------------ arch/x86/crypto/serpent_avx_glue.c | 51 ---------------- arch/x86/crypto/serpent_sse2_glue.c | 67 --------------------- crypto/Kconfig | 3 + 6 files changed, 3 insertions(+), 201 deletions(-) diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index 6b41f46bcc76..b7ee24df7fba 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -711,23 +711,3 @@ SYM_FUNC_START(serpent_cbc_dec_8way_avx) FRAME_END ret; SYM_FUNC_END(serpent_cbc_dec_8way_avx) - -SYM_FUNC_START(serpent_ctr_8way_avx) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: iv (little endian, 128bit) - */ - FRAME_BEGIN - - load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2, - RD2, RK0, RK1, RK2); - - call __serpent_enc_blk8_avx; - - store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); - - FRAME_END - ret; -SYM_FUNC_END(serpent_ctr_8way_avx) diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S index a510a949f02f..9161b6e441f3 100644 --- a/arch/x86/crypto/serpent-avx2-asm_64.S +++ b/arch/x86/crypto/serpent-avx2-asm_64.S @@ -724,28 +724,3 @@ SYM_FUNC_START(serpent_cbc_dec_16way) FRAME_END ret; SYM_FUNC_END(serpent_cbc_dec_16way) - -SYM_FUNC_START(serpent_ctr_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) - * %rcx: iv (little endian, 128bit) - */ - FRAME_BEGIN - - vzeroupper; - - load_ctr_16way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2, - RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT, - tp); - - call __serpent_enc_blk16; - - store_ctr_16way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); - - vzeroupper; - - FRAME_END - ret; -SYM_FUNC_END(serpent_ctr_16way) diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 9cdf2c078e21..28e542c6512a 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -22,8 +22,6 @@ asmlinkage void serpent_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void serpent_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void serpent_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void serpent_ctr_16way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { @@ -46,22 +44,6 @@ static const struct common_glue_ctx serpent_enc = { } } }; -static const struct common_glue_ctx serpent_ctr = { - .num_funcs = 3, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .ctr = serpent_ctr_16way } - }, { - .num_blocks = 8, - .fn_u = { .ctr = serpent_ctr_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .ctr = __serpent_crypt_ctr } - } } -}; - static const struct common_glue_ctx serpent_dec = { .num_funcs = 3, .fpu_blocks_limit = 8, @@ -114,11 +96,6 @@ static int cbc_decrypt(struct skcipher_request *req) return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req); } -static int ctr_crypt(struct skcipher_request *req) -{ - return glue_ctr_req_128bit(&serpent_ctr, req); -} - static struct skcipher_alg serpent_algs[] = { { .base.cra_name = "__ecb(serpent)", @@ -147,21 +124,6 @@ static struct skcipher_alg serpent_algs[] = { .setkey = serpent_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "__ctr(serpent)", - .base.cra_driver_name = "__ctr-serpent-avx2", - .base.cra_priority = 600, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct serpent_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .chunksize = SERPENT_BLOCK_SIZE, - .setkey = serpent_setkey_skcipher, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, }, }; diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index b17a08b57a91..aa4605baf9d4 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -31,24 +31,6 @@ asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(serpent_cbc_dec_8way_avx); -asmlinkage void serpent_ctr_8way_avx(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -EXPORT_SYMBOL_GPL(serpent_ctr_8way_avx); - -void __serpent_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) -{ - be128 ctrblk; - u128 *dst = (u128 *)d; - const u128 *src = (const u128 *)s; - - le128_to_be128(&ctrblk, iv); - le128_inc(iv); - - __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); - u128_xor(dst, src, (u128 *)&ctrblk); -} -EXPORT_SYMBOL_GPL(__serpent_crypt_ctr); - static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { @@ -68,19 +50,6 @@ static const struct common_glue_ctx serpent_enc = { } } }; -static const struct common_glue_ctx serpent_ctr = { - .num_funcs = 2, - .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ctr = serpent_ctr_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .ctr = __serpent_crypt_ctr } - } } -}; - static const struct common_glue_ctx serpent_dec = { .num_funcs = 2, .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, @@ -127,11 +96,6 @@ static int cbc_decrypt(struct skcipher_request *req) return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req); } -static int ctr_crypt(struct skcipher_request *req) -{ - return glue_ctr_req_128bit(&serpent_ctr, req); -} - static struct skcipher_alg serpent_algs[] = { { .base.cra_name = "__ecb(serpent)", @@ -160,21 +124,6 @@ static struct skcipher_alg serpent_algs[] = { .setkey = serpent_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "__ctr(serpent)", - .base.cra_driver_name = "__ctr-serpent-avx", - .base.cra_priority = 500, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct serpent_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .chunksize = SERPENT_BLOCK_SIZE, - .setkey = serpent_setkey_skcipher, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, }, }; diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 4fed8d26b91a..9acb3bf28feb 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -10,8 +10,6 @@ * * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: * Copyright (c) 2006 Herbert Xu - * CTR part based on code (crypto/ctr.c) by: - * (C) Copyright IBM Corp. 2007 - Joy Latten */ #include @@ -47,38 +45,6 @@ static void serpent_decrypt_cbc_xway(const void *ctx, u8 *d, const u8 *s) u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); } -static void serpent_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) -{ - be128 ctrblk; - u128 *dst = (u128 *)d; - const u128 *src = (const u128 *)s; - - le128_to_be128(&ctrblk, iv); - le128_inc(iv); - - __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); - u128_xor(dst, src, (u128 *)&ctrblk); -} - -static void serpent_crypt_ctr_xway(const void *ctx, u8 *d, const u8 *s, - le128 *iv) -{ - be128 ctrblks[SERPENT_PARALLEL_BLOCKS]; - u128 *dst = (u128 *)d; - const u128 *src = (const u128 *)s; - unsigned int i; - - for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { - if (dst != src) - dst[i] = src[i]; - - le128_to_be128(&ctrblks[i], iv); - le128_inc(iv); - } - - serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); -} - static const struct common_glue_ctx serpent_enc = { .num_funcs = 2, .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, @@ -92,19 +58,6 @@ static const struct common_glue_ctx serpent_enc = { } } }; -static const struct common_glue_ctx serpent_ctr = { - .num_funcs = 2, - .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ctr = serpent_crypt_ctr_xway } - }, { - .num_blocks = 1, - .fn_u = { .ctr = serpent_crypt_ctr } - } } -}; - static const struct common_glue_ctx serpent_dec = { .num_funcs = 2, .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, @@ -152,11 +105,6 @@ static int cbc_decrypt(struct skcipher_request *req) return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req); } -static int ctr_crypt(struct skcipher_request *req) -{ - return glue_ctr_req_128bit(&serpent_ctr, req); -} - static struct skcipher_alg serpent_algs[] = { { .base.cra_name = "__ecb(serpent)", @@ -185,21 +133,6 @@ static struct skcipher_alg serpent_algs[] = { .setkey = serpent_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "__ctr(serpent)", - .base.cra_driver_name = "__ctr-serpent-sse2", - .base.cra_priority = 400, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct serpent_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .chunksize = SERPENT_BLOCK_SIZE, - .setkey = serpent_setkey_skcipher, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, }, }; diff --git a/crypto/Kconfig b/crypto/Kconfig index ea788cab8c7d..dd48c3bab3f5 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1539,6 +1539,7 @@ config CRYPTO_SERPENT_SSE2_X86_64 select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT select CRYPTO_SIMD + imply CRYPTO_CTR help Serpent cipher algorithm, by Anderson, Biham & Knudsen. @@ -1558,6 +1559,7 @@ config CRYPTO_SERPENT_SSE2_586 select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT select CRYPTO_SIMD + imply CRYPTO_CTR help Serpent cipher algorithm, by Anderson, Biham & Knudsen. @@ -1578,6 +1580,7 @@ config CRYPTO_SERPENT_AVX_X86_64 select CRYPTO_SERPENT select CRYPTO_SIMD imply CRYPTO_XTS + imply CRYPTO_CTR help Serpent cipher algorithm, by Anderson, Biham & Knudsen. From e2d60e2f597a5b2a0a8724989742784bb83ada5d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2021 17:47:56 +0100 Subject: [PATCH 070/154] crypto: x86/cast5 - drop CTR mode implementation CAST5 in CTR mode is never used by the kernel directly, and is highly unlikely to be relied upon by dm-crypt or algif_skcipher. So let's drop the accelerated CTR mode implementation, and instead, rely on the CTR template and the bare cipher. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/cast5_avx_glue.c | 103 ------------------------------- crypto/Kconfig | 1 + 2 files changed, 1 insertion(+), 103 deletions(-) diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index 384ccb00f9e1..e0d1c7903b29 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -23,8 +23,6 @@ asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src); asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src); -asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src, - __be64 *iv); static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) @@ -214,92 +212,6 @@ static int cbc_decrypt(struct skcipher_request *req) return err; } -static void ctr_crypt_final(struct skcipher_walk *walk, struct cast5_ctx *ctx) -{ - u8 *ctrblk = walk->iv; - u8 keystream[CAST5_BLOCK_SIZE]; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - - __cast5_encrypt(ctx, keystream, ctrblk); - crypto_xor_cpy(dst, keystream, src, nbytes); - - crypto_inc(ctrblk, CAST5_BLOCK_SIZE); -} - -static unsigned int __ctr_crypt(struct skcipher_walk *walk, - struct cast5_ctx *ctx) -{ - const unsigned int bsize = CAST5_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u64 *src = (u64 *)walk->src.virt.addr; - u64 *dst = (u64 *)walk->dst.virt.addr; - - /* Process multi-block batch */ - if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { - do { - cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src, - (__be64 *)walk->iv); - - src += CAST5_PARALLEL_BLOCKS; - dst += CAST5_PARALLEL_BLOCKS; - nbytes -= bsize * CAST5_PARALLEL_BLOCKS; - } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - u64 ctrblk; - - if (dst != src) - *dst = *src; - - ctrblk = *(u64 *)walk->iv; - be64_add_cpu((__be64 *)walk->iv, 1); - - __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); - *dst ^= ctrblk; - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - -done: - return nbytes; -} - -static int ctr_crypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); - bool fpu_enabled = false; - struct skcipher_walk walk; - unsigned int nbytes; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { - fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); - nbytes = __ctr_crypt(&walk, ctx); - err = skcipher_walk_done(&walk, nbytes); - } - - cast5_fpu_end(fpu_enabled); - - if (walk.nbytes) { - ctr_crypt_final(&walk, ctx); - err = skcipher_walk_done(&walk, 0); - } - - return err; -} - static struct skcipher_alg cast5_algs[] = { { .base.cra_name = "__ecb(cast5)", @@ -328,21 +240,6 @@ static struct skcipher_alg cast5_algs[] = { .setkey = cast5_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "__ctr(cast5)", - .base.cra_driver_name = "__ctr-cast5-avx", - .base.cra_priority = 200, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct cast5_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = CAST5_MIN_KEY_SIZE, - .max_keysize = CAST5_MAX_KEY_SIZE, - .ivsize = CAST5_BLOCK_SIZE, - .chunksize = CAST5_BLOCK_SIZE, - .setkey = cast5_setkey_skcipher, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, } }; diff --git a/crypto/Kconfig b/crypto/Kconfig index dd48c3bab3f5..fed73fff5a65 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1372,6 +1372,7 @@ config CRYPTO_CAST5_AVX_X86_64 select CRYPTO_CAST5 select CRYPTO_CAST_COMMON select CRYPTO_SIMD + imply CRYPTO_CTR help The CAST5 encryption algorithm (synonymous with CAST-128) is described in RFC2144. From 7a6623cc6867b5f24f750a7c16b996b0cbbc63b5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2021 17:47:57 +0100 Subject: [PATCH 071/154] crypto: x86/cast6 - drop CTR mode implementation CAST6 in CTR mode is never used by the kernel directly, and is highly unlikely to be relied upon by dm-crypt or algif_skcipher. So let's drop the accelerated CTR mode implementation, and instead, rely on the CTR template and the bare cipher. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 28 ------------- arch/x86/crypto/cast6_avx_glue.c | 48 ----------------------- crypto/Kconfig | 1 + 3 files changed, 1 insertion(+), 76 deletions(-) diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index 0c1ea836215a..fbddcecc3e3f 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -410,31 +410,3 @@ SYM_FUNC_START(cast6_cbc_dec_8way) FRAME_END ret; SYM_FUNC_END(cast6_cbc_dec_8way) - -SYM_FUNC_START(cast6_ctr_8way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: iv (little endian, 128bit) - */ - FRAME_BEGIN - pushq %r12; - pushq %r15 - - movq %rdi, CTX; - movq %rsi, %r11; - movq %rdx, %r12; - - load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2, - RD2, RX, RKR, RKM); - - call __cast6_enc_blk8; - - store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); - - popq %r15; - popq %r12; - FRAME_END - ret; -SYM_FUNC_END(cast6_ctr_8way) diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 5a21d3e9041c..790efcb6df3b 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -23,8 +23,6 @@ asmlinkage void cast6_ecb_enc_8way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void cast6_ecb_dec_8way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void cast6_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void cast6_ctr_8way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); static int cast6_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) @@ -32,19 +30,6 @@ static int cast6_setkey_skcipher(struct crypto_skcipher *tfm, return cast6_setkey(&tfm->base, key, keylen); } -static void cast6_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) -{ - be128 ctrblk; - u128 *dst = (u128 *)d; - const u128 *src = (const u128 *)s; - - le128_to_be128(&ctrblk, iv); - le128_inc(iv); - - __cast6_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); - u128_xor(dst, src, (u128 *)&ctrblk); -} - static const struct common_glue_ctx cast6_enc = { .num_funcs = 2, .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, @@ -58,19 +43,6 @@ static const struct common_glue_ctx cast6_enc = { } } }; -static const struct common_glue_ctx cast6_ctr = { - .num_funcs = 2, - .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .ctr = cast6_ctr_8way } - }, { - .num_blocks = 1, - .fn_u = { .ctr = cast6_crypt_ctr } - } } -}; - static const struct common_glue_ctx cast6_dec = { .num_funcs = 2, .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, @@ -117,11 +89,6 @@ static int cbc_decrypt(struct skcipher_request *req) return glue_cbc_decrypt_req_128bit(&cast6_dec_cbc, req); } -static int ctr_crypt(struct skcipher_request *req) -{ - return glue_ctr_req_128bit(&cast6_ctr, req); -} - static struct skcipher_alg cast6_algs[] = { { .base.cra_name = "__ecb(cast6)", @@ -150,21 +117,6 @@ static struct skcipher_alg cast6_algs[] = { .setkey = cast6_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "__ctr(cast6)", - .base.cra_driver_name = "__ctr-cast6-avx", - .base.cra_priority = 200, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct cast6_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = CAST6_MIN_KEY_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE, - .ivsize = CAST6_BLOCK_SIZE, - .chunksize = CAST6_BLOCK_SIZE, - .setkey = cast6_setkey_skcipher, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, }, }; diff --git a/crypto/Kconfig b/crypto/Kconfig index fed73fff5a65..3f51c5dfc2a9 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1397,6 +1397,7 @@ config CRYPTO_CAST6_AVX_X86_64 select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SIMD imply CRYPTO_XTS + imply CRYPTO_CTR help The CAST6 encryption algorithm (synonymous with CAST-256) is described in RFC2612. From f43dcaf2c97eae986378f12c46b27fe21f8a885b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2021 17:47:58 +0100 Subject: [PATCH 072/154] crypto: x86/twofish - drop CTR mode implementation Twofish in CTR mode is never used by the kernel directly, and is highly unlikely to be relied upon by dm-crypt or algif_skcipher. So let's drop the accelerated CTR mode implementation, and instead, rely on the CTR template and the bare cipher. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 27 ------- arch/x86/crypto/twofish_avx_glue.c | 38 ---------- arch/x86/crypto/twofish_glue_3way.c | 78 --------------------- arch/x86/include/asm/crypto/twofish.h | 4 -- crypto/Kconfig | 2 + 5 files changed, 2 insertions(+), 147 deletions(-) diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index 84e61ef03638..37e63b3c664e 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -374,30 +374,3 @@ SYM_FUNC_START(twofish_cbc_dec_8way) FRAME_END ret; SYM_FUNC_END(twofish_cbc_dec_8way) - -SYM_FUNC_START(twofish_ctr_8way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: iv (little endian, 128bit) - */ - FRAME_BEGIN - - pushq %r12; - - movq %rsi, %r11; - movq %rdx, %r12; - - load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2, - RD2, RX0, RX1, RY0); - - call __twofish_enc_blk8; - - store_ctr_8way(%r12, %r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); - - popq %r12; - - FRAME_END - ret; -SYM_FUNC_END(twofish_ctr_8way) diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 7b539bbb108f..13f810b61034 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -25,8 +25,6 @@ asmlinkage void twofish_ecb_enc_8way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void twofish_ecb_dec_8way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void twofish_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void twofish_ctr_8way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) @@ -55,22 +53,6 @@ static const struct common_glue_ctx twofish_enc = { } } }; -static const struct common_glue_ctx twofish_ctr = { - .num_funcs = 3, - .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .ctr = twofish_ctr_8way } - }, { - .num_blocks = 3, - .fn_u = { .ctr = twofish_enc_blk_ctr_3way } - }, { - .num_blocks = 1, - .fn_u = { .ctr = twofish_enc_blk_ctr } - } } -}; - static const struct common_glue_ctx twofish_dec = { .num_funcs = 3, .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, @@ -123,11 +105,6 @@ static int cbc_decrypt(struct skcipher_request *req) return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req); } -static int ctr_crypt(struct skcipher_request *req) -{ - return glue_ctr_req_128bit(&twofish_ctr, req); -} - static struct skcipher_alg twofish_algs[] = { { .base.cra_name = "__ecb(twofish)", @@ -156,21 +133,6 @@ static struct skcipher_alg twofish_algs[] = { .setkey = twofish_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "__ctr(twofish)", - .base.cra_driver_name = "__ctr-twofish-avx", - .base.cra_priority = 400, - .base.cra_flags = CRYPTO_ALG_INTERNAL, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct twofish_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .chunksize = TF_BLOCK_SIZE, - .setkey = twofish_setkey_skcipher, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, }, }; diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 768af6075479..88252370db0a 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -30,12 +30,6 @@ static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src) __twofish_enc_blk_3way(ctx, dst, src, false); } -static inline void twofish_enc_blk_xor_3way(const void *ctx, u8 *dst, - const u8 *src) -{ - __twofish_enc_blk_3way(ctx, dst, src, true); -} - void twofish_dec_blk_cbc_3way(const void *ctx, u8 *d, const u8 *s) { u128 ivs[2]; @@ -52,46 +46,6 @@ void twofish_dec_blk_cbc_3way(const void *ctx, u8 *d, const u8 *s) } EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); -void twofish_enc_blk_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) -{ - be128 ctrblk; - u128 *dst = (u128 *)d; - const u128 *src = (const u128 *)s; - - if (dst != src) - *dst = *src; - - le128_to_be128(&ctrblk, iv); - le128_inc(iv); - - twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); - u128_xor(dst, dst, (u128 *)&ctrblk); -} -EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr); - -void twofish_enc_blk_ctr_3way(const void *ctx, u8 *d, const u8 *s, le128 *iv) -{ - be128 ctrblks[3]; - u128 *dst = (u128 *)d; - const u128 *src = (const u128 *)s; - - if (dst != src) { - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - } - - le128_to_be128(&ctrblks[0], iv); - le128_inc(iv); - le128_to_be128(&ctrblks[1], iv); - le128_inc(iv); - le128_to_be128(&ctrblks[2], iv); - le128_inc(iv); - - twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks); -} -EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way); - static const struct common_glue_ctx twofish_enc = { .num_funcs = 2, .fpu_blocks_limit = -1, @@ -105,19 +59,6 @@ static const struct common_glue_ctx twofish_enc = { } } }; -static const struct common_glue_ctx twofish_ctr = { - .num_funcs = 2, - .fpu_blocks_limit = -1, - - .funcs = { { - .num_blocks = 3, - .fn_u = { .ctr = twofish_enc_blk_ctr_3way } - }, { - .num_blocks = 1, - .fn_u = { .ctr = twofish_enc_blk_ctr } - } } -}; - static const struct common_glue_ctx twofish_dec = { .num_funcs = 2, .fpu_blocks_limit = -1, @@ -164,11 +105,6 @@ static int cbc_decrypt(struct skcipher_request *req) return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req); } -static int ctr_crypt(struct skcipher_request *req) -{ - return glue_ctr_req_128bit(&twofish_ctr, req); -} - static struct skcipher_alg tf_skciphers[] = { { .base.cra_name = "ecb(twofish)", @@ -195,20 +131,6 @@ static struct skcipher_alg tf_skciphers[] = { .setkey = twofish_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "ctr(twofish)", - .base.cra_driver_name = "ctr-twofish-3way", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct twofish_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .chunksize = TF_BLOCK_SIZE, - .setkey = twofish_setkey_skcipher, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, }, }; diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h index 2c377a8042e1..12df400e6d53 100644 --- a/arch/x86/include/asm/crypto/twofish.h +++ b/arch/x86/include/asm/crypto/twofish.h @@ -17,9 +17,5 @@ asmlinkage void twofish_dec_blk_3way(const void *ctx, u8 *dst, const u8 *src); /* helpers from twofish_x86_64-3way module */ extern void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst, const u8 *src); -extern void twofish_enc_blk_ctr(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); -extern void twofish_enc_blk_ctr_3way(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); #endif /* ASM_X86_TWOFISH_H */ diff --git a/crypto/Kconfig b/crypto/Kconfig index 3f51c5dfc2a9..606f94079f05 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1680,6 +1680,7 @@ config CRYPTO_TWOFISH_586 depends on (X86 || UML_X86) && !64BIT select CRYPTO_ALGAPI select CRYPTO_TWOFISH_COMMON + imply CRYPTO_CTR help Twofish cipher algorithm. @@ -1696,6 +1697,7 @@ config CRYPTO_TWOFISH_X86_64 depends on (X86 || UML_X86) && 64BIT select CRYPTO_ALGAPI select CRYPTO_TWOFISH_COMMON + imply CRYPTO_CTR help Twofish cipher algorithm (x86_64). From 89b7ba5c8b9b20df043bc7b1d60065589f4103c3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2021 17:47:59 +0100 Subject: [PATCH 073/154] crypto: x86/glue-helper - drop CTR helper routines The glue helper's CTR routines are no longer used, so drop them. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/glue_helper-asm-avx.S | 45 -------------- arch/x86/crypto/glue_helper-asm-avx2.S | 58 ------------------ arch/x86/crypto/glue_helper.c | 72 ----------------------- arch/x86/include/asm/crypto/glue_helper.h | 32 ---------- 4 files changed, 207 deletions(-) diff --git a/arch/x86/crypto/glue_helper-asm-avx.S b/arch/x86/crypto/glue_helper-asm-avx.S index a94511432803..3da385271227 100644 --- a/arch/x86/crypto/glue_helper-asm-avx.S +++ b/arch/x86/crypto/glue_helper-asm-avx.S @@ -34,48 +34,3 @@ vpxor (5*16)(src), x6, x6; \ vpxor (6*16)(src), x7, x7; \ store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); - -#define inc_le128(x, minus_one, tmp) \ - vpcmpeqq minus_one, x, tmp; \ - vpsubq minus_one, x, x; \ - vpslldq $8, tmp, tmp; \ - vpsubq tmp, x, x; - -#define load_ctr_8way(iv, bswap, x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2) \ - vpcmpeqd t0, t0, t0; \ - vpsrldq $8, t0, t0; /* low: -1, high: 0 */ \ - vmovdqa bswap, t1; \ - \ - /* load IV and byteswap */ \ - vmovdqu (iv), x7; \ - vpshufb t1, x7, x0; \ - \ - /* construct IVs */ \ - inc_le128(x7, t0, t2); \ - vpshufb t1, x7, x1; \ - inc_le128(x7, t0, t2); \ - vpshufb t1, x7, x2; \ - inc_le128(x7, t0, t2); \ - vpshufb t1, x7, x3; \ - inc_le128(x7, t0, t2); \ - vpshufb t1, x7, x4; \ - inc_le128(x7, t0, t2); \ - vpshufb t1, x7, x5; \ - inc_le128(x7, t0, t2); \ - vpshufb t1, x7, x6; \ - inc_le128(x7, t0, t2); \ - vmovdqa x7, t2; \ - vpshufb t1, x7, x7; \ - inc_le128(t2, t0, t1); \ - vmovdqu t2, (iv); - -#define store_ctr_8way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \ - vpxor (0*16)(src), x0, x0; \ - vpxor (1*16)(src), x1, x1; \ - vpxor (2*16)(src), x2, x2; \ - vpxor (3*16)(src), x3, x3; \ - vpxor (4*16)(src), x4, x4; \ - vpxor (5*16)(src), x5, x5; \ - vpxor (6*16)(src), x6, x6; \ - vpxor (7*16)(src), x7, x7; \ - store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); diff --git a/arch/x86/crypto/glue_helper-asm-avx2.S b/arch/x86/crypto/glue_helper-asm-avx2.S index 456bface1e5d..c77e9049431f 100644 --- a/arch/x86/crypto/glue_helper-asm-avx2.S +++ b/arch/x86/crypto/glue_helper-asm-avx2.S @@ -37,61 +37,3 @@ vpxor (5*32+16)(src), x6, x6; \ vpxor (6*32+16)(src), x7, x7; \ store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7); - -#define inc_le128(x, minus_one, tmp) \ - vpcmpeqq minus_one, x, tmp; \ - vpsubq minus_one, x, x; \ - vpslldq $8, tmp, tmp; \ - vpsubq tmp, x, x; - -#define add2_le128(x, minus_one, minus_two, tmp1, tmp2) \ - vpcmpeqq minus_one, x, tmp1; \ - vpcmpeqq minus_two, x, tmp2; \ - vpsubq minus_two, x, x; \ - vpor tmp2, tmp1, tmp1; \ - vpslldq $8, tmp1, tmp1; \ - vpsubq tmp1, x, x; - -#define load_ctr_16way(iv, bswap, x0, x1, x2, x3, x4, x5, x6, x7, t0, t0x, t1, \ - t1x, t2, t2x, t3, t3x, t4, t5) \ - vpcmpeqd t0, t0, t0; \ - vpsrldq $8, t0, t0; /* ab: -1:0 ; cd: -1:0 */ \ - vpaddq t0, t0, t4; /* ab: -2:0 ; cd: -2:0 */\ - \ - /* load IV and byteswap */ \ - vmovdqu (iv), t2x; \ - vmovdqa t2x, t3x; \ - inc_le128(t2x, t0x, t1x); \ - vbroadcasti128 bswap, t1; \ - vinserti128 $1, t2x, t3, t2; /* ab: le0 ; cd: le1 */ \ - vpshufb t1, t2, x0; \ - \ - /* construct IVs */ \ - add2_le128(t2, t0, t4, t3, t5); /* ab: le2 ; cd: le3 */ \ - vpshufb t1, t2, x1; \ - add2_le128(t2, t0, t4, t3, t5); \ - vpshufb t1, t2, x2; \ - add2_le128(t2, t0, t4, t3, t5); \ - vpshufb t1, t2, x3; \ - add2_le128(t2, t0, t4, t3, t5); \ - vpshufb t1, t2, x4; \ - add2_le128(t2, t0, t4, t3, t5); \ - vpshufb t1, t2, x5; \ - add2_le128(t2, t0, t4, t3, t5); \ - vpshufb t1, t2, x6; \ - add2_le128(t2, t0, t4, t3, t5); \ - vpshufb t1, t2, x7; \ - vextracti128 $1, t2, t2x; \ - inc_le128(t2x, t0x, t3x); \ - vmovdqu t2x, (iv); - -#define store_ctr_16way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \ - vpxor (0*32)(src), x0, x0; \ - vpxor (1*32)(src), x1, x1; \ - vpxor (2*32)(src), x2, x2; \ - vpxor (3*32)(src), x3, x3; \ - vpxor (4*32)(src), x4, x4; \ - vpxor (5*32)(src), x5, x5; \ - vpxor (6*32)(src), x6, x6; \ - vpxor (7*32)(src), x7, x7; \ - store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7); diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index 786ffda1caf4..895d34150c3f 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -6,8 +6,6 @@ * * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: * Copyright (c) 2006 Herbert Xu - * CTR part based on code (crypto/ctr.c) by: - * (C) Copyright IBM Corp. 2007 - Joy Latten */ #include @@ -154,74 +152,4 @@ done: } EXPORT_SYMBOL_GPL(glue_cbc_decrypt_req_128bit); -int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, - struct skcipher_request *req) -{ - void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); - const unsigned int bsize = 128 / 8; - struct skcipher_walk walk; - bool fpu_enabled = false; - unsigned int nbytes; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes) >= bsize) { - const u128 *src = walk.src.virt.addr; - u128 *dst = walk.dst.virt.addr; - unsigned int func_bytes, num_blocks; - unsigned int i; - le128 ctrblk; - - fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - &walk, fpu_enabled, nbytes); - - be128_to_le128(&ctrblk, (be128 *)walk.iv); - - for (i = 0; i < gctx->num_funcs; i++) { - num_blocks = gctx->funcs[i].num_blocks; - func_bytes = bsize * num_blocks; - - if (nbytes < func_bytes) - continue; - - /* Process multi-block batch */ - do { - gctx->funcs[i].fn_u.ctr(ctx, (u8 *)dst, - (const u8 *)src, - &ctrblk); - src += num_blocks; - dst += num_blocks; - nbytes -= func_bytes; - } while (nbytes >= func_bytes); - - if (nbytes < bsize) - break; - } - - le128_to_be128((be128 *)walk.iv, &ctrblk); - err = skcipher_walk_done(&walk, nbytes); - } - - glue_fpu_end(fpu_enabled); - - if (nbytes) { - le128 ctrblk; - u128 tmp; - - be128_to_le128(&ctrblk, (be128 *)walk.iv); - memcpy(&tmp, walk.src.virt.addr, nbytes); - gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, (u8 *)&tmp, - (const u8 *)&tmp, - &ctrblk); - memcpy(walk.dst.virt.addr, &tmp, nbytes); - le128_to_be128((be128 *)walk.iv, &ctrblk); - - err = skcipher_walk_done(&walk, 0); - } - - return err; -} -EXPORT_SYMBOL_GPL(glue_ctr_req_128bit); - MODULE_LICENSE("GPL"); diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index 62680775d189..23e09efd2aa6 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -9,19 +9,15 @@ #include #include #include -#include typedef void (*common_glue_func_t)(const void *ctx, u8 *dst, const u8 *src); typedef void (*common_glue_cbc_func_t)(const void *ctx, u8 *dst, const u8 *src); -typedef void (*common_glue_ctr_func_t)(const void *ctx, u8 *dst, const u8 *src, - le128 *iv); struct common_glue_func_entry { unsigned int num_blocks; /* number of blocks that @fn will process */ union { common_glue_func_t ecb; common_glue_cbc_func_t cbc; - common_glue_ctr_func_t ctr; } fn_u; }; @@ -66,31 +62,6 @@ static inline void glue_fpu_end(bool fpu_enabled) kernel_fpu_end(); } -static inline void le128_to_be128(be128 *dst, const le128 *src) -{ - dst->a = cpu_to_be64(le64_to_cpu(src->a)); - dst->b = cpu_to_be64(le64_to_cpu(src->b)); -} - -static inline void be128_to_le128(le128 *dst, const be128 *src) -{ - dst->a = cpu_to_le64(be64_to_cpu(src->a)); - dst->b = cpu_to_le64(be64_to_cpu(src->b)); -} - -static inline void le128_inc(le128 *i) -{ - u64 a = le64_to_cpu(i->a); - u64 b = le64_to_cpu(i->b); - - b++; - if (!b) - a++; - - i->a = cpu_to_le64(a); - i->b = cpu_to_le64(b); -} - extern int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, struct skcipher_request *req); @@ -100,7 +71,4 @@ extern int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn, extern int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, struct skcipher_request *req); -extern int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, - struct skcipher_request *req); - #endif /* _CRYPTO_GLUE_HELPER_H */ From 768db5fee3bb338174cd078878d3c4ff815a7fcf Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2021 17:48:00 +0100 Subject: [PATCH 074/154] crypto: x86/des - drop CTR mode implementation DES or Triple DES in counter mode is never used in the kernel, so there is no point in keeping an accelerated implementation around. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/des3_ede_glue.c | 104 -------------------------------- crypto/Kconfig | 1 + 2 files changed, 1 insertion(+), 104 deletions(-) diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c index 89830e531350..e7cb68a3db3b 100644 --- a/arch/x86/crypto/des3_ede_glue.c +++ b/arch/x86/crypto/des3_ede_glue.c @@ -6,8 +6,6 @@ * * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: * Copyright (c) 2006 Herbert Xu - * CTR part based on code (crypto/ctr.c) by: - * (C) Copyright IBM Corp. 2007 - Joy Latten */ #include @@ -253,94 +251,6 @@ static int cbc_decrypt(struct skcipher_request *req) return err; } -static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx, - struct skcipher_walk *walk) -{ - u8 *ctrblk = walk->iv; - u8 keystream[DES3_EDE_BLOCK_SIZE]; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - - des3_ede_enc_blk(ctx, keystream, ctrblk); - crypto_xor_cpy(dst, keystream, src, nbytes); - - crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE); -} - -static unsigned int __ctr_crypt(struct des3_ede_x86_ctx *ctx, - struct skcipher_walk *walk) -{ - unsigned int bsize = DES3_EDE_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - __be64 *src = (__be64 *)walk->src.virt.addr; - __be64 *dst = (__be64 *)walk->dst.virt.addr; - u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv); - __be64 ctrblocks[3]; - - /* Process four block batch */ - if (nbytes >= bsize * 3) { - do { - /* create ctrblks for parallel encrypt */ - ctrblocks[0] = cpu_to_be64(ctrblk++); - ctrblocks[1] = cpu_to_be64(ctrblk++); - ctrblocks[2] = cpu_to_be64(ctrblk++); - - des3_ede_enc_blk_3way(ctx, (u8 *)ctrblocks, - (u8 *)ctrblocks); - - dst[0] = src[0] ^ ctrblocks[0]; - dst[1] = src[1] ^ ctrblocks[1]; - dst[2] = src[2] ^ ctrblocks[2]; - - src += 3; - dst += 3; - } while ((nbytes -= bsize * 3) >= bsize * 3); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - ctrblocks[0] = cpu_to_be64(ctrblk++); - - des3_ede_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); - - dst[0] = src[0] ^ ctrblocks[0]; - - src += 1; - dst += 1; - } while ((nbytes -= bsize) >= bsize); - -done: - *(__be64 *)walk->iv = cpu_to_be64(ctrblk); - return nbytes; -} - -static int ctr_crypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - unsigned int nbytes; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes) >= DES3_EDE_BLOCK_SIZE) { - nbytes = __ctr_crypt(ctx, &walk); - err = skcipher_walk_done(&walk, nbytes); - } - - if (nbytes) { - ctr_crypt_final(ctx, &walk); - err = skcipher_walk_done(&walk, 0); - } - - return err; -} - static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { @@ -428,20 +338,6 @@ static struct skcipher_alg des3_ede_skciphers[] = { .setkey = des3_ede_x86_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "ctr(des3_ede)", - .base.cra_driver_name = "ctr-des3_ede-asm", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct des3_ede_x86_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = DES3_EDE_KEY_SIZE, - .max_keysize = DES3_EDE_KEY_SIZE, - .ivsize = DES3_EDE_BLOCK_SIZE, - .chunksize = DES3_EDE_BLOCK_SIZE, - .setkey = des3_ede_x86_setkey_skcipher, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, } }; diff --git a/crypto/Kconfig b/crypto/Kconfig index 606f94079f05..5e820a57d138 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1427,6 +1427,7 @@ config CRYPTO_DES3_EDE_X86_64 depends on X86 && 64BIT select CRYPTO_SKCIPHER select CRYPTO_LIB_DES + imply CRYPTO_CTR help Triple DES EDE (FIPS 46-3) algorithm. From c0a64926c53e05fc6f69c7d632967606defe5f61 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2021 17:48:01 +0100 Subject: [PATCH 075/154] crypto: x86/blowfish - drop CTR mode implementation Blowfish in counter mode is never used in the kernel, so there is no point in keeping an accelerated implementation around. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/blowfish_glue.c | 107 -------------------------------- crypto/Kconfig | 1 + 2 files changed, 1 insertion(+), 107 deletions(-) diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index cedfdba69ce3..a880e0b1c255 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c @@ -6,8 +6,6 @@ * * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: * Copyright (c) 2006 Herbert Xu - * CTR part based on code (crypto/ctr.c) by: - * (C) Copyright IBM Corp. 2007 - Joy Latten */ #include @@ -247,97 +245,6 @@ static int cbc_decrypt(struct skcipher_request *req) return err; } -static void ctr_crypt_final(struct bf_ctx *ctx, struct skcipher_walk *walk) -{ - u8 *ctrblk = walk->iv; - u8 keystream[BF_BLOCK_SIZE]; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - - blowfish_enc_blk(ctx, keystream, ctrblk); - crypto_xor_cpy(dst, keystream, src, nbytes); - - crypto_inc(ctrblk, BF_BLOCK_SIZE); -} - -static unsigned int __ctr_crypt(struct bf_ctx *ctx, struct skcipher_walk *walk) -{ - unsigned int bsize = BF_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u64 *src = (u64 *)walk->src.virt.addr; - u64 *dst = (u64 *)walk->dst.virt.addr; - u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv); - __be64 ctrblocks[4]; - - /* Process four block batch */ - if (nbytes >= bsize * 4) { - do { - if (dst != src) { - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - dst[3] = src[3]; - } - - /* create ctrblks for parallel encrypt */ - ctrblocks[0] = cpu_to_be64(ctrblk++); - ctrblocks[1] = cpu_to_be64(ctrblk++); - ctrblocks[2] = cpu_to_be64(ctrblk++); - ctrblocks[3] = cpu_to_be64(ctrblk++); - - blowfish_enc_blk_xor_4way(ctx, (u8 *)dst, - (u8 *)ctrblocks); - - src += 4; - dst += 4; - } while ((nbytes -= bsize * 4) >= bsize * 4); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - if (dst != src) - *dst = *src; - - ctrblocks[0] = cpu_to_be64(ctrblk++); - - blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks); - - src += 1; - dst += 1; - } while ((nbytes -= bsize) >= bsize); - -done: - *(__be64 *)walk->iv = cpu_to_be64(ctrblk); - return nbytes; -} - -static int ctr_crypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - unsigned int nbytes; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) { - nbytes = __ctr_crypt(ctx, &walk); - err = skcipher_walk_done(&walk, nbytes); - } - - if (nbytes) { - ctr_crypt_final(ctx, &walk); - err = skcipher_walk_done(&walk, 0); - } - - return err; -} - static struct crypto_alg bf_cipher_alg = { .cra_name = "blowfish", .cra_driver_name = "blowfish-asm", @@ -384,20 +291,6 @@ static struct skcipher_alg bf_skcipher_algs[] = { .setkey = blowfish_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, - }, { - .base.cra_name = "ctr(blowfish)", - .base.cra_driver_name = "ctr-blowfish-asm", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct bf_ctx), - .base.cra_module = THIS_MODULE, - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .ivsize = BF_BLOCK_SIZE, - .chunksize = BF_BLOCK_SIZE, - .setkey = blowfish_setkey_skcipher, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, }, }; diff --git a/crypto/Kconfig b/crypto/Kconfig index 5e820a57d138..24c0e001d06d 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1255,6 +1255,7 @@ config CRYPTO_BLOWFISH_X86_64 depends on X86 && 64BIT select CRYPTO_SKCIPHER select CRYPTO_BLOWFISH_COMMON + imply CRYPTO_CTR help Blowfish cipher algorithm (x86_64), by Bruce Schneier. From 827ee47228a6bfa446ddb81999adf400ae901106 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2021 17:48:02 +0100 Subject: [PATCH 076/154] crypto: x86 - add some helper macros for ECB and CBC modes The x86 glue helper module is starting to show its age: - It relies heavily on function pointers to invoke asm helper functions that operate on fixed input sizes that are relatively small. This means the performance is severely impacted by retpolines. - It goes to great lengths to amortize the cost of kernel_fpu_begin()/end() over as much work as possible, which is no longer necessary now that FPU save/restore is done lazily, and doing so may cause unbounded scheduling blackouts due to the fact that enabling the FPU in kernel mode disables preemption. - The CBC mode decryption helper makes backward strides through the input, in order to avoid a single block size memcpy() between chunks. Consuming the input in this manner is highly likely to defeat any hardware prefetchers, so it is better to go through the data linearly, and perform the extra memcpy() where needed (which is turned into direct loads and stores by the compiler anyway). Note that benchmarks won't show this effect, given that the memory they use is always cache hot. - It implements blockwise XOR in terms of le128 pointers, which imply an alignment that is not guaranteed by the API, violating the C standard. GCC does not seem to be smart enough to elide the indirect calls when the function pointers are passed as arguments to static inline helper routines modeled after the existing ones. So instead, let's create some CPP macros that encapsulate the core of the ECB and CBC processing, so we can wire them up for existing users of the glue helper module, i.e., Camellia, Serpent, Twofish and CAST6. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/ecb_cbc_helpers.h | 76 +++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 arch/x86/crypto/ecb_cbc_helpers.h diff --git a/arch/x86/crypto/ecb_cbc_helpers.h b/arch/x86/crypto/ecb_cbc_helpers.h new file mode 100644 index 000000000000..eaa15c7b29d6 --- /dev/null +++ b/arch/x86/crypto/ecb_cbc_helpers.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _CRYPTO_ECB_CBC_HELPER_H +#define _CRYPTO_ECB_CBC_HELPER_H + +#include +#include + +/* + * Mode helpers to instantiate parameterized skcipher ECB/CBC modes without + * having to rely on indirect calls and retpolines. + */ + +#define ECB_WALK_START(req, bsize, fpu_blocks) do { \ + void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); \ + const int __bsize = (bsize); \ + struct skcipher_walk walk; \ + int err = skcipher_walk_virt(&walk, (req), false); \ + while (walk.nbytes > 0) { \ + unsigned int nbytes = walk.nbytes; \ + bool do_fpu = (fpu_blocks) != -1 && \ + nbytes >= (fpu_blocks) * __bsize; \ + const u8 *src = walk.src.virt.addr; \ + u8 *dst = walk.dst.virt.addr; \ + u8 __maybe_unused buf[(bsize)]; \ + if (do_fpu) kernel_fpu_begin() + +#define CBC_WALK_START(req, bsize, fpu_blocks) \ + ECB_WALK_START(req, bsize, fpu_blocks) + +#define ECB_WALK_ADVANCE(blocks) do { \ + dst += (blocks) * __bsize; \ + src += (blocks) * __bsize; \ + nbytes -= (blocks) * __bsize; \ +} while (0) + +#define ECB_BLOCK(blocks, func) do { \ + while (nbytes >= (blocks) * __bsize) { \ + (func)(ctx, dst, src); \ + ECB_WALK_ADVANCE(blocks); \ + } \ +} while (0) + +#define CBC_ENC_BLOCK(func) do { \ + const u8 *__iv = walk.iv; \ + while (nbytes >= __bsize) { \ + crypto_xor_cpy(dst, src, __iv, __bsize); \ + (func)(ctx, dst, dst); \ + __iv = dst; \ + ECB_WALK_ADVANCE(1); \ + } \ + memcpy(walk.iv, __iv, __bsize); \ +} while (0) + +#define CBC_DEC_BLOCK(blocks, func) do { \ + while (nbytes >= (blocks) * __bsize) { \ + const u8 *__iv = src + ((blocks) - 1) * __bsize; \ + if (dst == src) \ + __iv = memcpy(buf, __iv, __bsize); \ + (func)(ctx, dst, src); \ + crypto_xor(dst, walk.iv, __bsize); \ + memcpy(walk.iv, __iv, __bsize); \ + ECB_WALK_ADVANCE(blocks); \ + } \ +} while (0) + +#define ECB_WALK_END() \ + if (do_fpu) kernel_fpu_end(); \ + err = skcipher_walk_done(&walk, nbytes); \ + } \ + return err; \ +} while (0) + +#define CBC_WALK_END() ECB_WALK_END() + +#endif From 407d409a8102a5ba042215aed7b2ef2d6e6c67a8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2021 17:48:03 +0100 Subject: [PATCH 077/154] crypto: x86/camellia - drop dependency on glue helper Replace the glue helper dependency with implementations of ECB and CBC based on the new CPP macros, which avoid the need for indirect calls. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/camellia_aesni_avx2_glue.c | 85 ++++++---------------- arch/x86/crypto/camellia_aesni_avx_glue.c | 73 +++++-------------- arch/x86/crypto/camellia_glue.c | 75 ++++++------------- crypto/Kconfig | 2 - 4 files changed, 67 insertions(+), 168 deletions(-) diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index 8f25a2a6222e..ef5c0f094584 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -6,7 +6,6 @@ */ #include -#include #include #include #include @@ -14,6 +13,8 @@ #include #include +#include "ecb_cbc_helpers.h" + #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 #define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32 @@ -23,63 +24,6 @@ asmlinkage void camellia_ecb_dec_32way(const void *ctx, u8 *dst, const u8 *src); asmlinkage void camellia_cbc_dec_32way(const void *ctx, u8 *dst, const u8 *src); -static const struct common_glue_ctx camellia_enc = { - .num_funcs = 4, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .ecb = camellia_ecb_enc_32way } - }, { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ecb = camellia_ecb_enc_16way } - }, { - .num_blocks = 2, - .fn_u = { .ecb = camellia_enc_blk_2way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = camellia_enc_blk } - } } -}; - -static const struct common_glue_ctx camellia_dec = { - .num_funcs = 4, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .ecb = camellia_ecb_dec_32way } - }, { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ecb = camellia_ecb_dec_16way } - }, { - .num_blocks = 2, - .fn_u = { .ecb = camellia_dec_blk_2way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = camellia_dec_blk } - } } -}; - -static const struct common_glue_ctx camellia_dec_cbc = { - .num_funcs = 4, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .cbc = camellia_cbc_dec_32way } - }, { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .cbc = camellia_cbc_dec_16way } - }, { - .num_blocks = 2, - .fn_u = { .cbc = camellia_decrypt_cbc_2way } - }, { - .num_blocks = 1, - .fn_u = { .cbc = camellia_dec_blk } - } } -}; - static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { @@ -88,22 +32,39 @@ static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&camellia_enc, req); + ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS); + ECB_BLOCK(CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, camellia_ecb_enc_32way); + ECB_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_ecb_enc_16way); + ECB_BLOCK(2, camellia_enc_blk_2way); + ECB_BLOCK(1, camellia_enc_blk); + ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&camellia_dec, req); + ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS); + ECB_BLOCK(CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, camellia_ecb_dec_32way); + ECB_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_ecb_dec_16way); + ECB_BLOCK(2, camellia_dec_blk_2way); + ECB_BLOCK(1, camellia_dec_blk); + ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req); + CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(camellia_enc_blk); + CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req); + CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, camellia_cbc_dec_32way); + CBC_DEC_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_cbc_dec_16way); + CBC_DEC_BLOCK(2, camellia_decrypt_cbc_2way); + CBC_DEC_BLOCK(1, camellia_dec_blk); + CBC_WALK_END(); } static struct skcipher_alg camellia_algs[] = { diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 22a89cdfedfb..68fed0a79889 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -6,7 +6,6 @@ */ #include -#include #include #include #include @@ -14,6 +13,8 @@ #include #include +#include "ecb_cbc_helpers.h" + #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 /* 16-way parallel cipher functions (avx/aes-ni) */ @@ -26,54 +27,6 @@ EXPORT_SYMBOL_GPL(camellia_ecb_dec_16way); asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(camellia_cbc_dec_16way); -static const struct common_glue_ctx camellia_enc = { - .num_funcs = 3, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ecb = camellia_ecb_enc_16way } - }, { - .num_blocks = 2, - .fn_u = { .ecb = camellia_enc_blk_2way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = camellia_enc_blk } - } } -}; - -static const struct common_glue_ctx camellia_dec = { - .num_funcs = 3, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ecb = camellia_ecb_dec_16way } - }, { - .num_blocks = 2, - .fn_u = { .ecb = camellia_dec_blk_2way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = camellia_dec_blk } - } } -}; - -static const struct common_glue_ctx camellia_dec_cbc = { - .num_funcs = 3, - .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .cbc = camellia_cbc_dec_16way } - }, { - .num_blocks = 2, - .fn_u = { .cbc = camellia_decrypt_cbc_2way } - }, { - .num_blocks = 1, - .fn_u = { .cbc = camellia_dec_blk } - } } -}; - static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { @@ -82,22 +35,36 @@ static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&camellia_enc, req); + ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS); + ECB_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_ecb_enc_16way); + ECB_BLOCK(2, camellia_enc_blk_2way); + ECB_BLOCK(1, camellia_enc_blk); + ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&camellia_dec, req); + ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS); + ECB_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_ecb_dec_16way); + ECB_BLOCK(2, camellia_dec_blk_2way); + ECB_BLOCK(1, camellia_dec_blk); + ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req); + CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(camellia_enc_blk); + CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req); + CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_cbc_dec_16way); + CBC_DEC_BLOCK(2, camellia_decrypt_cbc_2way); + CBC_DEC_BLOCK(1, camellia_dec_blk); + CBC_WALK_END(); } static struct skcipher_alg camellia_algs[] = { diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index fefeedf2b33d..0bc00ce68484 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -15,7 +15,8 @@ #include #include #include -#include + +#include "ecb_cbc_helpers.h" /* regular block cipher functions */ asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src, @@ -1262,75 +1263,47 @@ static int camellia_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, return camellia_setkey(&tfm->base, key, key_len); } -void camellia_decrypt_cbc_2way(const void *ctx, u8 *d, const u8 *s) +void camellia_decrypt_cbc_2way(const void *ctx, u8 *dst, const u8 *src) { - u128 *dst = (u128 *)d; - const u128 *src = (const u128 *)s; - u128 iv = *src; + u8 buf[CAMELLIA_BLOCK_SIZE]; + const u8 *iv = src; - camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src); - - u128_xor(&dst[1], &dst[1], &iv); + if (dst == src) + iv = memcpy(buf, iv, sizeof(buf)); + camellia_dec_blk_2way(ctx, dst, src); + crypto_xor(dst + CAMELLIA_BLOCK_SIZE, iv, CAMELLIA_BLOCK_SIZE); } EXPORT_SYMBOL_GPL(camellia_decrypt_cbc_2way); -static const struct common_glue_ctx camellia_enc = { - .num_funcs = 2, - .fpu_blocks_limit = -1, - - .funcs = { { - .num_blocks = 2, - .fn_u = { .ecb = camellia_enc_blk_2way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = camellia_enc_blk } - } } -}; - -static const struct common_glue_ctx camellia_dec = { - .num_funcs = 2, - .fpu_blocks_limit = -1, - - .funcs = { { - .num_blocks = 2, - .fn_u = { .ecb = camellia_dec_blk_2way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = camellia_dec_blk } - } } -}; - -static const struct common_glue_ctx camellia_dec_cbc = { - .num_funcs = 2, - .fpu_blocks_limit = -1, - - .funcs = { { - .num_blocks = 2, - .fn_u = { .cbc = camellia_decrypt_cbc_2way } - }, { - .num_blocks = 1, - .fn_u = { .cbc = camellia_dec_blk } - } } -}; - static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&camellia_enc, req); + ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); + ECB_BLOCK(2, camellia_enc_blk_2way); + ECB_BLOCK(1, camellia_enc_blk); + ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&camellia_dec, req); + ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); + ECB_BLOCK(2, camellia_dec_blk_2way); + ECB_BLOCK(1, camellia_dec_blk); + ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req); + CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(camellia_enc_blk); + CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req); + CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); + CBC_DEC_BLOCK(2, camellia_decrypt_cbc_2way); + CBC_DEC_BLOCK(1, camellia_dec_blk); + CBC_WALK_END(); } static struct crypto_alg camellia_cipher_alg = { diff --git a/crypto/Kconfig b/crypto/Kconfig index 24c0e001d06d..f8518ff389bb 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1286,7 +1286,6 @@ config CRYPTO_CAMELLIA_X86_64 depends on X86 && 64BIT depends on CRYPTO select CRYPTO_SKCIPHER - select CRYPTO_GLUE_HELPER_X86 imply CRYPTO_CTR help Camellia cipher algorithm module (x86_64). @@ -1305,7 +1304,6 @@ config CRYPTO_CAMELLIA_AESNI_AVX_X86_64 depends on CRYPTO select CRYPTO_SKCIPHER select CRYPTO_CAMELLIA_X86_64 - select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SIMD imply CRYPTO_XTS help From 9ad58b46f814edd5b8b288b66f94cf57c97eaea3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2021 17:48:04 +0100 Subject: [PATCH 078/154] crypto: x86/serpent - drop dependency on glue helper Replace the glue helper dependency with implementations of ECB and CBC based on the new CPP macros, which avoid the need for indirect calls. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/serpent_avx2_glue.c | 73 +++++++------------------- arch/x86/crypto/serpent_avx_glue.c | 61 ++++++---------------- arch/x86/crypto/serpent_sse2_glue.c | 81 +++++++++-------------------- crypto/Kconfig | 3 -- 4 files changed, 61 insertions(+), 157 deletions(-) diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 28e542c6512a..261c9ac2d762 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -12,9 +12,10 @@ #include #include #include -#include #include +#include "ecb_cbc_helpers.h" + #define SERPENT_AVX2_PARALLEL_BLOCKS 16 /* 16-way AVX2 parallel cipher functions */ @@ -28,72 +29,38 @@ static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); } -static const struct common_glue_ctx serpent_enc = { - .num_funcs = 3, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .ecb = serpent_ecb_enc_16way } - }, { - .num_blocks = 8, - .fn_u = { .ecb = serpent_ecb_enc_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .ecb = __serpent_encrypt } - } } -}; - -static const struct common_glue_ctx serpent_dec = { - .num_funcs = 3, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .ecb = serpent_ecb_dec_16way } - }, { - .num_blocks = 8, - .fn_u = { .ecb = serpent_ecb_dec_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .ecb = __serpent_decrypt } - } } -}; - -static const struct common_glue_ctx serpent_dec_cbc = { - .num_funcs = 3, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .cbc = serpent_cbc_dec_16way } - }, { - .num_blocks = 8, - .fn_u = { .cbc = serpent_cbc_dec_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .cbc = __serpent_decrypt } - } } -}; - static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&serpent_enc, req); + ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + ECB_BLOCK(SERPENT_AVX2_PARALLEL_BLOCKS, serpent_ecb_enc_16way); + ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_ecb_enc_8way_avx); + ECB_BLOCK(1, __serpent_encrypt); + ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&serpent_dec, req); + ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + ECB_BLOCK(SERPENT_AVX2_PARALLEL_BLOCKS, serpent_ecb_dec_16way); + ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_ecb_dec_8way_avx); + ECB_BLOCK(1, __serpent_decrypt); + ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(__serpent_encrypt, req); + CBC_WALK_START(req, SERPENT_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(__serpent_encrypt); + CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req); + CBC_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(SERPENT_AVX2_PARALLEL_BLOCKS, serpent_cbc_dec_16way); + CBC_DEC_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_cbc_dec_8way_avx); + CBC_DEC_BLOCK(1, __serpent_decrypt); + CBC_WALK_END(); } static struct skcipher_alg serpent_algs[] = { diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index aa4605baf9d4..5fe01d2a5b1d 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -15,9 +15,10 @@ #include #include #include -#include #include +#include "ecb_cbc_helpers.h" + /* 8-way parallel cipher functions */ asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst, const u8 *src); @@ -37,63 +38,35 @@ static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); } -static const struct common_glue_ctx serpent_enc = { - .num_funcs = 2, - .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ecb = serpent_ecb_enc_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .ecb = __serpent_encrypt } - } } -}; - -static const struct common_glue_ctx serpent_dec = { - .num_funcs = 2, - .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ecb = serpent_ecb_dec_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .ecb = __serpent_decrypt } - } } -}; - -static const struct common_glue_ctx serpent_dec_cbc = { - .num_funcs = 2, - .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .cbc = serpent_cbc_dec_8way_avx } - }, { - .num_blocks = 1, - .fn_u = { .cbc = __serpent_decrypt } - } } -}; - static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&serpent_enc, req); + ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_ecb_enc_8way_avx); + ECB_BLOCK(1, __serpent_encrypt); + ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&serpent_dec, req); + ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_ecb_dec_8way_avx); + ECB_BLOCK(1, __serpent_decrypt); + ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(__serpent_encrypt, req); + CBC_WALK_START(req, SERPENT_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(__serpent_encrypt); + CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req); + CBC_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_cbc_dec_8way_avx); + CBC_DEC_BLOCK(1, __serpent_decrypt); + CBC_WALK_END(); } static struct skcipher_alg serpent_algs[] = { diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 9acb3bf28feb..e28d60949c16 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -21,7 +21,8 @@ #include #include #include -#include + +#include "ecb_cbc_helpers.h" static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) @@ -29,80 +30,46 @@ static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); } -static void serpent_decrypt_cbc_xway(const void *ctx, u8 *d, const u8 *s) +static void serpent_decrypt_cbc_xway(const void *ctx, u8 *dst, const u8 *src) { - u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; - u128 *dst = (u128 *)d; - const u128 *src = (const u128 *)s; - unsigned int j; + u8 buf[SERPENT_PARALLEL_BLOCKS - 1][SERPENT_BLOCK_SIZE]; + const u8 *s = src; - for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) - ivs[j] = src[j]; - - serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); - - for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) - u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); + if (dst == src) + s = memcpy(buf, src, sizeof(buf)); + serpent_dec_blk_xway(ctx, dst, src); + crypto_xor(dst + SERPENT_BLOCK_SIZE, s, sizeof(buf)); } -static const struct common_glue_ctx serpent_enc = { - .num_funcs = 2, - .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ecb = serpent_enc_blk_xway } - }, { - .num_blocks = 1, - .fn_u = { .ecb = __serpent_encrypt } - } } -}; - -static const struct common_glue_ctx serpent_dec = { - .num_funcs = 2, - .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ecb = serpent_dec_blk_xway } - }, { - .num_blocks = 1, - .fn_u = { .ecb = __serpent_decrypt } - } } -}; - -static const struct common_glue_ctx serpent_dec_cbc = { - .num_funcs = 2, - .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .cbc = serpent_decrypt_cbc_xway } - }, { - .num_blocks = 1, - .fn_u = { .cbc = __serpent_decrypt } - } } -}; - static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&serpent_enc, req); + ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_enc_blk_xway); + ECB_BLOCK(1, __serpent_encrypt); + ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&serpent_dec, req); + ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_dec_blk_xway); + ECB_BLOCK(1, __serpent_decrypt); + ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(__serpent_encrypt, - req); + CBC_WALK_START(req, SERPENT_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(__serpent_encrypt); + CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req); + CBC_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_decrypt_cbc_xway); + CBC_DEC_BLOCK(1, __serpent_decrypt); + CBC_WALK_END(); } static struct skcipher_alg serpent_algs[] = { diff --git a/crypto/Kconfig b/crypto/Kconfig index f8518ff389bb..29dce7efc443 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1538,7 +1538,6 @@ config CRYPTO_SERPENT_SSE2_X86_64 tristate "Serpent cipher algorithm (x86_64/SSE2)" depends on X86 && 64BIT select CRYPTO_SKCIPHER - select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT select CRYPTO_SIMD imply CRYPTO_CTR @@ -1558,7 +1557,6 @@ config CRYPTO_SERPENT_SSE2_586 tristate "Serpent cipher algorithm (i586/SSE2)" depends on X86 && !64BIT select CRYPTO_SKCIPHER - select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT select CRYPTO_SIMD imply CRYPTO_CTR @@ -1578,7 +1576,6 @@ config CRYPTO_SERPENT_AVX_X86_64 tristate "Serpent cipher algorithm (x86_64/AVX)" depends on X86 && 64BIT select CRYPTO_SKCIPHER - select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SERPENT select CRYPTO_SIMD imply CRYPTO_XTS From 674d40abac42d502e226da6045fad61d7206e5fb Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2021 17:48:05 +0100 Subject: [PATCH 079/154] crypto: x86/cast5 - drop dependency on glue helper Replace the glue helper dependency with implementations of ECB and CBC based on the new CPP macros, which avoid the need for indirect calls. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/cast5_avx_glue.c | 184 +++---------------------------- 1 file changed, 17 insertions(+), 167 deletions(-) diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index e0d1c7903b29..3976a87f92ad 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -6,7 +6,6 @@ * */ -#include #include #include #include @@ -15,6 +14,8 @@ #include #include +#include "ecb_cbc_helpers.h" + #define CAST5_PARALLEL_BLOCKS 16 asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst, @@ -30,186 +31,35 @@ static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, return cast5_setkey(&tfm->base, key, keylen); } -static inline bool cast5_fpu_begin(bool fpu_enabled, struct skcipher_walk *walk, - unsigned int nbytes) -{ - return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, - walk, fpu_enabled, nbytes); -} - -static inline void cast5_fpu_end(bool fpu_enabled) -{ - return glue_fpu_end(fpu_enabled); -} - -static int ecb_crypt(struct skcipher_request *req, bool enc) -{ - bool fpu_enabled = false; - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - const unsigned int bsize = CAST5_BLOCK_SIZE; - unsigned int nbytes; - void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src); - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes)) { - u8 *wsrc = walk.src.virt.addr; - u8 *wdst = walk.dst.virt.addr; - - fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); - - /* Process multi-block batch */ - if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { - fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way; - do { - fn(ctx, wdst, wsrc); - - wsrc += bsize * CAST5_PARALLEL_BLOCKS; - wdst += bsize * CAST5_PARALLEL_BLOCKS; - nbytes -= bsize * CAST5_PARALLEL_BLOCKS; - } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - fn = (enc) ? __cast5_encrypt : __cast5_decrypt; - - /* Handle leftovers */ - do { - fn(ctx, wdst, wsrc); - - wsrc += bsize; - wdst += bsize; - nbytes -= bsize; - } while (nbytes >= bsize); - -done: - err = skcipher_walk_done(&walk, nbytes); - } - - cast5_fpu_end(fpu_enabled); - return err; -} - static int ecb_encrypt(struct skcipher_request *req) { - return ecb_crypt(req, true); + ECB_WALK_START(req, CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS); + ECB_BLOCK(CAST5_PARALLEL_BLOCKS, cast5_ecb_enc_16way); + ECB_BLOCK(1, __cast5_encrypt); + ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { - return ecb_crypt(req, false); + ECB_WALK_START(req, CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS); + ECB_BLOCK(CAST5_PARALLEL_BLOCKS, cast5_ecb_dec_16way); + ECB_BLOCK(1, __cast5_decrypt); + ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { - const unsigned int bsize = CAST5_BLOCK_SIZE; - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - unsigned int nbytes; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes)) { - u64 *src = (u64 *)walk.src.virt.addr; - u64 *dst = (u64 *)walk.dst.virt.addr; - u64 *iv = (u64 *)walk.iv; - - do { - *dst = *src ^ *iv; - __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); - iv = dst; - src++; - dst++; - nbytes -= bsize; - } while (nbytes >= bsize); - - *(u64 *)walk.iv = *iv; - err = skcipher_walk_done(&walk, nbytes); - } - - return err; -} - -static unsigned int __cbc_decrypt(struct cast5_ctx *ctx, - struct skcipher_walk *walk) -{ - const unsigned int bsize = CAST5_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u64 *src = (u64 *)walk->src.virt.addr; - u64 *dst = (u64 *)walk->dst.virt.addr; - u64 last_iv; - - /* Start of the last block. */ - src += nbytes / bsize - 1; - dst += nbytes / bsize - 1; - - last_iv = *src; - - /* Process multi-block batch */ - if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { - do { - nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1); - src -= CAST5_PARALLEL_BLOCKS - 1; - dst -= CAST5_PARALLEL_BLOCKS - 1; - - cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src); - - nbytes -= bsize; - if (nbytes < bsize) - goto done; - - *dst ^= *(src - 1); - src -= 1; - dst -= 1; - } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); - } - - /* Handle leftovers */ - for (;;) { - __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src); - - nbytes -= bsize; - if (nbytes < bsize) - break; - - *dst ^= *(src - 1); - src -= 1; - dst -= 1; - } - -done: - *dst ^= *(u64 *)walk->iv; - *(u64 *)walk->iv = last_iv; - - return nbytes; + CBC_WALK_START(req, CAST5_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(__cast5_encrypt); + CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); - bool fpu_enabled = false; - struct skcipher_walk walk; - unsigned int nbytes; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes)) { - fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); - nbytes = __cbc_decrypt(ctx, &walk); - err = skcipher_walk_done(&walk, nbytes); - } - - cast5_fpu_end(fpu_enabled); - return err; + CBC_WALK_START(req, CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(CAST5_PARALLEL_BLOCKS, cast5_cbc_dec_16way); + CBC_DEC_BLOCK(1, __cast5_decrypt); + CBC_WALK_END(); } static struct skcipher_alg cast5_algs[] = { From ea55cfc3f920c95ee8d01ddc51e586b09a1194ee Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2021 17:48:06 +0100 Subject: [PATCH 080/154] crypto: x86/cast6 - drop dependency on glue helper Replace the glue helper dependency with implementations of ECB and CBC based on the new CPP macros, which avoid the need for indirect calls. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/cast6_avx_glue.c | 61 +++++++++----------------------- crypto/Kconfig | 1 - 2 files changed, 17 insertions(+), 45 deletions(-) diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 790efcb6df3b..7e2aea372349 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -15,7 +15,8 @@ #include #include #include -#include + +#include "ecb_cbc_helpers.h" #define CAST6_PARALLEL_BLOCKS 8 @@ -30,63 +31,35 @@ static int cast6_setkey_skcipher(struct crypto_skcipher *tfm, return cast6_setkey(&tfm->base, key, keylen); } -static const struct common_glue_ctx cast6_enc = { - .num_funcs = 2, - .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .ecb = cast6_ecb_enc_8way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = __cast6_encrypt } - } } -}; - -static const struct common_glue_ctx cast6_dec = { - .num_funcs = 2, - .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .ecb = cast6_ecb_dec_8way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = __cast6_decrypt } - } } -}; - -static const struct common_glue_ctx cast6_dec_cbc = { - .num_funcs = 2, - .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .cbc = cast6_cbc_dec_8way } - }, { - .num_blocks = 1, - .fn_u = { .cbc = __cast6_decrypt } - } } -}; - static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&cast6_enc, req); + ECB_WALK_START(req, CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS); + ECB_BLOCK(CAST6_PARALLEL_BLOCKS, cast6_ecb_enc_8way); + ECB_BLOCK(1, __cast6_encrypt); + ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&cast6_dec, req); + ECB_WALK_START(req, CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS); + ECB_BLOCK(CAST6_PARALLEL_BLOCKS, cast6_ecb_dec_8way); + ECB_BLOCK(1, __cast6_decrypt); + ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(__cast6_encrypt, req); + CBC_WALK_START(req, CAST6_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(__cast6_encrypt); + CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_req_128bit(&cast6_dec_cbc, req); + CBC_WALK_START(req, CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(CAST6_PARALLEL_BLOCKS, cast6_cbc_dec_8way); + CBC_DEC_BLOCK(1, __cast6_decrypt); + CBC_WALK_END(); } static struct skcipher_alg cast6_algs[] = { diff --git a/crypto/Kconfig b/crypto/Kconfig index 29dce7efc443..25101558acb5 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1393,7 +1393,6 @@ config CRYPTO_CAST6_AVX_X86_64 select CRYPTO_SKCIPHER select CRYPTO_CAST6 select CRYPTO_CAST_COMMON - select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SIMD imply CRYPTO_XTS imply CRYPTO_CTR From 165f357334cc92435aa9b5c9161567e0d0ab8f2a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2021 17:48:07 +0100 Subject: [PATCH 081/154] crypto: x86/twofish - drop dependency on glue helper Replace the glue helper dependency with implementations of ECB and CBC based on the new CPP macros, which avoid the need for indirect calls. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/twofish_avx_glue.c | 73 ++++++++------------------ arch/x86/crypto/twofish_glue_3way.c | 80 +++++++++-------------------- crypto/Kconfig | 2 - 3 files changed, 44 insertions(+), 111 deletions(-) diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 13f810b61034..6ce198f808a5 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -15,9 +15,10 @@ #include #include #include -#include #include +#include "ecb_cbc_helpers.h" + #define TWOFISH_PARALLEL_BLOCKS 8 /* 8-way parallel cipher functions */ @@ -37,72 +38,38 @@ static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src) __twofish_enc_blk_3way(ctx, dst, src, false); } -static const struct common_glue_ctx twofish_enc = { - .num_funcs = 3, - .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .ecb = twofish_ecb_enc_8way } - }, { - .num_blocks = 3, - .fn_u = { .ecb = twofish_enc_blk_3way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = twofish_enc_blk } - } } -}; - -static const struct common_glue_ctx twofish_dec = { - .num_funcs = 3, - .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .ecb = twofish_ecb_dec_8way } - }, { - .num_blocks = 3, - .fn_u = { .ecb = twofish_dec_blk_3way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = twofish_dec_blk } - } } -}; - -static const struct common_glue_ctx twofish_dec_cbc = { - .num_funcs = 3, - .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, - - .funcs = { { - .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .cbc = twofish_cbc_dec_8way } - }, { - .num_blocks = 3, - .fn_u = { .cbc = twofish_dec_blk_cbc_3way } - }, { - .num_blocks = 1, - .fn_u = { .cbc = twofish_dec_blk } - } } -}; - static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&twofish_enc, req); + ECB_WALK_START(req, TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS); + ECB_BLOCK(TWOFISH_PARALLEL_BLOCKS, twofish_ecb_enc_8way); + ECB_BLOCK(3, twofish_enc_blk_3way); + ECB_BLOCK(1, twofish_enc_blk); + ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&twofish_dec, req); + ECB_WALK_START(req, TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS); + ECB_BLOCK(TWOFISH_PARALLEL_BLOCKS, twofish_ecb_dec_8way); + ECB_BLOCK(3, twofish_dec_blk_3way); + ECB_BLOCK(1, twofish_dec_blk); + ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req); + CBC_WALK_START(req, TF_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(twofish_enc_blk); + CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req); + CBC_WALK_START(req, TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS); + CBC_DEC_BLOCK(TWOFISH_PARALLEL_BLOCKS, twofish_cbc_dec_8way); + CBC_DEC_BLOCK(3, twofish_dec_blk_cbc_3way); + CBC_DEC_BLOCK(1, twofish_dec_blk); + CBC_WALK_END(); } static struct skcipher_alg twofish_algs[] = { diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 88252370db0a..d1fdefa5195a 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -5,17 +5,16 @@ * Copyright (c) 2011 Jussi Kivilinna */ -#include #include #include -#include -#include #include #include #include #include #include +#include "ecb_cbc_helpers.h" + EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); @@ -30,79 +29,48 @@ static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src) __twofish_enc_blk_3way(ctx, dst, src, false); } -void twofish_dec_blk_cbc_3way(const void *ctx, u8 *d, const u8 *s) +void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst, const u8 *src) { - u128 ivs[2]; - u128 *dst = (u128 *)d; - const u128 *src = (const u128 *)s; + u8 buf[2][TF_BLOCK_SIZE]; + const u8 *s = src; - ivs[0] = src[0]; - ivs[1] = src[1]; + if (dst == src) + s = memcpy(buf, src, sizeof(buf)); + twofish_dec_blk_3way(ctx, dst, src); + crypto_xor(dst + TF_BLOCK_SIZE, s, sizeof(buf)); - twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); - - u128_xor(&dst[1], &dst[1], &ivs[0]); - u128_xor(&dst[2], &dst[2], &ivs[1]); } EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); -static const struct common_glue_ctx twofish_enc = { - .num_funcs = 2, - .fpu_blocks_limit = -1, - - .funcs = { { - .num_blocks = 3, - .fn_u = { .ecb = twofish_enc_blk_3way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = twofish_enc_blk } - } } -}; - -static const struct common_glue_ctx twofish_dec = { - .num_funcs = 2, - .fpu_blocks_limit = -1, - - .funcs = { { - .num_blocks = 3, - .fn_u = { .ecb = twofish_dec_blk_3way } - }, { - .num_blocks = 1, - .fn_u = { .ecb = twofish_dec_blk } - } } -}; - -static const struct common_glue_ctx twofish_dec_cbc = { - .num_funcs = 2, - .fpu_blocks_limit = -1, - - .funcs = { { - .num_blocks = 3, - .fn_u = { .cbc = twofish_dec_blk_cbc_3way } - }, { - .num_blocks = 1, - .fn_u = { .cbc = twofish_dec_blk } - } } -}; - static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&twofish_enc, req); + ECB_WALK_START(req, TF_BLOCK_SIZE, -1); + ECB_BLOCK(3, twofish_enc_blk_3way); + ECB_BLOCK(1, twofish_enc_blk); + ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_req_128bit(&twofish_dec, req); + ECB_WALK_START(req, TF_BLOCK_SIZE, -1); + ECB_BLOCK(3, twofish_dec_blk_3way); + ECB_BLOCK(1, twofish_dec_blk); + ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req); + CBC_WALK_START(req, TF_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(twofish_enc_blk); + CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req); + CBC_WALK_START(req, TF_BLOCK_SIZE, -1); + CBC_DEC_BLOCK(3, twofish_dec_blk_cbc_3way); + CBC_DEC_BLOCK(1, twofish_dec_blk); + CBC_WALK_END(); } static struct skcipher_alg tf_skciphers[] = { diff --git a/crypto/Kconfig b/crypto/Kconfig index 25101558acb5..b2182658c55e 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1711,7 +1711,6 @@ config CRYPTO_TWOFISH_X86_64_3WAY select CRYPTO_SKCIPHER select CRYPTO_TWOFISH_COMMON select CRYPTO_TWOFISH_X86_64 - select CRYPTO_GLUE_HELPER_X86 help Twofish cipher algorithm (x86_64, 3-way parallel). @@ -1730,7 +1729,6 @@ config CRYPTO_TWOFISH_AVX_X86_64 tristate "Twofish cipher algorithm (x86_64/AVX)" depends on X86 && 64BIT select CRYPTO_SKCIPHER - select CRYPTO_GLUE_HELPER_X86 select CRYPTO_SIMD select CRYPTO_TWOFISH_COMMON select CRYPTO_TWOFISH_X86_64 From 64ca771cd6bf48bd01f630ad1440ab151d1d19d5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2021 17:48:08 +0100 Subject: [PATCH 082/154] crypto: x86 - remove glue helper module All dependencies on the x86 glue helper module have been replaced by local instantiations of the new ECB/CBC preprocessor helper macros, so the glue helper module can be retired. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 2 - arch/x86/crypto/glue_helper.c | 155 ---------------------- arch/x86/include/asm/crypto/glue_helper.h | 74 ----------- crypto/Kconfig | 5 - crypto/skcipher.c | 6 - include/crypto/internal/skcipher.h | 1 - 6 files changed, 243 deletions(-) delete mode 100644 arch/x86/crypto/glue_helper.c delete mode 100644 arch/x86/include/asm/crypto/glue_helper.h diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index a31de0c6ccde..b28e36b7c96b 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -4,8 +4,6 @@ OBJECT_FILES_NON_STANDARD := y -obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o - obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c deleted file mode 100644 index 895d34150c3f..000000000000 --- a/arch/x86/crypto/glue_helper.c +++ /dev/null @@ -1,155 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Shared glue code for 128bit block ciphers - * - * Copyright © 2012-2013 Jussi Kivilinna - * - * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: - * Copyright (c) 2006 Herbert Xu - */ - -#include -#include -#include -#include -#include - -int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, - struct skcipher_request *req) -{ - void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); - const unsigned int bsize = 128 / 8; - struct skcipher_walk walk; - bool fpu_enabled = false; - unsigned int nbytes; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes)) { - const u8 *src = walk.src.virt.addr; - u8 *dst = walk.dst.virt.addr; - unsigned int func_bytes; - unsigned int i; - - fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - &walk, fpu_enabled, nbytes); - for (i = 0; i < gctx->num_funcs; i++) { - func_bytes = bsize * gctx->funcs[i].num_blocks; - - if (nbytes < func_bytes) - continue; - - /* Process multi-block batch */ - do { - gctx->funcs[i].fn_u.ecb(ctx, dst, src); - src += func_bytes; - dst += func_bytes; - nbytes -= func_bytes; - } while (nbytes >= func_bytes); - - if (nbytes < bsize) - break; - } - err = skcipher_walk_done(&walk, nbytes); - } - - glue_fpu_end(fpu_enabled); - return err; -} -EXPORT_SYMBOL_GPL(glue_ecb_req_128bit); - -int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn, - struct skcipher_request *req) -{ - void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); - const unsigned int bsize = 128 / 8; - struct skcipher_walk walk; - unsigned int nbytes; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes)) { - const u128 *src = (u128 *)walk.src.virt.addr; - u128 *dst = (u128 *)walk.dst.virt.addr; - u128 *iv = (u128 *)walk.iv; - - do { - u128_xor(dst, src, iv); - fn(ctx, (u8 *)dst, (u8 *)dst); - iv = dst; - src++; - dst++; - nbytes -= bsize; - } while (nbytes >= bsize); - - *(u128 *)walk.iv = *iv; - err = skcipher_walk_done(&walk, nbytes); - } - return err; -} -EXPORT_SYMBOL_GPL(glue_cbc_encrypt_req_128bit); - -int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, - struct skcipher_request *req) -{ - void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); - const unsigned int bsize = 128 / 8; - struct skcipher_walk walk; - bool fpu_enabled = false; - unsigned int nbytes; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes)) { - const u128 *src = walk.src.virt.addr; - u128 *dst = walk.dst.virt.addr; - unsigned int func_bytes, num_blocks; - unsigned int i; - u128 last_iv; - - fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - &walk, fpu_enabled, nbytes); - /* Start of the last block. */ - src += nbytes / bsize - 1; - dst += nbytes / bsize - 1; - - last_iv = *src; - - for (i = 0; i < gctx->num_funcs; i++) { - num_blocks = gctx->funcs[i].num_blocks; - func_bytes = bsize * num_blocks; - - if (nbytes < func_bytes) - continue; - - /* Process multi-block batch */ - do { - src -= num_blocks - 1; - dst -= num_blocks - 1; - - gctx->funcs[i].fn_u.cbc(ctx, (u8 *)dst, - (const u8 *)src); - - nbytes -= func_bytes; - if (nbytes < bsize) - goto done; - - u128_xor(dst, dst, --src); - dst--; - } while (nbytes >= func_bytes); - } -done: - u128_xor(dst, dst, (u128 *)walk.iv); - *(u128 *)walk.iv = last_iv; - err = skcipher_walk_done(&walk, nbytes); - } - - glue_fpu_end(fpu_enabled); - return err; -} -EXPORT_SYMBOL_GPL(glue_cbc_decrypt_req_128bit); - -MODULE_LICENSE("GPL"); diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h deleted file mode 100644 index 23e09efd2aa6..000000000000 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ /dev/null @@ -1,74 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Shared glue code for 128bit block ciphers - */ - -#ifndef _CRYPTO_GLUE_HELPER_H -#define _CRYPTO_GLUE_HELPER_H - -#include -#include -#include - -typedef void (*common_glue_func_t)(const void *ctx, u8 *dst, const u8 *src); -typedef void (*common_glue_cbc_func_t)(const void *ctx, u8 *dst, const u8 *src); - -struct common_glue_func_entry { - unsigned int num_blocks; /* number of blocks that @fn will process */ - union { - common_glue_func_t ecb; - common_glue_cbc_func_t cbc; - } fn_u; -}; - -struct common_glue_ctx { - unsigned int num_funcs; - int fpu_blocks_limit; /* -1 means fpu not needed at all */ - - /* - * First funcs entry must have largest num_blocks and last funcs entry - * must have num_blocks == 1! - */ - struct common_glue_func_entry funcs[]; -}; - -static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit, - struct skcipher_walk *walk, - bool fpu_enabled, unsigned int nbytes) -{ - if (likely(fpu_blocks_limit < 0)) - return false; - - if (fpu_enabled) - return true; - - /* - * Vector-registers are only used when chunk to be processed is large - * enough, so do not enable FPU until it is necessary. - */ - if (nbytes < bsize * (unsigned int)fpu_blocks_limit) - return false; - - /* prevent sleeping if FPU is in use */ - skcipher_walk_atomise(walk); - - kernel_fpu_begin(); - return true; -} - -static inline void glue_fpu_end(bool fpu_enabled) -{ - if (fpu_enabled) - kernel_fpu_end(); -} - -extern int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, - struct skcipher_request *req); - -extern int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn, - struct skcipher_request *req); - -extern int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, - struct skcipher_request *req); - -#endif /* _CRYPTO_GLUE_HELPER_H */ diff --git a/crypto/Kconfig b/crypto/Kconfig index b2182658c55e..94f0fde06b94 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -210,11 +210,6 @@ config CRYPTO_SIMD tristate select CRYPTO_CRYPTD -config CRYPTO_GLUE_HELPER_X86 - tristate - depends on X86 - select CRYPTO_SKCIPHER - config CRYPTO_ENGINE tristate diff --git a/crypto/skcipher.c b/crypto/skcipher.c index ff16d05644c7..a15376245416 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -491,12 +491,6 @@ int skcipher_walk_virt(struct skcipher_walk *walk, } EXPORT_SYMBOL_GPL(skcipher_walk_virt); -void skcipher_walk_atomise(struct skcipher_walk *walk) -{ - walk->flags &= ~SKCIPHER_WALK_SLEEP; -} -EXPORT_SYMBOL_GPL(skcipher_walk_atomise); - int skcipher_walk_async(struct skcipher_walk *walk, struct skcipher_request *req) { diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h index 9dd6c0c17eb8..a2339f80a615 100644 --- a/include/crypto/internal/skcipher.h +++ b/include/crypto/internal/skcipher.h @@ -133,7 +133,6 @@ int skcipher_walk_done(struct skcipher_walk *walk, int err); int skcipher_walk_virt(struct skcipher_walk *walk, struct skcipher_request *req, bool atomic); -void skcipher_walk_atomise(struct skcipher_walk *walk); int skcipher_walk_async(struct skcipher_walk *walk, struct skcipher_request *req); int skcipher_walk_aead_encrypt(struct skcipher_walk *walk, From a04ea6f7ffa27d5825b56cb1591ad0992910992c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Jan 2021 17:48:09 +0100 Subject: [PATCH 083/154] crypto: x86 - use local headers for x86 specific shared declarations The Camellia, Serpent and Twofish related header files only contain declarations that are shared between different implementations of the respective algorithms residing under arch/x86/crypto, and none of their contents should be used elsewhere. So move the header files into the same location, and use local #includes instead. Acked-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/{include/asm => }/crypto/camellia.h | 0 arch/x86/crypto/camellia_aesni_avx2_glue.c | 2 +- arch/x86/crypto/camellia_aesni_avx_glue.c | 2 +- arch/x86/crypto/camellia_glue.c | 2 +- arch/x86/{include/asm => }/crypto/serpent-avx.h | 0 arch/x86/{include/asm => }/crypto/serpent-sse2.h | 0 arch/x86/crypto/serpent_avx2_glue.c | 2 +- arch/x86/crypto/serpent_avx_glue.c | 2 +- arch/x86/crypto/serpent_sse2_glue.c | 2 +- arch/x86/{include/asm => }/crypto/twofish.h | 0 arch/x86/crypto/twofish_avx_glue.c | 2 +- arch/x86/crypto/twofish_glue_3way.c | 2 +- 12 files changed, 8 insertions(+), 8 deletions(-) rename arch/x86/{include/asm => }/crypto/camellia.h (100%) rename arch/x86/{include/asm => }/crypto/serpent-avx.h (100%) rename arch/x86/{include/asm => }/crypto/serpent-sse2.h (100%) rename arch/x86/{include/asm => }/crypto/twofish.h (100%) diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/crypto/camellia.h similarity index 100% rename from arch/x86/include/asm/crypto/camellia.h rename to arch/x86/crypto/camellia.h diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index ef5c0f094584..e7e4d64e9577 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -5,7 +5,6 @@ * Copyright © 2013 Jussi Kivilinna */ -#include #include #include #include @@ -13,6 +12,7 @@ #include #include +#include "camellia.h" #include "ecb_cbc_helpers.h" #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 68fed0a79889..c7ccf63e741e 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -5,7 +5,6 @@ * Copyright © 2012-2013 Jussi Kivilinna */ -#include #include #include #include @@ -13,6 +12,7 @@ #include #include +#include "camellia.h" #include "ecb_cbc_helpers.h" #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index 0bc00ce68484..66c435ba9d3d 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -14,8 +14,8 @@ #include #include #include -#include +#include "camellia.h" #include "ecb_cbc_helpers.h" /* regular block cipher functions */ diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/crypto/serpent-avx.h similarity index 100% rename from arch/x86/include/asm/crypto/serpent-avx.h rename to arch/x86/crypto/serpent-avx.h diff --git a/arch/x86/include/asm/crypto/serpent-sse2.h b/arch/x86/crypto/serpent-sse2.h similarity index 100% rename from arch/x86/include/asm/crypto/serpent-sse2.h rename to arch/x86/crypto/serpent-sse2.h diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 261c9ac2d762..ccf0b5fa4933 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -12,8 +12,8 @@ #include #include #include -#include +#include "serpent-avx.h" #include "ecb_cbc_helpers.h" #define SERPENT_AVX2_PARALLEL_BLOCKS 16 diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 5fe01d2a5b1d..6c248e1ea4ef 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -15,8 +15,8 @@ #include #include #include -#include +#include "serpent-avx.h" #include "ecb_cbc_helpers.h" /* 8-way parallel cipher functions */ diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index e28d60949c16..d78f37e9b2cf 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -20,8 +20,8 @@ #include #include #include -#include +#include "serpent-sse2.h" #include "ecb_cbc_helpers.h" static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/crypto/twofish.h similarity index 100% rename from arch/x86/include/asm/crypto/twofish.h rename to arch/x86/crypto/twofish.h diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 6ce198f808a5..3eb3440b477a 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -15,8 +15,8 @@ #include #include #include -#include +#include "twofish.h" #include "ecb_cbc_helpers.h" #define TWOFISH_PARALLEL_BLOCKS 8 diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index d1fdefa5195a..03725696397c 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -5,7 +5,6 @@ * Copyright (c) 2011 Jussi Kivilinna */ -#include #include #include #include @@ -13,6 +12,7 @@ #include #include +#include "twofish.h" #include "ecb_cbc_helpers.h" EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); From 2aa3da2d34787fbabd87ebf6468cf36bf8ed8d92 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 6 Jan 2021 12:25:08 +0300 Subject: [PATCH 084/154] crypto: keembay-ocs-hcu - Fix a WARN() message The first argument to WARN() is a condition and the messages is the second argument is the string, so this WARN() will only display the __func__ part of the message. Fixes: ae832e329a8d ("crypto: keembay-ocs-hcu - Add HMAC support") Signed-off-by: Dan Carpenter Acked-by: Daniele Alessandrelli Signed-off-by: Herbert Xu --- drivers/crypto/keembay/keembay-ocs-hcu-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/keembay/keembay-ocs-hcu-core.c b/drivers/crypto/keembay/keembay-ocs-hcu-core.c index d547af047131..c4b97b4160e9 100644 --- a/drivers/crypto/keembay/keembay-ocs-hcu-core.c +++ b/drivers/crypto/keembay/keembay-ocs-hcu-core.c @@ -388,7 +388,7 @@ static int prepare_ipad(struct ahash_request *req) * longer keys are hashed by kmb_ocs_hcu_setkey()). */ if (ctx->key_len > rctx->blk_sz) { - WARN("%s: Invalid key length in tfm context\n", __func__); + WARN(1, "%s: Invalid key length in tfm context\n", __func__); return -EINVAL; } memzero_explicit(&ctx->key[ctx->key_len], From e2811196fbe0d8d21ad5c06488a5e896ceeb97fd Mon Sep 17 00:00:00 2001 From: Daniele Alessandrelli Date: Wed, 6 Jan 2021 15:27:33 +0000 Subject: [PATCH 085/154] crypto: keembay-ocs-hcu - Add dependency on HAS_IOMEM and ARCH_KEEMBAY Add the following additional dependencies for CRYPTO_DEV_KEEMBAY_OCS_HCU: - HAS_IOMEM to prevent build failures - ARCH_KEEMBAY to prevent asking the user about this driver when configuring a kernel without Intel Keem Bay platform support. Signed-off-by: Daniele Alessandrelli Signed-off-by: Herbert Xu --- drivers/crypto/keembay/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/crypto/keembay/Kconfig b/drivers/crypto/keembay/Kconfig index e45f1b039380..00cf8f028cb9 100644 --- a/drivers/crypto/keembay/Kconfig +++ b/drivers/crypto/keembay/Kconfig @@ -43,6 +43,8 @@ config CRYPTO_DEV_KEEMBAY_OCS_HCU tristate "Support for Intel Keem Bay OCS HCU HW acceleration" select CRYPTO_HASH select CRYPTO_ENGINE + depends on HAS_IOMEM + depends on ARCH_KEEMBAY || COMPILE_TEST depends on OF || COMPILE_TEST help Support for Intel Keem Bay Offload and Crypto Subsystem (OCS) Hash From 660d2062190db131d2feaf19914e90f868fe285c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 13 Jan 2021 10:11:35 +0100 Subject: [PATCH 086/154] crypto - shash: reduce minimum alignment of shash_desc structure Unlike many other structure types defined in the crypto API, the 'shash_desc' structure is permitted to live on the stack, which implies its contents may not be accessed by DMA masters. (This is due to the fact that the stack may be located in the vmalloc area, which requires a different virtual-to-physical translation than the one implemented by the DMA subsystem) Our definition of CRYPTO_MINALIGN_ATTR is based on ARCH_KMALLOC_MINALIGN, which may take DMA constraints into account on architectures that support non-cache coherent DMA such as ARM and arm64. In this case, the value is chosen to reflect the largest cacheline size in the system, in order to ensure that explicit cache maintenance as required by non-coherent DMA masters does not affect adjacent, unrelated slab allocations. On arm64, this value is currently set at 128 bytes. This means that applying CRYPTO_MINALIGN_ATTR to struct shash_desc is both unnecessary (as it is never used for DMA), and undesirable, given that it wastes stack space (on arm64, performing the alignment costs 112 bytes in the worst case, and the hole between the 'tfm' and '__ctx' members takes up another 120 bytes, resulting in an increased stack footprint of up to 232 bytes.) So instead, let's switch to the minimum SLAB alignment, which does not take DMA constraints into account. Note that this is a no-op for x86. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- include/crypto/hash.h | 8 ++++---- include/linux/crypto.h | 9 ++++++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/include/crypto/hash.h b/include/crypto/hash.h index af2ff31ff619..13f8a6a54ca8 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -149,7 +149,7 @@ struct ahash_alg { struct shash_desc { struct crypto_shash *tfm; - void *__ctx[] CRYPTO_MINALIGN_ATTR; + void *__ctx[] __aligned(ARCH_SLAB_MINALIGN); }; #define HASH_MAX_DIGESTSIZE 64 @@ -162,9 +162,9 @@ struct shash_desc { #define HASH_MAX_STATESIZE 512 -#define SHASH_DESC_ON_STACK(shash, ctx) \ - char __##shash##_desc[sizeof(struct shash_desc) + \ - HASH_MAX_DESCSIZE] CRYPTO_MINALIGN_ATTR; \ +#define SHASH_DESC_ON_STACK(shash, ctx) \ + char __##shash##_desc[sizeof(struct shash_desc) + HASH_MAX_DESCSIZE] \ + __aligned(__alignof__(struct shash_desc)); \ struct shash_desc *shash = (struct shash_desc *)__##shash##_desc /** diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 9b55cd6b1f1b..da5e0d74bb2f 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -151,9 +151,12 @@ * The macro CRYPTO_MINALIGN_ATTR (along with the void * type in the actual * declaration) is used to ensure that the crypto_tfm context structure is * aligned correctly for the given architecture so that there are no alignment - * faults for C data types. In particular, this is required on platforms such - * as arm where pointers are 32-bit aligned but there are data types such as - * u64 which require 64-bit alignment. + * faults for C data types. On architectures that support non-cache coherent + * DMA, such as ARM or arm64, it also takes into account the minimal alignment + * that is required to ensure that the context struct member does not share any + * cachelines with the rest of the struct. This is needed to ensure that cache + * maintenance for non-coherent DMA (cache invalidation in particular) does not + * affect data that may be accessed by the CPU concurrently. */ #define CRYPTO_MINALIGN ARCH_KMALLOC_MINALIGN From 5a17eae414ab81b268052768bf0fb35b4f4edbec Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 14 Jan 2021 17:39:58 +1100 Subject: [PATCH 087/154] crypto: bcm - Fix sparse warnings This patch fixes a number of sparse warnings in the bcm driver. Signed-off-by: Herbert Xu --- drivers/crypto/bcm/cipher.c | 4 ---- drivers/crypto/bcm/spu.c | 20 +++++++++----------- drivers/crypto/bcm/spu2.c | 6 ++---- drivers/crypto/bcm/spu2.h | 8 ++++---- drivers/crypto/bcm/spum.h | 22 +++++++++++----------- drivers/crypto/bcm/util.c | 2 ++ drivers/crypto/bcm/util.h | 24 +++++++++++++++++++----- 7 files changed, 47 insertions(+), 39 deletions(-) diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c index 0e5537838ef3..851b149f7170 100644 --- a/drivers/crypto/bcm/cipher.c +++ b/drivers/crypto/bcm/cipher.c @@ -471,10 +471,8 @@ static int handle_skcipher_req(struct iproc_reqctx_s *rctx) static void handle_skcipher_resp(struct iproc_reqctx_s *rctx) { struct spu_hw *spu = &iproc_priv.spu; -#ifdef DEBUG struct crypto_async_request *areq = rctx->parent; struct skcipher_request *req = skcipher_request_cast(areq); -#endif struct iproc_ctx_s *ctx = rctx->ctx; u32 payload_len; @@ -996,13 +994,11 @@ static int ahash_req_done(struct iproc_reqctx_s *rctx) static void handle_ahash_resp(struct iproc_reqctx_s *rctx) { struct iproc_ctx_s *ctx = rctx->ctx; -#ifdef DEBUG struct crypto_async_request *areq = rctx->parent; struct ahash_request *req = ahash_request_cast(areq); struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); unsigned int blocksize = crypto_tfm_alg_blocksize(crypto_ahash_tfm(ahash)); -#endif /* * Save hash to use as input to next op if incremental. Might be copying * too much, but that's easier than figuring out actual digest size here diff --git a/drivers/crypto/bcm/spu.c b/drivers/crypto/bcm/spu.c index fe126f95c702..007abf92cc05 100644 --- a/drivers/crypto/bcm/spu.c +++ b/drivers/crypto/bcm/spu.c @@ -41,7 +41,7 @@ void spum_dump_msg_hdr(u8 *buf, unsigned int buf_len) packet_log("SPU Message header %p len: %u\n", buf, buf_len); /* ========== Decode MH ========== */ - packet_log(" MH 0x%08x\n", be32_to_cpu(*((u32 *)ptr))); + packet_log(" MH 0x%08x\n", be32_to_cpup((__be32 *)ptr)); if (spuh->mh.flags & MH_SCTX_PRES) packet_log(" SCTX present\n"); if (spuh->mh.flags & MH_BDESC_PRES) @@ -273,22 +273,21 @@ void spum_dump_msg_hdr(u8 *buf, unsigned int buf_len) /* ========== Decode BDESC ========== */ if (spuh->mh.flags & MH_BDESC_PRES) { -#ifdef DEBUG struct BDESC_HEADER *bdesc = (struct BDESC_HEADER *)ptr; -#endif - packet_log(" BDESC[0] 0x%08x\n", be32_to_cpu(*((u32 *)ptr))); + + packet_log(" BDESC[0] 0x%08x\n", be32_to_cpup((__be32 *)ptr)); packet_log(" OffsetMAC:%u LengthMAC:%u\n", be16_to_cpu(bdesc->offset_mac), be16_to_cpu(bdesc->length_mac)); ptr += sizeof(u32); - packet_log(" BDESC[1] 0x%08x\n", be32_to_cpu(*((u32 *)ptr))); + packet_log(" BDESC[1] 0x%08x\n", be32_to_cpup((__be32 *)ptr)); packet_log(" OffsetCrypto:%u LengthCrypto:%u\n", be16_to_cpu(bdesc->offset_crypto), be16_to_cpu(bdesc->length_crypto)); ptr += sizeof(u32); - packet_log(" BDESC[2] 0x%08x\n", be32_to_cpu(*((u32 *)ptr))); + packet_log(" BDESC[2] 0x%08x\n", be32_to_cpup((__be32 *)ptr)); packet_log(" OffsetICV:%u OffsetIV:%u\n", be16_to_cpu(bdesc->offset_icv), be16_to_cpu(bdesc->offset_iv)); @@ -297,10 +296,9 @@ void spum_dump_msg_hdr(u8 *buf, unsigned int buf_len) /* ========== Decode BD ========== */ if (spuh->mh.flags & MH_BD_PRES) { -#ifdef DEBUG struct BD_HEADER *bd = (struct BD_HEADER *)ptr; -#endif - packet_log(" BD[0] 0x%08x\n", be32_to_cpu(*((u32 *)ptr))); + + packet_log(" BD[0] 0x%08x\n", be32_to_cpup((__be32 *)ptr)); packet_log(" Size:%ubytes PrevLength:%u\n", be16_to_cpu(bd->size), be16_to_cpu(bd->prev_length)); ptr += 4; @@ -1056,9 +1054,9 @@ void spum_request_pad(u8 *pad_start, /* add the size at the end as required per alg */ if (auth_alg == HASH_ALG_MD5) - *(u64 *)ptr = cpu_to_le64((u64)total_sent * 8); + *(__le64 *)ptr = cpu_to_le64(total_sent * 8ull); else /* SHA1, SHA2-224, SHA2-256 */ - *(u64 *)ptr = cpu_to_be64((u64)total_sent * 8); + *(__be64 *)ptr = cpu_to_be64(total_sent * 8ull); ptr += sizeof(u64); } } diff --git a/drivers/crypto/bcm/spu2.c b/drivers/crypto/bcm/spu2.c index c860ffb0b4c3..2db35b5ccaa2 100644 --- a/drivers/crypto/bcm/spu2.c +++ b/drivers/crypto/bcm/spu2.c @@ -964,7 +964,6 @@ u32 spu2_create_request(u8 *spu_hdr, unsigned int cipher_offset = aead_parms->assoc_size + aead_parms->aad_pad_len + aead_parms->iv_len; -#ifdef DEBUG /* total size of the data following OMD (without STAT word padding) */ unsigned int real_db_size = spu_real_db_size(aead_parms->assoc_size, aead_parms->iv_len, @@ -973,7 +972,6 @@ u32 spu2_create_request(u8 *spu_hdr, aead_parms->aad_pad_len, aead_parms->data_pad_len, hash_parms->pad_len); -#endif unsigned int assoc_size = aead_parms->assoc_size; if (req_opts->is_aead && @@ -1263,9 +1261,9 @@ void spu2_request_pad(u8 *pad_start, u32 gcm_padding, u32 hash_pad_len, /* add the size at the end as required per alg */ if (auth_alg == HASH_ALG_MD5) - *(u64 *)ptr = cpu_to_le64((u64)total_sent * 8); + *(__le64 *)ptr = cpu_to_le64(total_sent * 8ull); else /* SHA1, SHA2-224, SHA2-256 */ - *(u64 *)ptr = cpu_to_be64((u64)total_sent * 8); + *(__be64 *)ptr = cpu_to_be64(total_sent * 8ull); ptr += sizeof(u64); } diff --git a/drivers/crypto/bcm/spu2.h b/drivers/crypto/bcm/spu2.h index 6e666bfb3cfc..a76d4e054466 100644 --- a/drivers/crypto/bcm/spu2.h +++ b/drivers/crypto/bcm/spu2.h @@ -73,10 +73,10 @@ enum spu2_ret_md_opts { /* Fixed Metadata format */ struct SPU2_FMD { - u64 ctrl0; - u64 ctrl1; - u64 ctrl2; - u64 ctrl3; + __le64 ctrl0; + __le64 ctrl1; + __le64 ctrl2; + __le64 ctrl3; }; #define FMD_SIZE sizeof(struct SPU2_FMD) diff --git a/drivers/crypto/bcm/spum.h b/drivers/crypto/bcm/spum.h index 6116ad1dd26e..f062f75808de 100644 --- a/drivers/crypto/bcm/spum.h +++ b/drivers/crypto/bcm/spum.h @@ -69,18 +69,18 @@ /* Buffer Descriptor Header [BDESC]. SPU in big-endian mode. */ struct BDESC_HEADER { - u16 offset_mac; /* word 0 [31-16] */ - u16 length_mac; /* word 0 [15-0] */ - u16 offset_crypto; /* word 1 [31-16] */ - u16 length_crypto; /* word 1 [15-0] */ - u16 offset_icv; /* word 2 [31-16] */ - u16 offset_iv; /* word 2 [15-0] */ + __be16 offset_mac; /* word 0 [31-16] */ + __be16 length_mac; /* word 0 [15-0] */ + __be16 offset_crypto; /* word 1 [31-16] */ + __be16 length_crypto; /* word 1 [15-0] */ + __be16 offset_icv; /* word 2 [31-16] */ + __be16 offset_iv; /* word 2 [15-0] */ }; /* Buffer Data Header [BD]. SPU in big-endian mode. */ struct BD_HEADER { - u16 size; - u16 prev_length; + __be16 size; + __be16 prev_length; }; /* Command Context Header. SPU-M in big endian mode. */ @@ -144,13 +144,13 @@ struct MHEADER { /* Generic Mode Security Context Structure [SCTX] */ struct SCTX { /* word 0: protocol flags */ - u32 proto_flags; + __be32 proto_flags; /* word 1: cipher flags */ - u32 cipher_flags; + __be32 cipher_flags; /* word 2: Extended cipher flags */ - u32 ecf; + __be32 ecf; }; diff --git a/drivers/crypto/bcm/util.c b/drivers/crypto/bcm/util.c index 77aeedb84055..c4669a96eaec 100644 --- a/drivers/crypto/bcm/util.c +++ b/drivers/crypto/bcm/util.c @@ -268,6 +268,7 @@ do_shash_err: return rc; } +#ifdef DEBUG /* Dump len bytes of a scatterlist starting at skip bytes into the sg */ void __dump_sg(struct scatterlist *sg, unsigned int skip, unsigned int len) { @@ -289,6 +290,7 @@ void __dump_sg(struct scatterlist *sg, unsigned int skip, unsigned int len) if (debug_logging_sleep) msleep(debug_logging_sleep); } +#endif /* Returns the name for a given cipher alg/mode */ char *spu_alg_name(enum spu_cipher_alg alg, enum spu_cipher_mode mode) diff --git a/drivers/crypto/bcm/util.h b/drivers/crypto/bcm/util.h index a89b2b9c1f52..61c256384816 100644 --- a/drivers/crypto/bcm/util.h +++ b/drivers/crypto/bcm/util.h @@ -58,12 +58,26 @@ void __dump_sg(struct scatterlist *sg, unsigned int skip, unsigned int len); #else /* !DEBUG_ON */ -#define flow_log(...) do {} while (0) -#define flow_dump(msg, var, var_len) do {} while (0) -#define packet_log(...) do {} while (0) -#define packet_dump(msg, var, var_len) do {} while (0) +static inline void flow_log(const char *format, ...) +{ +} -#define dump_sg(sg, skip, len) do {} while (0) +static inline void flow_dump(const char *msg, const void *var, size_t var_len) +{ +} + +static inline void packet_log(const char *format, ...) +{ +} + +static inline void packet_dump(const char *msg, const void *var, size_t var_len) +{ +} + +static inline void dump_sg(struct scatterlist *sg, unsigned int skip, + unsigned int len) +{ +} #endif /* DEBUG_ON */ From 0df07d8117c3576f1603b05b84089742a118d10a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 14 Jan 2021 19:10:10 +0100 Subject: [PATCH 088/154] crypto: arm64/sha - add missing module aliases The accelerated, instruction based implementations of SHA1, SHA2 and SHA3 are autoloaded based on CPU capabilities, given that the code is modest in size, and widely used, which means that resolving the algo name, loading all compatible modules and picking the one with the highest priority is taken to be suboptimal. However, if these algorithms are requested before this CPU feature based matching and autoloading occurs, these modules are not even considered, and we end up with suboptimal performance. So add the missing module aliases for the various SHA implementations. Cc: Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/sha1-ce-glue.c | 1 + arch/arm64/crypto/sha2-ce-glue.c | 2 ++ arch/arm64/crypto/sha3-ce-glue.c | 4 ++++ arch/arm64/crypto/sha512-ce-glue.c | 2 ++ 4 files changed, 9 insertions(+) diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index c93121bcfdeb..c1362861765f 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -19,6 +19,7 @@ MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("sha1"); struct sha1_ce_state { struct sha1_state sst; diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index 31ba3da5e61b..ded3a6488f81 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -19,6 +19,8 @@ MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("sha224"); +MODULE_ALIAS_CRYPTO("sha256"); struct sha256_ce_state { struct sha256_state sst; diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c index e5a2936f0886..7288d3046354 100644 --- a/arch/arm64/crypto/sha3-ce-glue.c +++ b/arch/arm64/crypto/sha3-ce-glue.c @@ -23,6 +23,10 @@ MODULE_DESCRIPTION("SHA3 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("sha3-224"); +MODULE_ALIAS_CRYPTO("sha3-256"); +MODULE_ALIAS_CRYPTO("sha3-384"); +MODULE_ALIAS_CRYPTO("sha3-512"); asmlinkage void sha3_ce_transform(u64 *st, const u8 *data, int blocks, int md_len); diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c index faa83f6cf376..a6b1adf31c56 100644 --- a/arch/arm64/crypto/sha512-ce-glue.c +++ b/arch/arm64/crypto/sha512-ce-glue.c @@ -23,6 +23,8 @@ MODULE_DESCRIPTION("SHA-384/SHA-512 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("sha384"); +MODULE_ALIAS_CRYPTO("sha512"); asmlinkage void sha512_ce_transform(struct sha512_state *sst, u8 const *src, int blocks); From 5e8ce8334734c5f23fe54774e989b395bc6da635 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Fri, 15 Jan 2021 19:22:19 +0530 Subject: [PATCH 089/154] crypto: marvell - add Marvell OcteonTX2 CPT PF driver Adds skeleton for the Marvell OcteonTX2 CPT physical function driver which includes probe, PCI specific initialization and hardware register defines. RVU defines are present in AF driver (drivers/net/ethernet/marvell/octeontx2/af), header files from AF driver are included here to avoid duplication. Signed-off-by: Suheil Chandran Signed-off-by: Lukasz Bartosik Signed-off-by: Srujana Challa Signed-off-by: Herbert Xu --- drivers/crypto/marvell/Kconfig | 10 + drivers/crypto/marvell/Makefile | 1 + drivers/crypto/marvell/octeontx2/Makefile | 6 + .../marvell/octeontx2/otx2_cpt_common.h | 32 ++ .../marvell/octeontx2/otx2_cpt_hw_types.h | 464 ++++++++++++++++++ drivers/crypto/marvell/octeontx2/otx2_cptpf.h | 13 + .../marvell/octeontx2/otx2_cptpf_main.c | 107 ++++ 7 files changed, 633 insertions(+) create mode 100644 drivers/crypto/marvell/octeontx2/Makefile create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cpt_common.h create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cptpf.h create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c diff --git a/drivers/crypto/marvell/Kconfig b/drivers/crypto/marvell/Kconfig index 13063384f958..1440ec9e1fb4 100644 --- a/drivers/crypto/marvell/Kconfig +++ b/drivers/crypto/marvell/Kconfig @@ -35,3 +35,13 @@ config CRYPTO_DEV_OCTEONTX_CPT To compile this driver as module, choose M here: the modules will be called octeontx-cpt and octeontx-cptvf + +config CRYPTO_DEV_OCTEONTX2_CPT + tristate "Marvell OcteonTX2 CPT driver" + depends on ARM64 || COMPILE_TEST + depends on PCI_MSI && 64BIT + select OCTEONTX2_MBOX + select CRYPTO_DEV_MARVELL + help + This driver allows you to utilize the Marvell Cryptographic + Accelerator Unit(CPT) found in OcteonTX2 series of processors. diff --git a/drivers/crypto/marvell/Makefile b/drivers/crypto/marvell/Makefile index 6c6a1519b0f1..39db6d9c0aaf 100644 --- a/drivers/crypto/marvell/Makefile +++ b/drivers/crypto/marvell/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += cesa/ obj-$(CONFIG_CRYPTO_DEV_OCTEONTX_CPT) += octeontx/ +obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += octeontx2/ diff --git a/drivers/crypto/marvell/octeontx2/Makefile b/drivers/crypto/marvell/octeontx2/Makefile new file mode 100644 index 000000000000..db763ad46a91 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += octeontx2-cpt.o + +octeontx2-cpt-objs := otx2_cptpf_main.o + +ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h new file mode 100644 index 000000000000..eff4ffa58dc4 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2020 Marvell. + */ + +#ifndef __OTX2_CPT_COMMON_H +#define __OTX2_CPT_COMMON_H + +#include +#include +#include +#include +#include +#include "otx2_cpt_hw_types.h" +#include "rvu.h" + +#define OTX2_CPT_RVU_FUNC_ADDR_S(blk, slot, offs) \ + (((blk) << 20) | ((slot) << 12) | (offs)) + +static inline void otx2_cpt_write64(void __iomem *reg_base, u64 blk, u64 slot, + u64 offs, u64 val) +{ + writeq_relaxed(val, reg_base + + OTX2_CPT_RVU_FUNC_ADDR_S(blk, slot, offs)); +} + +static inline u64 otx2_cpt_read64(void __iomem *reg_base, u64 blk, u64 slot, + u64 offs) +{ + return readq_relaxed(reg_base + + OTX2_CPT_RVU_FUNC_ADDR_S(blk, slot, offs)); +} +#endif /* __OTX2_CPT_COMMON_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h new file mode 100644 index 000000000000..ecafc42f37a2 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h @@ -0,0 +1,464 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2020 Marvell. + */ + +#ifndef __OTX2_CPT_HW_TYPES_H +#define __OTX2_CPT_HW_TYPES_H + +#include + +/* Device IDs */ +#define OTX2_CPT_PCI_PF_DEVICE_ID 0xA0FD +#define OTX2_CPT_PCI_VF_DEVICE_ID 0xA0FE + +/* Mailbox interrupts offset */ +#define OTX2_CPT_PF_MBOX_INT 6 +#define OTX2_CPT_PF_INT_VEC_E_MBOXX(x, a) ((x) + (a)) + +/* Maximum supported microcode groups */ +#define OTX2_CPT_MAX_ENGINE_GROUPS 8 + +/* CPT instruction size in bytes */ +#define OTX2_CPT_INST_SIZE 64 +/* + * CPT VF MSIX vectors and their offsets + */ +#define OTX2_CPT_VF_MSIX_VECTORS 1 +#define OTX2_CPT_VF_INTR_MBOX_MASK BIT(0) + +/* CPT LF MSIX vectors */ +#define OTX2_CPT_LF_MSIX_VECTORS 2 + +/* OcteonTX2 CPT PF registers */ +#define OTX2_CPT_PF_CONSTANTS (0x0) +#define OTX2_CPT_PF_RESET (0x100) +#define OTX2_CPT_PF_DIAG (0x120) +#define OTX2_CPT_PF_BIST_STATUS (0x160) +#define OTX2_CPT_PF_ECC0_CTL (0x200) +#define OTX2_CPT_PF_ECC0_FLIP (0x210) +#define OTX2_CPT_PF_ECC0_INT (0x220) +#define OTX2_CPT_PF_ECC0_INT_W1S (0x230) +#define OTX2_CPT_PF_ECC0_ENA_W1S (0x240) +#define OTX2_CPT_PF_ECC0_ENA_W1C (0x250) +#define OTX2_CPT_PF_MBOX_INTX(b) (0x400 | (b) << 3) +#define OTX2_CPT_PF_MBOX_INT_W1SX(b) (0x420 | (b) << 3) +#define OTX2_CPT_PF_MBOX_ENA_W1CX(b) (0x440 | (b) << 3) +#define OTX2_CPT_PF_MBOX_ENA_W1SX(b) (0x460 | (b) << 3) +#define OTX2_CPT_PF_EXEC_INT (0x500) +#define OTX2_CPT_PF_EXEC_INT_W1S (0x520) +#define OTX2_CPT_PF_EXEC_ENA_W1C (0x540) +#define OTX2_CPT_PF_EXEC_ENA_W1S (0x560) +#define OTX2_CPT_PF_GX_EN(b) (0x600 | (b) << 3) +#define OTX2_CPT_PF_EXEC_INFO (0x700) +#define OTX2_CPT_PF_EXEC_BUSY (0x800) +#define OTX2_CPT_PF_EXEC_INFO0 (0x900) +#define OTX2_CPT_PF_EXEC_INFO1 (0x910) +#define OTX2_CPT_PF_INST_REQ_PC (0x10000) +#define OTX2_CPT_PF_INST_LATENCY_PC (0x10020) +#define OTX2_CPT_PF_RD_REQ_PC (0x10040) +#define OTX2_CPT_PF_RD_LATENCY_PC (0x10060) +#define OTX2_CPT_PF_RD_UC_PC (0x10080) +#define OTX2_CPT_PF_ACTIVE_CYCLES_PC (0x10100) +#define OTX2_CPT_PF_EXE_CTL (0x4000000) +#define OTX2_CPT_PF_EXE_STATUS (0x4000008) +#define OTX2_CPT_PF_EXE_CLK (0x4000010) +#define OTX2_CPT_PF_EXE_DBG_CTL (0x4000018) +#define OTX2_CPT_PF_EXE_DBG_DATA (0x4000020) +#define OTX2_CPT_PF_EXE_BIST_STATUS (0x4000028) +#define OTX2_CPT_PF_EXE_REQ_TIMER (0x4000030) +#define OTX2_CPT_PF_EXE_MEM_CTL (0x4000038) +#define OTX2_CPT_PF_EXE_PERF_CTL (0x4001000) +#define OTX2_CPT_PF_EXE_DBG_CNTX(b) (0x4001100 | (b) << 3) +#define OTX2_CPT_PF_EXE_PERF_EVENT_CNT (0x4001180) +#define OTX2_CPT_PF_EXE_EPCI_INBX_CNT(b) (0x4001200 | (b) << 3) +#define OTX2_CPT_PF_EXE_EPCI_OUTBX_CNT(b) (0x4001240 | (b) << 3) +#define OTX2_CPT_PF_ENGX_UCODE_BASE(b) (0x4002000 | (b) << 3) +#define OTX2_CPT_PF_QX_CTL(b) (0x8000000 | (b) << 20) +#define OTX2_CPT_PF_QX_GMCTL(b) (0x8000020 | (b) << 20) +#define OTX2_CPT_PF_QX_CTL2(b) (0x8000100 | (b) << 20) +#define OTX2_CPT_PF_VFX_MBOXX(b, c) (0x8001000 | (b) << 20 | \ + (c) << 8) + +/* OcteonTX2 CPT LF registers */ +#define OTX2_CPT_LF_CTL (0x10) +#define OTX2_CPT_LF_DONE_WAIT (0x30) +#define OTX2_CPT_LF_INPROG (0x40) +#define OTX2_CPT_LF_DONE (0x50) +#define OTX2_CPT_LF_DONE_ACK (0x60) +#define OTX2_CPT_LF_DONE_INT_ENA_W1S (0x90) +#define OTX2_CPT_LF_DONE_INT_ENA_W1C (0xa0) +#define OTX2_CPT_LF_MISC_INT (0xb0) +#define OTX2_CPT_LF_MISC_INT_W1S (0xc0) +#define OTX2_CPT_LF_MISC_INT_ENA_W1S (0xd0) +#define OTX2_CPT_LF_MISC_INT_ENA_W1C (0xe0) +#define OTX2_CPT_LF_Q_BASE (0xf0) +#define OTX2_CPT_LF_Q_SIZE (0x100) +#define OTX2_CPT_LF_Q_INST_PTR (0x110) +#define OTX2_CPT_LF_Q_GRP_PTR (0x120) +#define OTX2_CPT_LF_NQX(a) (0x400 | (a) << 3) +#define OTX2_CPT_RVU_FUNC_BLKADDR_SHIFT 20 +/* LMT LF registers */ +#define OTX2_CPT_LMT_LFBASE BIT_ULL(OTX2_CPT_RVU_FUNC_BLKADDR_SHIFT) +#define OTX2_CPT_LMT_LF_LMTLINEX(a) (OTX2_CPT_LMT_LFBASE | 0x000 | \ + (a) << 12) +/* RVU VF registers */ +#define OTX2_RVU_VF_INT (0x20) +#define OTX2_RVU_VF_INT_W1S (0x28) +#define OTX2_RVU_VF_INT_ENA_W1S (0x30) +#define OTX2_RVU_VF_INT_ENA_W1C (0x38) + +/* + * Enumeration otx2_cpt_ucode_error_code_e + * + * Enumerates ucode errors + */ +enum otx2_cpt_ucode_comp_code_e { + OTX2_CPT_UCC_SUCCESS = 0x00, + OTX2_CPT_UCC_INVALID_OPCODE = 0x01, + + /* Scatter gather */ + OTX2_CPT_UCC_SG_WRITE_LENGTH = 0x02, + OTX2_CPT_UCC_SG_LIST = 0x03, + OTX2_CPT_UCC_SG_NOT_SUPPORTED = 0x04, + +}; + +/* + * Enumeration otx2_cpt_comp_e + * + * OcteonTX2 CPT Completion Enumeration + * Enumerates the values of CPT_RES_S[COMPCODE]. + */ +enum otx2_cpt_comp_e { + OTX2_CPT_COMP_E_NOTDONE = 0x00, + OTX2_CPT_COMP_E_GOOD = 0x01, + OTX2_CPT_COMP_E_FAULT = 0x02, + OTX2_CPT_COMP_E_HWERR = 0x04, + OTX2_CPT_COMP_E_INSTERR = 0x05, + OTX2_CPT_COMP_E_LAST_ENTRY = 0x06 +}; + +/* + * Enumeration otx2_cpt_vf_int_vec_e + * + * OcteonTX2 CPT VF MSI-X Vector Enumeration + * Enumerates the MSI-X interrupt vectors. + */ +enum otx2_cpt_vf_int_vec_e { + OTX2_CPT_VF_INT_VEC_E_MBOX = 0x00 +}; + +/* + * Enumeration otx2_cpt_lf_int_vec_e + * + * OcteonTX2 CPT LF MSI-X Vector Enumeration + * Enumerates the MSI-X interrupt vectors. + */ +enum otx2_cpt_lf_int_vec_e { + OTX2_CPT_LF_INT_VEC_E_MISC = 0x00, + OTX2_CPT_LF_INT_VEC_E_DONE = 0x01 +}; + +/* + * Structure otx2_cpt_inst_s + * + * CPT Instruction Structure + * This structure specifies the instruction layout. Instructions are + * stored in memory as little-endian unless CPT()_PF_Q()_CTL[INST_BE] is set. + * cpt_inst_s_s + * Word 0 + * doneint:1 Done interrupt. + * 0 = No interrupts related to this instruction. + * 1 = When the instruction completes, CPT()_VQ()_DONE[DONE] will be + * incremented,and based on the rules described there an interrupt may + * occur. + * Word 1 + * res_addr [127: 64] Result IOVA. + * If nonzero, specifies where to write CPT_RES_S. + * If zero, no result structure will be written. + * Address must be 16-byte aligned. + * Bits <63:49> are ignored by hardware; software should use a + * sign-extended bit <48> for forward compatibility. + * Word 2 + * grp:10 [171:162] If [WQ_PTR] is nonzero, the SSO guest-group to use when + * CPT submits work SSO. + * For the SSO to not discard the add-work request, FPA_PF_MAP() must map + * [GRP] and CPT()_PF_Q()_GMCTL[GMID] as valid. + * tt:2 [161:160] If [WQ_PTR] is nonzero, the SSO tag type to use when CPT + * submits work to SSO + * tag:32 [159:128] If [WQ_PTR] is nonzero, the SSO tag to use when CPT + * submits work to SSO. + * Word 3 + * wq_ptr [255:192] If [WQ_PTR] is nonzero, it is a pointer to a + * work-queue entry that CPT submits work to SSO after all context, + * output data, and result write operations are visible to other + * CNXXXX units and the cores. Bits <2:0> must be zero. + * Bits <63:49> are ignored by hardware; software should + * use a sign-extended bit <48> for forward compatibility. + * Internal: + * Bits <63:49>, <2:0> are ignored by hardware, treated as always 0x0. + * Word 4 + * ei0; [319:256] Engine instruction word 0. Passed to the AE/SE. + * Word 5 + * ei1; [383:320] Engine instruction word 1. Passed to the AE/SE. + * Word 6 + * ei2; [447:384] Engine instruction word 1. Passed to the AE/SE. + * Word 7 + * ei3; [511:448] Engine instruction word 1. Passed to the AE/SE. + * + */ +union otx2_cpt_inst_s { + u64 u[8]; + + struct { + /* Word 0 */ + u64 nixtxl:3; + u64 doneint:1; + u64 nixtx_addr:60; + /* Word 1 */ + u64 res_addr; + /* Word 2 */ + u64 tag:32; + u64 tt:2; + u64 grp:10; + u64 reserved_172_175:4; + u64 rvu_pf_func:16; + /* Word 3 */ + u64 qord:1; + u64 reserved_194_193:2; + u64 wq_ptr:61; + /* Word 4 */ + u64 ei0; + /* Word 5 */ + u64 ei1; + /* Word 6 */ + u64 ei2; + /* Word 7 */ + u64 ei3; + } s; +}; + +/* + * Structure otx2_cpt_res_s + * + * CPT Result Structure + * The CPT coprocessor writes the result structure after it completes a + * CPT_INST_S instruction. The result structure is exactly 16 bytes, and + * each instruction completion produces exactly one result structure. + * + * This structure is stored in memory as little-endian unless + * CPT()_PF_Q()_CTL[INST_BE] is set. + * cpt_res_s_s + * Word 0 + * doneint:1 [16:16] Done interrupt. This bit is copied from the + * corresponding instruction's CPT_INST_S[DONEINT]. + * compcode:8 [7:0] Indicates completion/error status of the CPT coprocessor + * for the associated instruction, as enumerated by CPT_COMP_E. + * Core software may write the memory location containing [COMPCODE] to + * 0x0 before ringing the doorbell, and then poll for completion by + * checking for a nonzero value. + * Once the core observes a nonzero [COMPCODE] value in this case,the CPT + * coprocessor will have also completed L2/DRAM write operations. + * Word 1 + * reserved + * + */ +union otx2_cpt_res_s { + u64 u[2]; + + struct { + u64 compcode:8; + u64 uc_compcode:8; + u64 doneint:1; + u64 reserved_17_63:47; + u64 reserved_64_127; + } s; +}; + +/* + * Register (RVU_PF_BAR0) cpt#_af_constants1 + * + * CPT AF Constants Register + * This register contains implementation-related parameters of CPT. + */ +union otx2_cptx_af_constants1 { + u64 u; + struct otx2_cptx_af_constants1_s { + u64 se:16; + u64 ie:16; + u64 ae:16; + u64 reserved_48_63:16; + } s; +}; + +/* + * RVU_PFVF_BAR2 - cpt_lf_misc_int + * + * This register contain the per-queue miscellaneous interrupts. + * + */ +union otx2_cptx_lf_misc_int { + u64 u; + struct otx2_cptx_lf_misc_int_s { + u64 reserved_0:1; + u64 nqerr:1; + u64 irde:1; + u64 nwrp:1; + u64 reserved_4:1; + u64 hwerr:1; + u64 fault:1; + u64 reserved_7_63:57; + } s; +}; + +/* + * RVU_PFVF_BAR2 - cpt_lf_misc_int_ena_w1s + * + * This register sets interrupt enable bits. + * + */ +union otx2_cptx_lf_misc_int_ena_w1s { + u64 u; + struct otx2_cptx_lf_misc_int_ena_w1s_s { + u64 reserved_0:1; + u64 nqerr:1; + u64 irde:1; + u64 nwrp:1; + u64 reserved_4:1; + u64 hwerr:1; + u64 fault:1; + u64 reserved_7_63:57; + } s; +}; + +/* + * RVU_PFVF_BAR2 - cpt_lf_ctl + * + * This register configures the queue. + * + * When the queue is not execution-quiescent (see CPT_LF_INPROG[EENA,INFLIGHT]), + * software must only write this register with [ENA]=0. + */ +union otx2_cptx_lf_ctl { + u64 u; + struct otx2_cptx_lf_ctl_s { + u64 ena:1; + u64 fc_ena:1; + u64 fc_up_crossing:1; + u64 reserved_3:1; + u64 fc_hyst_bits:4; + u64 reserved_8_63:56; + } s; +}; + +/* + * RVU_PFVF_BAR2 - cpt_lf_done_wait + * + * This register specifies the per-queue interrupt coalescing settings. + */ +union otx2_cptx_lf_done_wait { + u64 u; + struct otx2_cptx_lf_done_wait_s { + u64 num_wait:20; + u64 reserved_20_31:12; + u64 time_wait:16; + u64 reserved_48_63:16; + } s; +}; + +/* + * RVU_PFVF_BAR2 - cpt_lf_done + * + * This register contain the per-queue instruction done count. + */ +union otx2_cptx_lf_done { + u64 u; + struct otx2_cptx_lf_done_s { + u64 done:20; + u64 reserved_20_63:44; + } s; +}; + +/* + * RVU_PFVF_BAR2 - cpt_lf_inprog + * + * These registers contain the per-queue instruction in flight registers. + * + */ +union otx2_cptx_lf_inprog { + u64 u; + struct otx2_cptx_lf_inprog_s { + u64 inflight:9; + u64 reserved_9_15:7; + u64 eena:1; + u64 grp_drp:1; + u64 reserved_18_30:13; + u64 grb_partial:1; + u64 grb_cnt:8; + u64 gwb_cnt:8; + u64 reserved_48_63:16; + } s; +}; + +/* + * RVU_PFVF_BAR2 - cpt_lf_q_base + * + * CPT initializes these CSR fields to these values on any CPT_LF_Q_BASE write: + * _ CPT_LF_Q_INST_PTR[XQ_XOR]=0. + * _ CPT_LF_Q_INST_PTR[NQ_PTR]=2. + * _ CPT_LF_Q_INST_PTR[DQ_PTR]=2. + * _ CPT_LF_Q_GRP_PTR[XQ_XOR]=0. + * _ CPT_LF_Q_GRP_PTR[NQ_PTR]=1. + * _ CPT_LF_Q_GRP_PTR[DQ_PTR]=1. + */ +union otx2_cptx_lf_q_base { + u64 u; + struct otx2_cptx_lf_q_base_s { + u64 fault:1; + u64 reserved_1_6:6; + u64 addr:46; + u64 reserved_53_63:11; + } s; +}; + +/* + * RVU_PFVF_BAR2 - cpt_lf_q_size + * + * CPT initializes these CSR fields to these values on any CPT_LF_Q_SIZE write: + * _ CPT_LF_Q_INST_PTR[XQ_XOR]=0. + * _ CPT_LF_Q_INST_PTR[NQ_PTR]=2. + * _ CPT_LF_Q_INST_PTR[DQ_PTR]=2. + * _ CPT_LF_Q_GRP_PTR[XQ_XOR]=0. + * _ CPT_LF_Q_GRP_PTR[NQ_PTR]=1. + * _ CPT_LF_Q_GRP_PTR[DQ_PTR]=1. + */ +union otx2_cptx_lf_q_size { + u64 u; + struct otx2_cptx_lf_q_size_s { + u64 size_div40:15; + u64 reserved_15_63:49; + } s; +}; + +/* + * RVU_PF_BAR0 - cpt_af_lf_ctl + * + * This register configures queues. This register should be written only + * when the queue is execution-quiescent (see CPT_LF_INPROG[INFLIGHT]). + */ +union otx2_cptx_af_lf_ctrl { + u64 u; + struct otx2_cptx_af_lf_ctrl_s { + u64 pri:1; + u64 reserved_1_8:8; + u64 pf_func_inst:1; + u64 cont_err:1; + u64 reserved_11_15:5; + u64 nixtx_en:1; + u64 reserved_17_47:31; + u64 grp:8; + u64 reserved_56_63:8; + } s; +}; + +#endif /* __OTX2_CPT_HW_TYPES_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h new file mode 100644 index 000000000000..84cdc8cc2c15 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2020 Marvell. + */ + +#ifndef __OTX2_CPTPF_H +#define __OTX2_CPTPF_H + +struct otx2_cptpf_dev { + void __iomem *reg_base; /* CPT PF registers start address */ + struct pci_dev *pdev; /* PCI device handle */ +}; + +#endif /* __OTX2_CPTPF_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c new file mode 100644 index 000000000000..47781966de3e --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Marvell. */ + +#include +#include "otx2_cpt_hw_types.h" +#include "otx2_cpt_common.h" +#include "otx2_cptpf.h" +#include "rvu_reg.h" + +#define OTX2_CPT_DRV_NAME "octeontx2-cpt" +#define OTX2_CPT_DRV_STRING "Marvell OcteonTX2 CPT Physical Function Driver" + +static int cpt_is_pf_usable(struct otx2_cptpf_dev *cptpf) +{ + u64 rev; + + rev = otx2_cpt_read64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_BLOCK_ADDRX_DISC(BLKADDR_RVUM)); + rev = (rev >> 12) & 0xFF; + /* + * Check if AF has setup revision for RVUM block, otherwise + * driver probe should be deferred until AF driver comes up + */ + if (!rev) { + dev_warn(&cptpf->pdev->dev, + "AF is not initialized, deferring probe\n"); + return -EPROBE_DEFER; + } + return 0; +} + +static int otx2_cptpf_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct device *dev = &pdev->dev; + struct otx2_cptpf_dev *cptpf; + int err; + + cptpf = devm_kzalloc(dev, sizeof(*cptpf), GFP_KERNEL); + if (!cptpf) + return -ENOMEM; + + err = pcim_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + goto clear_drvdata; + } + + err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get usable DMA configuration\n"); + goto clear_drvdata; + } + /* Map PF's configuration registers */ + err = pcim_iomap_regions_request_all(pdev, 1 << PCI_PF_REG_BAR_NUM, + OTX2_CPT_DRV_NAME); + if (err) { + dev_err(dev, "Couldn't get PCI resources 0x%x\n", err); + goto clear_drvdata; + } + pci_set_master(pdev); + pci_set_drvdata(pdev, cptpf); + cptpf->pdev = pdev; + + cptpf->reg_base = pcim_iomap_table(pdev)[PCI_PF_REG_BAR_NUM]; + + /* Check if AF driver is up, otherwise defer probe */ + err = cpt_is_pf_usable(cptpf); + if (err) + goto clear_drvdata; + + return 0; + +clear_drvdata: + pci_set_drvdata(pdev, NULL); + return err; +} + +static void otx2_cptpf_remove(struct pci_dev *pdev) +{ + struct otx2_cptpf_dev *cptpf = pci_get_drvdata(pdev); + + if (!cptpf) + return; + + pci_set_drvdata(pdev, NULL); +} + +/* Supported devices */ +static const struct pci_device_id otx2_cpt_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OTX2_CPT_PCI_PF_DEVICE_ID) }, + { 0, } /* end of table */ +}; + +static struct pci_driver otx2_cpt_pci_driver = { + .name = OTX2_CPT_DRV_NAME, + .id_table = otx2_cpt_id_table, + .probe = otx2_cptpf_probe, + .remove = otx2_cptpf_remove, +}; + +module_pci_driver(otx2_cpt_pci_driver); + +MODULE_AUTHOR("Marvell"); +MODULE_DESCRIPTION(OTX2_CPT_DRV_STRING); +MODULE_LICENSE("GPL v2"); +MODULE_DEVICE_TABLE(pci, otx2_cpt_id_table); From 83ffcf78627f98919ebae3dc6715982cc83176ed Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Fri, 15 Jan 2021 19:22:20 +0530 Subject: [PATCH 090/154] crypto: octeontx2 - add mailbox communication with AF In the resource virtualization unit (RVU) each of the PF and AF (admin function) share a 64KB of reserved memory region for communication. This patch initializes PF <=> AF mailbox IRQs, registers handlers for processing these communication messages. Signed-off-by: Suheil Chandran Signed-off-by: Lukasz Bartosik Signed-off-by: Srujana Challa Signed-off-by: Herbert Xu --- drivers/crypto/marvell/octeontx2/Makefile | 3 +- .../marvell/octeontx2/otx2_cpt_common.h | 4 + .../marvell/octeontx2/otx2_cpt_mbox_common.c | 37 +++++++ drivers/crypto/marvell/octeontx2/otx2_cptpf.h | 12 +++ .../marvell/octeontx2/otx2_cptpf_main.c | 102 +++++++++++++++++- .../marvell/octeontx2/otx2_cptpf_mbox.c | 80 ++++++++++++++ 6 files changed, 236 insertions(+), 2 deletions(-) create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c diff --git a/drivers/crypto/marvell/octeontx2/Makefile b/drivers/crypto/marvell/octeontx2/Makefile index db763ad46a91..8c8262e94f78 100644 --- a/drivers/crypto/marvell/octeontx2/Makefile +++ b/drivers/crypto/marvell/octeontx2/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += octeontx2-cpt.o -octeontx2-cpt-objs := otx2_cptpf_main.o +octeontx2-cpt-objs := otx2_cptpf_main.o otx2_cptpf_mbox.o \ + otx2_cpt_mbox_common.o ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h index eff4ffa58dc4..b677f8c7e724 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h @@ -12,6 +12,7 @@ #include #include "otx2_cpt_hw_types.h" #include "rvu.h" +#include "mbox.h" #define OTX2_CPT_RVU_FUNC_ADDR_S(blk, slot, offs) \ (((blk) << 20) | ((slot) << 12) | (offs)) @@ -29,4 +30,7 @@ static inline u64 otx2_cpt_read64(void __iomem *reg_base, u64 blk, u64 slot, return readq_relaxed(reg_base + OTX2_CPT_RVU_FUNC_ADDR_S(blk, slot, offs)); } + +int otx2_cpt_send_ready_msg(struct otx2_mbox *mbox, struct pci_dev *pdev); +int otx2_cpt_send_mbox_msg(struct otx2_mbox *mbox, struct pci_dev *pdev); #endif /* __OTX2_CPT_COMMON_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c new file mode 100644 index 000000000000..a122483b5976 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Marvell. */ + +#include "otx2_cpt_common.h" + +int otx2_cpt_send_mbox_msg(struct otx2_mbox *mbox, struct pci_dev *pdev) +{ + int ret; + + otx2_mbox_msg_send(mbox, 0); + ret = otx2_mbox_wait_for_rsp(mbox, 0); + if (ret == -EIO) { + dev_err(&pdev->dev, "RVU MBOX timeout.\n"); + return ret; + } else if (ret) { + dev_err(&pdev->dev, "RVU MBOX error: %d.\n", ret); + return -EFAULT; + } + return ret; +} + +int otx2_cpt_send_ready_msg(struct otx2_mbox *mbox, struct pci_dev *pdev) +{ + struct mbox_msghdr *req; + + req = otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req), + sizeof(struct ready_msg_rsp)); + if (req == NULL) { + dev_err(&pdev->dev, "RVU MBOX failed to get message.\n"); + return -EFAULT; + } + req->id = MBOX_MSG_READY; + req->sig = OTX2_MBOX_REQ_SIG; + req->pcifunc = 0; + + return otx2_cpt_send_mbox_msg(mbox, pdev); +} diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h index 84cdc8cc2c15..87fe4c6838e5 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h @@ -5,9 +5,21 @@ #ifndef __OTX2_CPTPF_H #define __OTX2_CPTPF_H +#include "otx2_cpt_common.h" + struct otx2_cptpf_dev { void __iomem *reg_base; /* CPT PF registers start address */ + void __iomem *afpf_mbox_base; /* PF-AF mbox start address */ struct pci_dev *pdev; /* PCI device handle */ + /* AF <=> PF mbox */ + struct otx2_mbox afpf_mbox; + struct work_struct afpf_mbox_work; + struct workqueue_struct *afpf_mbox_wq; + + u8 pf_id; /* RVU PF number */ }; +irqreturn_t otx2_cptpf_afpf_mbox_intr(int irq, void *arg); +void otx2_cptpf_afpf_mbox_handler(struct work_struct *work); + #endif /* __OTX2_CPTPF_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c index 47781966de3e..2f5bf02436da 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c @@ -10,6 +10,75 @@ #define OTX2_CPT_DRV_NAME "octeontx2-cpt" #define OTX2_CPT_DRV_STRING "Marvell OcteonTX2 CPT Physical Function Driver" +static void cptpf_disable_afpf_mbox_intr(struct otx2_cptpf_dev *cptpf) +{ + /* Disable AF-PF interrupt */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_INT_ENA_W1C, + 0x1ULL); + /* Clear interrupt if any */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_INT, 0x1ULL); +} + +static int cptpf_register_afpf_mbox_intr(struct otx2_cptpf_dev *cptpf) +{ + struct pci_dev *pdev = cptpf->pdev; + struct device *dev = &pdev->dev; + int ret, irq; + + irq = pci_irq_vector(pdev, RVU_PF_INT_VEC_AFPF_MBOX); + /* Register AF-PF mailbox interrupt handler */ + ret = devm_request_irq(dev, irq, otx2_cptpf_afpf_mbox_intr, 0, + "CPTAFPF Mbox", cptpf); + if (ret) { + dev_err(dev, + "IRQ registration failed for PFAF mbox irq\n"); + return ret; + } + /* Clear interrupt if any, to avoid spurious interrupts */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_INT, 0x1ULL); + /* Enable AF-PF interrupt */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_INT_ENA_W1S, + 0x1ULL); + + ret = otx2_cpt_send_ready_msg(&cptpf->afpf_mbox, cptpf->pdev); + if (ret) { + dev_warn(dev, + "AF not responding to mailbox, deferring probe\n"); + cptpf_disable_afpf_mbox_intr(cptpf); + return -EPROBE_DEFER; + } + return 0; +} + +static int cptpf_afpf_mbox_init(struct otx2_cptpf_dev *cptpf) +{ + int err; + + cptpf->afpf_mbox_wq = alloc_workqueue("cpt_afpf_mailbox", + WQ_UNBOUND | WQ_HIGHPRI | + WQ_MEM_RECLAIM, 1); + if (!cptpf->afpf_mbox_wq) + return -ENOMEM; + + err = otx2_mbox_init(&cptpf->afpf_mbox, cptpf->afpf_mbox_base, + cptpf->pdev, cptpf->reg_base, MBOX_DIR_PFAF, 1); + if (err) + goto error; + + INIT_WORK(&cptpf->afpf_mbox_work, otx2_cptpf_afpf_mbox_handler); + return 0; + +error: + destroy_workqueue(cptpf->afpf_mbox_wq); + return err; +} + +static void cptpf_afpf_mbox_destroy(struct otx2_cptpf_dev *cptpf) +{ + destroy_workqueue(cptpf->afpf_mbox_wq); + otx2_mbox_destroy(&cptpf->afpf_mbox); +} + static int cpt_is_pf_usable(struct otx2_cptpf_dev *cptpf) { u64 rev; @@ -33,6 +102,7 @@ static int otx2_cptpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct device *dev = &pdev->dev; + resource_size_t offset, size; struct otx2_cptpf_dev *cptpf; int err; @@ -69,8 +139,35 @@ static int otx2_cptpf_probe(struct pci_dev *pdev, if (err) goto clear_drvdata; + offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM); + size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM); + /* Map AF-PF mailbox memory */ + cptpf->afpf_mbox_base = devm_ioremap_wc(dev, offset, size); + if (!cptpf->afpf_mbox_base) { + dev_err(&pdev->dev, "Unable to map BAR4\n"); + err = -ENODEV; + goto clear_drvdata; + } + err = pci_alloc_irq_vectors(pdev, RVU_PF_INT_VEC_CNT, + RVU_PF_INT_VEC_CNT, PCI_IRQ_MSIX); + if (err < 0) { + dev_err(dev, "Request for %d msix vectors failed\n", + RVU_PF_INT_VEC_CNT); + goto clear_drvdata; + } + /* Initialize AF-PF mailbox */ + err = cptpf_afpf_mbox_init(cptpf); + if (err) + goto clear_drvdata; + /* Register mailbox interrupt */ + err = cptpf_register_afpf_mbox_intr(cptpf); + if (err) + goto destroy_afpf_mbox; + return 0; +destroy_afpf_mbox: + cptpf_afpf_mbox_destroy(cptpf); clear_drvdata: pci_set_drvdata(pdev, NULL); return err; @@ -82,7 +179,10 @@ static void otx2_cptpf_remove(struct pci_dev *pdev) if (!cptpf) return; - + /* Disable AF-PF mailbox interrupt */ + cptpf_disable_afpf_mbox_intr(cptpf); + /* Destroy AF-PF mbox */ + cptpf_afpf_mbox_destroy(cptpf); pci_set_drvdata(pdev, NULL); } diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c new file mode 100644 index 000000000000..0a8bd46b5686 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Marvell. */ + +#include "otx2_cpt_common.h" +#include "otx2_cptpf.h" +#include "rvu_reg.h" + +irqreturn_t otx2_cptpf_afpf_mbox_intr(int __always_unused irq, void *arg) +{ + struct otx2_cptpf_dev *cptpf = arg; + u64 intr; + + /* Read the interrupt bits */ + intr = otx2_cpt_read64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_INT); + + if (intr & 0x1ULL) { + /* Schedule work queue function to process the MBOX request */ + queue_work(cptpf->afpf_mbox_wq, &cptpf->afpf_mbox_work); + /* Clear and ack the interrupt */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_INT, + 0x1ULL); + } + return IRQ_HANDLED; +} + +static void process_afpf_mbox_msg(struct otx2_cptpf_dev *cptpf, + struct mbox_msghdr *msg) +{ + struct device *dev = &cptpf->pdev->dev; + + if (msg->id >= MBOX_MSG_MAX) { + dev_err(dev, "MBOX msg with unknown ID %d\n", msg->id); + return; + } + if (msg->sig != OTX2_MBOX_RSP_SIG) { + dev_err(dev, "MBOX msg with wrong signature %x, ID %d\n", + msg->sig, msg->id); + return; + } + + switch (msg->id) { + case MBOX_MSG_READY: + cptpf->pf_id = (msg->pcifunc >> RVU_PFVF_PF_SHIFT) & + RVU_PFVF_PF_MASK; + break; + default: + dev_err(dev, + "Unsupported msg %d received.\n", msg->id); + break; + } +} + +/* Handle mailbox messages received from AF */ +void otx2_cptpf_afpf_mbox_handler(struct work_struct *work) +{ + struct otx2_cptpf_dev *cptpf; + struct otx2_mbox *afpf_mbox; + struct otx2_mbox_dev *mdev; + struct mbox_hdr *rsp_hdr; + struct mbox_msghdr *msg; + int offset, i; + + cptpf = container_of(work, struct otx2_cptpf_dev, afpf_mbox_work); + afpf_mbox = &cptpf->afpf_mbox; + mdev = &afpf_mbox->dev[0]; + /* Sync mbox data into memory */ + smp_wmb(); + + rsp_hdr = (struct mbox_hdr *)(mdev->mbase + afpf_mbox->rx_start); + offset = ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); + + for (i = 0; i < rsp_hdr->num_msgs; i++) { + msg = (struct mbox_msghdr *)(mdev->mbase + afpf_mbox->rx_start + + offset); + process_afpf_mbox_msg(cptpf, msg); + offset = msg->next_msgoff; + mdev->msgs_acked++; + } + otx2_mbox_reset(afpf_mbox, 0); +} From fe16eceab0463c160a333b7df4edd707f3a24d5c Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Fri, 15 Jan 2021 19:22:21 +0530 Subject: [PATCH 091/154] crypto: octeontx2 - enable SR-IOV and mailbox communication with VF Adds 'sriov_configure' to enable/disable virtual functions (VFs). Also Initializes VF<=>PF mailbox IRQs, register handlers for processing these mailbox messages. Admin function (AF) handles resource allocation and configuration for PFs and their VFs. PFs request the AF directly, via mailboxes. Unlike PFs, VFs cannot send a mailbox request directly. A VF sends mailbox messages to its parent PF, with which it shares a mailbox region. The PF then forwards these messages to the AF. After handling the request, the AF sends a response back to the VF, through the PF. This patch adds support for this 'VF <=> PF <=> AF' mailbox communication. Signed-off-by: Suheil Chandran Signed-off-by: Lukasz Bartosik Signed-off-by: Srujana Challa Signed-off-by: Herbert Xu --- .../marvell/octeontx2/otx2_cpt_common.h | 1 + drivers/crypto/marvell/octeontx2/otx2_cptpf.h | 27 ++ .../marvell/octeontx2/otx2_cptpf_main.c | 382 ++++++++++++++++++ .../marvell/octeontx2/otx2_cptpf_mbox.c | 175 +++++++- 4 files changed, 583 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h index b677f8c7e724..277c7c7f95cf 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h @@ -14,6 +14,7 @@ #include "rvu.h" #include "mbox.h" +#define OTX2_CPT_MAX_VFS_NUM 128 #define OTX2_CPT_RVU_FUNC_ADDR_S(blk, slot, offs) \ (((blk) << 20) | ((slot) << 12) | (offs)) diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h index 87fe4c6838e5..8a9805f89fee 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h @@ -7,19 +7,46 @@ #include "otx2_cpt_common.h" +struct otx2_cptpf_dev; +struct otx2_cptvf_info { + struct otx2_cptpf_dev *cptpf; /* PF pointer this VF belongs to */ + struct work_struct vfpf_mbox_work; + struct pci_dev *vf_dev; + int vf_id; + int intr_idx; +}; + +struct cptpf_flr_work { + struct work_struct work; + struct otx2_cptpf_dev *pf; +}; + struct otx2_cptpf_dev { void __iomem *reg_base; /* CPT PF registers start address */ void __iomem *afpf_mbox_base; /* PF-AF mbox start address */ + void __iomem *vfpf_mbox_base; /* VF-PF mbox start address */ struct pci_dev *pdev; /* PCI device handle */ + struct otx2_cptvf_info vf[OTX2_CPT_MAX_VFS_NUM]; /* AF <=> PF mbox */ struct otx2_mbox afpf_mbox; struct work_struct afpf_mbox_work; struct workqueue_struct *afpf_mbox_wq; + /* VF <=> PF mbox */ + struct otx2_mbox vfpf_mbox; + struct workqueue_struct *vfpf_mbox_wq; + + struct workqueue_struct *flr_wq; + struct cptpf_flr_work *flr_work; + u8 pf_id; /* RVU PF number */ + u8 max_vfs; /* Maximum number of VFs supported by CPT */ + u8 enabled_vfs; /* Number of enabled VFs */ }; irqreturn_t otx2_cptpf_afpf_mbox_intr(int irq, void *arg); void otx2_cptpf_afpf_mbox_handler(struct work_struct *work); +irqreturn_t otx2_cptpf_vfpf_mbox_intr(int irq, void *arg); +void otx2_cptpf_vfpf_mbox_handler(struct work_struct *work); #endif /* __OTX2_CPTPF_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c index 2f5bf02436da..224882454c2f 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c @@ -10,6 +10,318 @@ #define OTX2_CPT_DRV_NAME "octeontx2-cpt" #define OTX2_CPT_DRV_STRING "Marvell OcteonTX2 CPT Physical Function Driver" +static void cptpf_enable_vfpf_mbox_intr(struct otx2_cptpf_dev *cptpf, + int num_vfs) +{ + int ena_bits; + + /* Clear any pending interrupts */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFPF_MBOX_INTX(0), ~0x0ULL); + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFPF_MBOX_INTX(1), ~0x0ULL); + + /* Enable VF interrupts for VFs from 0 to 63 */ + ena_bits = ((num_vfs - 1) % 64); + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFPF_MBOX_INT_ENA_W1SX(0), + GENMASK_ULL(ena_bits, 0)); + + if (num_vfs > 64) { + /* Enable VF interrupts for VFs from 64 to 127 */ + ena_bits = num_vfs - 64 - 1; + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFPF_MBOX_INT_ENA_W1SX(1), + GENMASK_ULL(ena_bits, 0)); + } +} + +static void cptpf_disable_vfpf_mbox_intr(struct otx2_cptpf_dev *cptpf, + int num_vfs) +{ + int vector; + + /* Disable VF-PF interrupts */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFPF_MBOX_INT_ENA_W1CX(0), ~0ULL); + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFPF_MBOX_INT_ENA_W1CX(1), ~0ULL); + /* Clear any pending interrupts */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFPF_MBOX_INTX(0), ~0ULL); + + vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFPF_MBOX0); + free_irq(vector, cptpf); + + if (num_vfs > 64) { + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFPF_MBOX_INTX(1), ~0ULL); + vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFPF_MBOX1); + free_irq(vector, cptpf); + } +} + +static void cptpf_enable_vf_flr_intrs(struct otx2_cptpf_dev *cptpf) +{ + /* Clear interrupt if any */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFFLR_INTX(0), + ~0x0ULL); + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFFLR_INTX(1), + ~0x0ULL); + + /* Enable VF FLR interrupts */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFFLR_INT_ENA_W1SX(0), ~0x0ULL); + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFFLR_INT_ENA_W1SX(1), ~0x0ULL); +} + +static void cptpf_disable_vf_flr_intrs(struct otx2_cptpf_dev *cptpf, + int num_vfs) +{ + int vector; + + /* Disable VF FLR interrupts */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFFLR_INT_ENA_W1CX(0), ~0x0ULL); + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFFLR_INT_ENA_W1CX(1), ~0x0ULL); + + /* Clear interrupt if any */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFFLR_INTX(0), + ~0x0ULL); + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFFLR_INTX(1), + ~0x0ULL); + + vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFFLR0); + free_irq(vector, cptpf); + + if (num_vfs > 64) { + vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFFLR1); + free_irq(vector, cptpf); + } +} + +static void cptpf_flr_wq_handler(struct work_struct *work) +{ + struct cptpf_flr_work *flr_work; + struct otx2_cptpf_dev *pf; + struct mbox_msghdr *req; + struct otx2_mbox *mbox; + int vf, reg = 0; + + flr_work = container_of(work, struct cptpf_flr_work, work); + pf = flr_work->pf; + mbox = &pf->afpf_mbox; + + vf = flr_work - pf->flr_work; + + req = otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req), + sizeof(struct msg_rsp)); + if (!req) + return; + + req->sig = OTX2_MBOX_REQ_SIG; + req->id = MBOX_MSG_VF_FLR; + req->pcifunc &= RVU_PFVF_FUNC_MASK; + req->pcifunc |= (vf + 1) & RVU_PFVF_FUNC_MASK; + + otx2_cpt_send_mbox_msg(mbox, pf->pdev); + + if (vf >= 64) { + reg = 1; + vf = vf - 64; + } + /* Clear transaction pending register */ + otx2_cpt_write64(pf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFTRPENDX(reg), BIT_ULL(vf)); + otx2_cpt_write64(pf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFFLR_INT_ENA_W1SX(reg), BIT_ULL(vf)); +} + +static irqreturn_t cptpf_vf_flr_intr(int __always_unused irq, void *arg) +{ + int reg, dev, vf, start_vf, num_reg = 1; + struct otx2_cptpf_dev *cptpf = arg; + u64 intr; + + if (cptpf->max_vfs > 64) + num_reg = 2; + + for (reg = 0; reg < num_reg; reg++) { + intr = otx2_cpt_read64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFFLR_INTX(reg)); + if (!intr) + continue; + start_vf = 64 * reg; + for (vf = 0; vf < 64; vf++) { + if (!(intr & BIT_ULL(vf))) + continue; + dev = vf + start_vf; + queue_work(cptpf->flr_wq, &cptpf->flr_work[dev].work); + /* Clear interrupt */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFFLR_INTX(reg), BIT_ULL(vf)); + /* Disable the interrupt */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFFLR_INT_ENA_W1CX(reg), + BIT_ULL(vf)); + } + } + return IRQ_HANDLED; +} + +static void cptpf_unregister_vfpf_intr(struct otx2_cptpf_dev *cptpf, + int num_vfs) +{ + cptpf_disable_vfpf_mbox_intr(cptpf, num_vfs); + cptpf_disable_vf_flr_intrs(cptpf, num_vfs); +} + +static int cptpf_register_vfpf_intr(struct otx2_cptpf_dev *cptpf, int num_vfs) +{ + struct pci_dev *pdev = cptpf->pdev; + struct device *dev = &pdev->dev; + int ret, vector; + + vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFPF_MBOX0); + /* Register VF-PF mailbox interrupt handler */ + ret = request_irq(vector, otx2_cptpf_vfpf_mbox_intr, 0, "CPTVFPF Mbox0", + cptpf); + if (ret) { + dev_err(dev, + "IRQ registration failed for PFVF mbox0 irq\n"); + return ret; + } + vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFFLR0); + /* Register VF FLR interrupt handler */ + ret = request_irq(vector, cptpf_vf_flr_intr, 0, "CPTPF FLR0", cptpf); + if (ret) { + dev_err(dev, + "IRQ registration failed for VFFLR0 irq\n"); + goto free_mbox0_irq; + } + if (num_vfs > 64) { + vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFPF_MBOX1); + ret = request_irq(vector, otx2_cptpf_vfpf_mbox_intr, 0, + "CPTVFPF Mbox1", cptpf); + if (ret) { + dev_err(dev, + "IRQ registration failed for PFVF mbox1 irq\n"); + goto free_flr0_irq; + } + vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFFLR1); + /* Register VF FLR interrupt handler */ + ret = request_irq(vector, cptpf_vf_flr_intr, 0, "CPTPF FLR1", + cptpf); + if (ret) { + dev_err(dev, + "IRQ registration failed for VFFLR1 irq\n"); + goto free_mbox1_irq; + } + } + cptpf_enable_vfpf_mbox_intr(cptpf, num_vfs); + cptpf_enable_vf_flr_intrs(cptpf); + + return 0; + +free_mbox1_irq: + vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFPF_MBOX1); + free_irq(vector, cptpf); +free_flr0_irq: + vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFFLR0); + free_irq(vector, cptpf); +free_mbox0_irq: + vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFPF_MBOX0); + free_irq(vector, cptpf); + return ret; +} + +static void cptpf_flr_wq_destroy(struct otx2_cptpf_dev *pf) +{ + if (!pf->flr_wq) + return; + destroy_workqueue(pf->flr_wq); + pf->flr_wq = NULL; + kfree(pf->flr_work); +} + +static int cptpf_flr_wq_init(struct otx2_cptpf_dev *cptpf, int num_vfs) +{ + int vf; + + cptpf->flr_wq = alloc_ordered_workqueue("cptpf_flr_wq", 0); + if (!cptpf->flr_wq) + return -ENOMEM; + + cptpf->flr_work = kcalloc(num_vfs, sizeof(struct cptpf_flr_work), + GFP_KERNEL); + if (!cptpf->flr_work) + goto destroy_wq; + + for (vf = 0; vf < num_vfs; vf++) { + cptpf->flr_work[vf].pf = cptpf; + INIT_WORK(&cptpf->flr_work[vf].work, cptpf_flr_wq_handler); + } + return 0; + +destroy_wq: + destroy_workqueue(cptpf->flr_wq); + return -ENOMEM; +} + +static int cptpf_vfpf_mbox_init(struct otx2_cptpf_dev *cptpf, int num_vfs) +{ + struct device *dev = &cptpf->pdev->dev; + u64 vfpf_mbox_base; + int err, i; + + cptpf->vfpf_mbox_wq = alloc_workqueue("cpt_vfpf_mailbox", + WQ_UNBOUND | WQ_HIGHPRI | + WQ_MEM_RECLAIM, 1); + if (!cptpf->vfpf_mbox_wq) + return -ENOMEM; + + /* Map VF-PF mailbox memory */ + vfpf_mbox_base = readq(cptpf->reg_base + RVU_PF_VF_BAR4_ADDR); + if (!vfpf_mbox_base) { + dev_err(dev, "VF-PF mailbox address not configured\n"); + err = -ENOMEM; + goto free_wqe; + } + cptpf->vfpf_mbox_base = devm_ioremap_wc(dev, vfpf_mbox_base, + MBOX_SIZE * cptpf->max_vfs); + if (!cptpf->vfpf_mbox_base) { + dev_err(dev, "Mapping of VF-PF mailbox address failed\n"); + err = -ENOMEM; + goto free_wqe; + } + err = otx2_mbox_init(&cptpf->vfpf_mbox, cptpf->vfpf_mbox_base, + cptpf->pdev, cptpf->reg_base, MBOX_DIR_PFVF, + num_vfs); + if (err) + goto free_wqe; + + for (i = 0; i < num_vfs; i++) { + cptpf->vf[i].vf_id = i; + cptpf->vf[i].cptpf = cptpf; + cptpf->vf[i].intr_idx = i % 64; + INIT_WORK(&cptpf->vf[i].vfpf_mbox_work, + otx2_cptpf_vfpf_mbox_handler); + } + return 0; + +free_wqe: + destroy_workqueue(cptpf->vfpf_mbox_wq); + return err; +} + +static void cptpf_vfpf_mbox_destroy(struct otx2_cptpf_dev *cptpf) +{ + destroy_workqueue(cptpf->vfpf_mbox_wq); + otx2_mbox_destroy(&cptpf->vfpf_mbox); +} + static void cptpf_disable_afpf_mbox_intr(struct otx2_cptpf_dev *cptpf) { /* Disable AF-PF interrupt */ @@ -98,6 +410,71 @@ static int cpt_is_pf_usable(struct otx2_cptpf_dev *cptpf) return 0; } +static int cptpf_sriov_disable(struct pci_dev *pdev) +{ + struct otx2_cptpf_dev *cptpf = pci_get_drvdata(pdev); + int num_vfs = pci_num_vf(pdev); + + if (!num_vfs) + return 0; + + pci_disable_sriov(pdev); + cptpf_unregister_vfpf_intr(cptpf, num_vfs); + cptpf_flr_wq_destroy(cptpf); + cptpf_vfpf_mbox_destroy(cptpf); + module_put(THIS_MODULE); + cptpf->enabled_vfs = 0; + + return 0; +} + +static int cptpf_sriov_enable(struct pci_dev *pdev, int num_vfs) +{ + struct otx2_cptpf_dev *cptpf = pci_get_drvdata(pdev); + int ret; + + /* Initialize VF<=>PF mailbox */ + ret = cptpf_vfpf_mbox_init(cptpf, num_vfs); + if (ret) + return ret; + + ret = cptpf_flr_wq_init(cptpf, num_vfs); + if (ret) + goto destroy_mbox; + /* Register VF<=>PF mailbox interrupt */ + ret = cptpf_register_vfpf_intr(cptpf, num_vfs); + if (ret) + goto destroy_flr; + + cptpf->enabled_vfs = num_vfs; + ret = pci_enable_sriov(pdev, num_vfs); + if (ret) + goto disable_intr; + + dev_notice(&cptpf->pdev->dev, "VFs enabled: %d\n", num_vfs); + + try_module_get(THIS_MODULE); + return num_vfs; + +disable_intr: + cptpf_unregister_vfpf_intr(cptpf, num_vfs); + cptpf->enabled_vfs = 0; +destroy_flr: + cptpf_flr_wq_destroy(cptpf); +destroy_mbox: + cptpf_vfpf_mbox_destroy(cptpf); + return ret; +} + +static int otx2_cptpf_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + if (num_vfs > 0) { + return cptpf_sriov_enable(pdev, num_vfs); + } else { + return cptpf_sriov_disable(pdev); + } +} + static int otx2_cptpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -164,6 +541,8 @@ static int otx2_cptpf_probe(struct pci_dev *pdev, if (err) goto destroy_afpf_mbox; + cptpf->max_vfs = pci_sriov_get_totalvfs(pdev); + return 0; destroy_afpf_mbox: @@ -179,6 +558,8 @@ static void otx2_cptpf_remove(struct pci_dev *pdev) if (!cptpf) return; + + cptpf_sriov_disable(pdev); /* Disable AF-PF mailbox interrupt */ cptpf_disable_afpf_mbox_intr(cptpf); /* Destroy AF-PF mbox */ @@ -197,6 +578,7 @@ static struct pci_driver otx2_cpt_pci_driver = { .id_table = otx2_cpt_id_table, .probe = otx2_cptpf_probe, .remove = otx2_cptpf_remove, + .sriov_configure = otx2_cptpf_sriov_configure }; module_pci_driver(otx2_cpt_pci_driver); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c index 0a8bd46b5686..1d97f7202130 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c @@ -5,6 +5,127 @@ #include "otx2_cptpf.h" #include "rvu_reg.h" +static int forward_to_af(struct otx2_cptpf_dev *cptpf, + struct otx2_cptvf_info *vf, + struct mbox_msghdr *req, int size) +{ + struct mbox_msghdr *msg; + int ret; + + msg = otx2_mbox_alloc_msg(&cptpf->afpf_mbox, 0, size); + if (msg == NULL) + return -ENOMEM; + + memcpy((uint8_t *)msg + sizeof(struct mbox_msghdr), + (uint8_t *)req + sizeof(struct mbox_msghdr), size); + msg->id = req->id; + msg->pcifunc = req->pcifunc; + msg->sig = req->sig; + msg->ver = req->ver; + + otx2_mbox_msg_send(&cptpf->afpf_mbox, 0); + ret = otx2_mbox_wait_for_rsp(&cptpf->afpf_mbox, 0); + if (ret == -EIO) { + dev_err(&cptpf->pdev->dev, "RVU MBOX timeout.\n"); + return ret; + } else if (ret) { + dev_err(&cptpf->pdev->dev, "RVU MBOX error: %d.\n", ret); + return -EFAULT; + } + return 0; +} + +static int cptpf_handle_vf_req(struct otx2_cptpf_dev *cptpf, + struct otx2_cptvf_info *vf, + struct mbox_msghdr *req, int size) +{ + int err = 0; + + /* Check if msg is valid, if not reply with an invalid msg */ + if (req->sig != OTX2_MBOX_REQ_SIG) + goto inval_msg; + + return forward_to_af(cptpf, vf, req, size); + +inval_msg: + otx2_reply_invalid_msg(&cptpf->vfpf_mbox, vf->vf_id, 0, req->id); + otx2_mbox_msg_send(&cptpf->vfpf_mbox, vf->vf_id); + return err; +} + +irqreturn_t otx2_cptpf_vfpf_mbox_intr(int __always_unused irq, void *arg) +{ + struct otx2_cptpf_dev *cptpf = arg; + struct otx2_cptvf_info *vf; + int i, vf_idx; + u64 intr; + + /* + * Check which VF has raised an interrupt and schedule + * corresponding work queue to process the messages + */ + for (i = 0; i < 2; i++) { + /* Read the interrupt bits */ + intr = otx2_cpt_read64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFPF_MBOX_INTX(i)); + + for (vf_idx = i * 64; vf_idx < cptpf->enabled_vfs; vf_idx++) { + vf = &cptpf->vf[vf_idx]; + if (intr & (1ULL << vf->intr_idx)) { + queue_work(cptpf->vfpf_mbox_wq, + &vf->vfpf_mbox_work); + /* Clear the interrupt */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, + 0, RVU_PF_VFPF_MBOX_INTX(i), + BIT_ULL(vf->intr_idx)); + } + } + } + return IRQ_HANDLED; +} + +void otx2_cptpf_vfpf_mbox_handler(struct work_struct *work) +{ + struct otx2_cptpf_dev *cptpf; + struct otx2_cptvf_info *vf; + struct otx2_mbox_dev *mdev; + struct mbox_hdr *req_hdr; + struct mbox_msghdr *msg; + struct otx2_mbox *mbox; + int offset, i, err; + + vf = container_of(work, struct otx2_cptvf_info, vfpf_mbox_work); + cptpf = vf->cptpf; + mbox = &cptpf->vfpf_mbox; + /* sync with mbox memory region */ + smp_rmb(); + mdev = &mbox->dev[vf->vf_id]; + /* Process received mbox messages */ + req_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + offset = mbox->rx_start + ALIGN(sizeof(*req_hdr), MBOX_MSG_ALIGN); + + for (i = 0; i < req_hdr->num_msgs; i++) { + msg = (struct mbox_msghdr *)(mdev->mbase + offset); + + /* Set which VF sent this message based on mbox IRQ */ + msg->pcifunc = ((u16)cptpf->pf_id << RVU_PFVF_PF_SHIFT) | + ((vf->vf_id + 1) & RVU_PFVF_FUNC_MASK); + + err = cptpf_handle_vf_req(cptpf, vf, msg, + msg->next_msgoff - offset); + /* + * Behave as the AF, drop the msg if there is + * no memory, timeout handling also goes here + */ + if (err == -ENOMEM || err == -EIO) + break; + offset = msg->next_msgoff; + } + /* Send mbox responses to VF */ + if (mdev->num_msgs) + otx2_mbox_msg_send(mbox, vf->vf_id); +} + irqreturn_t otx2_cptpf_afpf_mbox_intr(int __always_unused irq, void *arg) { struct otx2_cptpf_dev *cptpf = arg; @@ -50,6 +171,49 @@ static void process_afpf_mbox_msg(struct otx2_cptpf_dev *cptpf, } } +static void forward_to_vf(struct otx2_cptpf_dev *cptpf, struct mbox_msghdr *msg, + int vf_id, int size) +{ + struct otx2_mbox *vfpf_mbox; + struct mbox_msghdr *fwd; + + if (msg->id >= MBOX_MSG_MAX) { + dev_err(&cptpf->pdev->dev, + "MBOX msg with unknown ID %d\n", msg->id); + return; + } + if (msg->sig != OTX2_MBOX_RSP_SIG) { + dev_err(&cptpf->pdev->dev, + "MBOX msg with wrong signature %x, ID %d\n", + msg->sig, msg->id); + return; + } + vfpf_mbox = &cptpf->vfpf_mbox; + vf_id--; + if (vf_id >= cptpf->enabled_vfs) { + dev_err(&cptpf->pdev->dev, + "MBOX msg to unknown VF: %d >= %d\n", + vf_id, cptpf->enabled_vfs); + return; + } + if (msg->id == MBOX_MSG_VF_FLR) + return; + + fwd = otx2_mbox_alloc_msg(vfpf_mbox, vf_id, size); + if (!fwd) { + dev_err(&cptpf->pdev->dev, + "Forwarding to VF%d failed.\n", vf_id); + return; + } + memcpy((uint8_t *)fwd + sizeof(struct mbox_msghdr), + (uint8_t *)msg + sizeof(struct mbox_msghdr), size); + fwd->id = msg->id; + fwd->pcifunc = msg->pcifunc; + fwd->sig = msg->sig; + fwd->ver = msg->ver; + fwd->rc = msg->rc; +} + /* Handle mailbox messages received from AF */ void otx2_cptpf_afpf_mbox_handler(struct work_struct *work) { @@ -58,7 +222,7 @@ void otx2_cptpf_afpf_mbox_handler(struct work_struct *work) struct otx2_mbox_dev *mdev; struct mbox_hdr *rsp_hdr; struct mbox_msghdr *msg; - int offset, i; + int offset, vf_id, i; cptpf = container_of(work, struct otx2_cptpf_dev, afpf_mbox_work); afpf_mbox = &cptpf->afpf_mbox; @@ -72,7 +236,14 @@ void otx2_cptpf_afpf_mbox_handler(struct work_struct *work) for (i = 0; i < rsp_hdr->num_msgs; i++) { msg = (struct mbox_msghdr *)(mdev->mbase + afpf_mbox->rx_start + offset); - process_afpf_mbox_msg(cptpf, msg); + vf_id = (msg->pcifunc >> RVU_PFVF_FUNC_SHIFT) & + RVU_PFVF_FUNC_MASK; + if (vf_id > 0) + forward_to_vf(cptpf, msg, vf_id, + msg->next_msgoff - offset); + else + process_afpf_mbox_msg(cptpf, msg); + offset = msg->next_msgoff; mdev->msgs_acked++; } From 43ac0b824f1cb7c63c5fe98ea2b80ec480412601 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Fri, 15 Jan 2021 19:22:22 +0530 Subject: [PATCH 092/154] crypto: octeontx2 - load microcode and create engine groups CPT includes microcoded GigaCypher symmetric engines(SEs), IPsec symmetric engines(IEs), and asymmetric engines (AEs). Each engine receives CPT instructions from the engine groups it has subscribed to. This patch loads microcode, configures three engine groups(one for SEs, one for IEs and one for AEs), and configures all engines. Signed-off-by: Suheil Chandran Signed-off-by: Lukasz Bartosik Signed-off-by: Srujana Challa Signed-off-by: Herbert Xu --- drivers/crypto/marvell/octeontx2/Makefile | 2 +- .../marvell/octeontx2/otx2_cpt_common.h | 42 + .../marvell/octeontx2/otx2_cpt_mbox_common.c | 77 + drivers/crypto/marvell/octeontx2/otx2_cptpf.h | 3 + .../marvell/octeontx2/otx2_cptpf_main.c | 72 + .../marvell/octeontx2/otx2_cptpf_mbox.c | 46 +- .../marvell/octeontx2/otx2_cptpf_ucode.c | 1254 +++++++++++++++++ .../marvell/octeontx2/otx2_cptpf_ucode.h | 161 +++ 8 files changed, 1655 insertions(+), 2 deletions(-) create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h diff --git a/drivers/crypto/marvell/octeontx2/Makefile b/drivers/crypto/marvell/octeontx2/Makefile index 8c8262e94f78..3c4155446296 100644 --- a/drivers/crypto/marvell/octeontx2/Makefile +++ b/drivers/crypto/marvell/octeontx2/Makefile @@ -2,6 +2,6 @@ obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += octeontx2-cpt.o octeontx2-cpt-objs := otx2_cptpf_main.o otx2_cptpf_mbox.o \ - otx2_cpt_mbox_common.o + otx2_cpt_mbox_common.o otx2_cptpf_ucode.o ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h index 277c7c7f95cf..ae16dc102459 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h @@ -18,6 +18,37 @@ #define OTX2_CPT_RVU_FUNC_ADDR_S(blk, slot, offs) \ (((blk) << 20) | ((slot) << 12) | (offs)) +#define OTX2_CPT_INVALID_CRYPTO_ENG_GRP 0xFF +#define OTX2_CPT_NAME_LENGTH 64 + +#define BAD_OTX2_CPT_ENG_TYPE OTX2_CPT_MAX_ENG_TYPES + +enum otx2_cpt_eng_type { + OTX2_CPT_AE_TYPES = 1, + OTX2_CPT_SE_TYPES = 2, + OTX2_CPT_IE_TYPES = 3, + OTX2_CPT_MAX_ENG_TYPES, +}; + +/* Take mbox id from end of CPT mbox range in AF (range 0xA00 - 0xBFF) */ +#define MBOX_MSG_GET_ENG_GRP_NUM 0xBFF + +/* + * Message request and response to get engine group number + * which has attached a given type of engines (SE, AE, IE) + * This messages are only used between CPT PF <=> CPT VF + */ +struct otx2_cpt_egrp_num_msg { + struct mbox_msghdr hdr; + u8 eng_type; +}; + +struct otx2_cpt_egrp_num_rsp { + struct mbox_msghdr hdr; + u8 eng_type; + u8 eng_grp_num; +}; + static inline void otx2_cpt_write64(void __iomem *reg_base, u64 blk, u64 slot, u64 offs, u64 val) { @@ -34,4 +65,15 @@ static inline u64 otx2_cpt_read64(void __iomem *reg_base, u64 blk, u64 slot, int otx2_cpt_send_ready_msg(struct otx2_mbox *mbox, struct pci_dev *pdev); int otx2_cpt_send_mbox_msg(struct otx2_mbox *mbox, struct pci_dev *pdev); + +int otx2_cpt_send_af_reg_requests(struct otx2_mbox *mbox, + struct pci_dev *pdev); +int otx2_cpt_add_read_af_reg(struct otx2_mbox *mbox, + struct pci_dev *pdev, u64 reg, u64 *val); +int otx2_cpt_add_write_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, + u64 reg, u64 val); +int otx2_cpt_read_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, + u64 reg, u64 *val); +int otx2_cpt_write_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, + u64 reg, u64 val); #endif /* __OTX2_CPT_COMMON_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c index a122483b5976..ef1291c4881b 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c @@ -35,3 +35,80 @@ int otx2_cpt_send_ready_msg(struct otx2_mbox *mbox, struct pci_dev *pdev) return otx2_cpt_send_mbox_msg(mbox, pdev); } + +int otx2_cpt_send_af_reg_requests(struct otx2_mbox *mbox, struct pci_dev *pdev) +{ + return otx2_cpt_send_mbox_msg(mbox, pdev); +} + +int otx2_cpt_add_read_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, + u64 reg, u64 *val) +{ + struct cpt_rd_wr_reg_msg *reg_msg; + + reg_msg = (struct cpt_rd_wr_reg_msg *) + otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*reg_msg), + sizeof(*reg_msg)); + if (reg_msg == NULL) { + dev_err(&pdev->dev, "RVU MBOX failed to get message.\n"); + return -EFAULT; + } + + reg_msg->hdr.id = MBOX_MSG_CPT_RD_WR_REGISTER; + reg_msg->hdr.sig = OTX2_MBOX_REQ_SIG; + reg_msg->hdr.pcifunc = 0; + + reg_msg->is_write = 0; + reg_msg->reg_offset = reg; + reg_msg->ret_val = val; + + return 0; +} + +int otx2_cpt_add_write_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, + u64 reg, u64 val) +{ + struct cpt_rd_wr_reg_msg *reg_msg; + + reg_msg = (struct cpt_rd_wr_reg_msg *) + otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*reg_msg), + sizeof(*reg_msg)); + if (reg_msg == NULL) { + dev_err(&pdev->dev, "RVU MBOX failed to get message.\n"); + return -EFAULT; + } + + reg_msg->hdr.id = MBOX_MSG_CPT_RD_WR_REGISTER; + reg_msg->hdr.sig = OTX2_MBOX_REQ_SIG; + reg_msg->hdr.pcifunc = 0; + + reg_msg->is_write = 1; + reg_msg->reg_offset = reg; + reg_msg->val = val; + + return 0; +} + +int otx2_cpt_read_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, + u64 reg, u64 *val) +{ + int ret; + + ret = otx2_cpt_add_read_af_reg(mbox, pdev, reg, val); + if (ret) + return ret; + + return otx2_cpt_send_mbox_msg(mbox, pdev); +} + +int otx2_cpt_write_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, + u64 reg, u64 val) +{ + int ret; + + ret = otx2_cpt_add_write_af_reg(mbox, pdev, reg, val); + if (ret) + return ret; + + return otx2_cpt_send_mbox_msg(mbox, pdev); +} diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h index 8a9805f89fee..ac9795978286 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h @@ -6,6 +6,7 @@ #define __OTX2_CPTPF_H #include "otx2_cpt_common.h" +#include "otx2_cptpf_ucode.h" struct otx2_cptpf_dev; struct otx2_cptvf_info { @@ -27,6 +28,8 @@ struct otx2_cptpf_dev { void __iomem *vfpf_mbox_base; /* VF-PF mbox start address */ struct pci_dev *pdev; /* PCI device handle */ struct otx2_cptvf_info vf[OTX2_CPT_MAX_VFS_NUM]; + struct otx2_cpt_eng_grps eng_grps;/* Engine groups information */ + /* AF <=> PF mbox */ struct otx2_mbox afpf_mbox; struct work_struct afpf_mbox_work; diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c index 224882454c2f..f0ad45c04df7 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c @@ -4,6 +4,7 @@ #include #include "otx2_cpt_hw_types.h" #include "otx2_cpt_common.h" +#include "otx2_cptpf_ucode.h" #include "otx2_cptpf.h" #include "rvu_reg.h" @@ -410,6 +411,59 @@ static int cpt_is_pf_usable(struct otx2_cptpf_dev *cptpf) return 0; } +static int cptpf_device_reset(struct otx2_cptpf_dev *cptpf) +{ + int timeout = 10, ret; + u64 reg = 0; + + ret = otx2_cpt_write_af_reg(&cptpf->afpf_mbox, cptpf->pdev, + CPT_AF_BLK_RST, 0x1); + if (ret) + return ret; + + do { + ret = otx2_cpt_read_af_reg(&cptpf->afpf_mbox, cptpf->pdev, + CPT_AF_BLK_RST, ®); + if (ret) + return ret; + + if (!((reg >> 63) & 0x1)) + break; + + usleep_range(10000, 20000); + if (timeout-- < 0) + return -EBUSY; + } while (1); + + return ret; +} + +static int cptpf_device_init(struct otx2_cptpf_dev *cptpf) +{ + union otx2_cptx_af_constants1 af_cnsts1 = {0}; + int ret = 0; + + /* Reset the CPT PF device */ + ret = cptpf_device_reset(cptpf); + if (ret) + return ret; + + /* Get number of SE, IE and AE engines */ + ret = otx2_cpt_read_af_reg(&cptpf->afpf_mbox, cptpf->pdev, + CPT_AF_CONSTANTS1, &af_cnsts1.u); + if (ret) + return ret; + + cptpf->eng_grps.avail.max_se_cnt = af_cnsts1.s.se; + cptpf->eng_grps.avail.max_ie_cnt = af_cnsts1.s.ie; + cptpf->eng_grps.avail.max_ae_cnt = af_cnsts1.s.ae; + + /* Disable all cores */ + ret = otx2_cpt_disable_all_cores(cptpf); + + return ret; +} + static int cptpf_sriov_disable(struct pci_dev *pdev) { struct otx2_cptpf_dev *cptpf = pci_get_drvdata(pdev); @@ -446,6 +500,10 @@ static int cptpf_sriov_enable(struct pci_dev *pdev, int num_vfs) if (ret) goto destroy_flr; + ret = otx2_cpt_create_eng_grps(cptpf->pdev, &cptpf->eng_grps); + if (ret) + goto disable_intr; + cptpf->enabled_vfs = num_vfs; ret = pci_enable_sriov(pdev, num_vfs); if (ret) @@ -543,8 +601,20 @@ static int otx2_cptpf_probe(struct pci_dev *pdev, cptpf->max_vfs = pci_sriov_get_totalvfs(pdev); + /* Initialize CPT PF device */ + err = cptpf_device_init(cptpf); + if (err) + goto unregister_intr; + + /* Initialize engine groups */ + err = otx2_cpt_init_eng_grps(pdev, &cptpf->eng_grps); + if (err) + goto unregister_intr; + return 0; +unregister_intr: + cptpf_disable_afpf_mbox_intr(cptpf); destroy_afpf_mbox: cptpf_afpf_mbox_destroy(cptpf); clear_drvdata: @@ -560,6 +630,8 @@ static void otx2_cptpf_remove(struct pci_dev *pdev) return; cptpf_sriov_disable(pdev); + /* Cleanup engine groups */ + otx2_cpt_cleanup_eng_grps(pdev, &cptpf->eng_grps); /* Disable AF-PF mailbox interrupt */ cptpf_disable_afpf_mbox_intr(cptpf); /* Destroy AF-PF mbox */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c index 1d97f7202130..08e18fe6817b 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c @@ -35,6 +35,29 @@ static int forward_to_af(struct otx2_cptpf_dev *cptpf, return 0; } +static int handle_msg_get_eng_grp_num(struct otx2_cptpf_dev *cptpf, + struct otx2_cptvf_info *vf, + struct mbox_msghdr *req) +{ + struct otx2_cpt_egrp_num_msg *grp_req; + struct otx2_cpt_egrp_num_rsp *rsp; + + grp_req = (struct otx2_cpt_egrp_num_msg *)req; + rsp = (struct otx2_cpt_egrp_num_rsp *) + otx2_mbox_alloc_msg(&cptpf->vfpf_mbox, vf->vf_id, sizeof(*rsp)); + if (!rsp) + return -ENOMEM; + + rsp->hdr.id = MBOX_MSG_GET_ENG_GRP_NUM; + rsp->hdr.sig = OTX2_MBOX_RSP_SIG; + rsp->hdr.pcifunc = req->pcifunc; + rsp->eng_type = grp_req->eng_type; + rsp->eng_grp_num = otx2_cpt_get_eng_grp(&cptpf->eng_grps, + grp_req->eng_type); + + return 0; +} + static int cptpf_handle_vf_req(struct otx2_cptpf_dev *cptpf, struct otx2_cptvf_info *vf, struct mbox_msghdr *req, int size) @@ -45,7 +68,15 @@ static int cptpf_handle_vf_req(struct otx2_cptpf_dev *cptpf, if (req->sig != OTX2_MBOX_REQ_SIG) goto inval_msg; - return forward_to_af(cptpf, vf, req, size); + switch (req->id) { + case MBOX_MSG_GET_ENG_GRP_NUM: + err = handle_msg_get_eng_grp_num(cptpf, vf, req); + break; + default: + err = forward_to_af(cptpf, vf, req, size); + break; + } + return err; inval_msg: otx2_reply_invalid_msg(&cptpf->vfpf_mbox, vf->vf_id, 0, req->id); @@ -148,6 +179,7 @@ static void process_afpf_mbox_msg(struct otx2_cptpf_dev *cptpf, struct mbox_msghdr *msg) { struct device *dev = &cptpf->pdev->dev; + struct cpt_rd_wr_reg_msg *rsp_rd_wr; if (msg->id >= MBOX_MSG_MAX) { dev_err(dev, "MBOX msg with unknown ID %d\n", msg->id); @@ -164,6 +196,18 @@ static void process_afpf_mbox_msg(struct otx2_cptpf_dev *cptpf, cptpf->pf_id = (msg->pcifunc >> RVU_PFVF_PF_SHIFT) & RVU_PFVF_PF_MASK; break; + case MBOX_MSG_CPT_RD_WR_REGISTER: + rsp_rd_wr = (struct cpt_rd_wr_reg_msg *)msg; + if (msg->rc) { + dev_err(dev, "Reg %llx rd/wr(%d) failed %d\n", + rsp_rd_wr->reg_offset, rsp_rd_wr->is_write, + msg->rc); + return; + } + if (!rsp_rd_wr->is_write) + *rsp_rd_wr->ret_val = rsp_rd_wr->val; + break; + default: dev_err(dev, "Unsupported msg %d received.\n", msg->id); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c new file mode 100644 index 000000000000..64f0e7e2652a --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c @@ -0,0 +1,1254 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Marvell. */ + +#include +#include +#include "otx2_cptpf_ucode.h" +#include "otx2_cpt_common.h" +#include "otx2_cptpf.h" +#include "rvu_reg.h" + +#define CSR_DELAY 30 + +#define LOADFVC_RLEN 8 +#define LOADFVC_MAJOR_OP 0x01 +#define LOADFVC_MINOR_OP 0x08 + +struct fw_info_t { + struct list_head ucodes; +}; + +static struct otx2_cpt_bitmap get_cores_bmap(struct device *dev, + struct otx2_cpt_eng_grp_info *eng_grp) +{ + struct otx2_cpt_bitmap bmap = { {0} }; + bool found = false; + int i; + + if (eng_grp->g->engs_num > OTX2_CPT_MAX_ENGINES) { + dev_err(dev, "unsupported number of engines %d on octeontx2\n", + eng_grp->g->engs_num); + return bmap; + } + + for (i = 0; i < OTX2_CPT_MAX_ETYPES_PER_GRP; i++) { + if (eng_grp->engs[i].type) { + bitmap_or(bmap.bits, bmap.bits, + eng_grp->engs[i].bmap, + eng_grp->g->engs_num); + bmap.size = eng_grp->g->engs_num; + found = true; + } + } + + if (!found) + dev_err(dev, "No engines reserved for engine group %d\n", + eng_grp->idx); + return bmap; +} + +static int is_eng_type(int val, int eng_type) +{ + return val & (1 << eng_type); +} + +static int is_2nd_ucode_used(struct otx2_cpt_eng_grp_info *eng_grp) +{ + if (eng_grp->ucode[1].type) + return true; + else + return false; +} + +static void set_ucode_filename(struct otx2_cpt_ucode *ucode, + const char *filename) +{ + strlcpy(ucode->filename, filename, OTX2_CPT_NAME_LENGTH); +} + +static char *get_eng_type_str(int eng_type) +{ + char *str = "unknown"; + + switch (eng_type) { + case OTX2_CPT_SE_TYPES: + str = "SE"; + break; + + case OTX2_CPT_IE_TYPES: + str = "IE"; + break; + + case OTX2_CPT_AE_TYPES: + str = "AE"; + break; + } + return str; +} + +static char *get_ucode_type_str(int ucode_type) +{ + char *str = "unknown"; + + switch (ucode_type) { + case (1 << OTX2_CPT_SE_TYPES): + str = "SE"; + break; + + case (1 << OTX2_CPT_IE_TYPES): + str = "IE"; + break; + + case (1 << OTX2_CPT_AE_TYPES): + str = "AE"; + break; + + case (1 << OTX2_CPT_SE_TYPES | 1 << OTX2_CPT_IE_TYPES): + str = "SE+IPSEC"; + break; + } + return str; +} + +static int get_ucode_type(struct device *dev, + struct otx2_cpt_ucode_hdr *ucode_hdr, + int *ucode_type) +{ + struct otx2_cptpf_dev *cptpf = dev_get_drvdata(dev); + char ver_str_prefix[OTX2_CPT_UCODE_VER_STR_SZ]; + char tmp_ver_str[OTX2_CPT_UCODE_VER_STR_SZ]; + struct pci_dev *pdev = cptpf->pdev; + int i, val = 0; + u8 nn; + + strlcpy(tmp_ver_str, ucode_hdr->ver_str, OTX2_CPT_UCODE_VER_STR_SZ); + for (i = 0; i < strlen(tmp_ver_str); i++) + tmp_ver_str[i] = tolower(tmp_ver_str[i]); + + sprintf(ver_str_prefix, "ocpt-%02d", pdev->revision); + if (!strnstr(tmp_ver_str, ver_str_prefix, OTX2_CPT_UCODE_VER_STR_SZ)) + return -EINVAL; + + nn = ucode_hdr->ver_num.nn; + if (strnstr(tmp_ver_str, "se-", OTX2_CPT_UCODE_VER_STR_SZ) && + (nn == OTX2_CPT_SE_UC_TYPE1 || nn == OTX2_CPT_SE_UC_TYPE2 || + nn == OTX2_CPT_SE_UC_TYPE3)) + val |= 1 << OTX2_CPT_SE_TYPES; + if (strnstr(tmp_ver_str, "ie-", OTX2_CPT_UCODE_VER_STR_SZ) && + (nn == OTX2_CPT_IE_UC_TYPE1 || nn == OTX2_CPT_IE_UC_TYPE2 || + nn == OTX2_CPT_IE_UC_TYPE3)) + val |= 1 << OTX2_CPT_IE_TYPES; + if (strnstr(tmp_ver_str, "ae", OTX2_CPT_UCODE_VER_STR_SZ) && + nn == OTX2_CPT_AE_UC_TYPE) + val |= 1 << OTX2_CPT_AE_TYPES; + + *ucode_type = val; + + if (!val) + return -EINVAL; + + return 0; +} + +static int __write_ucode_base(struct otx2_cptpf_dev *cptpf, int eng, + dma_addr_t dma_addr) +{ + return otx2_cpt_write_af_reg(&cptpf->afpf_mbox, cptpf->pdev, + CPT_AF_EXEX_UCODE_BASE(eng), + (u64)dma_addr); +} + +static int cpt_set_ucode_base(struct otx2_cpt_eng_grp_info *eng_grp, void *obj) +{ + struct otx2_cptpf_dev *cptpf = obj; + struct otx2_cpt_engs_rsvd *engs; + dma_addr_t dma_addr; + int i, bit, ret; + + /* Set PF number for microcode fetches */ + ret = otx2_cpt_write_af_reg(&cptpf->afpf_mbox, cptpf->pdev, + CPT_AF_PF_FUNC, + cptpf->pf_id << RVU_PFVF_PF_SHIFT); + if (ret) + return ret; + + for (i = 0; i < OTX2_CPT_MAX_ETYPES_PER_GRP; i++) { + engs = &eng_grp->engs[i]; + if (!engs->type) + continue; + + dma_addr = engs->ucode->dma; + + /* + * Set UCODE_BASE only for the cores which are not used, + * other cores should have already valid UCODE_BASE set + */ + for_each_set_bit(bit, engs->bmap, eng_grp->g->engs_num) + if (!eng_grp->g->eng_ref_cnt[bit]) { + ret = __write_ucode_base(cptpf, bit, dma_addr); + if (ret) + return ret; + } + } + return 0; +} + +static int cpt_detach_and_disable_cores(struct otx2_cpt_eng_grp_info *eng_grp, + void *obj) +{ + struct otx2_cptpf_dev *cptpf = obj; + struct otx2_cpt_bitmap bmap; + int i, timeout = 10; + int busy, ret; + u64 reg = 0; + + bmap = get_cores_bmap(&cptpf->pdev->dev, eng_grp); + if (!bmap.size) + return -EINVAL; + + /* Detach the cores from group */ + for_each_set_bit(i, bmap.bits, bmap.size) { + ret = otx2_cpt_read_af_reg(&cptpf->afpf_mbox, cptpf->pdev, + CPT_AF_EXEX_CTL2(i), ®); + if (ret) + return ret; + + if (reg & (1ull << eng_grp->idx)) { + eng_grp->g->eng_ref_cnt[i]--; + reg &= ~(1ull << eng_grp->idx); + + ret = otx2_cpt_write_af_reg(&cptpf->afpf_mbox, + cptpf->pdev, + CPT_AF_EXEX_CTL2(i), reg); + if (ret) + return ret; + } + } + + /* Wait for cores to become idle */ + do { + busy = 0; + usleep_range(10000, 20000); + if (timeout-- < 0) + return -EBUSY; + + for_each_set_bit(i, bmap.bits, bmap.size) { + ret = otx2_cpt_read_af_reg(&cptpf->afpf_mbox, + cptpf->pdev, + CPT_AF_EXEX_STS(i), ®); + if (ret) + return ret; + + if (reg & 0x1) { + busy = 1; + break; + } + } + } while (busy); + + /* Disable the cores only if they are not used anymore */ + for_each_set_bit(i, bmap.bits, bmap.size) { + if (!eng_grp->g->eng_ref_cnt[i]) { + ret = otx2_cpt_write_af_reg(&cptpf->afpf_mbox, + cptpf->pdev, + CPT_AF_EXEX_CTL(i), 0x0); + if (ret) + return ret; + } + } + + return 0; +} + +static int cpt_attach_and_enable_cores(struct otx2_cpt_eng_grp_info *eng_grp, + void *obj) +{ + struct otx2_cptpf_dev *cptpf = obj; + struct otx2_cpt_bitmap bmap; + u64 reg = 0; + int i, ret; + + bmap = get_cores_bmap(&cptpf->pdev->dev, eng_grp); + if (!bmap.size) + return -EINVAL; + + /* Attach the cores to the group */ + for_each_set_bit(i, bmap.bits, bmap.size) { + ret = otx2_cpt_read_af_reg(&cptpf->afpf_mbox, cptpf->pdev, + CPT_AF_EXEX_CTL2(i), ®); + if (ret) + return ret; + + if (!(reg & (1ull << eng_grp->idx))) { + eng_grp->g->eng_ref_cnt[i]++; + reg |= 1ull << eng_grp->idx; + + ret = otx2_cpt_write_af_reg(&cptpf->afpf_mbox, + cptpf->pdev, + CPT_AF_EXEX_CTL2(i), reg); + if (ret) + return ret; + } + } + + /* Enable the cores */ + for_each_set_bit(i, bmap.bits, bmap.size) { + ret = otx2_cpt_add_write_af_reg(&cptpf->afpf_mbox, + cptpf->pdev, + CPT_AF_EXEX_CTL(i), 0x1); + if (ret) + return ret; + } + ret = otx2_cpt_send_af_reg_requests(&cptpf->afpf_mbox, cptpf->pdev); + + return ret; +} + +static int load_fw(struct device *dev, struct fw_info_t *fw_info, + char *filename) +{ + struct otx2_cpt_ucode_hdr *ucode_hdr; + struct otx2_cpt_uc_info_t *uc_info; + int ucode_type, ucode_size; + int ret; + + uc_info = kzalloc(sizeof(*uc_info), GFP_KERNEL); + if (!uc_info) + return -ENOMEM; + + ret = request_firmware(&uc_info->fw, filename, dev); + if (ret) + goto free_uc_info; + + ucode_hdr = (struct otx2_cpt_ucode_hdr *)uc_info->fw->data; + ret = get_ucode_type(dev, ucode_hdr, &ucode_type); + if (ret) + goto release_fw; + + ucode_size = ntohl(ucode_hdr->code_length) * 2; + if (!ucode_size) { + dev_err(dev, "Ucode %s invalid size\n", filename); + ret = -EINVAL; + goto release_fw; + } + + set_ucode_filename(&uc_info->ucode, filename); + memcpy(uc_info->ucode.ver_str, ucode_hdr->ver_str, + OTX2_CPT_UCODE_VER_STR_SZ); + uc_info->ucode.ver_num = ucode_hdr->ver_num; + uc_info->ucode.type = ucode_type; + uc_info->ucode.size = ucode_size; + list_add_tail(&uc_info->list, &fw_info->ucodes); + + return 0; + +release_fw: + release_firmware(uc_info->fw); +free_uc_info: + kfree(uc_info); + return ret; +} + +static void cpt_ucode_release_fw(struct fw_info_t *fw_info) +{ + struct otx2_cpt_uc_info_t *curr, *temp; + + if (!fw_info) + return; + + list_for_each_entry_safe(curr, temp, &fw_info->ucodes, list) { + list_del(&curr->list); + release_firmware(curr->fw); + kfree(curr); + } +} + +static struct otx2_cpt_uc_info_t *get_ucode(struct fw_info_t *fw_info, + int ucode_type) +{ + struct otx2_cpt_uc_info_t *curr; + + list_for_each_entry(curr, &fw_info->ucodes, list) { + if (!is_eng_type(curr->ucode.type, ucode_type)) + continue; + + return curr; + } + return NULL; +} + +static void print_uc_info(struct fw_info_t *fw_info) +{ + struct otx2_cpt_uc_info_t *curr; + + list_for_each_entry(curr, &fw_info->ucodes, list) { + pr_debug("Ucode filename %s\n", curr->ucode.filename); + pr_debug("Ucode version string %s\n", curr->ucode.ver_str); + pr_debug("Ucode version %d.%d.%d.%d\n", + curr->ucode.ver_num.nn, curr->ucode.ver_num.xx, + curr->ucode.ver_num.yy, curr->ucode.ver_num.zz); + pr_debug("Ucode type (%d) %s\n", curr->ucode.type, + get_ucode_type_str(curr->ucode.type)); + pr_debug("Ucode size %d\n", curr->ucode.size); + pr_debug("Ucode ptr %p\n", curr->fw->data); + } +} + +static int cpt_ucode_load_fw(struct pci_dev *pdev, struct fw_info_t *fw_info) +{ + char filename[OTX2_CPT_NAME_LENGTH]; + char eng_type[8] = {0}; + int ret, e, i; + + INIT_LIST_HEAD(&fw_info->ucodes); + + for (e = 1; e < OTX2_CPT_MAX_ENG_TYPES; e++) { + strcpy(eng_type, get_eng_type_str(e)); + for (i = 0; i < strlen(eng_type); i++) + eng_type[i] = tolower(eng_type[i]); + + snprintf(filename, sizeof(filename), "mrvl/cpt%02d/%s.out", + pdev->revision, eng_type); + /* Request firmware for each engine type */ + ret = load_fw(&pdev->dev, fw_info, filename); + if (ret) + goto release_fw; + } + print_uc_info(fw_info); + return 0; + +release_fw: + cpt_ucode_release_fw(fw_info); + return ret; +} + +static struct otx2_cpt_engs_rsvd *find_engines_by_type( + struct otx2_cpt_eng_grp_info *eng_grp, + int eng_type) +{ + int i; + + for (i = 0; i < OTX2_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!eng_grp->engs[i].type) + continue; + + if (eng_grp->engs[i].type == eng_type) + return &eng_grp->engs[i]; + } + return NULL; +} + +static int eng_grp_has_eng_type(struct otx2_cpt_eng_grp_info *eng_grp, + int eng_type) +{ + struct otx2_cpt_engs_rsvd *engs; + + engs = find_engines_by_type(eng_grp, eng_type); + + return (engs != NULL ? 1 : 0); +} + +static int update_engines_avail_count(struct device *dev, + struct otx2_cpt_engs_available *avail, + struct otx2_cpt_engs_rsvd *engs, int val) +{ + switch (engs->type) { + case OTX2_CPT_SE_TYPES: + avail->se_cnt += val; + break; + + case OTX2_CPT_IE_TYPES: + avail->ie_cnt += val; + break; + + case OTX2_CPT_AE_TYPES: + avail->ae_cnt += val; + break; + + default: + dev_err(dev, "Invalid engine type %d\n", engs->type); + return -EINVAL; + } + return 0; +} + +static int update_engines_offset(struct device *dev, + struct otx2_cpt_engs_available *avail, + struct otx2_cpt_engs_rsvd *engs) +{ + switch (engs->type) { + case OTX2_CPT_SE_TYPES: + engs->offset = 0; + break; + + case OTX2_CPT_IE_TYPES: + engs->offset = avail->max_se_cnt; + break; + + case OTX2_CPT_AE_TYPES: + engs->offset = avail->max_se_cnt + avail->max_ie_cnt; + break; + + default: + dev_err(dev, "Invalid engine type %d\n", engs->type); + return -EINVAL; + } + return 0; +} + +static int release_engines(struct device *dev, + struct otx2_cpt_eng_grp_info *grp) +{ + int i, ret = 0; + + for (i = 0; i < OTX2_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!grp->engs[i].type) + continue; + + if (grp->engs[i].count > 0) { + ret = update_engines_avail_count(dev, &grp->g->avail, + &grp->engs[i], + grp->engs[i].count); + if (ret) + return ret; + } + + grp->engs[i].type = 0; + grp->engs[i].count = 0; + grp->engs[i].offset = 0; + grp->engs[i].ucode = NULL; + bitmap_zero(grp->engs[i].bmap, grp->g->engs_num); + } + return 0; +} + +static int do_reserve_engines(struct device *dev, + struct otx2_cpt_eng_grp_info *grp, + struct otx2_cpt_engines *req_engs) +{ + struct otx2_cpt_engs_rsvd *engs = NULL; + int i, ret; + + for (i = 0; i < OTX2_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!grp->engs[i].type) { + engs = &grp->engs[i]; + break; + } + } + + if (!engs) + return -ENOMEM; + + engs->type = req_engs->type; + engs->count = req_engs->count; + + ret = update_engines_offset(dev, &grp->g->avail, engs); + if (ret) + return ret; + + if (engs->count > 0) { + ret = update_engines_avail_count(dev, &grp->g->avail, engs, + -engs->count); + if (ret) + return ret; + } + + return 0; +} + +static int check_engines_availability(struct device *dev, + struct otx2_cpt_eng_grp_info *grp, + struct otx2_cpt_engines *req_eng) +{ + int avail_cnt = 0; + + switch (req_eng->type) { + case OTX2_CPT_SE_TYPES: + avail_cnt = grp->g->avail.se_cnt; + break; + + case OTX2_CPT_IE_TYPES: + avail_cnt = grp->g->avail.ie_cnt; + break; + + case OTX2_CPT_AE_TYPES: + avail_cnt = grp->g->avail.ae_cnt; + break; + + default: + dev_err(dev, "Invalid engine type %d\n", req_eng->type); + return -EINVAL; + } + + if (avail_cnt < req_eng->count) { + dev_err(dev, + "Error available %s engines %d < than requested %d\n", + get_eng_type_str(req_eng->type), + avail_cnt, req_eng->count); + return -EBUSY; + } + return 0; +} + +static int reserve_engines(struct device *dev, + struct otx2_cpt_eng_grp_info *grp, + struct otx2_cpt_engines *req_engs, int ucodes_cnt) +{ + int i, ret = 0; + + /* Validate if a number of requested engines are available */ + for (i = 0; i < ucodes_cnt; i++) { + ret = check_engines_availability(dev, grp, &req_engs[i]); + if (ret) + return ret; + } + + /* Reserve requested engines for this engine group */ + for (i = 0; i < ucodes_cnt; i++) { + ret = do_reserve_engines(dev, grp, &req_engs[i]); + if (ret) + return ret; + } + return 0; +} + +static void ucode_unload(struct device *dev, struct otx2_cpt_ucode *ucode) +{ + if (ucode->va) { + dma_free_coherent(dev, ucode->size, ucode->va, ucode->dma); + ucode->va = NULL; + ucode->dma = 0; + ucode->size = 0; + } + + memset(&ucode->ver_str, 0, OTX2_CPT_UCODE_VER_STR_SZ); + memset(&ucode->ver_num, 0, sizeof(struct otx2_cpt_ucode_ver_num)); + set_ucode_filename(ucode, ""); + ucode->type = 0; +} + +static int copy_ucode_to_dma_mem(struct device *dev, + struct otx2_cpt_ucode *ucode, + const u8 *ucode_data) +{ + u32 i; + + /* Allocate DMAable space */ + ucode->va = dma_alloc_coherent(dev, ucode->size, &ucode->dma, + GFP_KERNEL); + if (!ucode->va) + return -ENOMEM; + + memcpy(ucode->va, ucode_data + sizeof(struct otx2_cpt_ucode_hdr), + ucode->size); + + /* Byte swap 64-bit */ + for (i = 0; i < (ucode->size / 8); i++) + cpu_to_be64s(&((u64 *)ucode->va)[i]); + /* Ucode needs 16-bit swap */ + for (i = 0; i < (ucode->size / 2); i++) + cpu_to_be16s(&((u16 *)ucode->va)[i]); + return 0; +} + +static int enable_eng_grp(struct otx2_cpt_eng_grp_info *eng_grp, + void *obj) +{ + int ret; + + /* Point microcode to each core of the group */ + ret = cpt_set_ucode_base(eng_grp, obj); + if (ret) + return ret; + + /* Attach the cores to the group and enable them */ + ret = cpt_attach_and_enable_cores(eng_grp, obj); + + return ret; +} + +static int disable_eng_grp(struct device *dev, + struct otx2_cpt_eng_grp_info *eng_grp, + void *obj) +{ + int i, ret; + + /* Disable all engines used by this group */ + ret = cpt_detach_and_disable_cores(eng_grp, obj); + if (ret) + return ret; + + /* Unload ucode used by this engine group */ + ucode_unload(dev, &eng_grp->ucode[0]); + ucode_unload(dev, &eng_grp->ucode[1]); + + for (i = 0; i < OTX2_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!eng_grp->engs[i].type) + continue; + + eng_grp->engs[i].ucode = &eng_grp->ucode[0]; + } + + /* Clear UCODE_BASE register for each engine used by this group */ + ret = cpt_set_ucode_base(eng_grp, obj); + + return ret; +} + +static void setup_eng_grp_mirroring(struct otx2_cpt_eng_grp_info *dst_grp, + struct otx2_cpt_eng_grp_info *src_grp) +{ + /* Setup fields for engine group which is mirrored */ + src_grp->mirror.is_ena = false; + src_grp->mirror.idx = 0; + src_grp->mirror.ref_count++; + + /* Setup fields for mirroring engine group */ + dst_grp->mirror.is_ena = true; + dst_grp->mirror.idx = src_grp->idx; + dst_grp->mirror.ref_count = 0; +} + +static void remove_eng_grp_mirroring(struct otx2_cpt_eng_grp_info *dst_grp) +{ + struct otx2_cpt_eng_grp_info *src_grp; + + if (!dst_grp->mirror.is_ena) + return; + + src_grp = &dst_grp->g->grp[dst_grp->mirror.idx]; + + src_grp->mirror.ref_count--; + dst_grp->mirror.is_ena = false; + dst_grp->mirror.idx = 0; + dst_grp->mirror.ref_count = 0; +} + +static void update_requested_engs(struct otx2_cpt_eng_grp_info *mirror_eng_grp, + struct otx2_cpt_engines *engs, int engs_cnt) +{ + struct otx2_cpt_engs_rsvd *mirrored_engs; + int i; + + for (i = 0; i < engs_cnt; i++) { + mirrored_engs = find_engines_by_type(mirror_eng_grp, + engs[i].type); + if (!mirrored_engs) + continue; + + /* + * If mirrored group has this type of engines attached then + * there are 3 scenarios possible: + * 1) mirrored_engs.count == engs[i].count then all engines + * from mirrored engine group will be shared with this engine + * group + * 2) mirrored_engs.count > engs[i].count then only a subset of + * engines from mirrored engine group will be shared with this + * engine group + * 3) mirrored_engs.count < engs[i].count then all engines + * from mirrored engine group will be shared with this group + * and additional engines will be reserved for exclusively use + * by this engine group + */ + engs[i].count -= mirrored_engs->count; + } +} + +static struct otx2_cpt_eng_grp_info *find_mirrored_eng_grp( + struct otx2_cpt_eng_grp_info *grp) +{ + struct otx2_cpt_eng_grps *eng_grps = grp->g; + int i; + + for (i = 0; i < OTX2_CPT_MAX_ENGINE_GROUPS; i++) { + if (!eng_grps->grp[i].is_enabled) + continue; + if (eng_grps->grp[i].ucode[0].type && + eng_grps->grp[i].ucode[1].type) + continue; + if (grp->idx == i) + continue; + if (!strncasecmp(eng_grps->grp[i].ucode[0].ver_str, + grp->ucode[0].ver_str, + OTX2_CPT_UCODE_VER_STR_SZ)) + return &eng_grps->grp[i]; + } + + return NULL; +} + +static struct otx2_cpt_eng_grp_info *find_unused_eng_grp( + struct otx2_cpt_eng_grps *eng_grps) +{ + int i; + + for (i = 0; i < OTX2_CPT_MAX_ENGINE_GROUPS; i++) { + if (!eng_grps->grp[i].is_enabled) + return &eng_grps->grp[i]; + } + return NULL; +} + +static int eng_grp_update_masks(struct device *dev, + struct otx2_cpt_eng_grp_info *eng_grp) +{ + struct otx2_cpt_engs_rsvd *engs, *mirrored_engs; + struct otx2_cpt_bitmap tmp_bmap = { {0} }; + int i, j, cnt, max_cnt; + int bit; + + for (i = 0; i < OTX2_CPT_MAX_ETYPES_PER_GRP; i++) { + engs = &eng_grp->engs[i]; + if (!engs->type) + continue; + if (engs->count <= 0) + continue; + + switch (engs->type) { + case OTX2_CPT_SE_TYPES: + max_cnt = eng_grp->g->avail.max_se_cnt; + break; + + case OTX2_CPT_IE_TYPES: + max_cnt = eng_grp->g->avail.max_ie_cnt; + break; + + case OTX2_CPT_AE_TYPES: + max_cnt = eng_grp->g->avail.max_ae_cnt; + break; + + default: + dev_err(dev, "Invalid engine type %d\n", engs->type); + return -EINVAL; + } + + cnt = engs->count; + WARN_ON(engs->offset + max_cnt > OTX2_CPT_MAX_ENGINES); + bitmap_zero(tmp_bmap.bits, eng_grp->g->engs_num); + for (j = engs->offset; j < engs->offset + max_cnt; j++) { + if (!eng_grp->g->eng_ref_cnt[j]) { + bitmap_set(tmp_bmap.bits, j, 1); + cnt--; + if (!cnt) + break; + } + } + + if (cnt) + return -ENOSPC; + + bitmap_copy(engs->bmap, tmp_bmap.bits, eng_grp->g->engs_num); + } + + if (!eng_grp->mirror.is_ena) + return 0; + + for (i = 0; i < OTX2_CPT_MAX_ETYPES_PER_GRP; i++) { + engs = &eng_grp->engs[i]; + if (!engs->type) + continue; + + mirrored_engs = find_engines_by_type( + &eng_grp->g->grp[eng_grp->mirror.idx], + engs->type); + WARN_ON(!mirrored_engs && engs->count <= 0); + if (!mirrored_engs) + continue; + + bitmap_copy(tmp_bmap.bits, mirrored_engs->bmap, + eng_grp->g->engs_num); + if (engs->count < 0) { + bit = find_first_bit(mirrored_engs->bmap, + eng_grp->g->engs_num); + bitmap_clear(tmp_bmap.bits, bit, -engs->count); + } + bitmap_or(engs->bmap, engs->bmap, tmp_bmap.bits, + eng_grp->g->engs_num); + } + return 0; +} + +static int delete_engine_group(struct device *dev, + struct otx2_cpt_eng_grp_info *eng_grp) +{ + int ret; + + if (!eng_grp->is_enabled) + return 0; + + if (eng_grp->mirror.ref_count) + return -EINVAL; + + /* Removing engine group mirroring if enabled */ + remove_eng_grp_mirroring(eng_grp); + + /* Disable engine group */ + ret = disable_eng_grp(dev, eng_grp, eng_grp->g->obj); + if (ret) + return ret; + + /* Release all engines held by this engine group */ + ret = release_engines(dev, eng_grp); + if (ret) + return ret; + + eng_grp->is_enabled = false; + + return 0; +} + +static void update_ucode_ptrs(struct otx2_cpt_eng_grp_info *eng_grp) +{ + struct otx2_cpt_ucode *ucode; + + if (eng_grp->mirror.is_ena) + ucode = &eng_grp->g->grp[eng_grp->mirror.idx].ucode[0]; + else + ucode = &eng_grp->ucode[0]; + WARN_ON(!eng_grp->engs[0].type); + eng_grp->engs[0].ucode = ucode; + + if (eng_grp->engs[1].type) { + if (is_2nd_ucode_used(eng_grp)) + eng_grp->engs[1].ucode = &eng_grp->ucode[1]; + else + eng_grp->engs[1].ucode = ucode; + } +} + +static int create_engine_group(struct device *dev, + struct otx2_cpt_eng_grps *eng_grps, + struct otx2_cpt_engines *engs, int ucodes_cnt, + void *ucode_data[], int is_print) +{ + struct otx2_cpt_eng_grp_info *mirrored_eng_grp; + struct otx2_cpt_eng_grp_info *eng_grp; + struct otx2_cpt_uc_info_t *uc_info; + int i, ret = 0; + + /* Find engine group which is not used */ + eng_grp = find_unused_eng_grp(eng_grps); + if (!eng_grp) { + dev_err(dev, "Error all engine groups are being used\n"); + return -ENOSPC; + } + /* Load ucode */ + for (i = 0; i < ucodes_cnt; i++) { + uc_info = (struct otx2_cpt_uc_info_t *) ucode_data[i]; + eng_grp->ucode[i] = uc_info->ucode; + ret = copy_ucode_to_dma_mem(dev, &eng_grp->ucode[i], + uc_info->fw->data); + if (ret) + goto unload_ucode; + } + + /* Check if this group mirrors another existing engine group */ + mirrored_eng_grp = find_mirrored_eng_grp(eng_grp); + if (mirrored_eng_grp) { + /* Setup mirroring */ + setup_eng_grp_mirroring(eng_grp, mirrored_eng_grp); + + /* + * Update count of requested engines because some + * of them might be shared with mirrored group + */ + update_requested_engs(mirrored_eng_grp, engs, ucodes_cnt); + } + ret = reserve_engines(dev, eng_grp, engs, ucodes_cnt); + if (ret) + goto unload_ucode; + + /* Update ucode pointers used by engines */ + update_ucode_ptrs(eng_grp); + + /* Update engine masks used by this group */ + ret = eng_grp_update_masks(dev, eng_grp); + if (ret) + goto release_engs; + + /* Enable engine group */ + ret = enable_eng_grp(eng_grp, eng_grps->obj); + if (ret) + goto release_engs; + + /* + * If this engine group mirrors another engine group + * then we need to unload ucode as we will use ucode + * from mirrored engine group + */ + if (eng_grp->mirror.is_ena) + ucode_unload(dev, &eng_grp->ucode[0]); + + eng_grp->is_enabled = true; + + if (!is_print) + return 0; + + if (mirrored_eng_grp) + dev_info(dev, + "Engine_group%d: reuse microcode %s from group %d\n", + eng_grp->idx, mirrored_eng_grp->ucode[0].ver_str, + mirrored_eng_grp->idx); + else + dev_info(dev, "Engine_group%d: microcode loaded %s\n", + eng_grp->idx, eng_grp->ucode[0].ver_str); + if (is_2nd_ucode_used(eng_grp)) + dev_info(dev, "Engine_group%d: microcode loaded %s\n", + eng_grp->idx, eng_grp->ucode[1].ver_str); + + return 0; + +release_engs: + release_engines(dev, eng_grp); +unload_ucode: + ucode_unload(dev, &eng_grp->ucode[0]); + ucode_unload(dev, &eng_grp->ucode[1]); + return ret; +} + +static void delete_engine_grps(struct pci_dev *pdev, + struct otx2_cpt_eng_grps *eng_grps) +{ + int i; + + /* First delete all mirroring engine groups */ + for (i = 0; i < OTX2_CPT_MAX_ENGINE_GROUPS; i++) + if (eng_grps->grp[i].mirror.is_ena) + delete_engine_group(&pdev->dev, &eng_grps->grp[i]); + + /* Delete remaining engine groups */ + for (i = 0; i < OTX2_CPT_MAX_ENGINE_GROUPS; i++) + delete_engine_group(&pdev->dev, &eng_grps->grp[i]); +} + +int otx2_cpt_get_eng_grp(struct otx2_cpt_eng_grps *eng_grps, int eng_type) +{ + + int eng_grp_num = OTX2_CPT_INVALID_CRYPTO_ENG_GRP; + struct otx2_cpt_eng_grp_info *grp; + int i; + + for (i = 0; i < OTX2_CPT_MAX_ENGINE_GROUPS; i++) { + grp = &eng_grps->grp[i]; + if (!grp->is_enabled) + continue; + + if (eng_type == OTX2_CPT_SE_TYPES) { + if (eng_grp_has_eng_type(grp, eng_type) && + !eng_grp_has_eng_type(grp, OTX2_CPT_IE_TYPES)) { + eng_grp_num = i; + break; + } + } else { + if (eng_grp_has_eng_type(grp, eng_type)) { + eng_grp_num = i; + break; + } + } + } + return eng_grp_num; +} + +int otx2_cpt_create_eng_grps(struct pci_dev *pdev, + struct otx2_cpt_eng_grps *eng_grps) +{ + struct otx2_cpt_uc_info_t *uc_info[OTX2_CPT_MAX_ETYPES_PER_GRP] = { }; + struct otx2_cpt_engines engs[OTX2_CPT_MAX_ETYPES_PER_GRP] = { {0} }; + struct fw_info_t fw_info; + int ret; + + /* + * We don't create engine groups if it was already + * made (when user enabled VFs for the first time) + */ + if (eng_grps->is_grps_created) + return 0; + + ret = cpt_ucode_load_fw(pdev, &fw_info); + if (ret) + return ret; + + /* + * Create engine group with SE engines for kernel + * crypto functionality (symmetric crypto) + */ + uc_info[0] = get_ucode(&fw_info, OTX2_CPT_SE_TYPES); + if (uc_info[0] == NULL) { + dev_err(&pdev->dev, "Unable to find firmware for SE\n"); + ret = -EINVAL; + goto release_fw; + } + engs[0].type = OTX2_CPT_SE_TYPES; + engs[0].count = eng_grps->avail.max_se_cnt; + + ret = create_engine_group(&pdev->dev, eng_grps, engs, 1, + (void **) uc_info, 1); + if (ret) + goto release_fw; + + /* + * Create engine group with SE+IE engines for IPSec. + * All SE engines will be shared with engine group 0. + */ + uc_info[0] = get_ucode(&fw_info, OTX2_CPT_SE_TYPES); + uc_info[1] = get_ucode(&fw_info, OTX2_CPT_IE_TYPES); + + if (uc_info[1] == NULL) { + dev_err(&pdev->dev, "Unable to find firmware for IE"); + ret = -EINVAL; + goto delete_eng_grp; + } + engs[0].type = OTX2_CPT_SE_TYPES; + engs[0].count = eng_grps->avail.max_se_cnt; + engs[1].type = OTX2_CPT_IE_TYPES; + engs[1].count = eng_grps->avail.max_ie_cnt; + + ret = create_engine_group(&pdev->dev, eng_grps, engs, 2, + (void **) uc_info, 1); + if (ret) + goto delete_eng_grp; + + /* + * Create engine group with AE engines for asymmetric + * crypto functionality. + */ + uc_info[0] = get_ucode(&fw_info, OTX2_CPT_AE_TYPES); + if (uc_info[0] == NULL) { + dev_err(&pdev->dev, "Unable to find firmware for AE"); + ret = -EINVAL; + goto delete_eng_grp; + } + engs[0].type = OTX2_CPT_AE_TYPES; + engs[0].count = eng_grps->avail.max_ae_cnt; + + ret = create_engine_group(&pdev->dev, eng_grps, engs, 1, + (void **) uc_info, 1); + if (ret) + goto delete_eng_grp; + + eng_grps->is_grps_created = true; + + cpt_ucode_release_fw(&fw_info); + return 0; + +delete_eng_grp: + delete_engine_grps(pdev, eng_grps); +release_fw: + cpt_ucode_release_fw(&fw_info); + return ret; +} + +int otx2_cpt_disable_all_cores(struct otx2_cptpf_dev *cptpf) +{ + int i, ret, busy, total_cores; + int timeout = 10; + u64 reg = 0; + + total_cores = cptpf->eng_grps.avail.max_se_cnt + + cptpf->eng_grps.avail.max_ie_cnt + + cptpf->eng_grps.avail.max_ae_cnt; + + /* Disengage the cores from groups */ + for (i = 0; i < total_cores; i++) { + ret = otx2_cpt_add_write_af_reg(&cptpf->afpf_mbox, cptpf->pdev, + CPT_AF_EXEX_CTL2(i), 0x0); + if (ret) + return ret; + + cptpf->eng_grps.eng_ref_cnt[i] = 0; + } + ret = otx2_cpt_send_af_reg_requests(&cptpf->afpf_mbox, cptpf->pdev); + if (ret) + return ret; + + /* Wait for cores to become idle */ + do { + busy = 0; + usleep_range(10000, 20000); + if (timeout-- < 0) + return -EBUSY; + + for (i = 0; i < total_cores; i++) { + ret = otx2_cpt_read_af_reg(&cptpf->afpf_mbox, + cptpf->pdev, + CPT_AF_EXEX_STS(i), ®); + if (ret) + return ret; + + if (reg & 0x1) { + busy = 1; + break; + } + } + } while (busy); + + /* Disable the cores */ + for (i = 0; i < total_cores; i++) { + ret = otx2_cpt_add_write_af_reg(&cptpf->afpf_mbox, cptpf->pdev, + CPT_AF_EXEX_CTL(i), 0x0); + if (ret) + return ret; + } + return otx2_cpt_send_af_reg_requests(&cptpf->afpf_mbox, cptpf->pdev); +} + +void otx2_cpt_cleanup_eng_grps(struct pci_dev *pdev, + struct otx2_cpt_eng_grps *eng_grps) +{ + struct otx2_cpt_eng_grp_info *grp; + int i, j; + + delete_engine_grps(pdev, eng_grps); + /* Release memory */ + for (i = 0; i < OTX2_CPT_MAX_ENGINE_GROUPS; i++) { + grp = &eng_grps->grp[i]; + for (j = 0; j < OTX2_CPT_MAX_ETYPES_PER_GRP; j++) { + kfree(grp->engs[j].bmap); + grp->engs[j].bmap = NULL; + } + } +} + +int otx2_cpt_init_eng_grps(struct pci_dev *pdev, + struct otx2_cpt_eng_grps *eng_grps) +{ + struct otx2_cpt_eng_grp_info *grp; + int i, j, ret; + + eng_grps->obj = pci_get_drvdata(pdev); + eng_grps->avail.se_cnt = eng_grps->avail.max_se_cnt; + eng_grps->avail.ie_cnt = eng_grps->avail.max_ie_cnt; + eng_grps->avail.ae_cnt = eng_grps->avail.max_ae_cnt; + + eng_grps->engs_num = eng_grps->avail.max_se_cnt + + eng_grps->avail.max_ie_cnt + + eng_grps->avail.max_ae_cnt; + if (eng_grps->engs_num > OTX2_CPT_MAX_ENGINES) { + dev_err(&pdev->dev, + "Number of engines %d > than max supported %d\n", + eng_grps->engs_num, OTX2_CPT_MAX_ENGINES); + ret = -EINVAL; + goto cleanup_eng_grps; + } + + for (i = 0; i < OTX2_CPT_MAX_ENGINE_GROUPS; i++) { + grp = &eng_grps->grp[i]; + grp->g = eng_grps; + grp->idx = i; + + for (j = 0; j < OTX2_CPT_MAX_ETYPES_PER_GRP; j++) { + grp->engs[j].bmap = + kcalloc(BITS_TO_LONGS(eng_grps->engs_num), + sizeof(long), GFP_KERNEL); + if (!grp->engs[j].bmap) { + ret = -ENOMEM; + goto cleanup_eng_grps; + } + } + } + return 0; + +cleanup_eng_grps: + otx2_cpt_cleanup_eng_grps(pdev, eng_grps); + return ret; +} diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h new file mode 100644 index 000000000000..96556399a58c --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h @@ -0,0 +1,161 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2020 Marvell. + */ + +#ifndef __OTX2_CPTPF_UCODE_H +#define __OTX2_CPTPF_UCODE_H + +#include +#include +#include +#include "otx2_cpt_hw_types.h" +#include "otx2_cpt_common.h" + +/* + * On OcteonTX2 platform IPSec ucode can use both IE and SE engines therefore + * IE and SE engines can be attached to the same engine group. + */ +#define OTX2_CPT_MAX_ETYPES_PER_GRP 2 + +/* CPT ucode signature size */ +#define OTX2_CPT_UCODE_SIGN_LEN 256 + +/* Microcode version string length */ +#define OTX2_CPT_UCODE_VER_STR_SZ 44 + +/* Maximum number of supported engines/cores on OcteonTX2 platform */ +#define OTX2_CPT_MAX_ENGINES 128 + +#define OTX2_CPT_ENGS_BITMASK_LEN BITS_TO_LONGS(OTX2_CPT_MAX_ENGINES) + +/* Microcode types */ +enum otx2_cpt_ucode_type { + OTX2_CPT_AE_UC_TYPE = 1, /* AE-MAIN */ + OTX2_CPT_SE_UC_TYPE1 = 20,/* SE-MAIN - combination of 21 and 22 */ + OTX2_CPT_SE_UC_TYPE2 = 21,/* Fast Path IPSec + AirCrypto */ + OTX2_CPT_SE_UC_TYPE3 = 22,/* + * Hash + HMAC + FlexiCrypto + RNG + + * Full Feature IPSec + AirCrypto + Kasumi + */ + OTX2_CPT_IE_UC_TYPE1 = 30, /* IE-MAIN - combination of 31 and 32 */ + OTX2_CPT_IE_UC_TYPE2 = 31, /* Fast Path IPSec */ + OTX2_CPT_IE_UC_TYPE3 = 32, /* + * Hash + HMAC + FlexiCrypto + RNG + + * Full Future IPSec + */ +}; + +struct otx2_cpt_bitmap { + unsigned long bits[OTX2_CPT_ENGS_BITMASK_LEN]; + int size; +}; + +struct otx2_cpt_engines { + int type; + int count; +}; + +/* Microcode version number */ +struct otx2_cpt_ucode_ver_num { + u8 nn; + u8 xx; + u8 yy; + u8 zz; +}; + +struct otx2_cpt_ucode_hdr { + struct otx2_cpt_ucode_ver_num ver_num; + u8 ver_str[OTX2_CPT_UCODE_VER_STR_SZ]; + __be32 code_length; + u32 padding[3]; +}; + +struct otx2_cpt_ucode { + u8 ver_str[OTX2_CPT_UCODE_VER_STR_SZ];/* + * ucode version in readable + * format + */ + struct otx2_cpt_ucode_ver_num ver_num;/* ucode version number */ + char filename[OTX2_CPT_NAME_LENGTH];/* ucode filename */ + dma_addr_t dma; /* phys address of ucode image */ + void *va; /* virt address of ucode image */ + u32 size; /* ucode image size */ + int type; /* ucode image type SE, IE, AE or SE+IE */ +}; + +struct otx2_cpt_uc_info_t { + struct list_head list; + struct otx2_cpt_ucode ucode;/* microcode information */ + const struct firmware *fw; +}; + +/* Maximum and current number of engines available for all engine groups */ +struct otx2_cpt_engs_available { + int max_se_cnt; + int max_ie_cnt; + int max_ae_cnt; + int se_cnt; + int ie_cnt; + int ae_cnt; +}; + +/* Engines reserved to an engine group */ +struct otx2_cpt_engs_rsvd { + int type; /* engine type */ + int count; /* number of engines attached */ + int offset; /* constant offset of engine type in the bitmap */ + unsigned long *bmap; /* attached engines bitmap */ + struct otx2_cpt_ucode *ucode; /* ucode used by these engines */ +}; + +struct otx2_cpt_mirror_info { + int is_ena; /* + * is mirroring enabled, it is set only for engine + * group which mirrors another engine group + */ + int idx; /* + * index of engine group which is mirrored by this + * group, set only for engine group which mirrors + * another group + */ + int ref_count; /* + * number of times this engine group is mirrored by + * other groups, this is set only for engine group + * which is mirrored by other group(s) + */ +}; + +struct otx2_cpt_eng_grp_info { + struct otx2_cpt_eng_grps *g; /* pointer to engine_groups structure */ + /* engines attached */ + struct otx2_cpt_engs_rsvd engs[OTX2_CPT_MAX_ETYPES_PER_GRP]; + /* ucodes information */ + struct otx2_cpt_ucode ucode[OTX2_CPT_MAX_ETYPES_PER_GRP]; + /* engine group mirroring information */ + struct otx2_cpt_mirror_info mirror; + int idx; /* engine group index */ + bool is_enabled; /* + * is engine group enabled, engine group is enabled + * when it has engines attached and ucode loaded + */ +}; + +struct otx2_cpt_eng_grps { + struct otx2_cpt_eng_grp_info grp[OTX2_CPT_MAX_ENGINE_GROUPS]; + struct otx2_cpt_engs_available avail; + void *obj; /* device specific data */ + int engs_num; /* total number of engines supported */ + u8 eng_ref_cnt[OTX2_CPT_MAX_ENGINES];/* engines reference count */ + bool is_grps_created; /* Is the engine groups are already created */ +}; +struct otx2_cptpf_dev; +int otx2_cpt_init_eng_grps(struct pci_dev *pdev, + struct otx2_cpt_eng_grps *eng_grps); +void otx2_cpt_cleanup_eng_grps(struct pci_dev *pdev, + struct otx2_cpt_eng_grps *eng_grps); +int otx2_cpt_create_eng_grps(struct pci_dev *pdev, + struct otx2_cpt_eng_grps *eng_grps); +int otx2_cpt_disable_all_cores(struct otx2_cptpf_dev *cptpf); +int otx2_cpt_get_eng_grp(struct otx2_cpt_eng_grps *eng_grps, int eng_type); + +#endif /* __OTX2_CPTPF_UCODE_H */ From 64506017030dd44f0fc91c5110840ac7996213dd Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Fri, 15 Jan 2021 19:22:23 +0530 Subject: [PATCH 093/154] crypto: octeontx2 - add LF framework CPT RVU Local Functions(LFs) needs to be attached to the PF/VF to submit the instructions to CPT. This patch adds the interface to initialize and attach the LFs. It also adds interface to register the LF's interrupts. Signed-off-by: Suheil Chandran Signed-off-by: Lukasz Bartosik Signed-off-by: Srujana Challa Signed-off-by: Herbert Xu --- drivers/crypto/marvell/octeontx2/Makefile | 2 +- .../marvell/octeontx2/otx2_cpt_common.h | 4 + .../marvell/octeontx2/otx2_cpt_mbox_common.c | 56 +++ drivers/crypto/marvell/octeontx2/otx2_cptlf.c | 429 ++++++++++++++++++ drivers/crypto/marvell/octeontx2/otx2_cptlf.h | 283 ++++++++++++ drivers/crypto/marvell/octeontx2/otx2_cptpf.h | 2 + .../marvell/octeontx2/otx2_cptpf_mbox.c | 8 + 7 files changed, 783 insertions(+), 1 deletion(-) create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cptlf.c create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cptlf.h diff --git a/drivers/crypto/marvell/octeontx2/Makefile b/drivers/crypto/marvell/octeontx2/Makefile index 3c4155446296..e47a55961bb8 100644 --- a/drivers/crypto/marvell/octeontx2/Makefile +++ b/drivers/crypto/marvell/octeontx2/Makefile @@ -2,6 +2,6 @@ obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += octeontx2-cpt.o octeontx2-cpt-objs := otx2_cptpf_main.o otx2_cptpf_mbox.o \ - otx2_cpt_mbox_common.o otx2_cptpf_ucode.o + otx2_cpt_mbox_common.o otx2_cptpf_ucode.o otx2_cptlf.o ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h index ae16dc102459..d5576f5d3b90 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h @@ -76,4 +76,8 @@ int otx2_cpt_read_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, u64 reg, u64 *val); int otx2_cpt_write_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, u64 reg, u64 val); +struct otx2_cptlfs_info; +int otx2_cpt_attach_rscrs_msg(struct otx2_cptlfs_info *lfs); +int otx2_cpt_detach_rsrcs_msg(struct otx2_cptlfs_info *lfs); + #endif /* __OTX2_CPT_COMMON_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c index ef1291c4881b..0933031ac827 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c @@ -2,6 +2,7 @@ /* Copyright (C) 2020 Marvell. */ #include "otx2_cpt_common.h" +#include "otx2_cptlf.h" int otx2_cpt_send_mbox_msg(struct otx2_mbox *mbox, struct pci_dev *pdev) { @@ -112,3 +113,58 @@ int otx2_cpt_write_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, return otx2_cpt_send_mbox_msg(mbox, pdev); } + +int otx2_cpt_attach_rscrs_msg(struct otx2_cptlfs_info *lfs) +{ + struct otx2_mbox *mbox = lfs->mbox; + struct rsrc_attach *req; + int ret; + + req = (struct rsrc_attach *) + otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req), + sizeof(struct msg_rsp)); + if (req == NULL) { + dev_err(&lfs->pdev->dev, "RVU MBOX failed to get message.\n"); + return -EFAULT; + } + + req->hdr.id = MBOX_MSG_ATTACH_RESOURCES; + req->hdr.sig = OTX2_MBOX_REQ_SIG; + req->hdr.pcifunc = 0; + req->cptlfs = lfs->lfs_num; + ret = otx2_cpt_send_mbox_msg(mbox, lfs->pdev); + if (ret) + return ret; + + if (!lfs->are_lfs_attached) + ret = -EINVAL; + + return ret; +} + +int otx2_cpt_detach_rsrcs_msg(struct otx2_cptlfs_info *lfs) +{ + struct otx2_mbox *mbox = lfs->mbox; + struct rsrc_detach *req; + int ret; + + req = (struct rsrc_detach *) + otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req), + sizeof(struct msg_rsp)); + if (req == NULL) { + dev_err(&lfs->pdev->dev, "RVU MBOX failed to get message.\n"); + return -EFAULT; + } + + req->hdr.id = MBOX_MSG_DETACH_RESOURCES; + req->hdr.sig = OTX2_MBOX_REQ_SIG; + req->hdr.pcifunc = 0; + ret = otx2_cpt_send_mbox_msg(mbox, lfs->pdev); + if (ret) + return ret; + + if (lfs->are_lfs_attached) + ret = -EINVAL; + + return ret; +} diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.c b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c new file mode 100644 index 000000000000..e27ea8909368 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c @@ -0,0 +1,429 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Marvell. */ + +#include "otx2_cpt_common.h" +#include "otx2_cptlf.h" +#include "rvu_reg.h" + +#define CPT_TIMER_HOLD 0x03F +#define CPT_COUNT_HOLD 32 + +static void cptlf_do_set_done_time_wait(struct otx2_cptlf_info *lf, + int time_wait) +{ + union otx2_cptx_lf_done_wait done_wait; + + done_wait.u = otx2_cpt_read64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot, + OTX2_CPT_LF_DONE_WAIT); + done_wait.s.time_wait = time_wait; + otx2_cpt_write64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot, + OTX2_CPT_LF_DONE_WAIT, done_wait.u); +} + +static void cptlf_do_set_done_num_wait(struct otx2_cptlf_info *lf, int num_wait) +{ + union otx2_cptx_lf_done_wait done_wait; + + done_wait.u = otx2_cpt_read64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot, + OTX2_CPT_LF_DONE_WAIT); + done_wait.s.num_wait = num_wait; + otx2_cpt_write64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot, + OTX2_CPT_LF_DONE_WAIT, done_wait.u); +} + +static void cptlf_set_done_time_wait(struct otx2_cptlfs_info *lfs, + int time_wait) +{ + int slot; + + for (slot = 0; slot < lfs->lfs_num; slot++) + cptlf_do_set_done_time_wait(&lfs->lf[slot], time_wait); +} + +static void cptlf_set_done_num_wait(struct otx2_cptlfs_info *lfs, int num_wait) +{ + int slot; + + for (slot = 0; slot < lfs->lfs_num; slot++) + cptlf_do_set_done_num_wait(&lfs->lf[slot], num_wait); +} + +static int cptlf_set_pri(struct otx2_cptlf_info *lf, int pri) +{ + struct otx2_cptlfs_info *lfs = lf->lfs; + union otx2_cptx_af_lf_ctrl lf_ctrl; + int ret; + + ret = otx2_cpt_read_af_reg(lfs->mbox, lfs->pdev, + CPT_AF_LFX_CTL(lf->slot), + &lf_ctrl.u); + if (ret) + return ret; + + lf_ctrl.s.pri = pri ? 1 : 0; + + ret = otx2_cpt_write_af_reg(lfs->mbox, lfs->pdev, + CPT_AF_LFX_CTL(lf->slot), + lf_ctrl.u); + return ret; +} + +static int cptlf_set_eng_grps_mask(struct otx2_cptlf_info *lf, + int eng_grps_mask) +{ + struct otx2_cptlfs_info *lfs = lf->lfs; + union otx2_cptx_af_lf_ctrl lf_ctrl; + int ret; + + ret = otx2_cpt_read_af_reg(lfs->mbox, lfs->pdev, + CPT_AF_LFX_CTL(lf->slot), + &lf_ctrl.u); + if (ret) + return ret; + + lf_ctrl.s.grp = eng_grps_mask; + + ret = otx2_cpt_write_af_reg(lfs->mbox, lfs->pdev, + CPT_AF_LFX_CTL(lf->slot), + lf_ctrl.u); + return ret; +} + +static int cptlf_set_grp_and_pri(struct otx2_cptlfs_info *lfs, + int eng_grp_mask, int pri) +{ + int slot, ret = 0; + + for (slot = 0; slot < lfs->lfs_num; slot++) { + ret = cptlf_set_pri(&lfs->lf[slot], pri); + if (ret) + return ret; + + ret = cptlf_set_eng_grps_mask(&lfs->lf[slot], eng_grp_mask); + if (ret) + return ret; + } + return ret; +} + +static void cptlf_hw_init(struct otx2_cptlfs_info *lfs) +{ + /* Disable instruction queues */ + otx2_cptlf_disable_iqueues(lfs); + + /* Set instruction queues base addresses */ + otx2_cptlf_set_iqueues_base_addr(lfs); + + /* Set instruction queues sizes */ + otx2_cptlf_set_iqueues_size(lfs); + + /* Set done interrupts time wait */ + cptlf_set_done_time_wait(lfs, CPT_TIMER_HOLD); + + /* Set done interrupts num wait */ + cptlf_set_done_num_wait(lfs, CPT_COUNT_HOLD); + + /* Enable instruction queues */ + otx2_cptlf_enable_iqueues(lfs); +} + +static void cptlf_hw_cleanup(struct otx2_cptlfs_info *lfs) +{ + /* Disable instruction queues */ + otx2_cptlf_disable_iqueues(lfs); +} + +static void cptlf_set_misc_intrs(struct otx2_cptlfs_info *lfs, u8 enable) +{ + union otx2_cptx_lf_misc_int_ena_w1s irq_misc = { .u = 0x0 }; + u64 reg = enable ? OTX2_CPT_LF_MISC_INT_ENA_W1S : + OTX2_CPT_LF_MISC_INT_ENA_W1C; + int slot; + + irq_misc.s.fault = 0x1; + irq_misc.s.hwerr = 0x1; + irq_misc.s.irde = 0x1; + irq_misc.s.nqerr = 0x1; + irq_misc.s.nwrp = 0x1; + + for (slot = 0; slot < lfs->lfs_num; slot++) + otx2_cpt_write64(lfs->reg_base, BLKADDR_CPT0, slot, reg, + irq_misc.u); +} + +static void cptlf_enable_intrs(struct otx2_cptlfs_info *lfs) +{ + int slot; + + /* Enable done interrupts */ + for (slot = 0; slot < lfs->lfs_num; slot++) + otx2_cpt_write64(lfs->reg_base, BLKADDR_CPT0, slot, + OTX2_CPT_LF_DONE_INT_ENA_W1S, 0x1); + /* Enable Misc interrupts */ + cptlf_set_misc_intrs(lfs, true); +} + +static void cptlf_disable_intrs(struct otx2_cptlfs_info *lfs) +{ + int slot; + + for (slot = 0; slot < lfs->lfs_num; slot++) + otx2_cpt_write64(lfs->reg_base, BLKADDR_CPT0, slot, + OTX2_CPT_LF_DONE_INT_ENA_W1C, 0x1); + cptlf_set_misc_intrs(lfs, false); +} + +static inline int cptlf_read_done_cnt(struct otx2_cptlf_info *lf) +{ + union otx2_cptx_lf_done irq_cnt; + + irq_cnt.u = otx2_cpt_read64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot, + OTX2_CPT_LF_DONE); + return irq_cnt.s.done; +} + +static irqreturn_t cptlf_misc_intr_handler(int __always_unused irq, void *arg) +{ + union otx2_cptx_lf_misc_int irq_misc, irq_misc_ack; + struct otx2_cptlf_info *lf = arg; + struct device *dev; + + dev = &lf->lfs->pdev->dev; + irq_misc.u = otx2_cpt_read64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot, + OTX2_CPT_LF_MISC_INT); + irq_misc_ack.u = 0x0; + + if (irq_misc.s.fault) { + dev_err(dev, "Memory error detected while executing CPT_INST_S, LF %d.\n", + lf->slot); + irq_misc_ack.s.fault = 0x1; + + } else if (irq_misc.s.hwerr) { + dev_err(dev, "HW error from an engine executing CPT_INST_S, LF %d.", + lf->slot); + irq_misc_ack.s.hwerr = 0x1; + + } else if (irq_misc.s.nwrp) { + dev_err(dev, "SMMU fault while writing CPT_RES_S to CPT_INST_S[RES_ADDR], LF %d.\n", + lf->slot); + irq_misc_ack.s.nwrp = 0x1; + + } else if (irq_misc.s.irde) { + dev_err(dev, "Memory error when accessing instruction memory queue CPT_LF_Q_BASE[ADDR].\n"); + irq_misc_ack.s.irde = 0x1; + + } else if (irq_misc.s.nqerr) { + dev_err(dev, "Error enqueuing an instruction received at CPT_LF_NQ.\n"); + irq_misc_ack.s.nqerr = 0x1; + + } else { + dev_err(dev, "Unhandled interrupt in CPT LF %d\n", lf->slot); + return IRQ_NONE; + } + + /* Acknowledge interrupts */ + otx2_cpt_write64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot, + OTX2_CPT_LF_MISC_INT, irq_misc_ack.u); + + return IRQ_HANDLED; +} + +static irqreturn_t cptlf_done_intr_handler(int irq, void *arg) +{ + union otx2_cptx_lf_done_wait done_wait; + struct otx2_cptlf_info *lf = arg; + int irq_cnt; + + /* Read the number of completed requests */ + irq_cnt = cptlf_read_done_cnt(lf); + if (irq_cnt) { + done_wait.u = otx2_cpt_read64(lf->lfs->reg_base, BLKADDR_CPT0, + lf->slot, OTX2_CPT_LF_DONE_WAIT); + /* Acknowledge the number of completed requests */ + otx2_cpt_write64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot, + OTX2_CPT_LF_DONE_ACK, irq_cnt); + + otx2_cpt_write64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot, + OTX2_CPT_LF_DONE_WAIT, done_wait.u); + if (unlikely(!lf->wqe)) { + dev_err(&lf->lfs->pdev->dev, "No work for LF %d\n", + lf->slot); + return IRQ_NONE; + } + + /* Schedule processing of completed requests */ + tasklet_hi_schedule(&lf->wqe->work); + } + return IRQ_HANDLED; +} + +void otx2_cptlf_unregister_interrupts(struct otx2_cptlfs_info *lfs) +{ + int i, offs, vector; + + for (i = 0; i < lfs->lfs_num; i++) { + for (offs = 0; offs < OTX2_CPT_LF_MSIX_VECTORS; offs++) { + if (!lfs->lf[i].is_irq_reg[offs]) + continue; + + vector = pci_irq_vector(lfs->pdev, + lfs->lf[i].msix_offset + offs); + free_irq(vector, &lfs->lf[i]); + lfs->lf[i].is_irq_reg[offs] = false; + } + } + cptlf_disable_intrs(lfs); +} + +static int cptlf_do_register_interrrupts(struct otx2_cptlfs_info *lfs, + int lf_num, int irq_offset, + irq_handler_t handler) +{ + int ret, vector; + + vector = pci_irq_vector(lfs->pdev, lfs->lf[lf_num].msix_offset + + irq_offset); + ret = request_irq(vector, handler, 0, + lfs->lf[lf_num].irq_name[irq_offset], + &lfs->lf[lf_num]); + if (ret) + return ret; + + lfs->lf[lf_num].is_irq_reg[irq_offset] = true; + + return ret; +} + +int otx2_cptlf_register_interrupts(struct otx2_cptlfs_info *lfs) +{ + int irq_offs, ret, i; + + for (i = 0; i < lfs->lfs_num; i++) { + irq_offs = OTX2_CPT_LF_INT_VEC_E_MISC; + snprintf(lfs->lf[i].irq_name[irq_offs], 32, "CPTLF Misc%d", i); + ret = cptlf_do_register_interrrupts(lfs, i, irq_offs, + cptlf_misc_intr_handler); + if (ret) + goto free_irq; + + irq_offs = OTX2_CPT_LF_INT_VEC_E_DONE; + snprintf(lfs->lf[i].irq_name[irq_offs], 32, "OTX2_CPTLF Done%d", + i); + ret = cptlf_do_register_interrrupts(lfs, i, irq_offs, + cptlf_done_intr_handler); + if (ret) + goto free_irq; + } + cptlf_enable_intrs(lfs); + return 0; + +free_irq: + otx2_cptlf_unregister_interrupts(lfs); + return ret; +} + +void otx2_cptlf_free_irqs_affinity(struct otx2_cptlfs_info *lfs) +{ + int slot, offs; + + for (slot = 0; slot < lfs->lfs_num; slot++) { + for (offs = 0; offs < OTX2_CPT_LF_MSIX_VECTORS; offs++) + irq_set_affinity_hint(pci_irq_vector(lfs->pdev, + lfs->lf[slot].msix_offset + + offs), NULL); + if (lfs->lf[slot].affinity_mask) + free_cpumask_var(lfs->lf[slot].affinity_mask); + } +} + +int otx2_cptlf_set_irqs_affinity(struct otx2_cptlfs_info *lfs) +{ + struct otx2_cptlf_info *lf = lfs->lf; + int slot, offs, ret; + + for (slot = 0; slot < lfs->lfs_num; slot++) { + if (!zalloc_cpumask_var(&lf[slot].affinity_mask, GFP_KERNEL)) { + dev_err(&lfs->pdev->dev, + "cpumask allocation failed for LF %d", slot); + ret = -ENOMEM; + goto free_affinity_mask; + } + + cpumask_set_cpu(cpumask_local_spread(slot, + dev_to_node(&lfs->pdev->dev)), + lf[slot].affinity_mask); + + for (offs = 0; offs < OTX2_CPT_LF_MSIX_VECTORS; offs++) { + ret = irq_set_affinity_hint(pci_irq_vector(lfs->pdev, + lf[slot].msix_offset + offs), + lf[slot].affinity_mask); + if (ret) + goto free_affinity_mask; + } + } + return 0; + +free_affinity_mask: + otx2_cptlf_free_irqs_affinity(lfs); + return ret; +} + +int otx2_cptlf_init(struct otx2_cptlfs_info *lfs, u8 eng_grp_mask, int pri, + int lfs_num) +{ + int slot, ret; + + if (!lfs->pdev || !lfs->reg_base) + return -EINVAL; + + lfs->lfs_num = lfs_num; + for (slot = 0; slot < lfs->lfs_num; slot++) { + lfs->lf[slot].lfs = lfs; + lfs->lf[slot].slot = slot; + lfs->lf[slot].lmtline = lfs->reg_base + + OTX2_CPT_RVU_FUNC_ADDR_S(BLKADDR_LMT, slot, + OTX2_CPT_LMT_LF_LMTLINEX(0)); + lfs->lf[slot].ioreg = lfs->reg_base + + OTX2_CPT_RVU_FUNC_ADDR_S(BLKADDR_CPT0, slot, + OTX2_CPT_LF_NQX(0)); + } + /* Send request to attach LFs */ + ret = otx2_cpt_attach_rscrs_msg(lfs); + if (ret) + goto clear_lfs_num; + + ret = otx2_cpt_alloc_instruction_queues(lfs); + if (ret) { + dev_err(&lfs->pdev->dev, + "Allocating instruction queues failed\n"); + goto detach_rsrcs; + } + cptlf_hw_init(lfs); + /* + * Allow each LF to execute requests destined to any of 8 engine + * groups and set queue priority of each LF to high + */ + ret = cptlf_set_grp_and_pri(lfs, eng_grp_mask, pri); + if (ret) + goto free_iq; + + return 0; + +free_iq: + otx2_cpt_free_instruction_queues(lfs); + cptlf_hw_cleanup(lfs); +detach_rsrcs: + otx2_cpt_detach_rsrcs_msg(lfs); +clear_lfs_num: + lfs->lfs_num = 0; + return ret; +} + +void otx2_cptlf_shutdown(struct otx2_cptlfs_info *lfs) +{ + lfs->lfs_num = 0; + /* Cleanup LFs hardware side */ + cptlf_hw_cleanup(lfs); + /* Send request to detach LFs */ + otx2_cpt_detach_rsrcs_msg(lfs); +} diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h new file mode 100644 index 000000000000..37ae0d2dcf55 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h @@ -0,0 +1,283 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2020 Marvell. + */ +#ifndef __OTX2_CPTLF_H +#define __OTX2_CPTLF_H + +#include +#include +#include "otx2_cpt_common.h" + +/* + * CPT instruction and pending queues user requested length in CPT_INST_S msgs + */ +#define OTX2_CPT_USER_REQUESTED_QLEN_MSGS 8200 + +/* + * CPT instruction queue size passed to HW is in units of 40*CPT_INST_S + * messages. + */ +#define OTX2_CPT_SIZE_DIV40 (OTX2_CPT_USER_REQUESTED_QLEN_MSGS/40) + +/* + * CPT instruction and pending queues length in CPT_INST_S messages + */ +#define OTX2_CPT_INST_QLEN_MSGS ((OTX2_CPT_SIZE_DIV40 - 1) * 40) + +/* CPT instruction queue length in bytes */ +#define OTX2_CPT_INST_QLEN_BYTES (OTX2_CPT_SIZE_DIV40 * 40 * \ + OTX2_CPT_INST_SIZE) + +/* CPT instruction group queue length in bytes */ +#define OTX2_CPT_INST_GRP_QLEN_BYTES (OTX2_CPT_SIZE_DIV40 * 16) + +/* CPT FC length in bytes */ +#define OTX2_CPT_Q_FC_LEN 128 + +/* CPT instruction queue alignment */ +#define OTX2_CPT_INST_Q_ALIGNMENT 128 + +/* Mask which selects all engine groups */ +#define OTX2_CPT_ALL_ENG_GRPS_MASK 0xFF + +/* Maximum LFs supported in OcteonTX2 for CPT */ +#define OTX2_CPT_MAX_LFS_NUM 64 + +/* Queue priority */ +#define OTX2_CPT_QUEUE_HI_PRIO 0x1 +#define OTX2_CPT_QUEUE_LOW_PRIO 0x0 + +enum otx2_cptlf_state { + OTX2_CPTLF_IN_RESET, + OTX2_CPTLF_STARTED, +}; + +struct otx2_cpt_inst_queue { + u8 *vaddr; + u8 *real_vaddr; + dma_addr_t dma_addr; + dma_addr_t real_dma_addr; + u32 size; +}; + +struct otx2_cptlfs_info; +struct otx2_cptlf_wqe { + struct tasklet_struct work; + struct otx2_cptlfs_info *lfs; + u8 lf_num; +}; + +struct otx2_cptlf_info { + struct otx2_cptlfs_info *lfs; /* Ptr to cptlfs_info struct */ + void __iomem *lmtline; /* Address of LMTLINE */ + void __iomem *ioreg; /* LMTLINE send register */ + int msix_offset; /* MSI-X interrupts offset */ + cpumask_var_t affinity_mask; /* IRQs affinity mask */ + u8 irq_name[OTX2_CPT_LF_MSIX_VECTORS][32];/* Interrupts name */ + u8 is_irq_reg[OTX2_CPT_LF_MSIX_VECTORS]; /* Is interrupt registered */ + u8 slot; /* Slot number of this LF */ + + struct otx2_cpt_inst_queue iqueue;/* Instruction queue */ + struct otx2_cptlf_wqe *wqe; /* Tasklet work info */ +}; + +struct otx2_cptlfs_info { + /* Registers start address of VF/PF LFs are attached to */ + void __iomem *reg_base; + struct pci_dev *pdev; /* Device LFs are attached to */ + struct otx2_cptlf_info lf[OTX2_CPT_MAX_LFS_NUM]; + struct otx2_mbox *mbox; + u8 are_lfs_attached; /* Whether CPT LFs are attached */ + u8 lfs_num; /* Number of CPT LFs */ + atomic_t state; /* LF's state. started/reset */ +}; + +static inline void otx2_cpt_free_instruction_queues( + struct otx2_cptlfs_info *lfs) +{ + struct otx2_cpt_inst_queue *iq; + int i; + + for (i = 0; i < lfs->lfs_num; i++) { + iq = &lfs->lf[i].iqueue; + if (iq->real_vaddr) + dma_free_coherent(&lfs->pdev->dev, + iq->size, + iq->real_vaddr, + iq->real_dma_addr); + iq->real_vaddr = NULL; + iq->vaddr = NULL; + } +} + +static inline int otx2_cpt_alloc_instruction_queues( + struct otx2_cptlfs_info *lfs) +{ + struct otx2_cpt_inst_queue *iq; + int ret = 0, i; + + if (!lfs->lfs_num) + return -EINVAL; + + for (i = 0; i < lfs->lfs_num; i++) { + iq = &lfs->lf[i].iqueue; + iq->size = OTX2_CPT_INST_QLEN_BYTES + + OTX2_CPT_Q_FC_LEN + + OTX2_CPT_INST_GRP_QLEN_BYTES + + OTX2_CPT_INST_Q_ALIGNMENT; + iq->real_vaddr = dma_alloc_coherent(&lfs->pdev->dev, iq->size, + &iq->real_dma_addr, GFP_KERNEL); + if (!iq->real_vaddr) { + ret = -ENOMEM; + goto error; + } + iq->vaddr = iq->real_vaddr + OTX2_CPT_INST_GRP_QLEN_BYTES; + iq->dma_addr = iq->real_dma_addr + OTX2_CPT_INST_GRP_QLEN_BYTES; + + /* Align pointers */ + iq->vaddr = PTR_ALIGN(iq->vaddr, OTX2_CPT_INST_Q_ALIGNMENT); + iq->dma_addr = PTR_ALIGN(iq->dma_addr, + OTX2_CPT_INST_Q_ALIGNMENT); + } + return 0; + +error: + otx2_cpt_free_instruction_queues(lfs); + return ret; +} + +static inline void otx2_cptlf_set_iqueues_base_addr( + struct otx2_cptlfs_info *lfs) +{ + union otx2_cptx_lf_q_base lf_q_base; + int slot; + + for (slot = 0; slot < lfs->lfs_num; slot++) { + lf_q_base.u = lfs->lf[slot].iqueue.dma_addr; + otx2_cpt_write64(lfs->reg_base, BLKADDR_CPT0, slot, + OTX2_CPT_LF_Q_BASE, lf_q_base.u); + } +} + +static inline void otx2_cptlf_do_set_iqueue_size(struct otx2_cptlf_info *lf) +{ + union otx2_cptx_lf_q_size lf_q_size = { .u = 0x0 }; + + lf_q_size.s.size_div40 = OTX2_CPT_SIZE_DIV40; + otx2_cpt_write64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot, + OTX2_CPT_LF_Q_SIZE, lf_q_size.u); +} + +static inline void otx2_cptlf_set_iqueues_size(struct otx2_cptlfs_info *lfs) +{ + int slot; + + for (slot = 0; slot < lfs->lfs_num; slot++) + otx2_cptlf_do_set_iqueue_size(&lfs->lf[slot]); +} + +static inline void otx2_cptlf_do_disable_iqueue(struct otx2_cptlf_info *lf) +{ + union otx2_cptx_lf_ctl lf_ctl = { .u = 0x0 }; + union otx2_cptx_lf_inprog lf_inprog; + int timeout = 20; + + /* Disable instructions enqueuing */ + otx2_cpt_write64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot, + OTX2_CPT_LF_CTL, lf_ctl.u); + + /* Wait for instruction queue to become empty */ + do { + lf_inprog.u = otx2_cpt_read64(lf->lfs->reg_base, BLKADDR_CPT0, + lf->slot, OTX2_CPT_LF_INPROG); + if (!lf_inprog.s.inflight) + break; + + usleep_range(10000, 20000); + if (timeout-- < 0) { + dev_err(&lf->lfs->pdev->dev, + "Error LF %d is still busy.\n", lf->slot); + break; + } + + } while (1); + + /* + * Disable executions in the LF's queue, + * the queue should be empty at this point + */ + lf_inprog.s.eena = 0x0; + otx2_cpt_write64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot, + OTX2_CPT_LF_INPROG, lf_inprog.u); +} + +static inline void otx2_cptlf_disable_iqueues(struct otx2_cptlfs_info *lfs) +{ + int slot; + + for (slot = 0; slot < lfs->lfs_num; slot++) + otx2_cptlf_do_disable_iqueue(&lfs->lf[slot]); +} + +static inline void otx2_cptlf_set_iqueue_enq(struct otx2_cptlf_info *lf, + bool enable) +{ + union otx2_cptx_lf_ctl lf_ctl; + + lf_ctl.u = otx2_cpt_read64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot, + OTX2_CPT_LF_CTL); + + /* Set iqueue's enqueuing */ + lf_ctl.s.ena = enable ? 0x1 : 0x0; + otx2_cpt_write64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot, + OTX2_CPT_LF_CTL, lf_ctl.u); +} + +static inline void otx2_cptlf_enable_iqueue_enq(struct otx2_cptlf_info *lf) +{ + otx2_cptlf_set_iqueue_enq(lf, true); +} + +static inline void otx2_cptlf_set_iqueue_exec(struct otx2_cptlf_info *lf, + bool enable) +{ + union otx2_cptx_lf_inprog lf_inprog; + + lf_inprog.u = otx2_cpt_read64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot, + OTX2_CPT_LF_INPROG); + + /* Set iqueue's execution */ + lf_inprog.s.eena = enable ? 0x1 : 0x0; + otx2_cpt_write64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot, + OTX2_CPT_LF_INPROG, lf_inprog.u); +} + +static inline void otx2_cptlf_enable_iqueue_exec(struct otx2_cptlf_info *lf) +{ + otx2_cptlf_set_iqueue_exec(lf, true); +} + +static inline void otx2_cptlf_disable_iqueue_exec(struct otx2_cptlf_info *lf) +{ + otx2_cptlf_set_iqueue_exec(lf, false); +} + +static inline void otx2_cptlf_enable_iqueues(struct otx2_cptlfs_info *lfs) +{ + int slot; + + for (slot = 0; slot < lfs->lfs_num; slot++) { + otx2_cptlf_enable_iqueue_exec(&lfs->lf[slot]); + otx2_cptlf_enable_iqueue_enq(&lfs->lf[slot]); + } +} + +int otx2_cptlf_init(struct otx2_cptlfs_info *lfs, u8 eng_grp_msk, int pri, + int lfs_num); +void otx2_cptlf_shutdown(struct otx2_cptlfs_info *lfs); +int otx2_cptlf_register_interrupts(struct otx2_cptlfs_info *lfs); +void otx2_cptlf_unregister_interrupts(struct otx2_cptlfs_info *lfs); +void otx2_cptlf_free_irqs_affinity(struct otx2_cptlfs_info *lfs); +int otx2_cptlf_set_irqs_affinity(struct otx2_cptlfs_info *lfs); + +#endif /* __OTX2_CPTLF_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h index ac9795978286..f441c2dbe7bf 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h @@ -7,6 +7,7 @@ #include "otx2_cpt_common.h" #include "otx2_cptpf_ucode.h" +#include "otx2_cptlf.h" struct otx2_cptpf_dev; struct otx2_cptvf_info { @@ -29,6 +30,7 @@ struct otx2_cptpf_dev { struct pci_dev *pdev; /* PCI device handle */ struct otx2_cptvf_info vf[OTX2_CPT_MAX_VFS_NUM]; struct otx2_cpt_eng_grps eng_grps;/* Engine groups information */ + struct otx2_cptlfs_info lfs; /* CPT LFs attached to this PF */ /* AF <=> PF mbox */ struct otx2_mbox afpf_mbox; diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c index 08e18fe6817b..2270967eaf65 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c @@ -207,6 +207,14 @@ static void process_afpf_mbox_msg(struct otx2_cptpf_dev *cptpf, if (!rsp_rd_wr->is_write) *rsp_rd_wr->ret_val = rsp_rd_wr->val; break; + case MBOX_MSG_ATTACH_RESOURCES: + if (!msg->rc) + cptpf->lfs.are_lfs_attached = 1; + break; + case MBOX_MSG_DETACH_RESOURCES: + if (!msg->rc) + cptpf->lfs.are_lfs_attached = 0; + break; default: dev_err(dev, From 78506c2a1eac97504ff56de1c587bac403ca8dca Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Fri, 15 Jan 2021 19:22:24 +0530 Subject: [PATCH 094/154] crypto: octeontx2 - add support to get engine capabilities Adds support to get engine capabilities and adds a new mailbox to share capabilities with VF driver. Signed-off-by: Suheil Chandran Signed-off-by: Srujana Challa Signed-off-by: Herbert Xu --- .../marvell/octeontx2/otx2_cpt_common.h | 36 ++++ .../marvell/octeontx2/otx2_cpt_reqmgr.h | 51 ++++++ drivers/crypto/marvell/octeontx2/otx2_cptlf.h | 62 +++++++ drivers/crypto/marvell/octeontx2/otx2_cptpf.h | 3 + .../marvell/octeontx2/otx2_cptpf_main.c | 5 + .../marvell/octeontx2/otx2_cptpf_mbox.c | 31 ++++ .../marvell/octeontx2/otx2_cptpf_ucode.c | 161 ++++++++++++++++++ .../marvell/octeontx2/otx2_cptpf_ucode.h | 1 + 8 files changed, 350 insertions(+) create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h index d5576f5d3b90..705a0503b962 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h @@ -20,6 +20,7 @@ #define OTX2_CPT_INVALID_CRYPTO_ENG_GRP 0xFF #define OTX2_CPT_NAME_LENGTH 64 +#define OTX2_CPT_DMA_MINALIGN 128 #define BAD_OTX2_CPT_ENG_TYPE OTX2_CPT_MAX_ENG_TYPES @@ -32,6 +33,7 @@ enum otx2_cpt_eng_type { /* Take mbox id from end of CPT mbox range in AF (range 0xA00 - 0xBFF) */ #define MBOX_MSG_GET_ENG_GRP_NUM 0xBFF +#define MBOX_MSG_GET_CAPS 0xBFD /* * Message request and response to get engine group number @@ -49,6 +51,40 @@ struct otx2_cpt_egrp_num_rsp { u8 eng_grp_num; }; +/* CPT HW capabilities */ +union otx2_cpt_eng_caps { + u64 u; + struct { + u64 reserved_0_4:5; + u64 mul:1; + u64 sha1_sha2:1; + u64 chacha20:1; + u64 zuc_snow3g:1; + u64 sha3:1; + u64 aes:1; + u64 kasumi:1; + u64 des:1; + u64 crc:1; + u64 reserved_14_63:50; + }; +}; + +/* + * Message request and response to get HW capabilities for each + * engine type (SE, IE, AE). + * This messages are only used between CPT PF <=> CPT VF + */ +struct otx2_cpt_caps_msg { + struct mbox_msghdr hdr; +}; + +struct otx2_cpt_caps_rsp { + struct mbox_msghdr hdr; + u16 cpt_pf_drv_version; + u8 cpt_revision; + union otx2_cpt_eng_caps eng_caps[OTX2_CPT_MAX_ENG_TYPES]; +}; + static inline void otx2_cpt_write64(void __iomem *reg_base, u64 blk, u64 slot, u64 offs, u64 val) { diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h new file mode 100644 index 000000000000..9184f91c68c1 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2020 Marvell. + */ + +#ifndef __OTX2_CPT_REQMGR_H +#define __OTX2_CPT_REQMGR_H + +#include "otx2_cpt_common.h" + +/* Completion code size and initial value */ +#define OTX2_CPT_COMPLETION_CODE_SIZE 8 +#define OTX2_CPT_COMPLETION_CODE_INIT OTX2_CPT_COMP_E_NOTDONE + +union otx2_cpt_opcode { + u16 flags; + struct { + u8 major; + u8 minor; + } s; +}; + +/* + * CPT_INST_S software command definitions + * Words EI (0-3) + */ +union otx2_cpt_iq_cmd_word0 { + u64 u; + struct { + __be16 opcode; + __be16 param1; + __be16 param2; + __be16 dlen; + } s; +}; + +union otx2_cpt_iq_cmd_word3 { + u64 u; + struct { + u64 cptr:61; + u64 grp:3; + } s; +}; + +struct otx2_cpt_iq_command { + union otx2_cpt_iq_cmd_word0 cmd; + u64 dptr; + u64 rptr; + union otx2_cpt_iq_cmd_word3 cptr; +}; + +#endif /* __OTX2_CPT_REQMGR_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h index 37ae0d2dcf55..efb4f395a54a 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h @@ -4,9 +4,11 @@ #ifndef __OTX2_CPTLF_H #define __OTX2_CPTLF_H +#include #include #include #include "otx2_cpt_common.h" +#include "otx2_cpt_reqmgr.h" /* * CPT instruction and pending queues user requested length in CPT_INST_S msgs @@ -272,6 +274,66 @@ static inline void otx2_cptlf_enable_iqueues(struct otx2_cptlfs_info *lfs) } } +static inline void otx2_cpt_fill_inst(union otx2_cpt_inst_s *cptinst, + struct otx2_cpt_iq_command *iq_cmd, + u64 comp_baddr) +{ + cptinst->u[0] = 0x0; + cptinst->s.doneint = true; + cptinst->s.res_addr = comp_baddr; + cptinst->u[2] = 0x0; + cptinst->u[3] = 0x0; + cptinst->s.ei0 = iq_cmd->cmd.u; + cptinst->s.ei1 = iq_cmd->dptr; + cptinst->s.ei2 = iq_cmd->rptr; + cptinst->s.ei3 = iq_cmd->cptr.u; +} + +/* + * On OcteonTX2 platform the parameter insts_num is used as a count of + * instructions to be enqueued. The valid values for insts_num are: + * 1 - 1 CPT instruction will be enqueued during LMTST operation + * 2 - 2 CPT instructions will be enqueued during LMTST operation + */ +static inline void otx2_cpt_send_cmd(union otx2_cpt_inst_s *cptinst, + u32 insts_num, struct otx2_cptlf_info *lf) +{ + void __iomem *lmtline = lf->lmtline; + long ret; + + /* + * Make sure memory areas pointed in CPT_INST_S + * are flushed before the instruction is sent to CPT + */ + dma_wmb(); + + do { + /* Copy CPT command to LMTLINE */ + memcpy_toio(lmtline, cptinst, insts_num * OTX2_CPT_INST_SIZE); + + /* + * LDEOR initiates atomic transfer to I/O device + * The following will cause the LMTST to fail (the LDEOR + * returns zero): + * - No stores have been performed to the LMTLINE since it was + * last invalidated. + * - The bytes which have been stored to LMTLINE since it was + * last invalidated form a pattern that is non-contiguous, does + * not start at byte 0, or does not end on a 8-byte boundary. + * (i.e.comprises a formation of other than 1–16 8-byte + * words.) + * + * These rules are designed such that an operating system + * context switch or hypervisor guest switch need have no + * knowledge of the LMTST operations; the switch code does not + * need to store to LMTCANCEL. Also note as LMTLINE data cannot + * be read, there is no information leakage between processes. + */ + ret = otx2_lmt_flush(lf->ioreg); + + } while (!ret); +} + int otx2_cptlf_init(struct otx2_cptlfs_info *lfs, u8 eng_grp_msk, int pri, int lfs_num); void otx2_cptlf_shutdown(struct otx2_cptlfs_info *lfs); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h index f441c2dbe7bf..ede230dc205f 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h @@ -31,6 +31,9 @@ struct otx2_cptpf_dev { struct otx2_cptvf_info vf[OTX2_CPT_MAX_VFS_NUM]; struct otx2_cpt_eng_grps eng_grps;/* Engine groups information */ struct otx2_cptlfs_info lfs; /* CPT LFs attached to this PF */ + /* HW capabilities for each engine type */ + union otx2_cpt_eng_caps eng_caps[OTX2_CPT_MAX_ENG_TYPES]; + bool is_eng_caps_discovered; /* AF <=> PF mbox */ struct otx2_mbox afpf_mbox; diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c index f0ad45c04df7..502bcf21ea89 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c @@ -500,6 +500,11 @@ static int cptpf_sriov_enable(struct pci_dev *pdev, int num_vfs) if (ret) goto destroy_flr; + /* Get CPT HW capabilities using LOAD_FVC operation. */ + ret = otx2_cpt_discover_eng_capabilities(cptpf); + if (ret) + goto disable_intr; + ret = otx2_cpt_create_eng_grps(cptpf->pdev, &cptpf->eng_grps); if (ret) goto disable_intr; diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c index 2270967eaf65..0834dc3b3e59 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c @@ -5,6 +5,12 @@ #include "otx2_cptpf.h" #include "rvu_reg.h" +/* + * CPT PF driver version, It will be incremented by 1 for every feature + * addition in CPT mailbox messages. + */ +#define OTX2_CPT_PF_DRV_VERSION 0x1 + static int forward_to_af(struct otx2_cptpf_dev *cptpf, struct otx2_cptvf_info *vf, struct mbox_msghdr *req, int size) @@ -35,6 +41,28 @@ static int forward_to_af(struct otx2_cptpf_dev *cptpf, return 0; } +static int handle_msg_get_caps(struct otx2_cptpf_dev *cptpf, + struct otx2_cptvf_info *vf, + struct mbox_msghdr *req) +{ + struct otx2_cpt_caps_rsp *rsp; + + rsp = (struct otx2_cpt_caps_rsp *) + otx2_mbox_alloc_msg(&cptpf->vfpf_mbox, vf->vf_id, + sizeof(*rsp)); + if (!rsp) + return -ENOMEM; + + rsp->hdr.id = MBOX_MSG_GET_CAPS; + rsp->hdr.sig = OTX2_MBOX_RSP_SIG; + rsp->hdr.pcifunc = req->pcifunc; + rsp->cpt_pf_drv_version = OTX2_CPT_PF_DRV_VERSION; + rsp->cpt_revision = cptpf->pdev->revision; + memcpy(&rsp->eng_caps, &cptpf->eng_caps, sizeof(rsp->eng_caps)); + + return 0; +} + static int handle_msg_get_eng_grp_num(struct otx2_cptpf_dev *cptpf, struct otx2_cptvf_info *vf, struct mbox_msghdr *req) @@ -72,6 +100,9 @@ static int cptpf_handle_vf_req(struct otx2_cptpf_dev *cptpf, case MBOX_MSG_GET_ENG_GRP_NUM: err = handle_msg_get_eng_grp_num(cptpf, vf, req); break; + case MBOX_MSG_GET_CAPS: + err = handle_msg_get_caps(cptpf, vf, req); + break; default: err = forward_to_af(cptpf, vf, req, size); break; diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c index 64f0e7e2652a..1dc3ba298139 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c @@ -6,6 +6,8 @@ #include "otx2_cptpf_ucode.h" #include "otx2_cpt_common.h" #include "otx2_cptpf.h" +#include "otx2_cptlf.h" +#include "otx2_cpt_reqmgr.h" #include "rvu_reg.h" #define CSR_DELAY 30 @@ -1252,3 +1254,162 @@ cleanup_eng_grps: otx2_cpt_cleanup_eng_grps(pdev, eng_grps); return ret; } + +static int create_eng_caps_discovery_grps(struct pci_dev *pdev, + struct otx2_cpt_eng_grps *eng_grps) +{ + struct otx2_cpt_uc_info_t *uc_info[OTX2_CPT_MAX_ETYPES_PER_GRP] = { }; + struct otx2_cpt_engines engs[OTX2_CPT_MAX_ETYPES_PER_GRP] = { {0} }; + struct fw_info_t fw_info; + int ret; + + ret = cpt_ucode_load_fw(pdev, &fw_info); + if (ret) + return ret; + + uc_info[0] = get_ucode(&fw_info, OTX2_CPT_SE_TYPES); + if (uc_info[0] == NULL) { + dev_err(&pdev->dev, "Unable to find firmware for AE\n"); + ret = -EINVAL; + goto release_fw; + } + engs[0].type = OTX2_CPT_AE_TYPES; + engs[0].count = 2; + + ret = create_engine_group(&pdev->dev, eng_grps, engs, 1, + (void **) uc_info, 0); + if (ret) + goto release_fw; + + uc_info[0] = get_ucode(&fw_info, OTX2_CPT_SE_TYPES); + if (uc_info[0] == NULL) { + dev_err(&pdev->dev, "Unable to find firmware for SE\n"); + ret = -EINVAL; + goto delete_eng_grp; + } + engs[0].type = OTX2_CPT_SE_TYPES; + engs[0].count = 2; + + ret = create_engine_group(&pdev->dev, eng_grps, engs, 1, + (void **) uc_info, 0); + if (ret) + goto delete_eng_grp; + + uc_info[0] = get_ucode(&fw_info, OTX2_CPT_IE_TYPES); + if (uc_info[0] == NULL) { + dev_err(&pdev->dev, "Unable to find firmware for IE\n"); + ret = -EINVAL; + goto delete_eng_grp; + } + engs[0].type = OTX2_CPT_IE_TYPES; + engs[0].count = 2; + + ret = create_engine_group(&pdev->dev, eng_grps, engs, 1, + (void **) uc_info, 0); + if (ret) + goto delete_eng_grp; + + cpt_ucode_release_fw(&fw_info); + return 0; + +delete_eng_grp: + delete_engine_grps(pdev, eng_grps); +release_fw: + cpt_ucode_release_fw(&fw_info); + return ret; +} + +/* + * Get CPT HW capabilities using LOAD_FVC operation. + */ +int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf) +{ + struct otx2_cptlfs_info *lfs = &cptpf->lfs; + struct otx2_cpt_iq_command iq_cmd; + union otx2_cpt_opcode opcode; + union otx2_cpt_res_s *result; + union otx2_cpt_inst_s inst; + dma_addr_t rptr_baddr; + struct pci_dev *pdev; + u32 len, compl_rlen; + int ret, etype; + void *rptr; + + /* + * We don't get capabilities if it was already done + * (when user enabled VFs for the first time) + */ + if (cptpf->is_eng_caps_discovered) + return 0; + + pdev = cptpf->pdev; + /* + * Create engine groups for each type to submit LOAD_FVC op and + * get engine's capabilities. + */ + ret = create_eng_caps_discovery_grps(pdev, &cptpf->eng_grps); + if (ret) + goto delete_grps; + + lfs->pdev = pdev; + lfs->reg_base = cptpf->reg_base; + lfs->mbox = &cptpf->afpf_mbox; + ret = otx2_cptlf_init(&cptpf->lfs, OTX2_CPT_ALL_ENG_GRPS_MASK, + OTX2_CPT_QUEUE_HI_PRIO, 1); + if (ret) + goto delete_grps; + + compl_rlen = ALIGN(sizeof(union otx2_cpt_res_s), OTX2_CPT_DMA_MINALIGN); + len = compl_rlen + LOADFVC_RLEN; + + result = kzalloc(len, GFP_KERNEL); + if (!result) { + ret = -ENOMEM; + goto lf_cleanup; + } + rptr_baddr = dma_map_single(&pdev->dev, (void *)result, len, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&pdev->dev, rptr_baddr)) { + dev_err(&pdev->dev, "DMA mapping failed\n"); + ret = -EFAULT; + goto free_result; + } + rptr = (u8 *)result + compl_rlen; + + /* Fill in the command */ + opcode.s.major = LOADFVC_MAJOR_OP; + opcode.s.minor = LOADFVC_MINOR_OP; + + iq_cmd.cmd.u = 0; + iq_cmd.cmd.s.opcode = cpu_to_be16(opcode.flags); + + /* 64-bit swap for microcode data reads, not needed for addresses */ + cpu_to_be64s(&iq_cmd.cmd.u); + iq_cmd.dptr = 0; + iq_cmd.rptr = rptr_baddr + compl_rlen; + iq_cmd.cptr.u = 0; + + for (etype = 1; etype < OTX2_CPT_MAX_ENG_TYPES; etype++) { + result->s.compcode = OTX2_CPT_COMPLETION_CODE_INIT; + iq_cmd.cptr.s.grp = otx2_cpt_get_eng_grp(&cptpf->eng_grps, + etype); + otx2_cpt_fill_inst(&inst, &iq_cmd, rptr_baddr); + otx2_cpt_send_cmd(&inst, 1, &cptpf->lfs.lf[0]); + + while (result->s.compcode == OTX2_CPT_COMPLETION_CODE_INIT) + cpu_relax(); + + cptpf->eng_caps[etype].u = be64_to_cpup(rptr); + } + dma_unmap_single(&pdev->dev, rptr_baddr, len, DMA_BIDIRECTIONAL); + cptpf->is_eng_caps_discovered = true; + +free_result: + kfree(result); +lf_cleanup: + otx2_cptlf_shutdown(&cptpf->lfs); +delete_grps: + delete_engine_grps(pdev, &cptpf->eng_grps); + + return ret; +} diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h index 96556399a58c..6b0d432de0af 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h @@ -157,5 +157,6 @@ int otx2_cpt_create_eng_grps(struct pci_dev *pdev, struct otx2_cpt_eng_grps *eng_grps); int otx2_cpt_disable_all_cores(struct otx2_cptpf_dev *cptpf); int otx2_cpt_get_eng_grp(struct otx2_cpt_eng_grps *eng_grps, int eng_type); +int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf); #endif /* __OTX2_CPTPF_UCODE_H */ From 19d8e8c7be1567b92e99f7201b8e9b286d04dc0f Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Fri, 15 Jan 2021 19:22:25 +0530 Subject: [PATCH 095/154] crypto: octeontx2 - add virtual function driver support Add support for the Marvell OcteonTX2 CPT virtual function driver. This patch includes probe, PCI specific initialization and interrupt handling. Signed-off-by: Suheil Chandran Signed-off-by: Lukasz Bartosik Signed-off-by: Srujana Challa Signed-off-by: Herbert Xu --- drivers/crypto/marvell/octeontx2/Makefile | 4 +- .../marvell/octeontx2/otx2_cpt_common.h | 1 + .../marvell/octeontx2/otx2_cpt_mbox_common.c | 32 +++ drivers/crypto/marvell/octeontx2/otx2_cptvf.h | 28 +++ .../marvell/octeontx2/otx2_cptvf_main.c | 196 ++++++++++++++++++ .../marvell/octeontx2/otx2_cptvf_mbox.c | 113 ++++++++++ 6 files changed, 373 insertions(+), 1 deletion(-) create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cptvf.h create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c diff --git a/drivers/crypto/marvell/octeontx2/Makefile b/drivers/crypto/marvell/octeontx2/Makefile index e47a55961bb8..ef6fb2ab3571 100644 --- a/drivers/crypto/marvell/octeontx2/Makefile +++ b/drivers/crypto/marvell/octeontx2/Makefile @@ -1,7 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += octeontx2-cpt.o +obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += octeontx2-cpt.o octeontx2-cptvf.o octeontx2-cpt-objs := otx2_cptpf_main.o otx2_cptpf_mbox.o \ otx2_cpt_mbox_common.o otx2_cptpf_ucode.o otx2_cptlf.o +octeontx2-cptvf-objs := otx2_cptvf_main.o otx2_cptvf_mbox.o otx2_cptlf.o \ + otx2_cpt_mbox_common.o ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h index 705a0503b962..ca220178e518 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h @@ -115,5 +115,6 @@ int otx2_cpt_write_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, struct otx2_cptlfs_info; int otx2_cpt_attach_rscrs_msg(struct otx2_cptlfs_info *lfs); int otx2_cpt_detach_rsrcs_msg(struct otx2_cptlfs_info *lfs); +int otx2_cpt_msix_offset_msg(struct otx2_cptlfs_info *lfs); #endif /* __OTX2_CPT_COMMON_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c index 0933031ac827..51cb6404ded7 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c @@ -168,3 +168,35 @@ int otx2_cpt_detach_rsrcs_msg(struct otx2_cptlfs_info *lfs) return ret; } + +int otx2_cpt_msix_offset_msg(struct otx2_cptlfs_info *lfs) +{ + struct otx2_mbox *mbox = lfs->mbox; + struct pci_dev *pdev = lfs->pdev; + struct mbox_msghdr *req; + int ret, i; + + req = otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req), + sizeof(struct msix_offset_rsp)); + if (req == NULL) { + dev_err(&pdev->dev, "RVU MBOX failed to get message.\n"); + return -EFAULT; + } + + req->id = MBOX_MSG_MSIX_OFFSET; + req->sig = OTX2_MBOX_REQ_SIG; + req->pcifunc = 0; + ret = otx2_cpt_send_mbox_msg(mbox, pdev); + if (ret) + return ret; + + for (i = 0; i < lfs->lfs_num; i++) { + if (lfs->lf[i].msix_offset == MSIX_VECTOR_INVALID) { + dev_err(&pdev->dev, + "Invalid msix offset %d for LF %d\n", + lfs->lf[i].msix_offset, i); + return -EINVAL; + } + } + return ret; +} diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf.h b/drivers/crypto/marvell/octeontx2/otx2_cptvf.h new file mode 100644 index 000000000000..4b01eb9d9f70 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2020 Marvell. + */ + +#ifndef __OTX2_CPTVF_H +#define __OTX2_CPTVF_H + +#include "mbox.h" +#include "otx2_cptlf.h" + +struct otx2_cptvf_dev { + void __iomem *reg_base; /* Register start address */ + void __iomem *pfvf_mbox_base; /* PF-VF mbox start address */ + struct pci_dev *pdev; /* PCI device handle */ + struct otx2_cptlfs_info lfs; /* CPT LFs attached to this VF */ + u8 vf_id; /* Virtual function index */ + + /* PF <=> VF mbox */ + struct otx2_mbox pfvf_mbox; + struct work_struct pfvf_mbox_work; + struct workqueue_struct *pfvf_mbox_wq; +}; + +irqreturn_t otx2_cptvf_pfvf_mbox_intr(int irq, void *arg); +void otx2_cptvf_pfvf_mbox_handler(struct work_struct *work); +int otx2_cptvf_send_eng_grp_num_msg(struct otx2_cptvf_dev *cptvf, int eng_type); + +#endif /* __OTX2_CPTVF_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c new file mode 100644 index 000000000000..6d84fafa2915 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Marvell. */ + +#include "otx2_cpt_common.h" +#include "otx2_cptvf.h" +#include + +#define OTX2_CPTVF_DRV_NAME "octeontx2-cptvf" + +static void cptvf_enable_pfvf_mbox_intrs(struct otx2_cptvf_dev *cptvf) +{ + /* Clear interrupt if any */ + otx2_cpt_write64(cptvf->reg_base, BLKADDR_RVUM, 0, OTX2_RVU_VF_INT, + 0x1ULL); + + /* Enable PF-VF interrupt */ + otx2_cpt_write64(cptvf->reg_base, BLKADDR_RVUM, 0, + OTX2_RVU_VF_INT_ENA_W1S, 0x1ULL); +} + +static void cptvf_disable_pfvf_mbox_intrs(struct otx2_cptvf_dev *cptvf) +{ + /* Disable PF-VF interrupt */ + otx2_cpt_write64(cptvf->reg_base, BLKADDR_RVUM, 0, + OTX2_RVU_VF_INT_ENA_W1C, 0x1ULL); + + /* Clear interrupt if any */ + otx2_cpt_write64(cptvf->reg_base, BLKADDR_RVUM, 0, OTX2_RVU_VF_INT, + 0x1ULL); +} + +static int cptvf_register_interrupts(struct otx2_cptvf_dev *cptvf) +{ + int ret, irq; + u32 num_vec; + + num_vec = pci_msix_vec_count(cptvf->pdev); + if (num_vec <= 0) + return -EINVAL; + + /* Enable MSI-X */ + ret = pci_alloc_irq_vectors(cptvf->pdev, num_vec, num_vec, + PCI_IRQ_MSIX); + if (ret < 0) { + dev_err(&cptvf->pdev->dev, + "Request for %d msix vectors failed\n", num_vec); + return ret; + } + irq = pci_irq_vector(cptvf->pdev, OTX2_CPT_VF_INT_VEC_E_MBOX); + /* Register VF<=>PF mailbox interrupt handler */ + ret = devm_request_irq(&cptvf->pdev->dev, irq, + otx2_cptvf_pfvf_mbox_intr, 0, + "CPTPFVF Mbox", cptvf); + if (ret) + return ret; + /* Enable PF-VF mailbox interrupts */ + cptvf_enable_pfvf_mbox_intrs(cptvf); + + ret = otx2_cpt_send_ready_msg(&cptvf->pfvf_mbox, cptvf->pdev); + if (ret) { + dev_warn(&cptvf->pdev->dev, + "PF not responding to mailbox, deferring probe\n"); + cptvf_disable_pfvf_mbox_intrs(cptvf); + return -EPROBE_DEFER; + } + return 0; +} + +static int cptvf_pfvf_mbox_init(struct otx2_cptvf_dev *cptvf) +{ + int ret; + + cptvf->pfvf_mbox_wq = alloc_workqueue("cpt_pfvf_mailbox", + WQ_UNBOUND | WQ_HIGHPRI | + WQ_MEM_RECLAIM, 1); + if (!cptvf->pfvf_mbox_wq) + return -ENOMEM; + + ret = otx2_mbox_init(&cptvf->pfvf_mbox, cptvf->pfvf_mbox_base, + cptvf->pdev, cptvf->reg_base, MBOX_DIR_VFPF, 1); + if (ret) + goto free_wqe; + + INIT_WORK(&cptvf->pfvf_mbox_work, otx2_cptvf_pfvf_mbox_handler); + return 0; + +free_wqe: + destroy_workqueue(cptvf->pfvf_mbox_wq); + return ret; +} + +static void cptvf_pfvf_mbox_destroy(struct otx2_cptvf_dev *cptvf) +{ + destroy_workqueue(cptvf->pfvf_mbox_wq); + otx2_mbox_destroy(&cptvf->pfvf_mbox); +} + +static int otx2_cptvf_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct device *dev = &pdev->dev; + resource_size_t offset, size; + struct otx2_cptvf_dev *cptvf; + int ret; + + cptvf = devm_kzalloc(dev, sizeof(*cptvf), GFP_KERNEL); + if (!cptvf) + return -ENOMEM; + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(dev, "Failed to enable PCI device\n"); + goto clear_drvdata; + } + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); + if (ret) { + dev_err(dev, "Unable to get usable DMA configuration\n"); + goto clear_drvdata; + } + /* Map VF's configuration registers */ + ret = pcim_iomap_regions_request_all(pdev, 1 << PCI_PF_REG_BAR_NUM, + OTX2_CPTVF_DRV_NAME); + if (ret) { + dev_err(dev, "Couldn't get PCI resources 0x%x\n", ret); + goto clear_drvdata; + } + pci_set_master(pdev); + pci_set_drvdata(pdev, cptvf); + cptvf->pdev = pdev; + + cptvf->reg_base = pcim_iomap_table(pdev)[PCI_PF_REG_BAR_NUM]; + + offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM); + size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM); + /* Map PF-VF mailbox memory */ + cptvf->pfvf_mbox_base = devm_ioremap_wc(dev, offset, size); + if (!cptvf->pfvf_mbox_base) { + dev_err(&pdev->dev, "Unable to map BAR4\n"); + ret = -ENODEV; + goto clear_drvdata; + } + /* Initialize PF<=>VF mailbox */ + ret = cptvf_pfvf_mbox_init(cptvf); + if (ret) + goto clear_drvdata; + + /* Register interrupts */ + ret = cptvf_register_interrupts(cptvf); + if (ret) + goto destroy_pfvf_mbox; + + return 0; + +destroy_pfvf_mbox: + cptvf_pfvf_mbox_destroy(cptvf); +clear_drvdata: + pci_set_drvdata(pdev, NULL); + + return ret; +} + +static void otx2_cptvf_remove(struct pci_dev *pdev) +{ + struct otx2_cptvf_dev *cptvf = pci_get_drvdata(pdev); + + if (!cptvf) { + dev_err(&pdev->dev, "Invalid CPT VF device.\n"); + return; + } + /* Disable PF-VF mailbox interrupt */ + cptvf_disable_pfvf_mbox_intrs(cptvf); + /* Destroy PF-VF mbox */ + cptvf_pfvf_mbox_destroy(cptvf); + pci_set_drvdata(pdev, NULL); +} + +/* Supported devices */ +static const struct pci_device_id otx2_cptvf_id_table[] = { + {PCI_VDEVICE(CAVIUM, OTX2_CPT_PCI_VF_DEVICE_ID), 0}, + { 0, } /* end of table */ +}; + +static struct pci_driver otx2_cptvf_pci_driver = { + .name = OTX2_CPTVF_DRV_NAME, + .id_table = otx2_cptvf_id_table, + .probe = otx2_cptvf_probe, + .remove = otx2_cptvf_remove, +}; + +module_pci_driver(otx2_cptvf_pci_driver); + +MODULE_AUTHOR("Marvell"); +MODULE_DESCRIPTION("Marvell OcteonTX2 CPT Virtual Function Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_DEVICE_TABLE(pci, otx2_cptvf_id_table); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c new file mode 100644 index 000000000000..417099a86742 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Marvell. */ + +#include "otx2_cpt_common.h" +#include "otx2_cptvf.h" +#include + +irqreturn_t otx2_cptvf_pfvf_mbox_intr(int __always_unused irq, void *arg) +{ + struct otx2_cptvf_dev *cptvf = arg; + u64 intr; + + /* Read the interrupt bits */ + intr = otx2_cpt_read64(cptvf->reg_base, BLKADDR_RVUM, 0, + OTX2_RVU_VF_INT); + + if (intr & 0x1ULL) { + /* Schedule work queue function to process the MBOX request */ + queue_work(cptvf->pfvf_mbox_wq, &cptvf->pfvf_mbox_work); + /* Clear and ack the interrupt */ + otx2_cpt_write64(cptvf->reg_base, BLKADDR_RVUM, 0, + OTX2_RVU_VF_INT, 0x1ULL); + } + return IRQ_HANDLED; +} + +static void process_pfvf_mbox_mbox_msg(struct otx2_cptvf_dev *cptvf, + struct mbox_msghdr *msg) +{ + struct otx2_cptlfs_info *lfs = &cptvf->lfs; + struct cpt_rd_wr_reg_msg *rsp_reg; + struct msix_offset_rsp *rsp_msix; + int i; + + if (msg->id >= MBOX_MSG_MAX) { + dev_err(&cptvf->pdev->dev, + "MBOX msg with unknown ID %d\n", msg->id); + return; + } + if (msg->sig != OTX2_MBOX_RSP_SIG) { + dev_err(&cptvf->pdev->dev, + "MBOX msg with wrong signature %x, ID %d\n", + msg->sig, msg->id); + return; + } + switch (msg->id) { + case MBOX_MSG_READY: + cptvf->vf_id = ((msg->pcifunc >> RVU_PFVF_FUNC_SHIFT) + & RVU_PFVF_FUNC_MASK) - 1; + break; + case MBOX_MSG_ATTACH_RESOURCES: + /* Check if resources were successfully attached */ + if (!msg->rc) + lfs->are_lfs_attached = 1; + break; + case MBOX_MSG_DETACH_RESOURCES: + /* Check if resources were successfully detached */ + if (!msg->rc) + lfs->are_lfs_attached = 0; + break; + case MBOX_MSG_MSIX_OFFSET: + rsp_msix = (struct msix_offset_rsp *) msg; + for (i = 0; i < rsp_msix->cptlfs; i++) + lfs->lf[i].msix_offset = rsp_msix->cptlf_msixoff[i]; + break; + case MBOX_MSG_CPT_RD_WR_REGISTER: + rsp_reg = (struct cpt_rd_wr_reg_msg *) msg; + if (msg->rc) { + dev_err(&cptvf->pdev->dev, + "Reg %llx rd/wr(%d) failed %d\n", + rsp_reg->reg_offset, rsp_reg->is_write, + msg->rc); + return; + } + if (!rsp_reg->is_write) + *rsp_reg->ret_val = rsp_reg->val; + break; + default: + dev_err(&cptvf->pdev->dev, "Unsupported msg %d received.\n", + msg->id); + break; + } +} + +void otx2_cptvf_pfvf_mbox_handler(struct work_struct *work) +{ + struct otx2_cptvf_dev *cptvf; + struct otx2_mbox *pfvf_mbox; + struct otx2_mbox_dev *mdev; + struct mbox_hdr *rsp_hdr; + struct mbox_msghdr *msg; + int offset, i; + + /* sync with mbox memory region */ + smp_rmb(); + + cptvf = container_of(work, struct otx2_cptvf_dev, pfvf_mbox_work); + pfvf_mbox = &cptvf->pfvf_mbox; + mdev = &pfvf_mbox->dev[0]; + rsp_hdr = (struct mbox_hdr *)(mdev->mbase + pfvf_mbox->rx_start); + if (rsp_hdr->num_msgs == 0) + return; + offset = ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); + + for (i = 0; i < rsp_hdr->num_msgs; i++) { + msg = (struct mbox_msghdr *)(mdev->mbase + pfvf_mbox->rx_start + + offset); + process_pfvf_mbox_mbox_msg(cptvf, msg); + offset = msg->next_msgoff; + mdev->msgs_acked++; + } + otx2_mbox_reset(pfvf_mbox, 0); +} From 8ec8015a316816b07538635fe9c04c35ad63acfc Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Fri, 15 Jan 2021 19:22:26 +0530 Subject: [PATCH 096/154] crypto: octeontx2 - add support to process the crypto request Attach LFs to CPT VF to process the crypto requests and register LF interrupts. Signed-off-by: Suheil Chandran Signed-off-by: Lukasz Bartosik Signed-off-by: Srujana Challa Signed-off-by: Herbert Xu --- drivers/crypto/marvell/octeontx2/Makefile | 2 +- .../marvell/octeontx2/otx2_cpt_common.h | 17 + .../marvell/octeontx2/otx2_cpt_reqmgr.h | 145 +++++ drivers/crypto/marvell/octeontx2/otx2_cptlf.h | 8 + drivers/crypto/marvell/octeontx2/otx2_cptpf.h | 1 + .../marvell/octeontx2/otx2_cptpf_main.c | 47 ++ .../marvell/octeontx2/otx2_cptpf_mbox.c | 22 + drivers/crypto/marvell/octeontx2/otx2_cptvf.h | 1 + .../marvell/octeontx2/otx2_cptvf_main.c | 204 +++++++ .../marvell/octeontx2/otx2_cptvf_mbox.c | 54 ++ .../marvell/octeontx2/otx2_cptvf_reqmgr.c | 534 ++++++++++++++++++ 11 files changed, 1034 insertions(+), 1 deletion(-) create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c diff --git a/drivers/crypto/marvell/octeontx2/Makefile b/drivers/crypto/marvell/octeontx2/Makefile index ef6fb2ab3571..41c0a5832b3f 100644 --- a/drivers/crypto/marvell/octeontx2/Makefile +++ b/drivers/crypto/marvell/octeontx2/Makefile @@ -4,6 +4,6 @@ obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += octeontx2-cpt.o octeontx2-cptvf.o octeontx2-cpt-objs := otx2_cptpf_main.o otx2_cptpf_mbox.o \ otx2_cpt_mbox_common.o otx2_cptpf_ucode.o otx2_cptlf.o octeontx2-cptvf-objs := otx2_cptvf_main.o otx2_cptvf_mbox.o otx2_cptlf.o \ - otx2_cpt_mbox_common.o + otx2_cpt_mbox_common.o otx2_cptvf_reqmgr.o ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h index ca220178e518..3518fac29834 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h @@ -17,6 +17,9 @@ #define OTX2_CPT_MAX_VFS_NUM 128 #define OTX2_CPT_RVU_FUNC_ADDR_S(blk, slot, offs) \ (((blk) << 20) | ((slot) << 12) | (offs)) +#define OTX2_CPT_RVU_PFFUNC(pf, func) \ + ((((pf) & RVU_PFVF_PF_MASK) << RVU_PFVF_PF_SHIFT) | \ + (((func) & RVU_PFVF_FUNC_MASK) << RVU_PFVF_FUNC_SHIFT)) #define OTX2_CPT_INVALID_CRYPTO_ENG_GRP 0xFF #define OTX2_CPT_NAME_LENGTH 64 @@ -34,6 +37,7 @@ enum otx2_cpt_eng_type { /* Take mbox id from end of CPT mbox range in AF (range 0xA00 - 0xBFF) */ #define MBOX_MSG_GET_ENG_GRP_NUM 0xBFF #define MBOX_MSG_GET_CAPS 0xBFD +#define MBOX_MSG_GET_KVF_LIMITS 0xBFC /* * Message request and response to get engine group number @@ -51,6 +55,19 @@ struct otx2_cpt_egrp_num_rsp { u8 eng_grp_num; }; +/* + * Message request and response to get kernel crypto limits + * This messages are only used between CPT PF <-> CPT VF + */ +struct otx2_cpt_kvf_limits_msg { + struct mbox_msghdr hdr; +}; + +struct otx2_cpt_kvf_limits_rsp { + struct mbox_msghdr hdr; + u8 kvf_limits; +}; + /* CPT HW capabilities */ union otx2_cpt_eng_caps { u64 u; diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h index 9184f91c68c1..597a998c6df6 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h @@ -10,6 +10,22 @@ /* Completion code size and initial value */ #define OTX2_CPT_COMPLETION_CODE_SIZE 8 #define OTX2_CPT_COMPLETION_CODE_INIT OTX2_CPT_COMP_E_NOTDONE +/* + * Maximum total number of SG buffers is 100, we divide it equally + * between input and output + */ +#define OTX2_CPT_MAX_SG_IN_CNT 50 +#define OTX2_CPT_MAX_SG_OUT_CNT 50 + +/* DMA mode direct or SG */ +#define OTX2_CPT_DMA_MODE_DIRECT 0 +#define OTX2_CPT_DMA_MODE_SG 1 + +/* Context source CPTR or DPTR */ +#define OTX2_CPT_FROM_CPTR 0 +#define OTX2_CPT_FROM_DPTR 1 + +#define OTX2_CPT_MAX_REQ_SIZE 65535 union otx2_cpt_opcode { u16 flags; @@ -19,6 +35,13 @@ union otx2_cpt_opcode { } s; }; +struct otx2_cptvf_request { + u32 param1; + u32 param2; + u16 dlen; + union otx2_cpt_opcode opcode; +}; + /* * CPT_INST_S software command definitions * Words EI (0-3) @@ -48,4 +71,126 @@ struct otx2_cpt_iq_command { union otx2_cpt_iq_cmd_word3 cptr; }; +struct otx2_cpt_pending_entry { + void *completion_addr; /* Completion address */ + void *info; + /* Kernel async request callback */ + void (*callback)(int status, void *arg1, void *arg2); + struct crypto_async_request *areq; /* Async request callback arg */ + u8 resume_sender; /* Notify sender to resume sending requests */ + u8 busy; /* Entry status (free/busy) */ +}; + +struct otx2_cpt_pending_queue { + struct otx2_cpt_pending_entry *head; /* Head of the queue */ + u32 front; /* Process work from here */ + u32 rear; /* Append new work here */ + u32 pending_count; /* Pending requests count */ + u32 qlen; /* Queue length */ + spinlock_t lock; /* Queue lock */ +}; + +struct otx2_cpt_buf_ptr { + u8 *vptr; + dma_addr_t dma_addr; + u16 size; +}; + +union otx2_cpt_ctrl_info { + u32 flags; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u32 reserved_6_31:26; + u32 grp:3; /* Group bits */ + u32 dma_mode:2; /* DMA mode */ + u32 se_req:1; /* To SE core */ +#else + u32 se_req:1; /* To SE core */ + u32 dma_mode:2; /* DMA mode */ + u32 grp:3; /* Group bits */ + u32 reserved_6_31:26; +#endif + } s; +}; + +struct otx2_cpt_req_info { + /* Kernel async request callback */ + void (*callback)(int status, void *arg1, void *arg2); + struct crypto_async_request *areq; /* Async request callback arg */ + struct otx2_cptvf_request req;/* Request information (core specific) */ + union otx2_cpt_ctrl_info ctrl;/* User control information */ + struct otx2_cpt_buf_ptr in[OTX2_CPT_MAX_SG_IN_CNT]; + struct otx2_cpt_buf_ptr out[OTX2_CPT_MAX_SG_OUT_CNT]; + u8 *iv_out; /* IV to send back */ + u16 rlen; /* Output length */ + u8 in_cnt; /* Number of input buffers */ + u8 out_cnt; /* Number of output buffers */ + u8 req_type; /* Type of request */ + u8 is_enc; /* Is a request an encryption request */ + u8 is_trunc_hmac;/* Is truncated hmac used */ +}; + +struct otx2_cpt_inst_info { + struct otx2_cpt_pending_entry *pentry; + struct otx2_cpt_req_info *req; + struct pci_dev *pdev; + void *completion_addr; + u8 *out_buffer; + u8 *in_buffer; + dma_addr_t dptr_baddr; + dma_addr_t rptr_baddr; + dma_addr_t comp_baddr; + unsigned long time_in; + u32 dlen; + u32 dma_len; + u8 extra_time; +}; + +struct otx2_cpt_sglist_component { + __be16 len0; + __be16 len1; + __be16 len2; + __be16 len3; + __be64 ptr0; + __be64 ptr1; + __be64 ptr2; + __be64 ptr3; +}; + +static inline void otx2_cpt_info_destroy(struct pci_dev *pdev, + struct otx2_cpt_inst_info *info) +{ + struct otx2_cpt_req_info *req; + int i; + + if (info->dptr_baddr) + dma_unmap_single(&pdev->dev, info->dptr_baddr, + info->dma_len, DMA_BIDIRECTIONAL); + + if (info->req) { + req = info->req; + for (i = 0; i < req->out_cnt; i++) { + if (req->out[i].dma_addr) + dma_unmap_single(&pdev->dev, + req->out[i].dma_addr, + req->out[i].size, + DMA_BIDIRECTIONAL); + } + + for (i = 0; i < req->in_cnt; i++) { + if (req->in[i].dma_addr) + dma_unmap_single(&pdev->dev, + req->in[i].dma_addr, + req->in[i].size, + DMA_BIDIRECTIONAL); + } + } + kfree(info); +} + +struct otx2_cptlf_wqe; +int otx2_cpt_do_request(struct pci_dev *pdev, struct otx2_cpt_req_info *req, + int cpu_num); +void otx2_cpt_post_process(struct otx2_cptlf_wqe *wqe); + #endif /* __OTX2_CPT_REQMGR_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h index efb4f395a54a..314e97354100 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h @@ -80,6 +80,7 @@ struct otx2_cptlf_info { u8 slot; /* Slot number of this LF */ struct otx2_cpt_inst_queue iqueue;/* Instruction queue */ + struct otx2_cpt_pending_queue pqueue; /* Pending queue */ struct otx2_cptlf_wqe *wqe; /* Tasklet work info */ }; @@ -91,6 +92,8 @@ struct otx2_cptlfs_info { struct otx2_mbox *mbox; u8 are_lfs_attached; /* Whether CPT LFs are attached */ u8 lfs_num; /* Number of CPT LFs */ + u8 kcrypto_eng_grp_num; /* Kernel crypto engine group number */ + u8 kvf_limits; /* Kernel crypto limits */ atomic_t state; /* LF's state. started/reset */ }; @@ -334,6 +337,11 @@ static inline void otx2_cpt_send_cmd(union otx2_cpt_inst_s *cptinst, } while (!ret); } +static inline bool otx2_cptlf_started(struct otx2_cptlfs_info *lfs) +{ + return atomic_read(&lfs->state) == OTX2_CPTLF_STARTED; +} + int otx2_cptlf_init(struct otx2_cptlfs_info *lfs, u8 eng_grp_msk, int pri, int lfs_num); void otx2_cptlf_shutdown(struct otx2_cptlfs_info *lfs); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h index ede230dc205f..8c899ad531a5 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h @@ -50,6 +50,7 @@ struct otx2_cptpf_dev { u8 pf_id; /* RVU PF number */ u8 max_vfs; /* Maximum number of VFs supported by CPT */ u8 enabled_vfs; /* Number of enabled VFs */ + u8 kvf_limits; /* Kernel crypto limits */ }; irqreturn_t otx2_cptpf_afpf_mbox_intr(int irq, void *arg); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c index 502bcf21ea89..5277e04badd9 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c @@ -392,6 +392,46 @@ static void cptpf_afpf_mbox_destroy(struct otx2_cptpf_dev *cptpf) otx2_mbox_destroy(&cptpf->afpf_mbox); } +static ssize_t kvf_limits_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct otx2_cptpf_dev *cptpf = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", cptpf->kvf_limits); +} + +static ssize_t kvf_limits_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct otx2_cptpf_dev *cptpf = dev_get_drvdata(dev); + int lfs_num; + + if (kstrtoint(buf, 0, &lfs_num)) { + dev_err(dev, "lfs count %d must be in range [1 - %d]\n", + lfs_num, num_online_cpus()); + return -EINVAL; + } + if (lfs_num < 1 || lfs_num > num_online_cpus()) { + dev_err(dev, "lfs count %d must be in range [1 - %d]\n", + lfs_num, num_online_cpus()); + return -EINVAL; + } + cptpf->kvf_limits = lfs_num; + + return count; +} + +static DEVICE_ATTR_RW(kvf_limits); +static struct attribute *cptpf_attrs[] = { + &dev_attr_kvf_limits.attr, + NULL +}; + +static const struct attribute_group cptpf_sysfs_group = { + .attrs = cptpf_attrs, +}; + static int cpt_is_pf_usable(struct otx2_cptpf_dev *cptpf) { u64 rev; @@ -616,8 +656,13 @@ static int otx2_cptpf_probe(struct pci_dev *pdev, if (err) goto unregister_intr; + err = sysfs_create_group(&dev->kobj, &cptpf_sysfs_group); + if (err) + goto cleanup_eng_grps; return 0; +cleanup_eng_grps: + otx2_cpt_cleanup_eng_grps(pdev, &cptpf->eng_grps); unregister_intr: cptpf_disable_afpf_mbox_intr(cptpf); destroy_afpf_mbox: @@ -635,6 +680,8 @@ static void otx2_cptpf_remove(struct pci_dev *pdev) return; cptpf_sriov_disable(pdev); + /* Delete sysfs entry created for kernel VF limits */ + sysfs_remove_group(&pdev->dev.kobj, &cptpf_sysfs_group); /* Cleanup engine groups */ otx2_cpt_cleanup_eng_grps(pdev, &cptpf->eng_grps); /* Disable AF-PF mailbox interrupt */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c index 0834dc3b3e59..186f1c1190c1 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c @@ -86,6 +86,25 @@ static int handle_msg_get_eng_grp_num(struct otx2_cptpf_dev *cptpf, return 0; } +static int handle_msg_kvf_limits(struct otx2_cptpf_dev *cptpf, + struct otx2_cptvf_info *vf, + struct mbox_msghdr *req) +{ + struct otx2_cpt_kvf_limits_rsp *rsp; + + rsp = (struct otx2_cpt_kvf_limits_rsp *) + otx2_mbox_alloc_msg(&cptpf->vfpf_mbox, vf->vf_id, sizeof(*rsp)); + if (!rsp) + return -ENOMEM; + + rsp->hdr.id = MBOX_MSG_GET_KVF_LIMITS; + rsp->hdr.sig = OTX2_MBOX_RSP_SIG; + rsp->hdr.pcifunc = req->pcifunc; + rsp->kvf_limits = cptpf->kvf_limits; + + return 0; +} + static int cptpf_handle_vf_req(struct otx2_cptpf_dev *cptpf, struct otx2_cptvf_info *vf, struct mbox_msghdr *req, int size) @@ -103,6 +122,9 @@ static int cptpf_handle_vf_req(struct otx2_cptpf_dev *cptpf, case MBOX_MSG_GET_CAPS: err = handle_msg_get_caps(cptpf, vf, req); break; + case MBOX_MSG_GET_KVF_LIMITS: + err = handle_msg_kvf_limits(cptpf, vf, req); + break; default: err = forward_to_af(cptpf, vf, req, size); break; diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf.h b/drivers/crypto/marvell/octeontx2/otx2_cptvf.h index 4b01eb9d9f70..4f0a169fddbd 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf.h @@ -24,5 +24,6 @@ struct otx2_cptvf_dev { irqreturn_t otx2_cptvf_pfvf_mbox_intr(int irq, void *arg); void otx2_cptvf_pfvf_mbox_handler(struct work_struct *work); int otx2_cptvf_send_eng_grp_num_msg(struct otx2_cptvf_dev *cptvf, int eng_type); +int otx2_cptvf_send_kvf_limits_msg(struct otx2_cptvf_dev *cptvf); #endif /* __OTX2_CPTVF_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c index 6d84fafa2915..97d57bcb2b94 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c @@ -3,6 +3,7 @@ #include "otx2_cpt_common.h" #include "otx2_cptvf.h" +#include "otx2_cptlf.h" #include #define OTX2_CPTVF_DRV_NAME "octeontx2-cptvf" @@ -95,6 +96,201 @@ static void cptvf_pfvf_mbox_destroy(struct otx2_cptvf_dev *cptvf) otx2_mbox_destroy(&cptvf->pfvf_mbox); } +static void cptlf_work_handler(unsigned long data) +{ + otx2_cpt_post_process((struct otx2_cptlf_wqe *) data); +} + +static void cleanup_tasklet_work(struct otx2_cptlfs_info *lfs) +{ + int i; + + for (i = 0; i < lfs->lfs_num; i++) { + if (!lfs->lf[i].wqe) + continue; + + tasklet_kill(&lfs->lf[i].wqe->work); + kfree(lfs->lf[i].wqe); + lfs->lf[i].wqe = NULL; + } +} + +static int init_tasklet_work(struct otx2_cptlfs_info *lfs) +{ + struct otx2_cptlf_wqe *wqe; + int i, ret = 0; + + for (i = 0; i < lfs->lfs_num; i++) { + wqe = kzalloc(sizeof(struct otx2_cptlf_wqe), GFP_KERNEL); + if (!wqe) { + ret = -ENOMEM; + goto cleanup_tasklet; + } + + tasklet_init(&wqe->work, cptlf_work_handler, (u64) wqe); + wqe->lfs = lfs; + wqe->lf_num = i; + lfs->lf[i].wqe = wqe; + } + return 0; + +cleanup_tasklet: + cleanup_tasklet_work(lfs); + return ret; +} + +static void free_pending_queues(struct otx2_cptlfs_info *lfs) +{ + int i; + + for (i = 0; i < lfs->lfs_num; i++) { + kfree(lfs->lf[i].pqueue.head); + lfs->lf[i].pqueue.head = NULL; + } +} + +static int alloc_pending_queues(struct otx2_cptlfs_info *lfs) +{ + int size, ret, i; + + if (!lfs->lfs_num) + return -EINVAL; + + for (i = 0; i < lfs->lfs_num; i++) { + lfs->lf[i].pqueue.qlen = OTX2_CPT_INST_QLEN_MSGS; + size = lfs->lf[i].pqueue.qlen * + sizeof(struct otx2_cpt_pending_entry); + + lfs->lf[i].pqueue.head = kzalloc(size, GFP_KERNEL); + if (!lfs->lf[i].pqueue.head) { + ret = -ENOMEM; + goto error; + } + + /* Initialize spin lock */ + spin_lock_init(&lfs->lf[i].pqueue.lock); + } + return 0; + +error: + free_pending_queues(lfs); + return ret; +} + +static void lf_sw_cleanup(struct otx2_cptlfs_info *lfs) +{ + cleanup_tasklet_work(lfs); + free_pending_queues(lfs); +} + +static int lf_sw_init(struct otx2_cptlfs_info *lfs) +{ + int ret; + + ret = alloc_pending_queues(lfs); + if (ret) { + dev_err(&lfs->pdev->dev, + "Allocating pending queues failed\n"); + return ret; + } + ret = init_tasklet_work(lfs); + if (ret) { + dev_err(&lfs->pdev->dev, + "Tasklet work init failed\n"); + goto pending_queues_free; + } + return 0; + +pending_queues_free: + free_pending_queues(lfs); + return ret; +} + +static void cptvf_lf_shutdown(struct otx2_cptlfs_info *lfs) +{ + atomic_set(&lfs->state, OTX2_CPTLF_IN_RESET); + + /* Remove interrupts affinity */ + otx2_cptlf_free_irqs_affinity(lfs); + /* Disable instruction queue */ + otx2_cptlf_disable_iqueues(lfs); + /* Unregister LFs interrupts */ + otx2_cptlf_unregister_interrupts(lfs); + /* Cleanup LFs software side */ + lf_sw_cleanup(lfs); + /* Send request to detach LFs */ + otx2_cpt_detach_rsrcs_msg(lfs); +} + +static int cptvf_lf_init(struct otx2_cptvf_dev *cptvf) +{ + struct otx2_cptlfs_info *lfs = &cptvf->lfs; + struct device *dev = &cptvf->pdev->dev; + int ret, lfs_num; + u8 eng_grp_msk; + + /* Get engine group number for symmetric crypto */ + cptvf->lfs.kcrypto_eng_grp_num = OTX2_CPT_INVALID_CRYPTO_ENG_GRP; + ret = otx2_cptvf_send_eng_grp_num_msg(cptvf, OTX2_CPT_SE_TYPES); + if (ret) + return ret; + + if (cptvf->lfs.kcrypto_eng_grp_num == OTX2_CPT_INVALID_CRYPTO_ENG_GRP) { + dev_err(dev, "Engine group for kernel crypto not available\n"); + ret = -ENOENT; + return ret; + } + eng_grp_msk = 1 << cptvf->lfs.kcrypto_eng_grp_num; + + ret = otx2_cptvf_send_kvf_limits_msg(cptvf); + if (ret) + return ret; + + lfs->reg_base = cptvf->reg_base; + lfs->pdev = cptvf->pdev; + lfs->mbox = &cptvf->pfvf_mbox; + + lfs_num = cptvf->lfs.kvf_limits ? cptvf->lfs.kvf_limits : + num_online_cpus(); + ret = otx2_cptlf_init(lfs, eng_grp_msk, OTX2_CPT_QUEUE_HI_PRIO, + lfs_num); + if (ret) + return ret; + + /* Get msix offsets for attached LFs */ + ret = otx2_cpt_msix_offset_msg(lfs); + if (ret) + goto cleanup_lf; + + /* Initialize LFs software side */ + ret = lf_sw_init(lfs); + if (ret) + goto cleanup_lf; + + /* Register LFs interrupts */ + ret = otx2_cptlf_register_interrupts(lfs); + if (ret) + goto cleanup_lf_sw; + + /* Set interrupts affinity */ + ret = otx2_cptlf_set_irqs_affinity(lfs); + if (ret) + goto unregister_intr; + + atomic_set(&lfs->state, OTX2_CPTLF_STARTED); + + return 0; + +unregister_intr: + otx2_cptlf_unregister_interrupts(lfs); +cleanup_lf_sw: + lf_sw_cleanup(lfs); +cleanup_lf: + otx2_cptlf_shutdown(lfs); + + return ret; +} + static int otx2_cptvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -150,8 +346,15 @@ static int otx2_cptvf_probe(struct pci_dev *pdev, if (ret) goto destroy_pfvf_mbox; + /* Initialize CPT LFs */ + ret = cptvf_lf_init(cptvf); + if (ret) + goto unregister_interrupts; + return 0; +unregister_interrupts: + cptvf_disable_pfvf_mbox_intrs(cptvf); destroy_pfvf_mbox: cptvf_pfvf_mbox_destroy(cptvf); clear_drvdata: @@ -168,6 +371,7 @@ static void otx2_cptvf_remove(struct pci_dev *pdev) dev_err(&pdev->dev, "Invalid CPT VF device.\n"); return; } + cptvf_lf_shutdown(&cptvf->lfs); /* Disable PF-VF mailbox interrupt */ cptvf_disable_pfvf_mbox_intrs(cptvf); /* Destroy PF-VF mbox */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c index 417099a86742..5d73b711cba6 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c @@ -28,6 +28,8 @@ static void process_pfvf_mbox_mbox_msg(struct otx2_cptvf_dev *cptvf, struct mbox_msghdr *msg) { struct otx2_cptlfs_info *lfs = &cptvf->lfs; + struct otx2_cpt_kvf_limits_rsp *rsp_limits; + struct otx2_cpt_egrp_num_rsp *rsp_grp; struct cpt_rd_wr_reg_msg *rsp_reg; struct msix_offset_rsp *rsp_msix; int i; @@ -75,6 +77,14 @@ static void process_pfvf_mbox_mbox_msg(struct otx2_cptvf_dev *cptvf, if (!rsp_reg->is_write) *rsp_reg->ret_val = rsp_reg->val; break; + case MBOX_MSG_GET_ENG_GRP_NUM: + rsp_grp = (struct otx2_cpt_egrp_num_rsp *) msg; + cptvf->lfs.kcrypto_eng_grp_num = rsp_grp->eng_grp_num; + break; + case MBOX_MSG_GET_KVF_LIMITS: + rsp_limits = (struct otx2_cpt_kvf_limits_rsp *) msg; + cptvf->lfs.kvf_limits = rsp_limits->kvf_limits; + break; default: dev_err(&cptvf->pdev->dev, "Unsupported msg %d received.\n", msg->id); @@ -111,3 +121,47 @@ void otx2_cptvf_pfvf_mbox_handler(struct work_struct *work) } otx2_mbox_reset(pfvf_mbox, 0); } + +int otx2_cptvf_send_eng_grp_num_msg(struct otx2_cptvf_dev *cptvf, int eng_type) +{ + struct otx2_mbox *mbox = &cptvf->pfvf_mbox; + struct pci_dev *pdev = cptvf->pdev; + struct otx2_cpt_egrp_num_msg *req; + + req = (struct otx2_cpt_egrp_num_msg *) + otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req), + sizeof(struct otx2_cpt_egrp_num_rsp)); + if (req == NULL) { + dev_err(&pdev->dev, "RVU MBOX failed to get message.\n"); + return -EFAULT; + } + req->hdr.id = MBOX_MSG_GET_ENG_GRP_NUM; + req->hdr.sig = OTX2_MBOX_REQ_SIG; + req->hdr.pcifunc = OTX2_CPT_RVU_PFFUNC(cptvf->vf_id, 0); + req->eng_type = eng_type; + + return otx2_cpt_send_mbox_msg(mbox, pdev); +} + +int otx2_cptvf_send_kvf_limits_msg(struct otx2_cptvf_dev *cptvf) +{ + struct otx2_mbox *mbox = &cptvf->pfvf_mbox; + struct pci_dev *pdev = cptvf->pdev; + struct mbox_msghdr *req; + int ret; + + req = (struct mbox_msghdr *) + otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req), + sizeof(struct otx2_cpt_kvf_limits_rsp)); + if (req == NULL) { + dev_err(&pdev->dev, "RVU MBOX failed to get message.\n"); + return -EFAULT; + } + req->id = MBOX_MSG_GET_KVF_LIMITS; + req->sig = OTX2_MBOX_REQ_SIG; + req->pcifunc = OTX2_CPT_RVU_PFFUNC(cptvf->vf_id, 0); + + ret = otx2_cpt_send_mbox_msg(mbox, pdev); + + return ret; +} diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c new file mode 100644 index 000000000000..b3b371ce06f9 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c @@ -0,0 +1,534 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Marvell. */ + +#include "otx2_cptvf.h" +#include "otx2_cpt_common.h" + +/* SG list header size in bytes */ +#define SG_LIST_HDR_SIZE 8 + +/* Default timeout when waiting for free pending entry in us */ +#define CPT_PENTRY_TIMEOUT 1000 +#define CPT_PENTRY_STEP 50 + +/* Default threshold for stopping and resuming sender requests */ +#define CPT_IQ_STOP_MARGIN 128 +#define CPT_IQ_RESUME_MARGIN 512 + +/* Default command timeout in seconds */ +#define CPT_COMMAND_TIMEOUT 4 +#define CPT_TIME_IN_RESET_COUNT 5 + +static void otx2_cpt_dump_sg_list(struct pci_dev *pdev, + struct otx2_cpt_req_info *req) +{ + int i; + + pr_debug("Gather list size %d\n", req->in_cnt); + for (i = 0; i < req->in_cnt; i++) { + pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i, + req->in[i].size, req->in[i].vptr, + (void *) req->in[i].dma_addr); + pr_debug("Buffer hexdump (%d bytes)\n", + req->in[i].size); + print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, + req->in[i].vptr, req->in[i].size, false); + } + pr_debug("Scatter list size %d\n", req->out_cnt); + for (i = 0; i < req->out_cnt; i++) { + pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i, + req->out[i].size, req->out[i].vptr, + (void *) req->out[i].dma_addr); + pr_debug("Buffer hexdump (%d bytes)\n", req->out[i].size); + print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, + req->out[i].vptr, req->out[i].size, false); + } +} + +static inline struct otx2_cpt_pending_entry *get_free_pending_entry( + struct otx2_cpt_pending_queue *q, + int qlen) +{ + struct otx2_cpt_pending_entry *ent = NULL; + + ent = &q->head[q->rear]; + if (unlikely(ent->busy)) + return NULL; + + q->rear++; + if (unlikely(q->rear == qlen)) + q->rear = 0; + + return ent; +} + +static inline u32 modulo_inc(u32 index, u32 length, u32 inc) +{ + if (WARN_ON(inc > length)) + inc = length; + + index += inc; + if (unlikely(index >= length)) + index -= length; + + return index; +} + +static inline void free_pentry(struct otx2_cpt_pending_entry *pentry) +{ + pentry->completion_addr = NULL; + pentry->info = NULL; + pentry->callback = NULL; + pentry->areq = NULL; + pentry->resume_sender = false; + pentry->busy = false; +} + +static inline int setup_sgio_components(struct pci_dev *pdev, + struct otx2_cpt_buf_ptr *list, + int buf_count, u8 *buffer) +{ + struct otx2_cpt_sglist_component *sg_ptr = NULL; + int ret = 0, i, j; + int components; + + if (unlikely(!list)) { + dev_err(&pdev->dev, "Input list pointer is NULL\n"); + return -EFAULT; + } + + for (i = 0; i < buf_count; i++) { + if (unlikely(!list[i].vptr)) + continue; + list[i].dma_addr = dma_map_single(&pdev->dev, list[i].vptr, + list[i].size, + DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(&pdev->dev, list[i].dma_addr))) { + dev_err(&pdev->dev, "Dma mapping failed\n"); + ret = -EIO; + goto sg_cleanup; + } + } + components = buf_count / 4; + sg_ptr = (struct otx2_cpt_sglist_component *)buffer; + for (i = 0; i < components; i++) { + sg_ptr->len0 = cpu_to_be16(list[i * 4 + 0].size); + sg_ptr->len1 = cpu_to_be16(list[i * 4 + 1].size); + sg_ptr->len2 = cpu_to_be16(list[i * 4 + 2].size); + sg_ptr->len3 = cpu_to_be16(list[i * 4 + 3].size); + sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr); + sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr); + sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr); + sg_ptr->ptr3 = cpu_to_be64(list[i * 4 + 3].dma_addr); + sg_ptr++; + } + components = buf_count % 4; + + switch (components) { + case 3: + sg_ptr->len2 = cpu_to_be16(list[i * 4 + 2].size); + sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr); + fallthrough; + case 2: + sg_ptr->len1 = cpu_to_be16(list[i * 4 + 1].size); + sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr); + fallthrough; + case 1: + sg_ptr->len0 = cpu_to_be16(list[i * 4 + 0].size); + sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr); + break; + default: + break; + } + return ret; + +sg_cleanup: + for (j = 0; j < i; j++) { + if (list[j].dma_addr) { + dma_unmap_single(&pdev->dev, list[j].dma_addr, + list[j].size, DMA_BIDIRECTIONAL); + } + + list[j].dma_addr = 0; + } + return ret; +} + +static inline struct otx2_cpt_inst_info *info_create(struct pci_dev *pdev, + struct otx2_cpt_req_info *req, + gfp_t gfp) +{ + int align = OTX2_CPT_DMA_MINALIGN; + struct otx2_cpt_inst_info *info; + u32 dlen, align_dlen, info_len; + u16 g_sz_bytes, s_sz_bytes; + u32 total_mem_len; + + if (unlikely(req->in_cnt > OTX2_CPT_MAX_SG_IN_CNT || + req->out_cnt > OTX2_CPT_MAX_SG_OUT_CNT)) { + dev_err(&pdev->dev, "Error too many sg components\n"); + return NULL; + } + + g_sz_bytes = ((req->in_cnt + 3) / 4) * + sizeof(struct otx2_cpt_sglist_component); + s_sz_bytes = ((req->out_cnt + 3) / 4) * + sizeof(struct otx2_cpt_sglist_component); + + dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE; + align_dlen = ALIGN(dlen, align); + info_len = ALIGN(sizeof(*info), align); + total_mem_len = align_dlen + info_len + sizeof(union otx2_cpt_res_s); + + info = kzalloc(total_mem_len, gfp); + if (unlikely(!info)) + return NULL; + + info->dlen = dlen; + info->in_buffer = (u8 *)info + info_len; + + ((u16 *)info->in_buffer)[0] = req->out_cnt; + ((u16 *)info->in_buffer)[1] = req->in_cnt; + ((u16 *)info->in_buffer)[2] = 0; + ((u16 *)info->in_buffer)[3] = 0; + cpu_to_be64s((u64 *)info->in_buffer); + + /* Setup gather (input) components */ + if (setup_sgio_components(pdev, req->in, req->in_cnt, + &info->in_buffer[8])) { + dev_err(&pdev->dev, "Failed to setup gather list\n"); + goto destroy_info; + } + + if (setup_sgio_components(pdev, req->out, req->out_cnt, + &info->in_buffer[8 + g_sz_bytes])) { + dev_err(&pdev->dev, "Failed to setup scatter list\n"); + goto destroy_info; + } + + info->dma_len = total_mem_len - info_len; + info->dptr_baddr = dma_map_single(&pdev->dev, info->in_buffer, + info->dma_len, DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(&pdev->dev, info->dptr_baddr))) { + dev_err(&pdev->dev, "DMA Mapping failed for cpt req\n"); + goto destroy_info; + } + /* + * Get buffer for union otx2_cpt_res_s response + * structure and its physical address + */ + info->completion_addr = info->in_buffer + align_dlen; + info->comp_baddr = info->dptr_baddr + align_dlen; + + return info; + +destroy_info: + otx2_cpt_info_destroy(pdev, info); + return NULL; +} + +static int process_request(struct pci_dev *pdev, struct otx2_cpt_req_info *req, + struct otx2_cpt_pending_queue *pqueue, + struct otx2_cptlf_info *lf) +{ + struct otx2_cptvf_request *cpt_req = &req->req; + struct otx2_cpt_pending_entry *pentry = NULL; + union otx2_cpt_ctrl_info *ctrl = &req->ctrl; + struct otx2_cpt_inst_info *info = NULL; + union otx2_cpt_res_s *result = NULL; + struct otx2_cpt_iq_command iq_cmd; + union otx2_cpt_inst_s cptinst; + int retry, ret = 0; + u8 resume_sender; + gfp_t gfp; + + gfp = (req->areq->flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : + GFP_ATOMIC; + if (unlikely(!otx2_cptlf_started(lf->lfs))) + return -ENODEV; + + info = info_create(pdev, req, gfp); + if (unlikely(!info)) { + dev_err(&pdev->dev, "Setting up cpt inst info failed"); + return -ENOMEM; + } + cpt_req->dlen = info->dlen; + + result = info->completion_addr; + result->s.compcode = OTX2_CPT_COMPLETION_CODE_INIT; + + spin_lock_bh(&pqueue->lock); + pentry = get_free_pending_entry(pqueue, pqueue->qlen); + retry = CPT_PENTRY_TIMEOUT / CPT_PENTRY_STEP; + while (unlikely(!pentry) && retry--) { + spin_unlock_bh(&pqueue->lock); + udelay(CPT_PENTRY_STEP); + spin_lock_bh(&pqueue->lock); + pentry = get_free_pending_entry(pqueue, pqueue->qlen); + } + + if (unlikely(!pentry)) { + ret = -ENOSPC; + goto destroy_info; + } + + /* + * Check if we are close to filling in entire pending queue, + * if so then tell the sender to stop/sleep by returning -EBUSY + * We do it only for context which can sleep (GFP_KERNEL) + */ + if (gfp == GFP_KERNEL && + pqueue->pending_count > (pqueue->qlen - CPT_IQ_STOP_MARGIN)) { + pentry->resume_sender = true; + } else + pentry->resume_sender = false; + resume_sender = pentry->resume_sender; + pqueue->pending_count++; + + pentry->completion_addr = info->completion_addr; + pentry->info = info; + pentry->callback = req->callback; + pentry->areq = req->areq; + pentry->busy = true; + info->pentry = pentry; + info->time_in = jiffies; + info->req = req; + + /* Fill in the command */ + iq_cmd.cmd.u = 0; + iq_cmd.cmd.s.opcode = cpu_to_be16(cpt_req->opcode.flags); + iq_cmd.cmd.s.param1 = cpu_to_be16(cpt_req->param1); + iq_cmd.cmd.s.param2 = cpu_to_be16(cpt_req->param2); + iq_cmd.cmd.s.dlen = cpu_to_be16(cpt_req->dlen); + + /* 64-bit swap for microcode data reads, not needed for addresses*/ + cpu_to_be64s(&iq_cmd.cmd.u); + iq_cmd.dptr = info->dptr_baddr; + iq_cmd.rptr = 0; + iq_cmd.cptr.u = 0; + iq_cmd.cptr.s.grp = ctrl->s.grp; + + /* Fill in the CPT_INST_S type command for HW interpretation */ + otx2_cpt_fill_inst(&cptinst, &iq_cmd, info->comp_baddr); + + /* Print debug info if enabled */ + otx2_cpt_dump_sg_list(pdev, req); + pr_debug("Cpt_inst_s hexdump (%d bytes)\n", OTX2_CPT_INST_SIZE); + print_hex_dump_debug("", 0, 16, 1, &cptinst, OTX2_CPT_INST_SIZE, false); + pr_debug("Dptr hexdump (%d bytes)\n", cpt_req->dlen); + print_hex_dump_debug("", 0, 16, 1, info->in_buffer, + cpt_req->dlen, false); + + /* Send CPT command */ + otx2_cpt_send_cmd(&cptinst, 1, lf); + + /* + * We allocate and prepare pending queue entry in critical section + * together with submitting CPT instruction to CPT instruction queue + * to make sure that order of CPT requests is the same in both + * pending and instruction queues + */ + spin_unlock_bh(&pqueue->lock); + + ret = resume_sender ? -EBUSY : -EINPROGRESS; + return ret; + +destroy_info: + spin_unlock_bh(&pqueue->lock); + otx2_cpt_info_destroy(pdev, info); + return ret; +} + +int otx2_cpt_do_request(struct pci_dev *pdev, struct otx2_cpt_req_info *req, + int cpu_num) +{ + struct otx2_cptvf_dev *cptvf = pci_get_drvdata(pdev); + struct otx2_cptlfs_info *lfs = &cptvf->lfs; + + return process_request(lfs->pdev, req, &lfs->lf[cpu_num].pqueue, + &lfs->lf[cpu_num]); +} + +static int cpt_process_ccode(struct pci_dev *pdev, + union otx2_cpt_res_s *cpt_status, + struct otx2_cpt_inst_info *info, + u32 *res_code) +{ + u8 uc_ccode = cpt_status->s.uc_compcode; + u8 ccode = cpt_status->s.compcode; + + switch (ccode) { + case OTX2_CPT_COMP_E_FAULT: + dev_err(&pdev->dev, + "Request failed with DMA fault\n"); + otx2_cpt_dump_sg_list(pdev, info->req); + break; + + case OTX2_CPT_COMP_E_HWERR: + dev_err(&pdev->dev, + "Request failed with hardware error\n"); + otx2_cpt_dump_sg_list(pdev, info->req); + break; + + case OTX2_CPT_COMP_E_INSTERR: + dev_err(&pdev->dev, + "Request failed with instruction error\n"); + otx2_cpt_dump_sg_list(pdev, info->req); + break; + + case OTX2_CPT_COMP_E_NOTDONE: + /* check for timeout */ + if (time_after_eq(jiffies, info->time_in + + CPT_COMMAND_TIMEOUT * HZ)) + dev_warn(&pdev->dev, + "Request timed out 0x%p", info->req); + else if (info->extra_time < CPT_TIME_IN_RESET_COUNT) { + info->time_in = jiffies; + info->extra_time++; + } + return 1; + + case OTX2_CPT_COMP_E_GOOD: + /* + * Check microcode completion code, it is only valid + * when completion code is CPT_COMP_E::GOOD + */ + if (uc_ccode != OTX2_CPT_UCC_SUCCESS) { + /* + * If requested hmac is truncated and ucode returns + * s/g write length error then we report success + * because ucode writes as many bytes of calculated + * hmac as available in gather buffer and reports + * s/g write length error if number of bytes in gather + * buffer is less than full hmac size. + */ + if (info->req->is_trunc_hmac && + uc_ccode == OTX2_CPT_UCC_SG_WRITE_LENGTH) { + *res_code = 0; + break; + } + + dev_err(&pdev->dev, + "Request failed with software error code 0x%x\n", + cpt_status->s.uc_compcode); + otx2_cpt_dump_sg_list(pdev, info->req); + break; + } + /* Request has been processed with success */ + *res_code = 0; + break; + + default: + dev_err(&pdev->dev, + "Request returned invalid status %d\n", ccode); + break; + } + return 0; +} + +static inline void process_pending_queue(struct pci_dev *pdev, + struct otx2_cpt_pending_queue *pqueue) +{ + struct otx2_cpt_pending_entry *resume_pentry = NULL; + void (*callback)(int status, void *arg, void *req); + struct otx2_cpt_pending_entry *pentry = NULL; + union otx2_cpt_res_s *cpt_status = NULL; + struct otx2_cpt_inst_info *info = NULL; + struct otx2_cpt_req_info *req = NULL; + struct crypto_async_request *areq; + u32 res_code, resume_index; + + while (1) { + spin_lock_bh(&pqueue->lock); + pentry = &pqueue->head[pqueue->front]; + + if (WARN_ON(!pentry)) { + spin_unlock_bh(&pqueue->lock); + break; + } + + res_code = -EINVAL; + if (unlikely(!pentry->busy)) { + spin_unlock_bh(&pqueue->lock); + break; + } + + if (unlikely(!pentry->callback)) { + dev_err(&pdev->dev, "Callback NULL\n"); + goto process_pentry; + } + + info = pentry->info; + if (unlikely(!info)) { + dev_err(&pdev->dev, "Pending entry post arg NULL\n"); + goto process_pentry; + } + + req = info->req; + if (unlikely(!req)) { + dev_err(&pdev->dev, "Request NULL\n"); + goto process_pentry; + } + + cpt_status = pentry->completion_addr; + if (unlikely(!cpt_status)) { + dev_err(&pdev->dev, "Completion address NULL\n"); + goto process_pentry; + } + + if (cpt_process_ccode(pdev, cpt_status, info, &res_code)) { + spin_unlock_bh(&pqueue->lock); + return; + } + info->pdev = pdev; + +process_pentry: + /* + * Check if we should inform sending side to resume + * We do it CPT_IQ_RESUME_MARGIN elements in advance before + * pending queue becomes empty + */ + resume_index = modulo_inc(pqueue->front, pqueue->qlen, + CPT_IQ_RESUME_MARGIN); + resume_pentry = &pqueue->head[resume_index]; + if (resume_pentry && + resume_pentry->resume_sender) { + resume_pentry->resume_sender = false; + callback = resume_pentry->callback; + areq = resume_pentry->areq; + + if (callback) { + spin_unlock_bh(&pqueue->lock); + + /* + * EINPROGRESS is an indication for sending + * side that it can resume sending requests + */ + callback(-EINPROGRESS, areq, info); + spin_lock_bh(&pqueue->lock); + } + } + + callback = pentry->callback; + areq = pentry->areq; + free_pentry(pentry); + + pqueue->pending_count--; + pqueue->front = modulo_inc(pqueue->front, pqueue->qlen, 1); + spin_unlock_bh(&pqueue->lock); + + /* + * Call callback after current pending entry has been + * processed, we don't do it if the callback pointer is + * invalid. + */ + if (callback) + callback(res_code, areq, info); + } +} + +void otx2_cpt_post_process(struct otx2_cptlf_wqe *wqe) +{ + process_pending_queue(wqe->lfs->pdev, + &wqe->lfs->lf[wqe->lf_num].pqueue); +} From 6f03f0e8b6c8a82d8e740ff3a87ed407ad423243 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Fri, 15 Jan 2021 19:22:27 +0530 Subject: [PATCH 097/154] crypto: octeontx2 - register with linux crypto framework CPT offload module utilises the linux crypto framework to offload crypto processing. This patch registers supported algorithms by calling registration functions provided by the kernel crypto API. The module currently supports: - AES block cipher in CBC,ECB and XTS mode. - 3DES block cipher in CBC and ECB mode. - AEAD algorithms. authenc(hmac(sha1),cbc(aes)), authenc(hmac(sha256),cbc(aes)), authenc(hmac(sha384),cbc(aes)), authenc(hmac(sha512),cbc(aes)), authenc(hmac(sha1),ecb(cipher_null)), authenc(hmac(sha256),ecb(cipher_null)), authenc(hmac(sha384),ecb(cipher_null)), authenc(hmac(sha512),ecb(cipher_null)), rfc4106(gcm(aes)). Signed-off-by: Suheil Chandran Signed-off-by: Lukasz Bartosik Signed-off-by: Srujana Challa Signed-off-by: Herbert Xu --- drivers/crypto/marvell/Kconfig | 4 + drivers/crypto/marvell/octeontx2/Makefile | 3 +- .../marvell/octeontx2/otx2_cpt_reqmgr.h | 1 + .../marvell/octeontx2/otx2_cptvf_algs.c | 1758 +++++++++++++++++ .../marvell/octeontx2/otx2_cptvf_algs.h | 178 ++ .../marvell/octeontx2/otx2_cptvf_main.c | 12 +- .../marvell/octeontx2/otx2_cptvf_reqmgr.c | 7 + 7 files changed, 1961 insertions(+), 2 deletions(-) create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.h diff --git a/drivers/crypto/marvell/Kconfig b/drivers/crypto/marvell/Kconfig index 1440ec9e1fb4..2efbd79180ce 100644 --- a/drivers/crypto/marvell/Kconfig +++ b/drivers/crypto/marvell/Kconfig @@ -40,8 +40,12 @@ config CRYPTO_DEV_OCTEONTX2_CPT tristate "Marvell OcteonTX2 CPT driver" depends on ARM64 || COMPILE_TEST depends on PCI_MSI && 64BIT + depends on CRYPTO_LIB_AES select OCTEONTX2_MBOX select CRYPTO_DEV_MARVELL + select CRYPTO_SKCIPHER + select CRYPTO_HASH + select CRYPTO_AEAD help This driver allows you to utilize the Marvell Cryptographic Accelerator Unit(CPT) found in OcteonTX2 series of processors. diff --git a/drivers/crypto/marvell/octeontx2/Makefile b/drivers/crypto/marvell/octeontx2/Makefile index 41c0a5832b3f..b9c6201019e0 100644 --- a/drivers/crypto/marvell/octeontx2/Makefile +++ b/drivers/crypto/marvell/octeontx2/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += octeontx2-cpt.o octeontx2-cptvf.o octeontx2-cpt-objs := otx2_cptpf_main.o otx2_cptpf_mbox.o \ otx2_cpt_mbox_common.o otx2_cptpf_ucode.o otx2_cptlf.o octeontx2-cptvf-objs := otx2_cptvf_main.o otx2_cptvf_mbox.o otx2_cptlf.o \ - otx2_cpt_mbox_common.o otx2_cptvf_reqmgr.o + otx2_cpt_mbox_common.o otx2_cptvf_reqmgr.o \ + otx2_cptvf_algs.o ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h index 597a998c6df6..dbb1ee746f4c 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h @@ -192,5 +192,6 @@ struct otx2_cptlf_wqe; int otx2_cpt_do_request(struct pci_dev *pdev, struct otx2_cpt_req_info *req, int cpu_num); void otx2_cpt_post_process(struct otx2_cptlf_wqe *wqe); +int otx2_cpt_get_kcrypto_eng_grp_num(struct pci_dev *pdev); #endif /* __OTX2_CPT_REQMGR_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c new file mode 100644 index 000000000000..a72723455df7 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c @@ -0,0 +1,1758 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Marvell. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "otx2_cptvf.h" +#include "otx2_cptvf_algs.h" +#include "otx2_cpt_reqmgr.h" + +/* Size of salt in AES GCM mode */ +#define AES_GCM_SALT_SIZE 4 +/* Size of IV in AES GCM mode */ +#define AES_GCM_IV_SIZE 8 +/* Size of ICV (Integrity Check Value) in AES GCM mode */ +#define AES_GCM_ICV_SIZE 16 +/* Offset of IV in AES GCM mode */ +#define AES_GCM_IV_OFFSET 8 +#define CONTROL_WORD_LEN 8 +#define KEY2_OFFSET 48 +#define DMA_MODE_FLAG(dma_mode) \ + (((dma_mode) == OTX2_CPT_DMA_MODE_SG) ? (1 << 7) : 0) + +/* Truncated SHA digest size */ +#define SHA1_TRUNC_DIGEST_SIZE 12 +#define SHA256_TRUNC_DIGEST_SIZE 16 +#define SHA384_TRUNC_DIGEST_SIZE 24 +#define SHA512_TRUNC_DIGEST_SIZE 32 + +static DEFINE_MUTEX(mutex); +static int is_crypto_registered; + +struct cpt_device_desc { + struct pci_dev *dev; + int num_queues; +}; + +struct cpt_device_table { + atomic_t count; + struct cpt_device_desc desc[OTX2_CPT_MAX_LFS_NUM]; +}; + +static struct cpt_device_table se_devices = { + .count = ATOMIC_INIT(0) +}; + +static inline int get_se_device(struct pci_dev **pdev, int *cpu_num) +{ + int count; + + count = atomic_read(&se_devices.count); + if (count < 1) + return -ENODEV; + + *cpu_num = get_cpu(); + /* + * On OcteonTX2 platform CPT instruction queue is bound to each + * local function LF, in turn LFs can be attached to PF + * or VF therefore we always use first device. We get maximum + * performance if one CPT queue is available for each cpu + * otherwise CPT queues need to be shared between cpus. + */ + if (*cpu_num >= se_devices.desc[0].num_queues) + *cpu_num %= se_devices.desc[0].num_queues; + *pdev = se_devices.desc[0].dev; + + put_cpu(); + + return 0; +} + +static inline int validate_hmac_cipher_null(struct otx2_cpt_req_info *cpt_req) +{ + struct otx2_cpt_req_ctx *rctx; + struct aead_request *req; + struct crypto_aead *tfm; + + req = container_of(cpt_req->areq, struct aead_request, base); + tfm = crypto_aead_reqtfm(req); + rctx = aead_request_ctx(req); + if (memcmp(rctx->fctx.hmac.s.hmac_calc, + rctx->fctx.hmac.s.hmac_recv, + crypto_aead_authsize(tfm)) != 0) + return -EBADMSG; + + return 0; +} + +static void otx2_cpt_aead_callback(int status, void *arg1, void *arg2) +{ + struct otx2_cpt_inst_info *inst_info = arg2; + struct crypto_async_request *areq = arg1; + struct otx2_cpt_req_info *cpt_req; + struct pci_dev *pdev; + + if (inst_info) { + cpt_req = inst_info->req; + if (!status) { + /* + * When selected cipher is NULL we need to manually + * verify whether calculated hmac value matches + * received hmac value + */ + if (cpt_req->req_type == + OTX2_CPT_AEAD_ENC_DEC_NULL_REQ && + !cpt_req->is_enc) + status = validate_hmac_cipher_null(cpt_req); + } + pdev = inst_info->pdev; + otx2_cpt_info_destroy(pdev, inst_info); + } + if (areq) + areq->complete(areq, status); +} + +static void output_iv_copyback(struct crypto_async_request *areq) +{ + struct otx2_cpt_req_info *req_info; + struct otx2_cpt_req_ctx *rctx; + struct skcipher_request *sreq; + struct crypto_skcipher *stfm; + struct otx2_cpt_enc_ctx *ctx; + u32 start, ivsize; + + sreq = container_of(areq, struct skcipher_request, base); + stfm = crypto_skcipher_reqtfm(sreq); + ctx = crypto_skcipher_ctx(stfm); + if (ctx->cipher_type == OTX2_CPT_AES_CBC || + ctx->cipher_type == OTX2_CPT_DES3_CBC) { + rctx = skcipher_request_ctx(sreq); + req_info = &rctx->cpt_req; + ivsize = crypto_skcipher_ivsize(stfm); + start = sreq->cryptlen - ivsize; + + if (req_info->is_enc) { + scatterwalk_map_and_copy(sreq->iv, sreq->dst, start, + ivsize, 0); + } else { + if (sreq->src != sreq->dst) { + scatterwalk_map_and_copy(sreq->iv, sreq->src, + start, ivsize, 0); + } else { + memcpy(sreq->iv, req_info->iv_out, ivsize); + kfree(req_info->iv_out); + } + } + } +} + +static void otx2_cpt_skcipher_callback(int status, void *arg1, void *arg2) +{ + struct otx2_cpt_inst_info *inst_info = arg2; + struct crypto_async_request *areq = arg1; + struct pci_dev *pdev; + + if (areq) { + if (!status) + output_iv_copyback(areq); + if (inst_info) { + pdev = inst_info->pdev; + otx2_cpt_info_destroy(pdev, inst_info); + } + areq->complete(areq, status); + } +} + +static inline void update_input_data(struct otx2_cpt_req_info *req_info, + struct scatterlist *inp_sg, + u32 nbytes, u32 *argcnt) +{ + req_info->req.dlen += nbytes; + + while (nbytes) { + u32 len = (nbytes < inp_sg->length) ? nbytes : inp_sg->length; + u8 *ptr = sg_virt(inp_sg); + + req_info->in[*argcnt].vptr = (void *)ptr; + req_info->in[*argcnt].size = len; + nbytes -= len; + ++(*argcnt); + inp_sg = sg_next(inp_sg); + } +} + +static inline void update_output_data(struct otx2_cpt_req_info *req_info, + struct scatterlist *outp_sg, + u32 offset, u32 nbytes, u32 *argcnt) +{ + u32 len, sg_len; + u8 *ptr; + + req_info->rlen += nbytes; + + while (nbytes) { + sg_len = outp_sg->length - offset; + len = (nbytes < sg_len) ? nbytes : sg_len; + ptr = sg_virt(outp_sg); + + req_info->out[*argcnt].vptr = (void *) (ptr + offset); + req_info->out[*argcnt].size = len; + nbytes -= len; + ++(*argcnt); + offset = 0; + outp_sg = sg_next(outp_sg); + } +} + +static inline int create_ctx_hdr(struct skcipher_request *req, u32 enc, + u32 *argcnt) +{ + struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); + struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(stfm); + struct otx2_cpt_req_info *req_info = &rctx->cpt_req; + struct otx2_cpt_fc_ctx *fctx = &rctx->fctx; + int ivsize = crypto_skcipher_ivsize(stfm); + u32 start = req->cryptlen - ivsize; + gfp_t flags; + + flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; + req_info->ctrl.s.dma_mode = OTX2_CPT_DMA_MODE_SG; + req_info->ctrl.s.se_req = 1; + + req_info->req.opcode.s.major = OTX2_CPT_MAJOR_OP_FC | + DMA_MODE_FLAG(OTX2_CPT_DMA_MODE_SG); + if (enc) { + req_info->req.opcode.s.minor = 2; + } else { + req_info->req.opcode.s.minor = 3; + if ((ctx->cipher_type == OTX2_CPT_AES_CBC || + ctx->cipher_type == OTX2_CPT_DES3_CBC) && + req->src == req->dst) { + req_info->iv_out = kmalloc(ivsize, flags); + if (!req_info->iv_out) + return -ENOMEM; + + scatterwalk_map_and_copy(req_info->iv_out, req->src, + start, ivsize, 0); + } + } + /* Encryption data length */ + req_info->req.param1 = req->cryptlen; + /* Authentication data length */ + req_info->req.param2 = 0; + + fctx->enc.enc_ctrl.e.enc_cipher = ctx->cipher_type; + fctx->enc.enc_ctrl.e.aes_key = ctx->key_type; + fctx->enc.enc_ctrl.e.iv_source = OTX2_CPT_FROM_CPTR; + + if (ctx->cipher_type == OTX2_CPT_AES_XTS) + memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len * 2); + else + memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len); + + memcpy(fctx->enc.encr_iv, req->iv, crypto_skcipher_ivsize(stfm)); + + cpu_to_be64s(&fctx->enc.enc_ctrl.u); + + /* + * Storing Packet Data Information in offset + * Control Word First 8 bytes + */ + req_info->in[*argcnt].vptr = (u8 *)&rctx->ctrl_word; + req_info->in[*argcnt].size = CONTROL_WORD_LEN; + req_info->req.dlen += CONTROL_WORD_LEN; + ++(*argcnt); + + req_info->in[*argcnt].vptr = (u8 *)fctx; + req_info->in[*argcnt].size = sizeof(struct otx2_cpt_fc_ctx); + req_info->req.dlen += sizeof(struct otx2_cpt_fc_ctx); + + ++(*argcnt); + + return 0; +} + +static inline int create_input_list(struct skcipher_request *req, u32 enc, + u32 enc_iv_len) +{ + struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx2_cpt_req_info *req_info = &rctx->cpt_req; + u32 argcnt = 0; + int ret; + + ret = create_ctx_hdr(req, enc, &argcnt); + if (ret) + return ret; + + update_input_data(req_info, req->src, req->cryptlen, &argcnt); + req_info->in_cnt = argcnt; + + return 0; +} + +static inline void create_output_list(struct skcipher_request *req, + u32 enc_iv_len) +{ + struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx2_cpt_req_info *req_info = &rctx->cpt_req; + u32 argcnt = 0; + + /* + * OUTPUT Buffer Processing + * AES encryption/decryption output would be + * received in the following format + * + * ------IV--------|------ENCRYPTED/DECRYPTED DATA-----| + * [ 16 Bytes/ [ Request Enc/Dec/ DATA Len AES CBC ] + */ + update_output_data(req_info, req->dst, 0, req->cryptlen, &argcnt); + req_info->out_cnt = argcnt; +} + +static int skcipher_do_fallback(struct skcipher_request *req, bool is_enc) +{ + struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); + struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(stfm); + int ret; + + if (ctx->fbk_cipher) { + skcipher_request_set_tfm(&rctx->sk_fbk_req, ctx->fbk_cipher); + skcipher_request_set_callback(&rctx->sk_fbk_req, + req->base.flags, + req->base.complete, + req->base.data); + skcipher_request_set_crypt(&rctx->sk_fbk_req, req->src, + req->dst, req->cryptlen, req->iv); + ret = is_enc ? crypto_skcipher_encrypt(&rctx->sk_fbk_req) : + crypto_skcipher_decrypt(&rctx->sk_fbk_req); + } else { + ret = -EINVAL; + } + return ret; +} + +static inline int cpt_enc_dec(struct skcipher_request *req, u32 enc) +{ + struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); + struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(stfm); + struct otx2_cpt_req_info *req_info = &rctx->cpt_req; + u32 enc_iv_len = crypto_skcipher_ivsize(stfm); + struct pci_dev *pdev; + int status, cpu_num; + + if (req->cryptlen == 0) + return 0; + + if (!IS_ALIGNED(req->cryptlen, ctx->enc_align_len)) + return -EINVAL; + + if (req->cryptlen > OTX2_CPT_MAX_REQ_SIZE) + return skcipher_do_fallback(req, enc); + + /* Clear control words */ + rctx->ctrl_word.flags = 0; + rctx->fctx.enc.enc_ctrl.u = 0; + + status = create_input_list(req, enc, enc_iv_len); + if (status) + return status; + create_output_list(req, enc_iv_len); + + status = get_se_device(&pdev, &cpu_num); + if (status) + return status; + + req_info->callback = otx2_cpt_skcipher_callback; + req_info->areq = &req->base; + req_info->req_type = OTX2_CPT_ENC_DEC_REQ; + req_info->is_enc = enc; + req_info->is_trunc_hmac = false; + req_info->ctrl.s.grp = otx2_cpt_get_kcrypto_eng_grp_num(pdev); + + /* + * We perform an asynchronous send and once + * the request is completed the driver would + * intimate through registered call back functions + */ + status = otx2_cpt_do_request(pdev, req_info, cpu_num); + + return status; +} + +static int otx2_cpt_skcipher_encrypt(struct skcipher_request *req) +{ + return cpt_enc_dec(req, true); +} + +static int otx2_cpt_skcipher_decrypt(struct skcipher_request *req) +{ + return cpt_enc_dec(req, false); +} + +static int otx2_cpt_skcipher_xts_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + const u8 *key2 = key + (keylen / 2); + const u8 *key1 = key; + int ret; + + ret = xts_check_key(crypto_skcipher_tfm(tfm), key, keylen); + if (ret) + return ret; + ctx->key_len = keylen; + ctx->enc_align_len = 1; + memcpy(ctx->enc_key, key1, keylen / 2); + memcpy(ctx->enc_key + KEY2_OFFSET, key2, keylen / 2); + ctx->cipher_type = OTX2_CPT_AES_XTS; + switch (ctx->key_len) { + case 2 * AES_KEYSIZE_128: + ctx->key_type = OTX2_CPT_AES_128_BIT; + break; + case 2 * AES_KEYSIZE_192: + ctx->key_type = OTX2_CPT_AES_192_BIT; + break; + case 2 * AES_KEYSIZE_256: + ctx->key_type = OTX2_CPT_AES_256_BIT; + break; + default: + return -EINVAL; + } + return crypto_skcipher_setkey(ctx->fbk_cipher, key, keylen); +} + +static int cpt_des_setkey(struct crypto_skcipher *tfm, const u8 *key, + u32 keylen, u8 cipher_type) +{ + struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + + if (keylen != DES3_EDE_KEY_SIZE) + return -EINVAL; + + ctx->key_len = keylen; + ctx->cipher_type = cipher_type; + ctx->enc_align_len = 8; + + memcpy(ctx->enc_key, key, keylen); + + return crypto_skcipher_setkey(ctx->fbk_cipher, key, keylen); +} + +static int cpt_aes_setkey(struct crypto_skcipher *tfm, const u8 *key, + u32 keylen, u8 cipher_type) +{ + struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + + switch (keylen) { + case AES_KEYSIZE_128: + ctx->key_type = OTX2_CPT_AES_128_BIT; + break; + case AES_KEYSIZE_192: + ctx->key_type = OTX2_CPT_AES_192_BIT; + break; + case AES_KEYSIZE_256: + ctx->key_type = OTX2_CPT_AES_256_BIT; + break; + default: + return -EINVAL; + } + if (cipher_type == OTX2_CPT_AES_CBC || cipher_type == OTX2_CPT_AES_ECB) + ctx->enc_align_len = 16; + else + ctx->enc_align_len = 1; + + ctx->key_len = keylen; + ctx->cipher_type = cipher_type; + + memcpy(ctx->enc_key, key, keylen); + + return crypto_skcipher_setkey(ctx->fbk_cipher, key, keylen); +} + +static int otx2_cpt_skcipher_cbc_aes_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_aes_setkey(tfm, key, keylen, OTX2_CPT_AES_CBC); +} + +static int otx2_cpt_skcipher_ecb_aes_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_aes_setkey(tfm, key, keylen, OTX2_CPT_AES_ECB); +} + +static int otx2_cpt_skcipher_cbc_des3_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_des_setkey(tfm, key, keylen, OTX2_CPT_DES3_CBC); +} + +static int otx2_cpt_skcipher_ecb_des3_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_des_setkey(tfm, key, keylen, OTX2_CPT_DES3_ECB); +} + +static int cpt_skcipher_fallback_init(struct otx2_cpt_enc_ctx *ctx, + struct crypto_alg *alg) +{ + if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) { + ctx->fbk_cipher = + crypto_alloc_skcipher(alg->cra_name, 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->fbk_cipher)) { + pr_err("%s() failed to allocate fallback for %s\n", + __func__, alg->cra_name); + return PTR_ERR(ctx->fbk_cipher); + } + } + return 0; +} + +static int otx2_cpt_enc_dec_init(struct crypto_skcipher *stfm) +{ + struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(stfm); + struct crypto_tfm *tfm = crypto_skcipher_tfm(stfm); + struct crypto_alg *alg = tfm->__crt_alg; + + memset(ctx, 0, sizeof(*ctx)); + /* + * Additional memory for skcipher_request is + * allocated since the cryptd daemon uses + * this memory for request_ctx information + */ + crypto_skcipher_set_reqsize(stfm, sizeof(struct otx2_cpt_req_ctx) + + sizeof(struct skcipher_request)); + + return cpt_skcipher_fallback_init(ctx, alg); +} + +static void otx2_cpt_skcipher_exit(struct crypto_skcipher *tfm) +{ + struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + + if (ctx->fbk_cipher) { + crypto_free_skcipher(ctx->fbk_cipher); + ctx->fbk_cipher = NULL; + } +} + +static int cpt_aead_fallback_init(struct otx2_cpt_aead_ctx *ctx, + struct crypto_alg *alg) +{ + if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) { + ctx->fbk_cipher = + crypto_alloc_aead(alg->cra_name, 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->fbk_cipher)) { + pr_err("%s() failed to allocate fallback for %s\n", + __func__, alg->cra_name); + return PTR_ERR(ctx->fbk_cipher); + } + } + return 0; +} + +static int cpt_aead_init(struct crypto_aead *atfm, u8 cipher_type, u8 mac_type) +{ + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(atfm); + struct crypto_tfm *tfm = crypto_aead_tfm(atfm); + struct crypto_alg *alg = tfm->__crt_alg; + + ctx->cipher_type = cipher_type; + ctx->mac_type = mac_type; + + /* + * When selected cipher is NULL we use HMAC opcode instead of + * FLEXICRYPTO opcode therefore we don't need to use HASH algorithms + * for calculating ipad and opad + */ + if (ctx->cipher_type != OTX2_CPT_CIPHER_NULL) { + switch (ctx->mac_type) { + case OTX2_CPT_SHA1: + ctx->hashalg = crypto_alloc_shash("sha1", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + + case OTX2_CPT_SHA256: + ctx->hashalg = crypto_alloc_shash("sha256", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + + case OTX2_CPT_SHA384: + ctx->hashalg = crypto_alloc_shash("sha384", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + + case OTX2_CPT_SHA512: + ctx->hashalg = crypto_alloc_shash("sha512", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + } + } + switch (ctx->cipher_type) { + case OTX2_CPT_AES_CBC: + case OTX2_CPT_AES_ECB: + ctx->enc_align_len = 16; + break; + case OTX2_CPT_DES3_CBC: + case OTX2_CPT_DES3_ECB: + ctx->enc_align_len = 8; + break; + case OTX2_CPT_AES_GCM: + case OTX2_CPT_CIPHER_NULL: + ctx->enc_align_len = 1; + break; + } + crypto_aead_set_reqsize(atfm, sizeof(struct otx2_cpt_req_ctx)); + + return cpt_aead_fallback_init(ctx, alg); +} + +static int otx2_cpt_aead_cbc_aes_sha1_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX2_CPT_AES_CBC, OTX2_CPT_SHA1); +} + +static int otx2_cpt_aead_cbc_aes_sha256_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX2_CPT_AES_CBC, OTX2_CPT_SHA256); +} + +static int otx2_cpt_aead_cbc_aes_sha384_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX2_CPT_AES_CBC, OTX2_CPT_SHA384); +} + +static int otx2_cpt_aead_cbc_aes_sha512_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX2_CPT_AES_CBC, OTX2_CPT_SHA512); +} + +static int otx2_cpt_aead_ecb_null_sha1_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX2_CPT_CIPHER_NULL, OTX2_CPT_SHA1); +} + +static int otx2_cpt_aead_ecb_null_sha256_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX2_CPT_CIPHER_NULL, OTX2_CPT_SHA256); +} + +static int otx2_cpt_aead_ecb_null_sha384_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX2_CPT_CIPHER_NULL, OTX2_CPT_SHA384); +} + +static int otx2_cpt_aead_ecb_null_sha512_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX2_CPT_CIPHER_NULL, OTX2_CPT_SHA512); +} + +static int otx2_cpt_aead_gcm_aes_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX2_CPT_AES_GCM, OTX2_CPT_MAC_NULL); +} + +static void otx2_cpt_aead_exit(struct crypto_aead *tfm) +{ + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + + kfree(ctx->ipad); + kfree(ctx->opad); + if (ctx->hashalg) + crypto_free_shash(ctx->hashalg); + kfree(ctx->sdesc); + + if (ctx->fbk_cipher) { + crypto_free_aead(ctx->fbk_cipher); + ctx->fbk_cipher = NULL; + } +} + +static int otx2_cpt_aead_gcm_set_authsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + + if (crypto_rfc4106_check_authsize(authsize)) + return -EINVAL; + + tfm->authsize = authsize; + /* Set authsize for fallback case */ + if (ctx->fbk_cipher) + ctx->fbk_cipher->authsize = authsize; + + return 0; +} + +static int otx2_cpt_aead_set_authsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + tfm->authsize = authsize; + + return 0; +} + +static int otx2_cpt_aead_null_set_authsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + + ctx->is_trunc_hmac = true; + tfm->authsize = authsize; + + return 0; +} + +static struct otx2_cpt_sdesc *alloc_sdesc(struct crypto_shash *alg) +{ + struct otx2_cpt_sdesc *sdesc; + int size; + + size = sizeof(struct shash_desc) + crypto_shash_descsize(alg); + sdesc = kmalloc(size, GFP_KERNEL); + if (!sdesc) + return NULL; + + sdesc->shash.tfm = alg; + + return sdesc; +} + +static inline void swap_data32(void *buf, u32 len) +{ + cpu_to_be32_array(buf, buf, len / 4); +} + +static inline void swap_data64(void *buf, u32 len) +{ + u64 *src = buf; + int i = 0; + + for (i = 0 ; i < len / 8; i++, src++) + cpu_to_be64s(src); +} + +static int copy_pad(u8 mac_type, u8 *out_pad, u8 *in_pad) +{ + struct sha512_state *sha512; + struct sha256_state *sha256; + struct sha1_state *sha1; + + switch (mac_type) { + case OTX2_CPT_SHA1: + sha1 = (struct sha1_state *) in_pad; + swap_data32(sha1->state, SHA1_DIGEST_SIZE); + memcpy(out_pad, &sha1->state, SHA1_DIGEST_SIZE); + break; + + case OTX2_CPT_SHA256: + sha256 = (struct sha256_state *) in_pad; + swap_data32(sha256->state, SHA256_DIGEST_SIZE); + memcpy(out_pad, &sha256->state, SHA256_DIGEST_SIZE); + break; + + case OTX2_CPT_SHA384: + case OTX2_CPT_SHA512: + sha512 = (struct sha512_state *) in_pad; + swap_data64(sha512->state, SHA512_DIGEST_SIZE); + memcpy(out_pad, &sha512->state, SHA512_DIGEST_SIZE); + break; + + default: + return -EINVAL; + } + + return 0; +} + +static int aead_hmac_init(struct crypto_aead *cipher) +{ + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + int state_size = crypto_shash_statesize(ctx->hashalg); + int ds = crypto_shash_digestsize(ctx->hashalg); + int bs = crypto_shash_blocksize(ctx->hashalg); + int authkeylen = ctx->auth_key_len; + u8 *ipad = NULL, *opad = NULL; + int ret = 0, icount = 0; + + ctx->sdesc = alloc_sdesc(ctx->hashalg); + if (!ctx->sdesc) + return -ENOMEM; + + ctx->ipad = kzalloc(bs, GFP_KERNEL); + if (!ctx->ipad) { + ret = -ENOMEM; + goto calc_fail; + } + + ctx->opad = kzalloc(bs, GFP_KERNEL); + if (!ctx->opad) { + ret = -ENOMEM; + goto calc_fail; + } + + ipad = kzalloc(state_size, GFP_KERNEL); + if (!ipad) { + ret = -ENOMEM; + goto calc_fail; + } + + opad = kzalloc(state_size, GFP_KERNEL); + if (!opad) { + ret = -ENOMEM; + goto calc_fail; + } + + if (authkeylen > bs) { + ret = crypto_shash_digest(&ctx->sdesc->shash, ctx->key, + authkeylen, ipad); + if (ret) + goto calc_fail; + + authkeylen = ds; + } else { + memcpy(ipad, ctx->key, authkeylen); + } + + memset(ipad + authkeylen, 0, bs - authkeylen); + memcpy(opad, ipad, bs); + + for (icount = 0; icount < bs; icount++) { + ipad[icount] ^= 0x36; + opad[icount] ^= 0x5c; + } + + /* + * Partial Hash calculated from the software + * algorithm is retrieved for IPAD & OPAD + */ + + /* IPAD Calculation */ + crypto_shash_init(&ctx->sdesc->shash); + crypto_shash_update(&ctx->sdesc->shash, ipad, bs); + crypto_shash_export(&ctx->sdesc->shash, ipad); + ret = copy_pad(ctx->mac_type, ctx->ipad, ipad); + if (ret) + goto calc_fail; + + /* OPAD Calculation */ + crypto_shash_init(&ctx->sdesc->shash); + crypto_shash_update(&ctx->sdesc->shash, opad, bs); + crypto_shash_export(&ctx->sdesc->shash, opad); + ret = copy_pad(ctx->mac_type, ctx->opad, opad); + if (ret) + goto calc_fail; + + kfree(ipad); + kfree(opad); + + return 0; + +calc_fail: + kfree(ctx->ipad); + ctx->ipad = NULL; + kfree(ctx->opad); + ctx->opad = NULL; + kfree(ipad); + kfree(opad); + kfree(ctx->sdesc); + ctx->sdesc = NULL; + + return ret; +} + +static int otx2_cpt_aead_cbc_aes_sha_setkey(struct crypto_aead *cipher, + const unsigned char *key, + unsigned int keylen) +{ + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct crypto_authenc_key_param *param; + int enckeylen = 0, authkeylen = 0; + struct rtattr *rta = (void *)key; + int status; + + if (!RTA_OK(rta, keylen)) + return -EINVAL; + + if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM) + return -EINVAL; + + if (RTA_PAYLOAD(rta) < sizeof(*param)) + return -EINVAL; + + param = RTA_DATA(rta); + enckeylen = be32_to_cpu(param->enckeylen); + key += RTA_ALIGN(rta->rta_len); + keylen -= RTA_ALIGN(rta->rta_len); + if (keylen < enckeylen) + return -EINVAL; + + if (keylen > OTX2_CPT_MAX_KEY_SIZE) + return -EINVAL; + + authkeylen = keylen - enckeylen; + memcpy(ctx->key, key, keylen); + + switch (enckeylen) { + case AES_KEYSIZE_128: + ctx->key_type = OTX2_CPT_AES_128_BIT; + break; + case AES_KEYSIZE_192: + ctx->key_type = OTX2_CPT_AES_192_BIT; + break; + case AES_KEYSIZE_256: + ctx->key_type = OTX2_CPT_AES_256_BIT; + break; + default: + /* Invalid key length */ + return -EINVAL; + } + + ctx->enc_key_len = enckeylen; + ctx->auth_key_len = authkeylen; + + status = aead_hmac_init(cipher); + if (status) + return status; + + return 0; +} + +static int otx2_cpt_aead_ecb_null_sha_setkey(struct crypto_aead *cipher, + const unsigned char *key, + unsigned int keylen) +{ + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct crypto_authenc_key_param *param; + struct rtattr *rta = (void *)key; + int enckeylen = 0; + + if (!RTA_OK(rta, keylen)) + return -EINVAL; + + if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM) + return -EINVAL; + + if (RTA_PAYLOAD(rta) < sizeof(*param)) + return -EINVAL; + + param = RTA_DATA(rta); + enckeylen = be32_to_cpu(param->enckeylen); + key += RTA_ALIGN(rta->rta_len); + keylen -= RTA_ALIGN(rta->rta_len); + if (enckeylen != 0) + return -EINVAL; + + if (keylen > OTX2_CPT_MAX_KEY_SIZE) + return -EINVAL; + + memcpy(ctx->key, key, keylen); + ctx->enc_key_len = enckeylen; + ctx->auth_key_len = keylen; + + return 0; +} + +static int otx2_cpt_aead_gcm_aes_setkey(struct crypto_aead *cipher, + const unsigned char *key, + unsigned int keylen) +{ + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + + /* + * For aes gcm we expect to get encryption key (16, 24, 32 bytes) + * and salt (4 bytes) + */ + switch (keylen) { + case AES_KEYSIZE_128 + AES_GCM_SALT_SIZE: + ctx->key_type = OTX2_CPT_AES_128_BIT; + ctx->enc_key_len = AES_KEYSIZE_128; + break; + case AES_KEYSIZE_192 + AES_GCM_SALT_SIZE: + ctx->key_type = OTX2_CPT_AES_192_BIT; + ctx->enc_key_len = AES_KEYSIZE_192; + break; + case AES_KEYSIZE_256 + AES_GCM_SALT_SIZE: + ctx->key_type = OTX2_CPT_AES_256_BIT; + ctx->enc_key_len = AES_KEYSIZE_256; + break; + default: + /* Invalid key and salt length */ + return -EINVAL; + } + + /* Store encryption key and salt */ + memcpy(ctx->key, key, keylen); + + return crypto_aead_setkey(ctx->fbk_cipher, key, keylen); +} + +static inline int create_aead_ctx_hdr(struct aead_request *req, u32 enc, + u32 *argcnt) +{ + struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx2_cpt_req_info *req_info = &rctx->cpt_req; + struct otx2_cpt_fc_ctx *fctx = &rctx->fctx; + int mac_len = crypto_aead_authsize(tfm); + int ds; + + rctx->ctrl_word.e.enc_data_offset = req->assoclen; + + switch (ctx->cipher_type) { + case OTX2_CPT_AES_CBC: + if (req->assoclen > 248 || !IS_ALIGNED(req->assoclen, 8)) + return -EINVAL; + + fctx->enc.enc_ctrl.e.iv_source = OTX2_CPT_FROM_CPTR; + /* Copy encryption key to context */ + memcpy(fctx->enc.encr_key, ctx->key + ctx->auth_key_len, + ctx->enc_key_len); + /* Copy IV to context */ + memcpy(fctx->enc.encr_iv, req->iv, crypto_aead_ivsize(tfm)); + + ds = crypto_shash_digestsize(ctx->hashalg); + if (ctx->mac_type == OTX2_CPT_SHA384) + ds = SHA512_DIGEST_SIZE; + if (ctx->ipad) + memcpy(fctx->hmac.e.ipad, ctx->ipad, ds); + if (ctx->opad) + memcpy(fctx->hmac.e.opad, ctx->opad, ds); + break; + + case OTX2_CPT_AES_GCM: + if (crypto_ipsec_check_assoclen(req->assoclen)) + return -EINVAL; + + fctx->enc.enc_ctrl.e.iv_source = OTX2_CPT_FROM_DPTR; + /* Copy encryption key to context */ + memcpy(fctx->enc.encr_key, ctx->key, ctx->enc_key_len); + /* Copy salt to context */ + memcpy(fctx->enc.encr_iv, ctx->key + ctx->enc_key_len, + AES_GCM_SALT_SIZE); + + rctx->ctrl_word.e.iv_offset = req->assoclen - AES_GCM_IV_OFFSET; + break; + + default: + /* Unknown cipher type */ + return -EINVAL; + } + cpu_to_be64s(&rctx->ctrl_word.flags); + + req_info->ctrl.s.dma_mode = OTX2_CPT_DMA_MODE_SG; + req_info->ctrl.s.se_req = 1; + req_info->req.opcode.s.major = OTX2_CPT_MAJOR_OP_FC | + DMA_MODE_FLAG(OTX2_CPT_DMA_MODE_SG); + if (enc) { + req_info->req.opcode.s.minor = 2; + req_info->req.param1 = req->cryptlen; + req_info->req.param2 = req->cryptlen + req->assoclen; + } else { + req_info->req.opcode.s.minor = 3; + req_info->req.param1 = req->cryptlen - mac_len; + req_info->req.param2 = req->cryptlen + req->assoclen - mac_len; + } + + fctx->enc.enc_ctrl.e.enc_cipher = ctx->cipher_type; + fctx->enc.enc_ctrl.e.aes_key = ctx->key_type; + fctx->enc.enc_ctrl.e.mac_type = ctx->mac_type; + fctx->enc.enc_ctrl.e.mac_len = mac_len; + cpu_to_be64s(&fctx->enc.enc_ctrl.u); + + /* + * Storing Packet Data Information in offset + * Control Word First 8 bytes + */ + req_info->in[*argcnt].vptr = (u8 *)&rctx->ctrl_word; + req_info->in[*argcnt].size = CONTROL_WORD_LEN; + req_info->req.dlen += CONTROL_WORD_LEN; + ++(*argcnt); + + req_info->in[*argcnt].vptr = (u8 *)fctx; + req_info->in[*argcnt].size = sizeof(struct otx2_cpt_fc_ctx); + req_info->req.dlen += sizeof(struct otx2_cpt_fc_ctx); + ++(*argcnt); + + return 0; +} + +static inline void create_hmac_ctx_hdr(struct aead_request *req, u32 *argcnt, + u32 enc) +{ + struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx2_cpt_req_info *req_info = &rctx->cpt_req; + + req_info->ctrl.s.dma_mode = OTX2_CPT_DMA_MODE_SG; + req_info->ctrl.s.se_req = 1; + req_info->req.opcode.s.major = OTX2_CPT_MAJOR_OP_HMAC | + DMA_MODE_FLAG(OTX2_CPT_DMA_MODE_SG); + req_info->is_trunc_hmac = ctx->is_trunc_hmac; + + req_info->req.opcode.s.minor = 0; + req_info->req.param1 = ctx->auth_key_len; + req_info->req.param2 = ctx->mac_type << 8; + + /* Add authentication key */ + req_info->in[*argcnt].vptr = ctx->key; + req_info->in[*argcnt].size = round_up(ctx->auth_key_len, 8); + req_info->req.dlen += round_up(ctx->auth_key_len, 8); + ++(*argcnt); +} + +static inline int create_aead_input_list(struct aead_request *req, u32 enc) +{ + struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx2_cpt_req_info *req_info = &rctx->cpt_req; + u32 inputlen = req->cryptlen + req->assoclen; + u32 status, argcnt = 0; + + status = create_aead_ctx_hdr(req, enc, &argcnt); + if (status) + return status; + update_input_data(req_info, req->src, inputlen, &argcnt); + req_info->in_cnt = argcnt; + + return 0; +} + +static inline void create_aead_output_list(struct aead_request *req, u32 enc, + u32 mac_len) +{ + struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx2_cpt_req_info *req_info = &rctx->cpt_req; + u32 argcnt = 0, outputlen = 0; + + if (enc) + outputlen = req->cryptlen + req->assoclen + mac_len; + else + outputlen = req->cryptlen + req->assoclen - mac_len; + + update_output_data(req_info, req->dst, 0, outputlen, &argcnt); + req_info->out_cnt = argcnt; +} + +static inline void create_aead_null_input_list(struct aead_request *req, + u32 enc, u32 mac_len) +{ + struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx2_cpt_req_info *req_info = &rctx->cpt_req; + u32 inputlen, argcnt = 0; + + if (enc) + inputlen = req->cryptlen + req->assoclen; + else + inputlen = req->cryptlen + req->assoclen - mac_len; + + create_hmac_ctx_hdr(req, &argcnt, enc); + update_input_data(req_info, req->src, inputlen, &argcnt); + req_info->in_cnt = argcnt; +} + +static inline int create_aead_null_output_list(struct aead_request *req, + u32 enc, u32 mac_len) +{ + struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx2_cpt_req_info *req_info = &rctx->cpt_req; + struct scatterlist *dst; + u8 *ptr = NULL; + int argcnt = 0, status, offset; + u32 inputlen; + + if (enc) + inputlen = req->cryptlen + req->assoclen; + else + inputlen = req->cryptlen + req->assoclen - mac_len; + + /* + * If source and destination are different + * then copy payload to destination + */ + if (req->src != req->dst) { + + ptr = kmalloc(inputlen, (req_info->areq->flags & + CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC); + if (!ptr) + return -ENOMEM; + + status = sg_copy_to_buffer(req->src, sg_nents(req->src), ptr, + inputlen); + if (status != inputlen) { + status = -EINVAL; + goto error_free; + } + status = sg_copy_from_buffer(req->dst, sg_nents(req->dst), ptr, + inputlen); + if (status != inputlen) { + status = -EINVAL; + goto error_free; + } + kfree(ptr); + } + + if (enc) { + /* + * In an encryption scenario hmac needs + * to be appended after payload + */ + dst = req->dst; + offset = inputlen; + while (offset >= dst->length) { + offset -= dst->length; + dst = sg_next(dst); + if (!dst) + return -ENOENT; + } + + update_output_data(req_info, dst, offset, mac_len, &argcnt); + } else { + /* + * In a decryption scenario calculated hmac for received + * payload needs to be compare with hmac received + */ + status = sg_copy_buffer(req->src, sg_nents(req->src), + rctx->fctx.hmac.s.hmac_recv, mac_len, + inputlen, true); + if (status != mac_len) + return -EINVAL; + + req_info->out[argcnt].vptr = rctx->fctx.hmac.s.hmac_calc; + req_info->out[argcnt].size = mac_len; + argcnt++; + } + + req_info->out_cnt = argcnt; + return 0; + +error_free: + kfree(ptr); + return status; +} + +static int aead_do_fallback(struct aead_request *req, bool is_enc) +{ + struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(aead); + int ret; + + if (ctx->fbk_cipher) { + /* Store the cipher tfm and then use the fallback tfm */ + aead_request_set_tfm(&rctx->fbk_req, ctx->fbk_cipher); + aead_request_set_callback(&rctx->fbk_req, req->base.flags, + req->base.complete, req->base.data); + aead_request_set_crypt(&rctx->fbk_req, req->src, + req->dst, req->cryptlen, req->iv); + ret = is_enc ? crypto_aead_encrypt(&rctx->fbk_req) : + crypto_aead_decrypt(&rctx->fbk_req); + } else { + ret = -EINVAL; + } + + return ret; +} + +static int cpt_aead_enc_dec(struct aead_request *req, u8 reg_type, u8 enc) +{ + struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx2_cpt_req_info *req_info = &rctx->cpt_req; + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct pci_dev *pdev; + int status, cpu_num; + + /* Clear control words */ + rctx->ctrl_word.flags = 0; + rctx->fctx.enc.enc_ctrl.u = 0; + + req_info->callback = otx2_cpt_aead_callback; + req_info->areq = &req->base; + req_info->req_type = reg_type; + req_info->is_enc = enc; + req_info->is_trunc_hmac = false; + + switch (reg_type) { + case OTX2_CPT_AEAD_ENC_DEC_REQ: + status = create_aead_input_list(req, enc); + if (status) + return status; + create_aead_output_list(req, enc, crypto_aead_authsize(tfm)); + break; + + case OTX2_CPT_AEAD_ENC_DEC_NULL_REQ: + create_aead_null_input_list(req, enc, + crypto_aead_authsize(tfm)); + status = create_aead_null_output_list(req, enc, + crypto_aead_authsize(tfm)); + if (status) + return status; + break; + + default: + return -EINVAL; + } + if (!IS_ALIGNED(req_info->req.param1, ctx->enc_align_len)) + return -EINVAL; + + if (!req_info->req.param2 || + (req_info->req.param1 > OTX2_CPT_MAX_REQ_SIZE) || + (req_info->req.param2 > OTX2_CPT_MAX_REQ_SIZE)) + return aead_do_fallback(req, enc); + + status = get_se_device(&pdev, &cpu_num); + if (status) + return status; + + req_info->ctrl.s.grp = otx2_cpt_get_kcrypto_eng_grp_num(pdev); + + /* + * We perform an asynchronous send and once + * the request is completed the driver would + * intimate through registered call back functions + */ + return otx2_cpt_do_request(pdev, req_info, cpu_num); +} + +static int otx2_cpt_aead_encrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX2_CPT_AEAD_ENC_DEC_REQ, true); +} + +static int otx2_cpt_aead_decrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX2_CPT_AEAD_ENC_DEC_REQ, false); +} + +static int otx2_cpt_aead_null_encrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX2_CPT_AEAD_ENC_DEC_NULL_REQ, true); +} + +static int otx2_cpt_aead_null_decrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX2_CPT_AEAD_ENC_DEC_NULL_REQ, false); +} + +static struct skcipher_alg otx2_cpt_skciphers[] = { { + .base.cra_name = "xts(aes)", + .base.cra_driver_name = "cpt_xts_aes", + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx2_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx2_cpt_enc_dec_init, + .exit = otx2_cpt_skcipher_exit, + .ivsize = AES_BLOCK_SIZE, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .setkey = otx2_cpt_skcipher_xts_setkey, + .encrypt = otx2_cpt_skcipher_encrypt, + .decrypt = otx2_cpt_skcipher_decrypt, +}, { + .base.cra_name = "cbc(aes)", + .base.cra_driver_name = "cpt_cbc_aes", + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx2_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx2_cpt_enc_dec_init, + .exit = otx2_cpt_skcipher_exit, + .ivsize = AES_BLOCK_SIZE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = otx2_cpt_skcipher_cbc_aes_setkey, + .encrypt = otx2_cpt_skcipher_encrypt, + .decrypt = otx2_cpt_skcipher_decrypt, +}, { + .base.cra_name = "ecb(aes)", + .base.cra_driver_name = "cpt_ecb_aes", + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx2_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx2_cpt_enc_dec_init, + .exit = otx2_cpt_skcipher_exit, + .ivsize = 0, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = otx2_cpt_skcipher_ecb_aes_setkey, + .encrypt = otx2_cpt_skcipher_encrypt, + .decrypt = otx2_cpt_skcipher_decrypt, +}, { + .base.cra_name = "cbc(des3_ede)", + .base.cra_driver_name = "cpt_cbc_des3_ede", + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx2_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx2_cpt_enc_dec_init, + .exit = otx2_cpt_skcipher_exit, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = otx2_cpt_skcipher_cbc_des3_setkey, + .encrypt = otx2_cpt_skcipher_encrypt, + .decrypt = otx2_cpt_skcipher_decrypt, +}, { + .base.cra_name = "ecb(des3_ede)", + .base.cra_driver_name = "cpt_ecb_des3_ede", + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx2_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx2_cpt_enc_dec_init, + .exit = otx2_cpt_skcipher_exit, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = 0, + .setkey = otx2_cpt_skcipher_ecb_des3_setkey, + .encrypt = otx2_cpt_skcipher_encrypt, + .decrypt = otx2_cpt_skcipher_decrypt, +} }; + +static struct aead_alg otx2_cpt_aeads[] = { { + .base = { + .cra_name = "authenc(hmac(sha1),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha1_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx2_cpt_aead_cbc_aes_sha1_init, + .exit = otx2_cpt_aead_exit, + .setkey = otx2_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx2_cpt_aead_set_authsize, + .encrypt = otx2_cpt_aead_encrypt, + .decrypt = otx2_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha256),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha256_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx2_cpt_aead_cbc_aes_sha256_init, + .exit = otx2_cpt_aead_exit, + .setkey = otx2_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx2_cpt_aead_set_authsize, + .encrypt = otx2_cpt_aead_encrypt, + .decrypt = otx2_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha384),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha384_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx2_cpt_aead_cbc_aes_sha384_init, + .exit = otx2_cpt_aead_exit, + .setkey = otx2_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx2_cpt_aead_set_authsize, + .encrypt = otx2_cpt_aead_encrypt, + .decrypt = otx2_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA384_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha512),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha512_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx2_cpt_aead_cbc_aes_sha512_init, + .exit = otx2_cpt_aead_exit, + .setkey = otx2_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx2_cpt_aead_set_authsize, + .encrypt = otx2_cpt_aead_encrypt, + .decrypt = otx2_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA512_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha1),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha1_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx2_cpt_aead_ecb_null_sha1_init, + .exit = otx2_cpt_aead_exit, + .setkey = otx2_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx2_cpt_aead_null_set_authsize, + .encrypt = otx2_cpt_aead_null_encrypt, + .decrypt = otx2_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA1_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha256),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha256_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx2_cpt_aead_ecb_null_sha256_init, + .exit = otx2_cpt_aead_exit, + .setkey = otx2_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx2_cpt_aead_null_set_authsize, + .encrypt = otx2_cpt_aead_null_encrypt, + .decrypt = otx2_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA256_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha384),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha384_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx2_cpt_aead_ecb_null_sha384_init, + .exit = otx2_cpt_aead_exit, + .setkey = otx2_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx2_cpt_aead_null_set_authsize, + .encrypt = otx2_cpt_aead_null_encrypt, + .decrypt = otx2_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA384_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha512),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha512_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx2_cpt_aead_ecb_null_sha512_init, + .exit = otx2_cpt_aead_exit, + .setkey = otx2_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx2_cpt_aead_null_set_authsize, + .encrypt = otx2_cpt_aead_null_encrypt, + .decrypt = otx2_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA512_DIGEST_SIZE, +}, { + .base = { + .cra_name = "rfc4106(gcm(aes))", + .cra_driver_name = "cpt_rfc4106_gcm_aes", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx2_cpt_aead_gcm_aes_init, + .exit = otx2_cpt_aead_exit, + .setkey = otx2_cpt_aead_gcm_aes_setkey, + .setauthsize = otx2_cpt_aead_gcm_set_authsize, + .encrypt = otx2_cpt_aead_encrypt, + .decrypt = otx2_cpt_aead_decrypt, + .ivsize = AES_GCM_IV_SIZE, + .maxauthsize = AES_GCM_ICV_SIZE, +} }; + +static inline int cpt_register_algs(void) +{ + int i, err = 0; + + if (!IS_ENABLED(CONFIG_DM_CRYPT)) { + for (i = 0; i < ARRAY_SIZE(otx2_cpt_skciphers); i++) + otx2_cpt_skciphers[i].base.cra_flags &= + ~CRYPTO_ALG_DEAD; + + err = crypto_register_skciphers(otx2_cpt_skciphers, + ARRAY_SIZE(otx2_cpt_skciphers)); + if (err) + return err; + } + + for (i = 0; i < ARRAY_SIZE(otx2_cpt_aeads); i++) + otx2_cpt_aeads[i].base.cra_flags &= ~CRYPTO_ALG_DEAD; + + err = crypto_register_aeads(otx2_cpt_aeads, + ARRAY_SIZE(otx2_cpt_aeads)); + if (err) { + crypto_unregister_skciphers(otx2_cpt_skciphers, + ARRAY_SIZE(otx2_cpt_skciphers)); + return err; + } + + return 0; +} + +static inline void cpt_unregister_algs(void) +{ + crypto_unregister_skciphers(otx2_cpt_skciphers, + ARRAY_SIZE(otx2_cpt_skciphers)); + crypto_unregister_aeads(otx2_cpt_aeads, ARRAY_SIZE(otx2_cpt_aeads)); +} + +static int compare_func(const void *lptr, const void *rptr) +{ + const struct cpt_device_desc *ldesc = (struct cpt_device_desc *) lptr; + const struct cpt_device_desc *rdesc = (struct cpt_device_desc *) rptr; + + if (ldesc->dev->devfn < rdesc->dev->devfn) + return -1; + if (ldesc->dev->devfn > rdesc->dev->devfn) + return 1; + return 0; +} + +static void swap_func(void *lptr, void *rptr, int size) +{ + struct cpt_device_desc *ldesc = lptr; + struct cpt_device_desc *rdesc = rptr; + struct cpt_device_desc desc; + + desc = *ldesc; + *ldesc = *rdesc; + *rdesc = desc; +} + +int otx2_cpt_crypto_init(struct pci_dev *pdev, struct module *mod, + int num_queues, int num_devices) +{ + int ret = 0; + int count; + + mutex_lock(&mutex); + count = atomic_read(&se_devices.count); + if (count >= OTX2_CPT_MAX_LFS_NUM) { + dev_err(&pdev->dev, "No space to add a new device\n"); + ret = -ENOSPC; + goto unlock; + } + se_devices.desc[count].num_queues = num_queues; + se_devices.desc[count++].dev = pdev; + atomic_inc(&se_devices.count); + + if (atomic_read(&se_devices.count) == num_devices && + is_crypto_registered == false) { + if (cpt_register_algs()) { + dev_err(&pdev->dev, + "Error in registering crypto algorithms\n"); + ret = -EINVAL; + goto unlock; + } + try_module_get(mod); + is_crypto_registered = true; + } + sort(se_devices.desc, count, sizeof(struct cpt_device_desc), + compare_func, swap_func); + +unlock: + mutex_unlock(&mutex); + return ret; +} + +void otx2_cpt_crypto_exit(struct pci_dev *pdev, struct module *mod) +{ + struct cpt_device_table *dev_tbl; + bool dev_found = false; + int i, j, count; + + mutex_lock(&mutex); + + dev_tbl = &se_devices; + count = atomic_read(&dev_tbl->count); + for (i = 0; i < count; i++) { + if (pdev == dev_tbl->desc[i].dev) { + for (j = i; j < count-1; j++) + dev_tbl->desc[j] = dev_tbl->desc[j+1]; + dev_found = true; + break; + } + } + + if (!dev_found) { + dev_err(&pdev->dev, "%s device not found\n", __func__); + goto unlock; + } + if (atomic_dec_and_test(&se_devices.count)) { + cpt_unregister_algs(); + module_put(mod); + is_crypto_registered = false; + } + +unlock: + mutex_unlock(&mutex); +} diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.h b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.h new file mode 100644 index 000000000000..f04184bd1744 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.h @@ -0,0 +1,178 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2020 Marvell. + */ + +#ifndef __OTX2_CPT_ALGS_H +#define __OTX2_CPT_ALGS_H + +#include +#include +#include +#include "otx2_cpt_common.h" + +#define OTX2_CPT_MAX_ENC_KEY_SIZE 32 +#define OTX2_CPT_MAX_HASH_KEY_SIZE 64 +#define OTX2_CPT_MAX_KEY_SIZE (OTX2_CPT_MAX_ENC_KEY_SIZE + \ + OTX2_CPT_MAX_HASH_KEY_SIZE) +enum otx2_cpt_request_type { + OTX2_CPT_ENC_DEC_REQ = 0x1, + OTX2_CPT_AEAD_ENC_DEC_REQ = 0x2, + OTX2_CPT_AEAD_ENC_DEC_NULL_REQ = 0x3, + OTX2_CPT_PASSTHROUGH_REQ = 0x4 +}; + +enum otx2_cpt_major_opcodes { + OTX2_CPT_MAJOR_OP_MISC = 0x01, + OTX2_CPT_MAJOR_OP_FC = 0x33, + OTX2_CPT_MAJOR_OP_HMAC = 0x35, +}; + +enum otx2_cpt_cipher_type { + OTX2_CPT_CIPHER_NULL = 0x0, + OTX2_CPT_DES3_CBC = 0x1, + OTX2_CPT_DES3_ECB = 0x2, + OTX2_CPT_AES_CBC = 0x3, + OTX2_CPT_AES_ECB = 0x4, + OTX2_CPT_AES_CFB = 0x5, + OTX2_CPT_AES_CTR = 0x6, + OTX2_CPT_AES_GCM = 0x7, + OTX2_CPT_AES_XTS = 0x8 +}; + +enum otx2_cpt_mac_type { + OTX2_CPT_MAC_NULL = 0x0, + OTX2_CPT_MD5 = 0x1, + OTX2_CPT_SHA1 = 0x2, + OTX2_CPT_SHA224 = 0x3, + OTX2_CPT_SHA256 = 0x4, + OTX2_CPT_SHA384 = 0x5, + OTX2_CPT_SHA512 = 0x6, + OTX2_CPT_GMAC = 0x7 +}; + +enum otx2_cpt_aes_key_len { + OTX2_CPT_AES_128_BIT = 0x1, + OTX2_CPT_AES_192_BIT = 0x2, + OTX2_CPT_AES_256_BIT = 0x3 +}; + +union otx2_cpt_encr_ctrl { + u64 u; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 enc_cipher:4; + u64 reserved_59:1; + u64 aes_key:2; + u64 iv_source:1; + u64 mac_type:4; + u64 reserved_49_51:3; + u64 auth_input_type:1; + u64 mac_len:8; + u64 reserved_32_39:8; + u64 encr_offset:16; + u64 iv_offset:8; + u64 auth_offset:8; +#else + u64 auth_offset:8; + u64 iv_offset:8; + u64 encr_offset:16; + u64 reserved_32_39:8; + u64 mac_len:8; + u64 auth_input_type:1; + u64 reserved_49_51:3; + u64 mac_type:4; + u64 iv_source:1; + u64 aes_key:2; + u64 reserved_59:1; + u64 enc_cipher:4; +#endif + } e; +}; + +struct otx2_cpt_cipher { + const char *name; + u8 value; +}; + +struct otx2_cpt_fc_enc_ctx { + union otx2_cpt_encr_ctrl enc_ctrl; + u8 encr_key[32]; + u8 encr_iv[16]; +}; + +union otx2_cpt_fc_hmac_ctx { + struct { + u8 ipad[64]; + u8 opad[64]; + } e; + struct { + u8 hmac_calc[64]; /* HMAC calculated */ + u8 hmac_recv[64]; /* HMAC received */ + } s; +}; + +struct otx2_cpt_fc_ctx { + struct otx2_cpt_fc_enc_ctx enc; + union otx2_cpt_fc_hmac_ctx hmac; +}; + +struct otx2_cpt_enc_ctx { + u32 key_len; + u8 enc_key[OTX2_CPT_MAX_KEY_SIZE]; + u8 cipher_type; + u8 key_type; + u8 enc_align_len; + struct crypto_skcipher *fbk_cipher; +}; + +union otx2_cpt_offset_ctrl { + u64 flags; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 reserved:32; + u64 enc_data_offset:16; + u64 iv_offset:8; + u64 auth_offset:8; +#else + u64 auth_offset:8; + u64 iv_offset:8; + u64 enc_data_offset:16; + u64 reserved:32; +#endif + } e; +}; + +struct otx2_cpt_req_ctx { + struct otx2_cpt_req_info cpt_req; + union otx2_cpt_offset_ctrl ctrl_word; + struct otx2_cpt_fc_ctx fctx; + union { + struct skcipher_request sk_fbk_req; + struct aead_request fbk_req; + }; +}; + +struct otx2_cpt_sdesc { + struct shash_desc shash; +}; + +struct otx2_cpt_aead_ctx { + u8 key[OTX2_CPT_MAX_KEY_SIZE]; + struct crypto_shash *hashalg; + struct otx2_cpt_sdesc *sdesc; + struct crypto_aead *fbk_cipher; + u8 *ipad; + u8 *opad; + u32 enc_key_len; + u32 auth_key_len; + u8 cipher_type; + u8 mac_type; + u8 key_type; + u8 is_trunc_hmac; + u8 enc_align_len; +}; +int otx2_cpt_crypto_init(struct pci_dev *pdev, struct module *mod, + int num_queues, int num_devices); +void otx2_cpt_crypto_exit(struct pci_dev *pdev, struct module *mod); + +#endif /* __OTX2_CPT_ALGS_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c index 97d57bcb2b94..9663be38ee40 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c @@ -4,6 +4,7 @@ #include "otx2_cpt_common.h" #include "otx2_cptvf.h" #include "otx2_cptlf.h" +#include "otx2_cptvf_algs.h" #include #define OTX2_CPTVF_DRV_NAME "octeontx2-cptvf" @@ -214,6 +215,8 @@ static void cptvf_lf_shutdown(struct otx2_cptlfs_info *lfs) otx2_cptlf_free_irqs_affinity(lfs); /* Disable instruction queue */ otx2_cptlf_disable_iqueues(lfs); + /* Unregister crypto algorithms */ + otx2_cpt_crypto_exit(lfs->pdev, THIS_MODULE); /* Unregister LFs interrupts */ otx2_cptlf_unregister_interrupts(lfs); /* Cleanup LFs software side */ @@ -278,9 +281,16 @@ static int cptvf_lf_init(struct otx2_cptvf_dev *cptvf) goto unregister_intr; atomic_set(&lfs->state, OTX2_CPTLF_STARTED); - + /* Register crypto algorithms */ + ret = otx2_cpt_crypto_init(lfs->pdev, THIS_MODULE, lfs_num, 1); + if (ret) { + dev_err(&lfs->pdev->dev, "algorithms registration failed\n"); + goto disable_irqs; + } return 0; +disable_irqs: + otx2_cptlf_free_irqs_affinity(lfs); unregister_intr: otx2_cptlf_unregister_interrupts(lfs); cleanup_lf_sw: diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c index b3b371ce06f9..d5c1c1b7c7e4 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c @@ -532,3 +532,10 @@ void otx2_cpt_post_process(struct otx2_cptlf_wqe *wqe) process_pending_queue(wqe->lfs->pdev, &wqe->lfs->lf[wqe->lf_num].pqueue); } + +int otx2_cpt_get_kcrypto_eng_grp_num(struct pci_dev *pdev) +{ + struct otx2_cptvf_dev *cptvf = pci_get_drvdata(pdev); + + return cptvf->lfs.kcrypto_eng_grp_num; +} From ac88c322d0f2917d41d13553c69e9d7f043c8b6f Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 15 Jan 2021 20:30:12 +0100 Subject: [PATCH 098/154] crypto: lib/chacha20poly1305 - define empty module exit function With no mod_exit function, users are unable to unload the module after use. I'm not aware of any reason why module unloading should be prohibited for this one, so this commit simply adds an empty exit function. Reported-and-tested-by: John Donnelly Acked-by: Ard Biesheuvel Signed-off-by: Jason A. Donenfeld Signed-off-by: Herbert Xu --- lib/crypto/chacha20poly1305.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c index 5850f3b87359..c2fcdb98cc02 100644 --- a/lib/crypto/chacha20poly1305.c +++ b/lib/crypto/chacha20poly1305.c @@ -362,7 +362,12 @@ static int __init mod_init(void) return 0; } +static void __exit mod_exit(void) +{ +} + module_init(mod_init); +module_exit(mod_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("ChaCha20Poly1305 AEAD construction"); MODULE_AUTHOR("Jason A. Donenfeld "); From 29dfe4d94f5c99d164c307b8320e45cb85fb4b21 Mon Sep 17 00:00:00 2001 From: Ovidiu Panait Date: Fri, 15 Jan 2021 22:46:05 +0200 Subject: [PATCH 099/154] crypto: keembay - use 64-bit arithmetic for computing bit_len src_size and aad_size are defined as u32, so the following expressions are currently being evaluated using 32-bit arithmetic: bit_len = src_size * 8; ... bit_len = aad_size * 8; However, bit_len is used afterwards in a context that expects a valid 64-bit value (the lower and upper 32-bit words of bit_len are extracted and written to hw). In order to make sure the correct bit length is generated and the 32-bit multiplication does not wrap around, cast src_size and aad_size to u64. Signed-off-by: Ovidiu Panait Acked-by: Daniele Alessandrelli Signed-off-by: Herbert Xu --- drivers/crypto/keembay/ocs-aes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/keembay/ocs-aes.c b/drivers/crypto/keembay/ocs-aes.c index cc286adb1c4a..b85c89477afa 100644 --- a/drivers/crypto/keembay/ocs-aes.c +++ b/drivers/crypto/keembay/ocs-aes.c @@ -958,14 +958,14 @@ int ocs_aes_gcm_op(struct ocs_aes_dev *aes_dev, ocs_aes_write_last_data_blk_len(aes_dev, src_size); /* Write ciphertext bit length */ - bit_len = src_size * 8; + bit_len = (u64)src_size * 8; val = bit_len & 0xFFFFFFFF; iowrite32(val, aes_dev->base_reg + AES_MULTIPURPOSE2_0_OFFSET); val = bit_len >> 32; iowrite32(val, aes_dev->base_reg + AES_MULTIPURPOSE2_1_OFFSET); /* Write aad bit length */ - bit_len = aad_size * 8; + bit_len = (u64)aad_size * 8; val = bit_len & 0xFFFFFFFF; iowrite32(val, aes_dev->base_reg + AES_MULTIPURPOSE2_2_OFFSET); val = bit_len >> 32; From 64a49b85953cafeaba2b4c2c13d089b3ed41cca6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 16 Jan 2021 17:48:09 +0100 Subject: [PATCH 100/154] crypto: aesni - replace CTR function pointer with static call Indirect calls are very expensive on x86, so use a static call to set the system-wide AES-NI/CTR asm helper. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index a548fdbc3073..d96685457196 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -34,6 +34,7 @@ #include #include #include +#include #define AESNI_ALIGN 16 @@ -107,10 +108,9 @@ asmlinkage void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *out, #ifdef CONFIG_X86_64 -static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in, unsigned int len, u8 *iv); asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); +DEFINE_STATIC_CALL(aesni_ctr_enc_tfm, aesni_ctr_enc); /* Scatter / Gather routines, with args similar to above */ asmlinkage void aesni_gcm_init(void *ctx, @@ -520,8 +520,10 @@ static int ctr_crypt(struct skcipher_request *req) kernel_fpu_begin(); while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { - aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr, - nbytes & AES_BLOCK_MASK, walk.iv); + static_call(aesni_ctr_enc_tfm)(ctx, walk.dst.virt.addr, + walk.src.virt.addr, + nbytes & AES_BLOCK_MASK, + walk.iv); nbytes &= AES_BLOCK_SIZE - 1; err = skcipher_walk_done(&walk, nbytes); } @@ -1160,10 +1162,9 @@ static int __init aesni_init(void) } else { pr_info("SSE version of gcm_enc/dec engaged.\n"); } - aesni_ctr_enc_tfm = aesni_ctr_enc; if (boot_cpu_has(X86_FEATURE_AVX)) { /* optimize performance of ctr mode encryption transform */ - aesni_ctr_enc_tfm = aesni_ctr_enc_avx_tfm; + static_call_update(aesni_ctr_enc_tfm, aesni_ctr_enc_avx_tfm); pr_info("AES CTR mode by8 optimization enabled\n"); } #endif From 65d1e3c415f6e380f6168faf333a59ec235eac5d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 16 Jan 2021 17:48:10 +0100 Subject: [PATCH 101/154] crypto: aesni - release FPU during skcipher walk API calls Taking ownership of the FPU in kernel mode disables preemption, and this may result in excessive scheduling blackouts if the size of the data being processed on the FPU is unbounded. Given that taking and releasing the FPU is cheap these days on x86, we can limit the impact of this issue easily for skcipher implementations, by moving the FPU begin/end calls inside the skcipher walk processing loop. Considering that skcipher walks operate on at most one page at a time, doing so fully mitigates this issue. This also permits the skcipher walk logic to use non-atomic kmalloc() calls etc so we can change the 'atomic' bool argument in the calls to skcipher_walk_virt() to false as well. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 73 +++++++++++++----------------- 1 file changed, 32 insertions(+), 41 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index d96685457196..2144e54a6c89 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -283,16 +283,16 @@ static int ecb_encrypt(struct skcipher_request *req) unsigned int nbytes; int err; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = skcipher_walk_done(&walk, nbytes); } - kernel_fpu_end(); return err; } @@ -305,16 +305,16 @@ static int ecb_decrypt(struct skcipher_request *req) unsigned int nbytes; int err; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = skcipher_walk_done(&walk, nbytes); } - kernel_fpu_end(); return err; } @@ -327,16 +327,16 @@ static int cbc_encrypt(struct skcipher_request *req) unsigned int nbytes; int err; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = skcipher_walk_done(&walk, nbytes); } - kernel_fpu_end(); return err; } @@ -349,16 +349,16 @@ static int cbc_decrypt(struct skcipher_request *req) unsigned int nbytes; int err; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = skcipher_walk_done(&walk, nbytes); } - kernel_fpu_end(); return err; } @@ -476,21 +476,6 @@ static int cts_cbc_decrypt(struct skcipher_request *req) } #ifdef CONFIG_X86_64 -static void ctr_crypt_final(struct crypto_aes_ctx *ctx, - struct skcipher_walk *walk) -{ - u8 *ctrblk = walk->iv; - u8 keystream[AES_BLOCK_SIZE]; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - - aesni_enc(ctx, keystream, ctrblk); - crypto_xor_cpy(dst, keystream, src, nbytes); - - crypto_inc(ctrblk, AES_BLOCK_SIZE); -} - static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv) { @@ -512,27 +497,33 @@ static int ctr_crypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); + u8 keystream[AES_BLOCK_SIZE]; struct skcipher_walk walk; unsigned int nbytes; int err; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_fpu_begin(); - while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { - static_call(aesni_ctr_enc_tfm)(ctx, walk.dst.virt.addr, - walk.src.virt.addr, - nbytes & AES_BLOCK_MASK, - walk.iv); - nbytes &= AES_BLOCK_SIZE - 1; + while ((nbytes = walk.nbytes) > 0) { + kernel_fpu_begin(); + if (nbytes & AES_BLOCK_MASK) + static_call(aesni_ctr_enc_tfm)(ctx, walk.dst.virt.addr, + walk.src.virt.addr, + nbytes & AES_BLOCK_MASK, + walk.iv); + nbytes &= ~AES_BLOCK_MASK; + + if (walk.nbytes == walk.total && nbytes > 0) { + aesni_enc(ctx, keystream, walk.iv); + crypto_xor_cpy(walk.dst.virt.addr + walk.nbytes - nbytes, + walk.src.virt.addr + walk.nbytes - nbytes, + keystream, nbytes); + crypto_inc(walk.iv, AES_BLOCK_SIZE); + nbytes = 0; + } + kernel_fpu_end(); err = skcipher_walk_done(&walk, nbytes); } - if (walk.nbytes) { - ctr_crypt_final(ctx, &walk); - err = skcipher_walk_done(&walk, 0); - } - kernel_fpu_end(); - return err; } From 09228c03775447a6e3b30e06abd3219f79bb32a1 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Mon, 18 Jan 2021 16:15:40 +0800 Subject: [PATCH 102/154] crypto: hisilicon/hpre - delete ECC 1bit error reported threshold Delete 'HPRE_RAS_ECC1BIT_TH' register setting of hpre, since register 'QM_RAS_CE_THRESHOLD' of qm has done this work. Signed-off-by: Hui Tang Reviewed-by: Zaibo Xu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index ad8b691887a6..bf1fa08984a4 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -36,7 +36,6 @@ #define HPRE_INT_STATUS 0x301800 #define HPRE_CORE_INT_ENABLE 0 #define HPRE_CORE_INT_DISABLE 0x003fffff -#define HPRE_RAS_ECC_1BIT_TH 0x30140c #define HPRE_RDCHN_INI_ST 0x301a00 #define HPRE_CLSTR_BASE 0x302000 #define HPRE_CORE_EN_OFFSET 0x04 @@ -312,7 +311,6 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm) writel(HPRE_QM_VFG_AX_MASK, HPRE_ADDR(qm, HPRE_VFG_AXCACHE)); writel(0x0, HPRE_ADDR(qm, HPRE_BD_ENDIAN)); writel(0x0, HPRE_ADDR(qm, HPRE_INT_MASK)); - writel(0x0, HPRE_ADDR(qm, HPRE_RAS_ECC_1BIT_TH)); writel(0x0, HPRE_ADDR(qm, HPRE_POISON_BYPASS)); writel(0x0, HPRE_ADDR(qm, HPRE_COMM_CNT_CLR_CE)); writel(0x0, HPRE_ADDR(qm, HPRE_ECC_BYPASS)); From ed278023708b68f08b2688beaef6d078f3339377 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Mon, 18 Jan 2021 16:17:25 +0800 Subject: [PATCH 103/154] crypto: hisilicon/hpre - add two RAS correctable errors processing 1.One CE error is detecting timeout of generating a random number. 2.Another is detecting timeout of SVA prefetching address. Signed-off-by: Hui Tang Reviewed-by: Zaibo Xu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index bf1fa08984a4..d46086e1b922 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -45,7 +45,7 @@ #define HPRE_CORE_IS_SCHD_OFFSET 0x90 #define HPRE_RAS_CE_ENB 0x301410 -#define HPRE_HAC_RAS_CE_ENABLE 0x1 +#define HPRE_HAC_RAS_CE_ENABLE (BIT(0) | BIT(22) | BIT(23)) #define HPRE_RAS_NFE_ENB 0x301414 #define HPRE_HAC_RAS_NFE_ENABLE 0x3ffffe #define HPRE_RAS_FE_ENB 0x301418 @@ -129,7 +129,11 @@ static const struct hpre_hw_error hpre_hw_errors[] = { { .int_msk = BIT(9), .msg = "cluster4_shb_timeout_int_set" }, { .int_msk = GENMASK(15, 10), .msg = "ooo_rdrsp_err_int_set" }, { .int_msk = GENMASK(21, 16), .msg = "ooo_wrrsp_err_int_set" }, - { /* sentinel */ } + { .int_msk = BIT(22), .msg = "pt_rng_timeout_int_set"}, + { .int_msk = BIT(23), .msg = "sva_fsm_timeout_int_set"}, + { + /* sentinel */ + } }; static const u64 hpre_cluster_offsets[] = { From bc005983e88ac45a284f70dd6ce5707a0c9dddc4 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Mon, 18 Jan 2021 16:18:19 +0800 Subject: [PATCH 104/154] crypto: hisilicon/hpre - add ecc algorithm inqury for uacce device Uacce SysFS support more algorithms inqury such as 'ecdh/ecdsa/sm2/x25519/x448' Signed-off-by: Hui Tang Reviewed-by: Zaibo Xu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index d46086e1b922..3b3481e7527c 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -778,7 +778,10 @@ static int hpre_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) return -EINVAL; } - qm->algs = "rsa\ndh\n"; + if (pdev->revision >= QM_HW_V3) + qm->algs = "rsa\ndh\necdh\nx25519\nx448\necdsa\nsm2\n"; + else + qm->algs = "rsa\ndh\n"; qm->mode = uacce_mode; qm->pdev = pdev; qm->ver = pdev->revision; From 416b846757bcea20006a9197e67ba3a8b5b2a680 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 20 Jan 2021 18:57:24 +0000 Subject: [PATCH 105/154] crypto: talitos - Work around SEC6 ERRATA (AES-CTR mode data size error) Talitos Security Engine AESU considers any input data size that is not a multiple of 16 bytes to be an error. This is not a problem in general, except for Counter mode that is a stream cipher and can have an input of any size. Test Manager for ctr(aes) fails on 4th test vector which has a length of 499 while all previous vectors which have a 16 bytes multiple length succeed. As suggested by Freescale, round up the input data length to the nearest 16 bytes. Fixes: 5e75ae1b3cef ("crypto: talitos - add new crypto modes") Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 28 ++++++++++++++++------------ drivers/crypto/talitos.h | 1 + 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 4fd85f31630a..b656983c1ef4 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1093,11 +1093,12 @@ static void ipsec_esp_decrypt_hwauth_done(struct device *dev, */ static int sg_to_link_tbl_offset(struct scatterlist *sg, int sg_count, unsigned int offset, int datalen, int elen, - struct talitos_ptr *link_tbl_ptr) + struct talitos_ptr *link_tbl_ptr, int align) { int n_sg = elen ? sg_count + 1 : sg_count; int count = 0; int cryptlen = datalen + elen; + int padding = ALIGN(cryptlen, align) - cryptlen; while (cryptlen && sg && n_sg--) { unsigned int len = sg_dma_len(sg); @@ -1121,7 +1122,7 @@ static int sg_to_link_tbl_offset(struct scatterlist *sg, int sg_count, offset += datalen; } to_talitos_ptr(link_tbl_ptr + count, - sg_dma_address(sg) + offset, len, 0); + sg_dma_address(sg) + offset, sg_next(sg) ? len : len + padding, 0); to_talitos_ptr_ext_set(link_tbl_ptr + count, 0, 0); count++; cryptlen -= len; @@ -1144,10 +1145,11 @@ static int talitos_sg_map_ext(struct device *dev, struct scatterlist *src, unsigned int len, struct talitos_edesc *edesc, struct talitos_ptr *ptr, int sg_count, unsigned int offset, int tbl_off, int elen, - bool force) + bool force, int align) { struct talitos_private *priv = dev_get_drvdata(dev); bool is_sec1 = has_ftr_sec1(priv); + int aligned_len = ALIGN(len, align); if (!src) { to_talitos_ptr(ptr, 0, 0, is_sec1); @@ -1155,22 +1157,22 @@ static int talitos_sg_map_ext(struct device *dev, struct scatterlist *src, } to_talitos_ptr_ext_set(ptr, elen, is_sec1); if (sg_count == 1 && !force) { - to_talitos_ptr(ptr, sg_dma_address(src) + offset, len, is_sec1); + to_talitos_ptr(ptr, sg_dma_address(src) + offset, aligned_len, is_sec1); return sg_count; } if (is_sec1) { - to_talitos_ptr(ptr, edesc->dma_link_tbl + offset, len, is_sec1); + to_talitos_ptr(ptr, edesc->dma_link_tbl + offset, aligned_len, is_sec1); return sg_count; } sg_count = sg_to_link_tbl_offset(src, sg_count, offset, len, elen, - &edesc->link_tbl[tbl_off]); + &edesc->link_tbl[tbl_off], align); if (sg_count == 1 && !force) { /* Only one segment now, so no link tbl needed*/ copy_talitos_ptr(ptr, &edesc->link_tbl[tbl_off], is_sec1); return sg_count; } to_talitos_ptr(ptr, edesc->dma_link_tbl + - tbl_off * sizeof(struct talitos_ptr), len, is_sec1); + tbl_off * sizeof(struct talitos_ptr), aligned_len, is_sec1); to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, is_sec1); return sg_count; @@ -1182,7 +1184,7 @@ static int talitos_sg_map(struct device *dev, struct scatterlist *src, unsigned int offset, int tbl_off) { return talitos_sg_map_ext(dev, src, len, edesc, ptr, sg_count, offset, - tbl_off, 0, false); + tbl_off, 0, false, 1); } /* @@ -1251,7 +1253,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, ret = talitos_sg_map_ext(dev, areq->src, cryptlen, edesc, &desc->ptr[4], sg_count, areq->assoclen, tbl_off, elen, - false); + false, 1); if (ret > 1) { tbl_off += ret; @@ -1271,7 +1273,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, elen = 0; ret = talitos_sg_map_ext(dev, areq->dst, cryptlen, edesc, &desc->ptr[5], sg_count, areq->assoclen, tbl_off, elen, - is_ipsec_esp && !encrypt); + is_ipsec_esp && !encrypt, 1); tbl_off += ret; if (!encrypt && is_ipsec_esp) { @@ -1577,6 +1579,8 @@ static int common_nonsnoop(struct talitos_edesc *edesc, bool sync_needed = false; struct talitos_private *priv = dev_get_drvdata(dev); bool is_sec1 = has_ftr_sec1(priv); + bool is_ctr = (desc->hdr & DESC_HDR_SEL0_MASK) == DESC_HDR_SEL0_AESU && + (desc->hdr & DESC_HDR_MODE0_AESU_MASK) == DESC_HDR_MODE0_AESU_CTR; /* first DWORD empty */ @@ -1597,8 +1601,8 @@ static int common_nonsnoop(struct talitos_edesc *edesc, /* * cipher in */ - sg_count = talitos_sg_map(dev, areq->src, cryptlen, edesc, - &desc->ptr[3], sg_count, 0, 0); + sg_count = talitos_sg_map_ext(dev, areq->src, cryptlen, edesc, &desc->ptr[3], + sg_count, 0, 0, 0, false, is_ctr ? 16 : 1); if (sg_count > 1) sync_needed = true; diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h index 1469b956948a..32825119e880 100644 --- a/drivers/crypto/talitos.h +++ b/drivers/crypto/talitos.h @@ -344,6 +344,7 @@ static inline bool has_ftr_sec1(struct talitos_private *priv) /* primary execution unit mode (MODE0) and derivatives */ #define DESC_HDR_MODE0_ENCRYPT cpu_to_be32(0x00100000) +#define DESC_HDR_MODE0_AESU_MASK cpu_to_be32(0x00600000) #define DESC_HDR_MODE0_AESU_CBC cpu_to_be32(0x00200000) #define DESC_HDR_MODE0_AESU_CTR cpu_to_be32(0x00600000) #define DESC_HDR_MODE0_DEU_CBC cpu_to_be32(0x00400000) From 43a942d27eaaf33bca560121cbe42f3637e92880 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 20 Jan 2021 18:57:25 +0000 Subject: [PATCH 106/154] crypto: talitos - Fix ctr(aes) on SEC1 While ctr(aes) requires the use of a special descriptor on SEC2 (see commit 70d355ccea89 ("crypto: talitos - fix ctr-aes-talitos")), that special descriptor doesn't work on SEC1, see commit e738c5f15562 ("powerpc/8xx: Add DT node for using the SEC engine of the MPC885"). However, the common nonsnoop descriptor works properly on SEC1 for ctr(aes). Add a second template for ctr(aes) that will be registered only on SEC1. Fixes: 70d355ccea89 ("crypto: talitos - fix ctr-aes-talitos") Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index b656983c1ef4..25c9f825b8b5 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -2765,6 +2765,22 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_CTR, }, + { .type = CRYPTO_ALG_TYPE_SKCIPHER, + .alg.skcipher = { + .base.cra_name = "ctr(aes)", + .base.cra_driver_name = "ctr-aes-talitos", + .base.cra_blocksize = 1, + .base.cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_ALLOCATES_MEMORY, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = skcipher_aes_setkey, + }, + .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU | + DESC_HDR_SEL0_AESU | + DESC_HDR_MODE0_AESU_CTR, + }, { .type = CRYPTO_ALG_TYPE_SKCIPHER, .alg.skcipher = { .base.cra_name = "ecb(des)", @@ -3182,6 +3198,12 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev, t_alg->algt.alg.skcipher.setkey ?: skcipher_setkey; t_alg->algt.alg.skcipher.encrypt = skcipher_encrypt; t_alg->algt.alg.skcipher.decrypt = skcipher_decrypt; + if (!strcmp(alg->cra_name, "ctr(aes)") && !has_ftr_sec1(priv) && + DESC_TYPE(t_alg->algt.desc_hdr_template) != + DESC_TYPE(DESC_HDR_TYPE_AESU_CTR_NONSNOOP)) { + devm_kfree(dev, t_alg); + return ERR_PTR(-ENOTSUPP); + } break; case CRYPTO_ALG_TYPE_AEAD: alg = &t_alg->algt.alg.aead.base; From c114cf7f86242bbd6841de4c49923100ad41b6d5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 21 Jan 2021 16:16:46 +1100 Subject: [PATCH 107/154] crypto: marvell/cesa - Fix use of sg_pcopy on iomem pointer The cesa driver mixes use of iomem pointers and normal kernel pointers. Sometimes it uses memcpy_toio/memcpy_fromio on both while other times it would use straight memcpy on both, through the sg_pcopy_* helpers. This patch fixes this by adding a new field sram_pool to the engine for the normal pointer case which then allows us to use the right interface depending on the value of engine->pool. Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa/cesa.c | 10 ++--- drivers/crypto/marvell/cesa/cesa.h | 31 ++++++++++++++- drivers/crypto/marvell/cesa/cipher.c | 34 ++++++++++------ drivers/crypto/marvell/cesa/hash.c | 59 +++++++++++++++++++--------- drivers/crypto/marvell/cesa/tdma.c | 50 +++++++++++++++++++++++ 5 files changed, 148 insertions(+), 36 deletions(-) diff --git a/drivers/crypto/marvell/cesa/cesa.c b/drivers/crypto/marvell/cesa/cesa.c index 06211858bf2e..f14aac532f53 100644 --- a/drivers/crypto/marvell/cesa/cesa.c +++ b/drivers/crypto/marvell/cesa/cesa.c @@ -381,10 +381,10 @@ static int mv_cesa_get_sram(struct platform_device *pdev, int idx) engine->pool = of_gen_pool_get(cesa->dev->of_node, "marvell,crypto-srams", idx); if (engine->pool) { - engine->sram = gen_pool_dma_alloc(engine->pool, - cesa->sram_size, - &engine->sram_dma); - if (engine->sram) + engine->sram_pool = gen_pool_dma_alloc(engine->pool, + cesa->sram_size, + &engine->sram_dma); + if (engine->sram_pool) return 0; engine->pool = NULL; @@ -422,7 +422,7 @@ static void mv_cesa_put_sram(struct platform_device *pdev, int idx) struct mv_cesa_engine *engine = &cesa->engines[idx]; if (engine->pool) - gen_pool_free(engine->pool, (unsigned long)engine->sram, + gen_pool_free(engine->pool, (unsigned long)engine->sram_pool, cesa->sram_size); else dma_unmap_resource(cesa->dev, engine->sram_dma, diff --git a/drivers/crypto/marvell/cesa/cesa.h b/drivers/crypto/marvell/cesa/cesa.h index fabfaaccca87..5c5eff0adbcc 100644 --- a/drivers/crypto/marvell/cesa/cesa.h +++ b/drivers/crypto/marvell/cesa/cesa.h @@ -428,6 +428,7 @@ struct mv_cesa_dev { * @id: engine id * @regs: engine registers * @sram: SRAM memory region + * @sram_pool: SRAM memory region from pool * @sram_dma: DMA address of the SRAM memory region * @lock: engine lock * @req: current crypto request @@ -448,7 +449,10 @@ struct mv_cesa_dev { struct mv_cesa_engine { int id; void __iomem *regs; - void __iomem *sram; + union { + void __iomem *sram; + void *sram_pool; + }; dma_addr_t sram_dma; spinlock_t lock; struct crypto_async_request *req; @@ -867,6 +871,31 @@ int mv_cesa_dma_add_op_transfers(struct mv_cesa_tdma_chain *chain, struct mv_cesa_sg_dma_iter *sgiter, gfp_t gfp_flags); +size_t mv_cesa_sg_copy(struct mv_cesa_engine *engine, + struct scatterlist *sgl, unsigned int nents, + unsigned int sram_off, size_t buflen, off_t skip, + bool to_sram); + +static inline size_t mv_cesa_sg_copy_to_sram(struct mv_cesa_engine *engine, + struct scatterlist *sgl, + unsigned int nents, + unsigned int sram_off, + size_t buflen, off_t skip) +{ + return mv_cesa_sg_copy(engine, sgl, nents, sram_off, buflen, skip, + true); +} + +static inline size_t mv_cesa_sg_copy_from_sram(struct mv_cesa_engine *engine, + struct scatterlist *sgl, + unsigned int nents, + unsigned int sram_off, + size_t buflen, off_t skip) +{ + return mv_cesa_sg_copy(engine, sgl, nents, sram_off, buflen, skip, + false); +} + /* Algorithm definitions */ extern struct ahash_alg mv_md5_alg; diff --git a/drivers/crypto/marvell/cesa/cipher.c b/drivers/crypto/marvell/cesa/cipher.c index b4a6ff9dd6d5..b739d3b873dc 100644 --- a/drivers/crypto/marvell/cesa/cipher.c +++ b/drivers/crypto/marvell/cesa/cipher.c @@ -89,22 +89,29 @@ static void mv_cesa_skcipher_std_step(struct skcipher_request *req) CESA_SA_SRAM_PAYLOAD_SIZE); mv_cesa_adjust_op(engine, &sreq->op); - memcpy_toio(engine->sram, &sreq->op, sizeof(sreq->op)); + if (engine->pool) + memcpy(engine->sram_pool, &sreq->op, sizeof(sreq->op)); + else + memcpy_toio(engine->sram, &sreq->op, sizeof(sreq->op)); - len = sg_pcopy_to_buffer(req->src, creq->src_nents, - engine->sram + CESA_SA_DATA_SRAM_OFFSET, - len, sreq->offset); + len = mv_cesa_sg_copy_to_sram(engine, req->src, creq->src_nents, + CESA_SA_DATA_SRAM_OFFSET, len, + sreq->offset); sreq->size = len; mv_cesa_set_crypt_op_len(&sreq->op, len); /* FIXME: only update enc_len field */ if (!sreq->skip_ctx) { - memcpy_toio(engine->sram, &sreq->op, sizeof(sreq->op)); + if (engine->pool) + memcpy(engine->sram_pool, &sreq->op, sizeof(sreq->op)); + else + memcpy_toio(engine->sram, &sreq->op, sizeof(sreq->op)); sreq->skip_ctx = true; - } else { + } else if (engine->pool) + memcpy(engine->sram_pool, &sreq->op, sizeof(sreq->op.desc)); + else memcpy_toio(engine->sram, &sreq->op, sizeof(sreq->op.desc)); - } mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE); writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG); @@ -121,9 +128,9 @@ static int mv_cesa_skcipher_std_process(struct skcipher_request *req, struct mv_cesa_engine *engine = creq->base.engine; size_t len; - len = sg_pcopy_from_buffer(req->dst, creq->dst_nents, - engine->sram + CESA_SA_DATA_SRAM_OFFSET, - sreq->size, sreq->offset); + len = mv_cesa_sg_copy_from_sram(engine, req->dst, creq->dst_nents, + CESA_SA_DATA_SRAM_OFFSET, sreq->size, + sreq->offset); sreq->offset += len; if (sreq->offset < req->cryptlen) @@ -214,11 +221,14 @@ mv_cesa_skcipher_complete(struct crypto_async_request *req) basereq = &creq->base; memcpy(skreq->iv, basereq->chain.last->op->ctx.skcipher.iv, ivsize); - } else { + } else if (engine->pool) + memcpy(skreq->iv, + engine->sram_pool + CESA_SA_CRYPT_IV_SRAM_OFFSET, + ivsize); + else memcpy_fromio(skreq->iv, engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET, ivsize); - } } static const struct mv_cesa_req_ops mv_cesa_skcipher_req_ops = { diff --git a/drivers/crypto/marvell/cesa/hash.c b/drivers/crypto/marvell/cesa/hash.c index 8cf9fd518d86..c72b0672fc71 100644 --- a/drivers/crypto/marvell/cesa/hash.c +++ b/drivers/crypto/marvell/cesa/hash.c @@ -168,7 +168,12 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) int i; mv_cesa_adjust_op(engine, &creq->op_tmpl); - memcpy_toio(engine->sram, &creq->op_tmpl, sizeof(creq->op_tmpl)); + if (engine->pool) + memcpy(engine->sram_pool, &creq->op_tmpl, + sizeof(creq->op_tmpl)); + else + memcpy_toio(engine->sram, &creq->op_tmpl, + sizeof(creq->op_tmpl)); if (!sreq->offset) { digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(req)); @@ -177,9 +182,14 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) engine->regs + CESA_IVDIG(i)); } - if (creq->cache_ptr) - memcpy_toio(engine->sram + CESA_SA_DATA_SRAM_OFFSET, - creq->cache, creq->cache_ptr); + if (creq->cache_ptr) { + if (engine->pool) + memcpy(engine->sram_pool + CESA_SA_DATA_SRAM_OFFSET, + creq->cache, creq->cache_ptr); + else + memcpy_toio(engine->sram + CESA_SA_DATA_SRAM_OFFSET, + creq->cache, creq->cache_ptr); + } len = min_t(size_t, req->nbytes + creq->cache_ptr - sreq->offset, CESA_SA_SRAM_PAYLOAD_SIZE); @@ -190,12 +200,10 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) } if (len - creq->cache_ptr) - sreq->offset += sg_pcopy_to_buffer(req->src, creq->src_nents, - engine->sram + - CESA_SA_DATA_SRAM_OFFSET + - creq->cache_ptr, - len - creq->cache_ptr, - sreq->offset); + sreq->offset += mv_cesa_sg_copy_to_sram( + engine, req->src, creq->src_nents, + CESA_SA_DATA_SRAM_OFFSET + creq->cache_ptr, + len - creq->cache_ptr, sreq->offset); op = &creq->op_tmpl; @@ -220,16 +228,28 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) if (len + trailerlen > CESA_SA_SRAM_PAYLOAD_SIZE) { len &= CESA_HASH_BLOCK_SIZE_MSK; new_cache_ptr = 64 - trailerlen; - memcpy_fromio(creq->cache, - engine->sram + - CESA_SA_DATA_SRAM_OFFSET + len, - new_cache_ptr); + if (engine->pool) + memcpy(creq->cache, + engine->sram_pool + + CESA_SA_DATA_SRAM_OFFSET + len, + new_cache_ptr); + else + memcpy_fromio(creq->cache, + engine->sram + + CESA_SA_DATA_SRAM_OFFSET + + len, + new_cache_ptr); } else { i = mv_cesa_ahash_pad_req(creq, creq->cache); len += i; - memcpy_toio(engine->sram + len + - CESA_SA_DATA_SRAM_OFFSET, - creq->cache, i); + if (engine->pool) + memcpy(engine->sram_pool + len + + CESA_SA_DATA_SRAM_OFFSET, + creq->cache, i); + else + memcpy_toio(engine->sram + len + + CESA_SA_DATA_SRAM_OFFSET, + creq->cache, i); } if (frag_mode == CESA_SA_DESC_CFG_LAST_FRAG) @@ -243,7 +263,10 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) mv_cesa_update_op_cfg(op, frag_mode, CESA_SA_DESC_CFG_FRAG_MSK); /* FIXME: only update enc_len field */ - memcpy_toio(engine->sram, op, sizeof(*op)); + if (engine->pool) + memcpy(engine->sram_pool, op, sizeof(*op)); + else + memcpy_toio(engine->sram, op, sizeof(*op)); if (frag_mode == CESA_SA_DESC_CFG_FIRST_FRAG) mv_cesa_update_op_cfg(op, CESA_SA_DESC_CFG_MID_FRAG, diff --git a/drivers/crypto/marvell/cesa/tdma.c b/drivers/crypto/marvell/cesa/tdma.c index 0e0d63359798..f0b5537038c2 100644 --- a/drivers/crypto/marvell/cesa/tdma.c +++ b/drivers/crypto/marvell/cesa/tdma.c @@ -350,3 +350,53 @@ int mv_cesa_dma_add_op_transfers(struct mv_cesa_tdma_chain *chain, return 0; } + +size_t mv_cesa_sg_copy(struct mv_cesa_engine *engine, + struct scatterlist *sgl, unsigned int nents, + unsigned int sram_off, size_t buflen, off_t skip, + bool to_sram) +{ + unsigned int sg_flags = SG_MITER_ATOMIC; + struct sg_mapping_iter miter; + unsigned int offset = 0; + + if (to_sram) + sg_flags |= SG_MITER_FROM_SG; + else + sg_flags |= SG_MITER_TO_SG; + + sg_miter_start(&miter, sgl, nents, sg_flags); + + if (!sg_miter_skip(&miter, skip)) + return 0; + + while ((offset < buflen) && sg_miter_next(&miter)) { + unsigned int len; + + len = min(miter.length, buflen - offset); + + if (to_sram) { + if (engine->pool) + memcpy(engine->sram_pool + sram_off + offset, + miter.addr, len); + else + memcpy_toio(engine->sram + sram_off + offset, + miter.addr, len); + } else { + if (engine->pool) + memcpy(miter.addr, + engine->sram_pool + sram_off + offset, + len); + else + memcpy_fromio(miter.addr, + engine->sram + sram_off + offset, + len); + } + + offset += len; + } + + sg_miter_stop(&miter); + + return offset; +} From b21b9a5e0aef025aafd2c57622a5f0cb9562c886 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 21 Jan 2021 14:07:29 +0100 Subject: [PATCH 108/154] crypto: rmd128 - remove RIPE-MD 128 hash algorithm RIPE-MD 128 is never referenced anywhere in the kernel, and unlikely to be depended upon by userspace via AF_ALG. So let's remove it. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/Kconfig | 13 -- crypto/Makefile | 1 - crypto/ripemd.h | 3 - crypto/rmd128.c | 323 ----------------------------------------------- crypto/tcrypt.c | 18 +-- crypto/testmgr.c | 12 -- crypto/testmgr.h | 137 -------------------- 7 files changed, 1 insertion(+), 506 deletions(-) delete mode 100644 crypto/rmd128.c diff --git a/crypto/Kconfig b/crypto/Kconfig index 94f0fde06b94..a14da8290abb 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -817,19 +817,6 @@ config CRYPTO_MICHAEL_MIC should not be used for other purposes because of the weakness of the algorithm. -config CRYPTO_RMD128 - tristate "RIPEMD-128 digest algorithm" - select CRYPTO_HASH - help - RIPEMD-128 (ISO/IEC 10118-3:2004). - - RIPEMD-128 is a 128-bit cryptographic hash function. It should only - be used as a secure replacement for RIPEMD. For other use cases, - RIPEMD-160 should be used. - - Developed by Hans Dobbertin, Antoon Bosselaers and Bart Preneel. - See - config CRYPTO_RMD160 tristate "RIPEMD-160 digest algorithm" select CRYPTO_HASH diff --git a/crypto/Makefile b/crypto/Makefile index b279483fba50..c4d8f86a106c 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -67,7 +67,6 @@ obj-$(CONFIG_CRYPTO_XCBC) += xcbc.o obj-$(CONFIG_CRYPTO_NULL2) += crypto_null.o obj-$(CONFIG_CRYPTO_MD4) += md4.o obj-$(CONFIG_CRYPTO_MD5) += md5.o -obj-$(CONFIG_CRYPTO_RMD128) += rmd128.o obj-$(CONFIG_CRYPTO_RMD160) += rmd160.o obj-$(CONFIG_CRYPTO_RMD256) += rmd256.o obj-$(CONFIG_CRYPTO_RMD320) += rmd320.o diff --git a/crypto/ripemd.h b/crypto/ripemd.h index 93edbf52197d..0f66e3c86a2b 100644 --- a/crypto/ripemd.h +++ b/crypto/ripemd.h @@ -6,9 +6,6 @@ #ifndef _CRYPTO_RMD_H #define _CRYPTO_RMD_H -#define RMD128_DIGEST_SIZE 16 -#define RMD128_BLOCK_SIZE 64 - #define RMD160_DIGEST_SIZE 20 #define RMD160_BLOCK_SIZE 64 diff --git a/crypto/rmd128.c b/crypto/rmd128.c deleted file mode 100644 index 29308fb97e7e..000000000000 --- a/crypto/rmd128.c +++ /dev/null @@ -1,323 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Cryptographic API. - * - * RIPEMD-128 - RACE Integrity Primitives Evaluation Message Digest. - * - * Based on the reference implementation by Antoon Bosselaers, ESAT-COSIC - * - * Copyright (c) 2008 Adrian-Ken Rueegsegger - */ -#include -#include -#include -#include -#include -#include - -#include "ripemd.h" - -struct rmd128_ctx { - u64 byte_count; - u32 state[4]; - __le32 buffer[16]; -}; - -#define K1 RMD_K1 -#define K2 RMD_K2 -#define K3 RMD_K3 -#define K4 RMD_K4 -#define KK1 RMD_K6 -#define KK2 RMD_K7 -#define KK3 RMD_K8 -#define KK4 RMD_K1 - -#define F1(x, y, z) (x ^ y ^ z) /* XOR */ -#define F2(x, y, z) (z ^ (x & (y ^ z))) /* x ? y : z */ -#define F3(x, y, z) ((x | ~y) ^ z) -#define F4(x, y, z) (y ^ (z & (x ^ y))) /* z ? x : y */ - -#define ROUND(a, b, c, d, f, k, x, s) { \ - (a) += f((b), (c), (d)) + le32_to_cpup(&(x)) + (k); \ - (a) = rol32((a), (s)); \ -} - -static void rmd128_transform(u32 *state, const __le32 *in) -{ - u32 aa, bb, cc, dd, aaa, bbb, ccc, ddd; - - /* Initialize left lane */ - aa = state[0]; - bb = state[1]; - cc = state[2]; - dd = state[3]; - - /* Initialize right lane */ - aaa = state[0]; - bbb = state[1]; - ccc = state[2]; - ddd = state[3]; - - /* round 1: left lane */ - ROUND(aa, bb, cc, dd, F1, K1, in[0], 11); - ROUND(dd, aa, bb, cc, F1, K1, in[1], 14); - ROUND(cc, dd, aa, bb, F1, K1, in[2], 15); - ROUND(bb, cc, dd, aa, F1, K1, in[3], 12); - ROUND(aa, bb, cc, dd, F1, K1, in[4], 5); - ROUND(dd, aa, bb, cc, F1, K1, in[5], 8); - ROUND(cc, dd, aa, bb, F1, K1, in[6], 7); - ROUND(bb, cc, dd, aa, F1, K1, in[7], 9); - ROUND(aa, bb, cc, dd, F1, K1, in[8], 11); - ROUND(dd, aa, bb, cc, F1, K1, in[9], 13); - ROUND(cc, dd, aa, bb, F1, K1, in[10], 14); - ROUND(bb, cc, dd, aa, F1, K1, in[11], 15); - ROUND(aa, bb, cc, dd, F1, K1, in[12], 6); - ROUND(dd, aa, bb, cc, F1, K1, in[13], 7); - ROUND(cc, dd, aa, bb, F1, K1, in[14], 9); - ROUND(bb, cc, dd, aa, F1, K1, in[15], 8); - - /* round 2: left lane */ - ROUND(aa, bb, cc, dd, F2, K2, in[7], 7); - ROUND(dd, aa, bb, cc, F2, K2, in[4], 6); - ROUND(cc, dd, aa, bb, F2, K2, in[13], 8); - ROUND(bb, cc, dd, aa, F2, K2, in[1], 13); - ROUND(aa, bb, cc, dd, F2, K2, in[10], 11); - ROUND(dd, aa, bb, cc, F2, K2, in[6], 9); - ROUND(cc, dd, aa, bb, F2, K2, in[15], 7); - ROUND(bb, cc, dd, aa, F2, K2, in[3], 15); - ROUND(aa, bb, cc, dd, F2, K2, in[12], 7); - ROUND(dd, aa, bb, cc, F2, K2, in[0], 12); - ROUND(cc, dd, aa, bb, F2, K2, in[9], 15); - ROUND(bb, cc, dd, aa, F2, K2, in[5], 9); - ROUND(aa, bb, cc, dd, F2, K2, in[2], 11); - ROUND(dd, aa, bb, cc, F2, K2, in[14], 7); - ROUND(cc, dd, aa, bb, F2, K2, in[11], 13); - ROUND(bb, cc, dd, aa, F2, K2, in[8], 12); - - /* round 3: left lane */ - ROUND(aa, bb, cc, dd, F3, K3, in[3], 11); - ROUND(dd, aa, bb, cc, F3, K3, in[10], 13); - ROUND(cc, dd, aa, bb, F3, K3, in[14], 6); - ROUND(bb, cc, dd, aa, F3, K3, in[4], 7); - ROUND(aa, bb, cc, dd, F3, K3, in[9], 14); - ROUND(dd, aa, bb, cc, F3, K3, in[15], 9); - ROUND(cc, dd, aa, bb, F3, K3, in[8], 13); - ROUND(bb, cc, dd, aa, F3, K3, in[1], 15); - ROUND(aa, bb, cc, dd, F3, K3, in[2], 14); - ROUND(dd, aa, bb, cc, F3, K3, in[7], 8); - ROUND(cc, dd, aa, bb, F3, K3, in[0], 13); - ROUND(bb, cc, dd, aa, F3, K3, in[6], 6); - ROUND(aa, bb, cc, dd, F3, K3, in[13], 5); - ROUND(dd, aa, bb, cc, F3, K3, in[11], 12); - ROUND(cc, dd, aa, bb, F3, K3, in[5], 7); - ROUND(bb, cc, dd, aa, F3, K3, in[12], 5); - - /* round 4: left lane */ - ROUND(aa, bb, cc, dd, F4, K4, in[1], 11); - ROUND(dd, aa, bb, cc, F4, K4, in[9], 12); - ROUND(cc, dd, aa, bb, F4, K4, in[11], 14); - ROUND(bb, cc, dd, aa, F4, K4, in[10], 15); - ROUND(aa, bb, cc, dd, F4, K4, in[0], 14); - ROUND(dd, aa, bb, cc, F4, K4, in[8], 15); - ROUND(cc, dd, aa, bb, F4, K4, in[12], 9); - ROUND(bb, cc, dd, aa, F4, K4, in[4], 8); - ROUND(aa, bb, cc, dd, F4, K4, in[13], 9); - ROUND(dd, aa, bb, cc, F4, K4, in[3], 14); - ROUND(cc, dd, aa, bb, F4, K4, in[7], 5); - ROUND(bb, cc, dd, aa, F4, K4, in[15], 6); - ROUND(aa, bb, cc, dd, F4, K4, in[14], 8); - ROUND(dd, aa, bb, cc, F4, K4, in[5], 6); - ROUND(cc, dd, aa, bb, F4, K4, in[6], 5); - ROUND(bb, cc, dd, aa, F4, K4, in[2], 12); - - /* round 1: right lane */ - ROUND(aaa, bbb, ccc, ddd, F4, KK1, in[5], 8); - ROUND(ddd, aaa, bbb, ccc, F4, KK1, in[14], 9); - ROUND(ccc, ddd, aaa, bbb, F4, KK1, in[7], 9); - ROUND(bbb, ccc, ddd, aaa, F4, KK1, in[0], 11); - ROUND(aaa, bbb, ccc, ddd, F4, KK1, in[9], 13); - ROUND(ddd, aaa, bbb, ccc, F4, KK1, in[2], 15); - ROUND(ccc, ddd, aaa, bbb, F4, KK1, in[11], 15); - ROUND(bbb, ccc, ddd, aaa, F4, KK1, in[4], 5); - ROUND(aaa, bbb, ccc, ddd, F4, KK1, in[13], 7); - ROUND(ddd, aaa, bbb, ccc, F4, KK1, in[6], 7); - ROUND(ccc, ddd, aaa, bbb, F4, KK1, in[15], 8); - ROUND(bbb, ccc, ddd, aaa, F4, KK1, in[8], 11); - ROUND(aaa, bbb, ccc, ddd, F4, KK1, in[1], 14); - ROUND(ddd, aaa, bbb, ccc, F4, KK1, in[10], 14); - ROUND(ccc, ddd, aaa, bbb, F4, KK1, in[3], 12); - ROUND(bbb, ccc, ddd, aaa, F4, KK1, in[12], 6); - - /* round 2: right lane */ - ROUND(aaa, bbb, ccc, ddd, F3, KK2, in[6], 9); - ROUND(ddd, aaa, bbb, ccc, F3, KK2, in[11], 13); - ROUND(ccc, ddd, aaa, bbb, F3, KK2, in[3], 15); - ROUND(bbb, ccc, ddd, aaa, F3, KK2, in[7], 7); - ROUND(aaa, bbb, ccc, ddd, F3, KK2, in[0], 12); - ROUND(ddd, aaa, bbb, ccc, F3, KK2, in[13], 8); - ROUND(ccc, ddd, aaa, bbb, F3, KK2, in[5], 9); - ROUND(bbb, ccc, ddd, aaa, F3, KK2, in[10], 11); - ROUND(aaa, bbb, ccc, ddd, F3, KK2, in[14], 7); - ROUND(ddd, aaa, bbb, ccc, F3, KK2, in[15], 7); - ROUND(ccc, ddd, aaa, bbb, F3, KK2, in[8], 12); - ROUND(bbb, ccc, ddd, aaa, F3, KK2, in[12], 7); - ROUND(aaa, bbb, ccc, ddd, F3, KK2, in[4], 6); - ROUND(ddd, aaa, bbb, ccc, F3, KK2, in[9], 15); - ROUND(ccc, ddd, aaa, bbb, F3, KK2, in[1], 13); - ROUND(bbb, ccc, ddd, aaa, F3, KK2, in[2], 11); - - /* round 3: right lane */ - ROUND(aaa, bbb, ccc, ddd, F2, KK3, in[15], 9); - ROUND(ddd, aaa, bbb, ccc, F2, KK3, in[5], 7); - ROUND(ccc, ddd, aaa, bbb, F2, KK3, in[1], 15); - ROUND(bbb, ccc, ddd, aaa, F2, KK3, in[3], 11); - ROUND(aaa, bbb, ccc, ddd, F2, KK3, in[7], 8); - ROUND(ddd, aaa, bbb, ccc, F2, KK3, in[14], 6); - ROUND(ccc, ddd, aaa, bbb, F2, KK3, in[6], 6); - ROUND(bbb, ccc, ddd, aaa, F2, KK3, in[9], 14); - ROUND(aaa, bbb, ccc, ddd, F2, KK3, in[11], 12); - ROUND(ddd, aaa, bbb, ccc, F2, KK3, in[8], 13); - ROUND(ccc, ddd, aaa, bbb, F2, KK3, in[12], 5); - ROUND(bbb, ccc, ddd, aaa, F2, KK3, in[2], 14); - ROUND(aaa, bbb, ccc, ddd, F2, KK3, in[10], 13); - ROUND(ddd, aaa, bbb, ccc, F2, KK3, in[0], 13); - ROUND(ccc, ddd, aaa, bbb, F2, KK3, in[4], 7); - ROUND(bbb, ccc, ddd, aaa, F2, KK3, in[13], 5); - - /* round 4: right lane */ - ROUND(aaa, bbb, ccc, ddd, F1, KK4, in[8], 15); - ROUND(ddd, aaa, bbb, ccc, F1, KK4, in[6], 5); - ROUND(ccc, ddd, aaa, bbb, F1, KK4, in[4], 8); - ROUND(bbb, ccc, ddd, aaa, F1, KK4, in[1], 11); - ROUND(aaa, bbb, ccc, ddd, F1, KK4, in[3], 14); - ROUND(ddd, aaa, bbb, ccc, F1, KK4, in[11], 14); - ROUND(ccc, ddd, aaa, bbb, F1, KK4, in[15], 6); - ROUND(bbb, ccc, ddd, aaa, F1, KK4, in[0], 14); - ROUND(aaa, bbb, ccc, ddd, F1, KK4, in[5], 6); - ROUND(ddd, aaa, bbb, ccc, F1, KK4, in[12], 9); - ROUND(ccc, ddd, aaa, bbb, F1, KK4, in[2], 12); - ROUND(bbb, ccc, ddd, aaa, F1, KK4, in[13], 9); - ROUND(aaa, bbb, ccc, ddd, F1, KK4, in[9], 12); - ROUND(ddd, aaa, bbb, ccc, F1, KK4, in[7], 5); - ROUND(ccc, ddd, aaa, bbb, F1, KK4, in[10], 15); - ROUND(bbb, ccc, ddd, aaa, F1, KK4, in[14], 8); - - /* combine results */ - ddd += cc + state[1]; /* final result for state[0] */ - state[1] = state[2] + dd + aaa; - state[2] = state[3] + aa + bbb; - state[3] = state[0] + bb + ccc; - state[0] = ddd; -} - -static int rmd128_init(struct shash_desc *desc) -{ - struct rmd128_ctx *rctx = shash_desc_ctx(desc); - - rctx->byte_count = 0; - - rctx->state[0] = RMD_H0; - rctx->state[1] = RMD_H1; - rctx->state[2] = RMD_H2; - rctx->state[3] = RMD_H3; - - memset(rctx->buffer, 0, sizeof(rctx->buffer)); - - return 0; -} - -static int rmd128_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct rmd128_ctx *rctx = shash_desc_ctx(desc); - const u32 avail = sizeof(rctx->buffer) - (rctx->byte_count & 0x3f); - - rctx->byte_count += len; - - /* Enough space in buffer? If so copy and we're done */ - if (avail > len) { - memcpy((char *)rctx->buffer + (sizeof(rctx->buffer) - avail), - data, len); - goto out; - } - - memcpy((char *)rctx->buffer + (sizeof(rctx->buffer) - avail), - data, avail); - - rmd128_transform(rctx->state, rctx->buffer); - data += avail; - len -= avail; - - while (len >= sizeof(rctx->buffer)) { - memcpy(rctx->buffer, data, sizeof(rctx->buffer)); - rmd128_transform(rctx->state, rctx->buffer); - data += sizeof(rctx->buffer); - len -= sizeof(rctx->buffer); - } - - memcpy(rctx->buffer, data, len); - -out: - return 0; -} - -/* Add padding and return the message digest. */ -static int rmd128_final(struct shash_desc *desc, u8 *out) -{ - struct rmd128_ctx *rctx = shash_desc_ctx(desc); - u32 i, index, padlen; - __le64 bits; - __le32 *dst = (__le32 *)out; - static const u8 padding[64] = { 0x80, }; - - bits = cpu_to_le64(rctx->byte_count << 3); - - /* Pad out to 56 mod 64 */ - index = rctx->byte_count & 0x3f; - padlen = (index < 56) ? (56 - index) : ((64+56) - index); - rmd128_update(desc, padding, padlen); - - /* Append length */ - rmd128_update(desc, (const u8 *)&bits, sizeof(bits)); - - /* Store state in digest */ - for (i = 0; i < 4; i++) - dst[i] = cpu_to_le32p(&rctx->state[i]); - - /* Wipe context */ - memset(rctx, 0, sizeof(*rctx)); - - return 0; -} - -static struct shash_alg alg = { - .digestsize = RMD128_DIGEST_SIZE, - .init = rmd128_init, - .update = rmd128_update, - .final = rmd128_final, - .descsize = sizeof(struct rmd128_ctx), - .base = { - .cra_name = "rmd128", - .cra_driver_name = "rmd128-generic", - .cra_blocksize = RMD128_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int __init rmd128_mod_init(void) -{ - return crypto_register_shash(&alg); -} - -static void __exit rmd128_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -subsys_initcall(rmd128_mod_init); -module_exit(rmd128_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Adrian-Ken Rueegsegger "); -MODULE_DESCRIPTION("RIPEMD-128 Message Digest"); -MODULE_ALIAS_CRYPTO("rmd128"); diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index a4a11d2b57bd..bc9e2910f5c3 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -71,7 +71,7 @@ static const char *check[] = { "blowfish", "twofish", "serpent", "sha384", "sha512", "md4", "aes", "cast6", "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea", "khazad", "wp512", "wp384", "wp256", "tnepres", "xeta", "fcrypt", - "camellia", "seed", "salsa20", "rmd128", "rmd160", "rmd256", "rmd320", + "camellia", "seed", "salsa20", "rmd160", "rmd256", "rmd320", "lzo", "lzo-rle", "cts", "sha3-224", "sha3-256", "sha3-384", "sha3-512", "streebog256", "streebog512", NULL @@ -1867,10 +1867,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) ret += tcrypt_test("cts(cbc(aes))"); break; - case 39: - ret += tcrypt_test("rmd128"); - break; - case 40: ret += tcrypt_test("rmd160"); break; @@ -1955,10 +1951,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) ret += tcrypt_test("xcbc(aes)"); break; - case 107: - ret += tcrypt_test("hmac(rmd128)"); - break; - case 108: ret += tcrypt_test("hmac(rmd160)"); break; @@ -2409,10 +2401,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) test_hash_speed("sha224", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; fallthrough; - case 314: - test_hash_speed("rmd128", sec, generic_hash_speed_template); - if (mode > 300 && mode < 400) break; - fallthrough; case 315: test_hash_speed("rmd160", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; @@ -2533,10 +2521,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) test_ahash_speed("sha224", sec, generic_hash_speed_template); if (mode > 400 && mode < 500) break; fallthrough; - case 414: - test_ahash_speed("rmd128", sec, generic_hash_speed_template); - if (mode > 400 && mode < 500) break; - fallthrough; case 415: test_ahash_speed("rmd160", sec, generic_hash_speed_template); if (mode > 400 && mode < 500) break; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index a896d77e9611..f8a5cec614d6 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -4957,12 +4957,6 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(hmac_md5_tv_template) } - }, { - .alg = "hmac(rmd128)", - .test = alg_test_hash, - .suite = { - .hash = __VECS(hmac_rmd128_tv_template) - } }, { .alg = "hmac(rmd160)", .test = alg_test_hash, @@ -5275,12 +5269,6 @@ static const struct alg_test_desc alg_test_descs[] = { .aad_iv = 1, } } - }, { - .alg = "rmd128", - .test = alg_test_hash, - .suite = { - .hash = __VECS(rmd128_tv_template) - } }, { .alg = "rmd160", .test = alg_test_hash, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 8c83811c0e35..05807872846c 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -3140,66 +3140,6 @@ static const struct hash_testvec md5_tv_template[] = { }; -/* - * RIPEMD-128 test vectors from ISO/IEC 10118-3:2004(E) - */ -static const struct hash_testvec rmd128_tv_template[] = { - { - .digest = "\xcd\xf2\x62\x13\xa1\x50\xdc\x3e" - "\xcb\x61\x0f\x18\xf6\xb3\x8b\x46", - }, { - .plaintext = "a", - .psize = 1, - .digest = "\x86\xbe\x7a\xfa\x33\x9d\x0f\xc7" - "\xcf\xc7\x85\xe7\x2f\x57\x8d\x33", - }, { - .plaintext = "abc", - .psize = 3, - .digest = "\xc1\x4a\x12\x19\x9c\x66\xe4\xba" - "\x84\x63\x6b\x0f\x69\x14\x4c\x77", - }, { - .plaintext = "message digest", - .psize = 14, - .digest = "\x9e\x32\x7b\x3d\x6e\x52\x30\x62" - "\xaf\xc1\x13\x2d\x7d\xf9\xd1\xb8", - }, { - .plaintext = "abcdefghijklmnopqrstuvwxyz", - .psize = 26, - .digest = "\xfd\x2a\xa6\x07\xf7\x1d\xc8\xf5" - "\x10\x71\x49\x22\xb3\x71\x83\x4e", - }, { - .plaintext = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcde" - "fghijklmnopqrstuvwxyz0123456789", - .psize = 62, - .digest = "\xd1\xe9\x59\xeb\x17\x9c\x91\x1f" - "\xae\xa4\x62\x4c\x60\xc5\xc7\x02", - }, { - .plaintext = "1234567890123456789012345678901234567890" - "1234567890123456789012345678901234567890", - .psize = 80, - .digest = "\x3f\x45\xef\x19\x47\x32\xc2\xdb" - "\xb2\xc4\xa2\xc7\x69\x79\x5f\xa3", - }, { - .plaintext = "abcdbcdecdefdefgefghfghighij" - "hijkijkljklmklmnlmnomnopnopq", - .psize = 56, - .digest = "\xa1\xaa\x06\x89\xd0\xfa\xfa\x2d" - "\xdc\x22\xe8\x8b\x49\x13\x3a\x06", - }, { - .plaintext = "abcdefghbcdefghicdefghijdefghijkefghijklfghi" - "jklmghijklmnhijklmnoijklmnopjklmnopqklmnopqr" - "lmnopqrsmnopqrstnopqrstu", - .psize = 112, - .digest = "\xd4\xec\xc9\x13\xe1\xdf\x77\x6b" - "\xf4\x8d\xe9\xd5\x5b\x1f\x25\x46", - }, { - .plaintext = "abcdbcdecdefdefgefghfghighijhijk", - .psize = 32, - .digest = "\x13\xfc\x13\xe8\xef\xff\x34\x7d" - "\xe1\x93\xff\x46\xdb\xac\xcf\xd4", - } -}; - /* * RIPEMD-160 test vectors from ISO/IEC 10118-3:2004(E) */ @@ -5452,83 +5392,6 @@ static const struct hash_testvec hmac_md5_tv_template[] = }, }; -/* - * HMAC-RIPEMD128 test vectors from RFC2286 - */ -static const struct hash_testvec hmac_rmd128_tv_template[] = { - { - .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b", - .ksize = 16, - .plaintext = "Hi There", - .psize = 8, - .digest = "\xfb\xf6\x1f\x94\x92\xaa\x4b\xbf" - "\x81\xc1\x72\xe8\x4e\x07\x34\xdb", - }, { - .key = "Jefe", - .ksize = 4, - .plaintext = "what do ya want for nothing?", - .psize = 28, - .digest = "\x87\x5f\x82\x88\x62\xb6\xb3\x34" - "\xb4\x27\xc5\x5f\x9f\x7f\xf0\x9b", - }, { - .key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa", - .ksize = 16, - .plaintext = "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" - "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" - "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" - "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd", - .psize = 50, - .digest = "\x09\xf0\xb2\x84\x6d\x2f\x54\x3d" - "\xa3\x63\xcb\xec\x8d\x62\xa3\x8d", - }, { - .key = "\x01\x02\x03\x04\x05\x06\x07\x08" - "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10" - "\x11\x12\x13\x14\x15\x16\x17\x18\x19", - .ksize = 25, - .plaintext = "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" - "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" - "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" - "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd", - .psize = 50, - .digest = "\xbd\xbb\xd7\xcf\x03\xe4\x4b\x5a" - "\xa6\x0a\xf8\x15\xbe\x4d\x22\x94", - }, { - .key = "\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c", - .ksize = 16, - .plaintext = "Test With Truncation", - .psize = 20, - .digest = "\xe7\x98\x08\xf2\x4b\x25\xfd\x03" - "\x1c\x15\x5f\x0d\x55\x1d\x9a\x3a", - }, { - .key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" - "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" - "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" - "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" - "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" - "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" - "\xaa\xaa", - .ksize = 80, - .plaintext = "Test Using Larger Than Block-Size Key - Hash Key First", - .psize = 54, - .digest = "\xdc\x73\x29\x28\xde\x98\x10\x4a" - "\x1f\x59\xd3\x73\xc1\x50\xac\xbb", - }, { - .key = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" - "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" - "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" - "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" - "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" - "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" - "\xaa\xaa", - .ksize = 80, - .plaintext = "Test Using Larger Than Block-Size Key and Larger Than One " - "Block-Size Data", - .psize = 73, - .digest = "\x5c\x6b\xec\x96\x79\x3e\x16\xd4" - "\x06\x90\xc2\x37\x63\x5f\x30\xc5", - }, -}; - /* * HMAC-RIPEMD160 test vectors from RFC2286 */ From c15d4167f0b0465b71c0619dc30b122f1b0e5b7a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 21 Jan 2021 14:07:30 +0100 Subject: [PATCH 109/154] crypto: rmd256 - remove RIPE-MD 256 hash algorithm RIPE-MD 256 is never referenced anywhere in the kernel, and unlikely to be depended upon by userspace via AF_ALG. So let's remove it Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/Kconfig | 12 -- crypto/Makefile | 1 - crypto/ripemd.h | 3 - crypto/rmd256.c | 342 ----------------------------------------------- crypto/tcrypt.c | 14 +- crypto/testmgr.c | 6 - crypto/testmgr.h | 64 --------- 7 files changed, 1 insertion(+), 441 deletions(-) delete mode 100644 crypto/rmd256.c diff --git a/crypto/Kconfig b/crypto/Kconfig index a14da8290abb..8e93dce161b0 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -834,18 +834,6 @@ config CRYPTO_RMD160 Developed by Hans Dobbertin, Antoon Bosselaers and Bart Preneel. See -config CRYPTO_RMD256 - tristate "RIPEMD-256 digest algorithm" - select CRYPTO_HASH - help - RIPEMD-256 is an optional extension of RIPEMD-128 with a - 256 bit hash. It is intended for applications that require - longer hash-results, without needing a larger security level - (than RIPEMD-128). - - Developed by Hans Dobbertin, Antoon Bosselaers and Bart Preneel. - See - config CRYPTO_RMD320 tristate "RIPEMD-320 digest algorithm" select CRYPTO_HASH diff --git a/crypto/Makefile b/crypto/Makefile index c4d8f86a106c..946e821f1874 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -68,7 +68,6 @@ obj-$(CONFIG_CRYPTO_NULL2) += crypto_null.o obj-$(CONFIG_CRYPTO_MD4) += md4.o obj-$(CONFIG_CRYPTO_MD5) += md5.o obj-$(CONFIG_CRYPTO_RMD160) += rmd160.o -obj-$(CONFIG_CRYPTO_RMD256) += rmd256.o obj-$(CONFIG_CRYPTO_RMD320) += rmd320.o obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o diff --git a/crypto/ripemd.h b/crypto/ripemd.h index 0f66e3c86a2b..a19c3c27a466 100644 --- a/crypto/ripemd.h +++ b/crypto/ripemd.h @@ -9,9 +9,6 @@ #define RMD160_DIGEST_SIZE 20 #define RMD160_BLOCK_SIZE 64 -#define RMD256_DIGEST_SIZE 32 -#define RMD256_BLOCK_SIZE 64 - #define RMD320_DIGEST_SIZE 40 #define RMD320_BLOCK_SIZE 64 diff --git a/crypto/rmd256.c b/crypto/rmd256.c deleted file mode 100644 index 3c730e9de5fd..000000000000 --- a/crypto/rmd256.c +++ /dev/null @@ -1,342 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Cryptographic API. - * - * RIPEMD-256 - RACE Integrity Primitives Evaluation Message Digest. - * - * Based on the reference implementation by Antoon Bosselaers, ESAT-COSIC - * - * Copyright (c) 2008 Adrian-Ken Rueegsegger - */ -#include -#include -#include -#include -#include -#include - -#include "ripemd.h" - -struct rmd256_ctx { - u64 byte_count; - u32 state[8]; - __le32 buffer[16]; -}; - -#define K1 RMD_K1 -#define K2 RMD_K2 -#define K3 RMD_K3 -#define K4 RMD_K4 -#define KK1 RMD_K6 -#define KK2 RMD_K7 -#define KK3 RMD_K8 -#define KK4 RMD_K1 - -#define F1(x, y, z) (x ^ y ^ z) /* XOR */ -#define F2(x, y, z) (z ^ (x & (y ^ z))) /* x ? y : z */ -#define F3(x, y, z) ((x | ~y) ^ z) -#define F4(x, y, z) (y ^ (z & (x ^ y))) /* z ? x : y */ - -#define ROUND(a, b, c, d, f, k, x, s) { \ - (a) += f((b), (c), (d)) + le32_to_cpup(&(x)) + (k); \ - (a) = rol32((a), (s)); \ -} - -static void rmd256_transform(u32 *state, const __le32 *in) -{ - u32 aa, bb, cc, dd, aaa, bbb, ccc, ddd; - - /* Initialize left lane */ - aa = state[0]; - bb = state[1]; - cc = state[2]; - dd = state[3]; - - /* Initialize right lane */ - aaa = state[4]; - bbb = state[5]; - ccc = state[6]; - ddd = state[7]; - - /* round 1: left lane */ - ROUND(aa, bb, cc, dd, F1, K1, in[0], 11); - ROUND(dd, aa, bb, cc, F1, K1, in[1], 14); - ROUND(cc, dd, aa, bb, F1, K1, in[2], 15); - ROUND(bb, cc, dd, aa, F1, K1, in[3], 12); - ROUND(aa, bb, cc, dd, F1, K1, in[4], 5); - ROUND(dd, aa, bb, cc, F1, K1, in[5], 8); - ROUND(cc, dd, aa, bb, F1, K1, in[6], 7); - ROUND(bb, cc, dd, aa, F1, K1, in[7], 9); - ROUND(aa, bb, cc, dd, F1, K1, in[8], 11); - ROUND(dd, aa, bb, cc, F1, K1, in[9], 13); - ROUND(cc, dd, aa, bb, F1, K1, in[10], 14); - ROUND(bb, cc, dd, aa, F1, K1, in[11], 15); - ROUND(aa, bb, cc, dd, F1, K1, in[12], 6); - ROUND(dd, aa, bb, cc, F1, K1, in[13], 7); - ROUND(cc, dd, aa, bb, F1, K1, in[14], 9); - ROUND(bb, cc, dd, aa, F1, K1, in[15], 8); - - /* round 1: right lane */ - ROUND(aaa, bbb, ccc, ddd, F4, KK1, in[5], 8); - ROUND(ddd, aaa, bbb, ccc, F4, KK1, in[14], 9); - ROUND(ccc, ddd, aaa, bbb, F4, KK1, in[7], 9); - ROUND(bbb, ccc, ddd, aaa, F4, KK1, in[0], 11); - ROUND(aaa, bbb, ccc, ddd, F4, KK1, in[9], 13); - ROUND(ddd, aaa, bbb, ccc, F4, KK1, in[2], 15); - ROUND(ccc, ddd, aaa, bbb, F4, KK1, in[11], 15); - ROUND(bbb, ccc, ddd, aaa, F4, KK1, in[4], 5); - ROUND(aaa, bbb, ccc, ddd, F4, KK1, in[13], 7); - ROUND(ddd, aaa, bbb, ccc, F4, KK1, in[6], 7); - ROUND(ccc, ddd, aaa, bbb, F4, KK1, in[15], 8); - ROUND(bbb, ccc, ddd, aaa, F4, KK1, in[8], 11); - ROUND(aaa, bbb, ccc, ddd, F4, KK1, in[1], 14); - ROUND(ddd, aaa, bbb, ccc, F4, KK1, in[10], 14); - ROUND(ccc, ddd, aaa, bbb, F4, KK1, in[3], 12); - ROUND(bbb, ccc, ddd, aaa, F4, KK1, in[12], 6); - - /* Swap contents of "a" registers */ - swap(aa, aaa); - - /* round 2: left lane */ - ROUND(aa, bb, cc, dd, F2, K2, in[7], 7); - ROUND(dd, aa, bb, cc, F2, K2, in[4], 6); - ROUND(cc, dd, aa, bb, F2, K2, in[13], 8); - ROUND(bb, cc, dd, aa, F2, K2, in[1], 13); - ROUND(aa, bb, cc, dd, F2, K2, in[10], 11); - ROUND(dd, aa, bb, cc, F2, K2, in[6], 9); - ROUND(cc, dd, aa, bb, F2, K2, in[15], 7); - ROUND(bb, cc, dd, aa, F2, K2, in[3], 15); - ROUND(aa, bb, cc, dd, F2, K2, in[12], 7); - ROUND(dd, aa, bb, cc, F2, K2, in[0], 12); - ROUND(cc, dd, aa, bb, F2, K2, in[9], 15); - ROUND(bb, cc, dd, aa, F2, K2, in[5], 9); - ROUND(aa, bb, cc, dd, F2, K2, in[2], 11); - ROUND(dd, aa, bb, cc, F2, K2, in[14], 7); - ROUND(cc, dd, aa, bb, F2, K2, in[11], 13); - ROUND(bb, cc, dd, aa, F2, K2, in[8], 12); - - /* round 2: right lane */ - ROUND(aaa, bbb, ccc, ddd, F3, KK2, in[6], 9); - ROUND(ddd, aaa, bbb, ccc, F3, KK2, in[11], 13); - ROUND(ccc, ddd, aaa, bbb, F3, KK2, in[3], 15); - ROUND(bbb, ccc, ddd, aaa, F3, KK2, in[7], 7); - ROUND(aaa, bbb, ccc, ddd, F3, KK2, in[0], 12); - ROUND(ddd, aaa, bbb, ccc, F3, KK2, in[13], 8); - ROUND(ccc, ddd, aaa, bbb, F3, KK2, in[5], 9); - ROUND(bbb, ccc, ddd, aaa, F3, KK2, in[10], 11); - ROUND(aaa, bbb, ccc, ddd, F3, KK2, in[14], 7); - ROUND(ddd, aaa, bbb, ccc, F3, KK2, in[15], 7); - ROUND(ccc, ddd, aaa, bbb, F3, KK2, in[8], 12); - ROUND(bbb, ccc, ddd, aaa, F3, KK2, in[12], 7); - ROUND(aaa, bbb, ccc, ddd, F3, KK2, in[4], 6); - ROUND(ddd, aaa, bbb, ccc, F3, KK2, in[9], 15); - ROUND(ccc, ddd, aaa, bbb, F3, KK2, in[1], 13); - ROUND(bbb, ccc, ddd, aaa, F3, KK2, in[2], 11); - - /* Swap contents of "b" registers */ - swap(bb, bbb); - - /* round 3: left lane */ - ROUND(aa, bb, cc, dd, F3, K3, in[3], 11); - ROUND(dd, aa, bb, cc, F3, K3, in[10], 13); - ROUND(cc, dd, aa, bb, F3, K3, in[14], 6); - ROUND(bb, cc, dd, aa, F3, K3, in[4], 7); - ROUND(aa, bb, cc, dd, F3, K3, in[9], 14); - ROUND(dd, aa, bb, cc, F3, K3, in[15], 9); - ROUND(cc, dd, aa, bb, F3, K3, in[8], 13); - ROUND(bb, cc, dd, aa, F3, K3, in[1], 15); - ROUND(aa, bb, cc, dd, F3, K3, in[2], 14); - ROUND(dd, aa, bb, cc, F3, K3, in[7], 8); - ROUND(cc, dd, aa, bb, F3, K3, in[0], 13); - ROUND(bb, cc, dd, aa, F3, K3, in[6], 6); - ROUND(aa, bb, cc, dd, F3, K3, in[13], 5); - ROUND(dd, aa, bb, cc, F3, K3, in[11], 12); - ROUND(cc, dd, aa, bb, F3, K3, in[5], 7); - ROUND(bb, cc, dd, aa, F3, K3, in[12], 5); - - /* round 3: right lane */ - ROUND(aaa, bbb, ccc, ddd, F2, KK3, in[15], 9); - ROUND(ddd, aaa, bbb, ccc, F2, KK3, in[5], 7); - ROUND(ccc, ddd, aaa, bbb, F2, KK3, in[1], 15); - ROUND(bbb, ccc, ddd, aaa, F2, KK3, in[3], 11); - ROUND(aaa, bbb, ccc, ddd, F2, KK3, in[7], 8); - ROUND(ddd, aaa, bbb, ccc, F2, KK3, in[14], 6); - ROUND(ccc, ddd, aaa, bbb, F2, KK3, in[6], 6); - ROUND(bbb, ccc, ddd, aaa, F2, KK3, in[9], 14); - ROUND(aaa, bbb, ccc, ddd, F2, KK3, in[11], 12); - ROUND(ddd, aaa, bbb, ccc, F2, KK3, in[8], 13); - ROUND(ccc, ddd, aaa, bbb, F2, KK3, in[12], 5); - ROUND(bbb, ccc, ddd, aaa, F2, KK3, in[2], 14); - ROUND(aaa, bbb, ccc, ddd, F2, KK3, in[10], 13); - ROUND(ddd, aaa, bbb, ccc, F2, KK3, in[0], 13); - ROUND(ccc, ddd, aaa, bbb, F2, KK3, in[4], 7); - ROUND(bbb, ccc, ddd, aaa, F2, KK3, in[13], 5); - - /* Swap contents of "c" registers */ - swap(cc, ccc); - - /* round 4: left lane */ - ROUND(aa, bb, cc, dd, F4, K4, in[1], 11); - ROUND(dd, aa, bb, cc, F4, K4, in[9], 12); - ROUND(cc, dd, aa, bb, F4, K4, in[11], 14); - ROUND(bb, cc, dd, aa, F4, K4, in[10], 15); - ROUND(aa, bb, cc, dd, F4, K4, in[0], 14); - ROUND(dd, aa, bb, cc, F4, K4, in[8], 15); - ROUND(cc, dd, aa, bb, F4, K4, in[12], 9); - ROUND(bb, cc, dd, aa, F4, K4, in[4], 8); - ROUND(aa, bb, cc, dd, F4, K4, in[13], 9); - ROUND(dd, aa, bb, cc, F4, K4, in[3], 14); - ROUND(cc, dd, aa, bb, F4, K4, in[7], 5); - ROUND(bb, cc, dd, aa, F4, K4, in[15], 6); - ROUND(aa, bb, cc, dd, F4, K4, in[14], 8); - ROUND(dd, aa, bb, cc, F4, K4, in[5], 6); - ROUND(cc, dd, aa, bb, F4, K4, in[6], 5); - ROUND(bb, cc, dd, aa, F4, K4, in[2], 12); - - /* round 4: right lane */ - ROUND(aaa, bbb, ccc, ddd, F1, KK4, in[8], 15); - ROUND(ddd, aaa, bbb, ccc, F1, KK4, in[6], 5); - ROUND(ccc, ddd, aaa, bbb, F1, KK4, in[4], 8); - ROUND(bbb, ccc, ddd, aaa, F1, KK4, in[1], 11); - ROUND(aaa, bbb, ccc, ddd, F1, KK4, in[3], 14); - ROUND(ddd, aaa, bbb, ccc, F1, KK4, in[11], 14); - ROUND(ccc, ddd, aaa, bbb, F1, KK4, in[15], 6); - ROUND(bbb, ccc, ddd, aaa, F1, KK4, in[0], 14); - ROUND(aaa, bbb, ccc, ddd, F1, KK4, in[5], 6); - ROUND(ddd, aaa, bbb, ccc, F1, KK4, in[12], 9); - ROUND(ccc, ddd, aaa, bbb, F1, KK4, in[2], 12); - ROUND(bbb, ccc, ddd, aaa, F1, KK4, in[13], 9); - ROUND(aaa, bbb, ccc, ddd, F1, KK4, in[9], 12); - ROUND(ddd, aaa, bbb, ccc, F1, KK4, in[7], 5); - ROUND(ccc, ddd, aaa, bbb, F1, KK4, in[10], 15); - ROUND(bbb, ccc, ddd, aaa, F1, KK4, in[14], 8); - - /* Swap contents of "d" registers */ - swap(dd, ddd); - - /* combine results */ - state[0] += aa; - state[1] += bb; - state[2] += cc; - state[3] += dd; - state[4] += aaa; - state[5] += bbb; - state[6] += ccc; - state[7] += ddd; -} - -static int rmd256_init(struct shash_desc *desc) -{ - struct rmd256_ctx *rctx = shash_desc_ctx(desc); - - rctx->byte_count = 0; - - rctx->state[0] = RMD_H0; - rctx->state[1] = RMD_H1; - rctx->state[2] = RMD_H2; - rctx->state[3] = RMD_H3; - rctx->state[4] = RMD_H5; - rctx->state[5] = RMD_H6; - rctx->state[6] = RMD_H7; - rctx->state[7] = RMD_H8; - - memset(rctx->buffer, 0, sizeof(rctx->buffer)); - - return 0; -} - -static int rmd256_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct rmd256_ctx *rctx = shash_desc_ctx(desc); - const u32 avail = sizeof(rctx->buffer) - (rctx->byte_count & 0x3f); - - rctx->byte_count += len; - - /* Enough space in buffer? If so copy and we're done */ - if (avail > len) { - memcpy((char *)rctx->buffer + (sizeof(rctx->buffer) - avail), - data, len); - goto out; - } - - memcpy((char *)rctx->buffer + (sizeof(rctx->buffer) - avail), - data, avail); - - rmd256_transform(rctx->state, rctx->buffer); - data += avail; - len -= avail; - - while (len >= sizeof(rctx->buffer)) { - memcpy(rctx->buffer, data, sizeof(rctx->buffer)); - rmd256_transform(rctx->state, rctx->buffer); - data += sizeof(rctx->buffer); - len -= sizeof(rctx->buffer); - } - - memcpy(rctx->buffer, data, len); - -out: - return 0; -} - -/* Add padding and return the message digest. */ -static int rmd256_final(struct shash_desc *desc, u8 *out) -{ - struct rmd256_ctx *rctx = shash_desc_ctx(desc); - u32 i, index, padlen; - __le64 bits; - __le32 *dst = (__le32 *)out; - static const u8 padding[64] = { 0x80, }; - - bits = cpu_to_le64(rctx->byte_count << 3); - - /* Pad out to 56 mod 64 */ - index = rctx->byte_count & 0x3f; - padlen = (index < 56) ? (56 - index) : ((64+56) - index); - rmd256_update(desc, padding, padlen); - - /* Append length */ - rmd256_update(desc, (const u8 *)&bits, sizeof(bits)); - - /* Store state in digest */ - for (i = 0; i < 8; i++) - dst[i] = cpu_to_le32p(&rctx->state[i]); - - /* Wipe context */ - memset(rctx, 0, sizeof(*rctx)); - - return 0; -} - -static struct shash_alg alg = { - .digestsize = RMD256_DIGEST_SIZE, - .init = rmd256_init, - .update = rmd256_update, - .final = rmd256_final, - .descsize = sizeof(struct rmd256_ctx), - .base = { - .cra_name = "rmd256", - .cra_driver_name = "rmd256-generic", - .cra_blocksize = RMD256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int __init rmd256_mod_init(void) -{ - return crypto_register_shash(&alg); -} - -static void __exit rmd256_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -subsys_initcall(rmd256_mod_init); -module_exit(rmd256_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Adrian-Ken Rueegsegger "); -MODULE_DESCRIPTION("RIPEMD-256 Message Digest"); -MODULE_ALIAS_CRYPTO("rmd256"); diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index bc9e2910f5c3..3fb842cb2c67 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -71,7 +71,7 @@ static const char *check[] = { "blowfish", "twofish", "serpent", "sha384", "sha512", "md4", "aes", "cast6", "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea", "khazad", "wp512", "wp384", "wp256", "tnepres", "xeta", "fcrypt", - "camellia", "seed", "salsa20", "rmd160", "rmd256", "rmd320", + "camellia", "seed", "salsa20", "rmd160", "rmd320", "lzo", "lzo-rle", "cts", "sha3-224", "sha3-256", "sha3-384", "sha3-512", "streebog256", "streebog512", NULL @@ -1871,10 +1871,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) ret += tcrypt_test("rmd160"); break; - case 41: - ret += tcrypt_test("rmd256"); - break; - case 42: ret += tcrypt_test("rmd320"); break; @@ -2405,10 +2401,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) test_hash_speed("rmd160", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; fallthrough; - case 316: - test_hash_speed("rmd256", sec, generic_hash_speed_template); - if (mode > 300 && mode < 400) break; - fallthrough; case 317: test_hash_speed("rmd320", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; @@ -2525,10 +2517,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) test_ahash_speed("rmd160", sec, generic_hash_speed_template); if (mode > 400 && mode < 500) break; fallthrough; - case 416: - test_ahash_speed("rmd256", sec, generic_hash_speed_template); - if (mode > 400 && mode < 500) break; - fallthrough; case 417: test_ahash_speed("rmd320", sec, generic_hash_speed_template); if (mode > 400 && mode < 500) break; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index f8a5cec614d6..c35de56fc25a 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -5275,12 +5275,6 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(rmd160_tv_template) } - }, { - .alg = "rmd256", - .test = alg_test_hash, - .suite = { - .hash = __VECS(rmd256_tv_template) - } }, { .alg = "rmd320", .test = alg_test_hash, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 05807872846c..86abd1f79aab 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -3200,70 +3200,6 @@ static const struct hash_testvec rmd160_tv_template[] = { } }; -/* - * RIPEMD-256 test vectors - */ -static const struct hash_testvec rmd256_tv_template[] = { - { - .digest = "\x02\xba\x4c\x4e\x5f\x8e\xcd\x18" - "\x77\xfc\x52\xd6\x4d\x30\xe3\x7a" - "\x2d\x97\x74\xfb\x1e\x5d\x02\x63" - "\x80\xae\x01\x68\xe3\xc5\x52\x2d", - }, { - .plaintext = "a", - .psize = 1, - .digest = "\xf9\x33\x3e\x45\xd8\x57\xf5\xd9" - "\x0a\x91\xba\xb7\x0a\x1e\xba\x0c" - "\xfb\x1b\xe4\xb0\x78\x3c\x9a\xcf" - "\xcd\x88\x3a\x91\x34\x69\x29\x25", - }, { - .plaintext = "abc", - .psize = 3, - .digest = "\xaf\xbd\x6e\x22\x8b\x9d\x8c\xbb" - "\xce\xf5\xca\x2d\x03\xe6\xdb\xa1" - "\x0a\xc0\xbc\x7d\xcb\xe4\x68\x0e" - "\x1e\x42\xd2\xe9\x75\x45\x9b\x65", - }, { - .plaintext = "message digest", - .psize = 14, - .digest = "\x87\xe9\x71\x75\x9a\x1c\xe4\x7a" - "\x51\x4d\x5c\x91\x4c\x39\x2c\x90" - "\x18\xc7\xc4\x6b\xc1\x44\x65\x55" - "\x4a\xfc\xdf\x54\xa5\x07\x0c\x0e", - }, { - .plaintext = "abcdefghijklmnopqrstuvwxyz", - .psize = 26, - .digest = "\x64\x9d\x30\x34\x75\x1e\xa2\x16" - "\x77\x6b\xf9\xa1\x8a\xcc\x81\xbc" - "\x78\x96\x11\x8a\x51\x97\x96\x87" - "\x82\xdd\x1f\xd9\x7d\x8d\x51\x33", - }, { - .plaintext = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcde" - "fghijklmnopqrstuvwxyz0123456789", - .psize = 62, - .digest = "\x57\x40\xa4\x08\xac\x16\xb7\x20" - "\xb8\x44\x24\xae\x93\x1c\xbb\x1f" - "\xe3\x63\xd1\xd0\xbf\x40\x17\xf1" - "\xa8\x9f\x7e\xa6\xde\x77\xa0\xb8", - }, { - .plaintext = "1234567890123456789012345678901234567890" - "1234567890123456789012345678901234567890", - .psize = 80, - .digest = "\x06\xfd\xcc\x7a\x40\x95\x48\xaa" - "\xf9\x13\x68\xc0\x6a\x62\x75\xb5" - "\x53\xe3\xf0\x99\xbf\x0e\xa4\xed" - "\xfd\x67\x78\xdf\x89\xa8\x90\xdd", - }, { - .plaintext = "abcdbcdecdefdefgefghfghighij" - "hijkijkljklmklmnlmnomnopnopq", - .psize = 56, - .digest = "\x38\x43\x04\x55\x83\xaa\xc6\xc8" - "\xc8\xd9\x12\x85\x73\xe7\xa9\x80" - "\x9a\xfb\x2a\x0f\x34\xcc\xc3\x6e" - "\xa9\xe7\x2f\x16\xf6\x36\x8e\x3f", - } -}; - /* * RIPEMD-320 test vectors */ From 93f64202926f606d67b1095b59137f903c6ab304 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 21 Jan 2021 14:07:31 +0100 Subject: [PATCH 110/154] crypto: rmd320 - remove RIPE-MD 320 hash algorithm RIPE-MD 320 is never referenced anywhere in the kernel, and unlikely to be depended upon by userspace via AF_ALG. So let's remove it Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/Kconfig | 12 -- crypto/ripemd.h | 8 - crypto/rmd320.c | 391 ----------------------------------------------- crypto/tcrypt.c | 14 +- crypto/testmgr.c | 6 - crypto/testmgr.h | 64 -------- 6 files changed, 1 insertion(+), 494 deletions(-) delete mode 100644 crypto/rmd320.c diff --git a/crypto/Kconfig b/crypto/Kconfig index 8e93dce161b0..a32e25cca2b4 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -834,18 +834,6 @@ config CRYPTO_RMD160 Developed by Hans Dobbertin, Antoon Bosselaers and Bart Preneel. See -config CRYPTO_RMD320 - tristate "RIPEMD-320 digest algorithm" - select CRYPTO_HASH - help - RIPEMD-320 is an optional extension of RIPEMD-160 with a - 320 bit hash. It is intended for applications that require - longer hash-results, without needing a larger security level - (than RIPEMD-160). - - Developed by Hans Dobbertin, Antoon Bosselaers and Bart Preneel. - See - config CRYPTO_SHA1 tristate "SHA1 digest algorithm" select CRYPTO_HASH diff --git a/crypto/ripemd.h b/crypto/ripemd.h index a19c3c27a466..b977785e2a62 100644 --- a/crypto/ripemd.h +++ b/crypto/ripemd.h @@ -9,20 +9,12 @@ #define RMD160_DIGEST_SIZE 20 #define RMD160_BLOCK_SIZE 64 -#define RMD320_DIGEST_SIZE 40 -#define RMD320_BLOCK_SIZE 64 - /* initial values */ #define RMD_H0 0x67452301UL #define RMD_H1 0xefcdab89UL #define RMD_H2 0x98badcfeUL #define RMD_H3 0x10325476UL #define RMD_H4 0xc3d2e1f0UL -#define RMD_H5 0x76543210UL -#define RMD_H6 0xfedcba98UL -#define RMD_H7 0x89abcdefUL -#define RMD_H8 0x01234567UL -#define RMD_H9 0x3c2d1e0fUL /* constants */ #define RMD_K1 0x00000000UL diff --git a/crypto/rmd320.c b/crypto/rmd320.c deleted file mode 100644 index c919ad6c4705..000000000000 --- a/crypto/rmd320.c +++ /dev/null @@ -1,391 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Cryptographic API. - * - * RIPEMD-320 - RACE Integrity Primitives Evaluation Message Digest. - * - * Based on the reference implementation by Antoon Bosselaers, ESAT-COSIC - * - * Copyright (c) 2008 Adrian-Ken Rueegsegger - */ -#include -#include -#include -#include -#include -#include - -#include "ripemd.h" - -struct rmd320_ctx { - u64 byte_count; - u32 state[10]; - __le32 buffer[16]; -}; - -#define K1 RMD_K1 -#define K2 RMD_K2 -#define K3 RMD_K3 -#define K4 RMD_K4 -#define K5 RMD_K5 -#define KK1 RMD_K6 -#define KK2 RMD_K7 -#define KK3 RMD_K8 -#define KK4 RMD_K9 -#define KK5 RMD_K1 - -#define F1(x, y, z) (x ^ y ^ z) /* XOR */ -#define F2(x, y, z) (z ^ (x & (y ^ z))) /* x ? y : z */ -#define F3(x, y, z) ((x | ~y) ^ z) -#define F4(x, y, z) (y ^ (z & (x ^ y))) /* z ? x : y */ -#define F5(x, y, z) (x ^ (y | ~z)) - -#define ROUND(a, b, c, d, e, f, k, x, s) { \ - (a) += f((b), (c), (d)) + le32_to_cpup(&(x)) + (k); \ - (a) = rol32((a), (s)) + (e); \ - (c) = rol32((c), 10); \ -} - -static void rmd320_transform(u32 *state, const __le32 *in) -{ - u32 aa, bb, cc, dd, ee, aaa, bbb, ccc, ddd, eee; - - /* Initialize left lane */ - aa = state[0]; - bb = state[1]; - cc = state[2]; - dd = state[3]; - ee = state[4]; - - /* Initialize right lane */ - aaa = state[5]; - bbb = state[6]; - ccc = state[7]; - ddd = state[8]; - eee = state[9]; - - /* round 1: left lane */ - ROUND(aa, bb, cc, dd, ee, F1, K1, in[0], 11); - ROUND(ee, aa, bb, cc, dd, F1, K1, in[1], 14); - ROUND(dd, ee, aa, bb, cc, F1, K1, in[2], 15); - ROUND(cc, dd, ee, aa, bb, F1, K1, in[3], 12); - ROUND(bb, cc, dd, ee, aa, F1, K1, in[4], 5); - ROUND(aa, bb, cc, dd, ee, F1, K1, in[5], 8); - ROUND(ee, aa, bb, cc, dd, F1, K1, in[6], 7); - ROUND(dd, ee, aa, bb, cc, F1, K1, in[7], 9); - ROUND(cc, dd, ee, aa, bb, F1, K1, in[8], 11); - ROUND(bb, cc, dd, ee, aa, F1, K1, in[9], 13); - ROUND(aa, bb, cc, dd, ee, F1, K1, in[10], 14); - ROUND(ee, aa, bb, cc, dd, F1, K1, in[11], 15); - ROUND(dd, ee, aa, bb, cc, F1, K1, in[12], 6); - ROUND(cc, dd, ee, aa, bb, F1, K1, in[13], 7); - ROUND(bb, cc, dd, ee, aa, F1, K1, in[14], 9); - ROUND(aa, bb, cc, dd, ee, F1, K1, in[15], 8); - - /* round 1: right lane */ - ROUND(aaa, bbb, ccc, ddd, eee, F5, KK1, in[5], 8); - ROUND(eee, aaa, bbb, ccc, ddd, F5, KK1, in[14], 9); - ROUND(ddd, eee, aaa, bbb, ccc, F5, KK1, in[7], 9); - ROUND(ccc, ddd, eee, aaa, bbb, F5, KK1, in[0], 11); - ROUND(bbb, ccc, ddd, eee, aaa, F5, KK1, in[9], 13); - ROUND(aaa, bbb, ccc, ddd, eee, F5, KK1, in[2], 15); - ROUND(eee, aaa, bbb, ccc, ddd, F5, KK1, in[11], 15); - ROUND(ddd, eee, aaa, bbb, ccc, F5, KK1, in[4], 5); - ROUND(ccc, ddd, eee, aaa, bbb, F5, KK1, in[13], 7); - ROUND(bbb, ccc, ddd, eee, aaa, F5, KK1, in[6], 7); - ROUND(aaa, bbb, ccc, ddd, eee, F5, KK1, in[15], 8); - ROUND(eee, aaa, bbb, ccc, ddd, F5, KK1, in[8], 11); - ROUND(ddd, eee, aaa, bbb, ccc, F5, KK1, in[1], 14); - ROUND(ccc, ddd, eee, aaa, bbb, F5, KK1, in[10], 14); - ROUND(bbb, ccc, ddd, eee, aaa, F5, KK1, in[3], 12); - ROUND(aaa, bbb, ccc, ddd, eee, F5, KK1, in[12], 6); - - /* Swap contents of "a" registers */ - swap(aa, aaa); - - /* round 2: left lane" */ - ROUND(ee, aa, bb, cc, dd, F2, K2, in[7], 7); - ROUND(dd, ee, aa, bb, cc, F2, K2, in[4], 6); - ROUND(cc, dd, ee, aa, bb, F2, K2, in[13], 8); - ROUND(bb, cc, dd, ee, aa, F2, K2, in[1], 13); - ROUND(aa, bb, cc, dd, ee, F2, K2, in[10], 11); - ROUND(ee, aa, bb, cc, dd, F2, K2, in[6], 9); - ROUND(dd, ee, aa, bb, cc, F2, K2, in[15], 7); - ROUND(cc, dd, ee, aa, bb, F2, K2, in[3], 15); - ROUND(bb, cc, dd, ee, aa, F2, K2, in[12], 7); - ROUND(aa, bb, cc, dd, ee, F2, K2, in[0], 12); - ROUND(ee, aa, bb, cc, dd, F2, K2, in[9], 15); - ROUND(dd, ee, aa, bb, cc, F2, K2, in[5], 9); - ROUND(cc, dd, ee, aa, bb, F2, K2, in[2], 11); - ROUND(bb, cc, dd, ee, aa, F2, K2, in[14], 7); - ROUND(aa, bb, cc, dd, ee, F2, K2, in[11], 13); - ROUND(ee, aa, bb, cc, dd, F2, K2, in[8], 12); - - /* round 2: right lane */ - ROUND(eee, aaa, bbb, ccc, ddd, F4, KK2, in[6], 9); - ROUND(ddd, eee, aaa, bbb, ccc, F4, KK2, in[11], 13); - ROUND(ccc, ddd, eee, aaa, bbb, F4, KK2, in[3], 15); - ROUND(bbb, ccc, ddd, eee, aaa, F4, KK2, in[7], 7); - ROUND(aaa, bbb, ccc, ddd, eee, F4, KK2, in[0], 12); - ROUND(eee, aaa, bbb, ccc, ddd, F4, KK2, in[13], 8); - ROUND(ddd, eee, aaa, bbb, ccc, F4, KK2, in[5], 9); - ROUND(ccc, ddd, eee, aaa, bbb, F4, KK2, in[10], 11); - ROUND(bbb, ccc, ddd, eee, aaa, F4, KK2, in[14], 7); - ROUND(aaa, bbb, ccc, ddd, eee, F4, KK2, in[15], 7); - ROUND(eee, aaa, bbb, ccc, ddd, F4, KK2, in[8], 12); - ROUND(ddd, eee, aaa, bbb, ccc, F4, KK2, in[12], 7); - ROUND(ccc, ddd, eee, aaa, bbb, F4, KK2, in[4], 6); - ROUND(bbb, ccc, ddd, eee, aaa, F4, KK2, in[9], 15); - ROUND(aaa, bbb, ccc, ddd, eee, F4, KK2, in[1], 13); - ROUND(eee, aaa, bbb, ccc, ddd, F4, KK2, in[2], 11); - - /* Swap contents of "b" registers */ - swap(bb, bbb); - - /* round 3: left lane" */ - ROUND(dd, ee, aa, bb, cc, F3, K3, in[3], 11); - ROUND(cc, dd, ee, aa, bb, F3, K3, in[10], 13); - ROUND(bb, cc, dd, ee, aa, F3, K3, in[14], 6); - ROUND(aa, bb, cc, dd, ee, F3, K3, in[4], 7); - ROUND(ee, aa, bb, cc, dd, F3, K3, in[9], 14); - ROUND(dd, ee, aa, bb, cc, F3, K3, in[15], 9); - ROUND(cc, dd, ee, aa, bb, F3, K3, in[8], 13); - ROUND(bb, cc, dd, ee, aa, F3, K3, in[1], 15); - ROUND(aa, bb, cc, dd, ee, F3, K3, in[2], 14); - ROUND(ee, aa, bb, cc, dd, F3, K3, in[7], 8); - ROUND(dd, ee, aa, bb, cc, F3, K3, in[0], 13); - ROUND(cc, dd, ee, aa, bb, F3, K3, in[6], 6); - ROUND(bb, cc, dd, ee, aa, F3, K3, in[13], 5); - ROUND(aa, bb, cc, dd, ee, F3, K3, in[11], 12); - ROUND(ee, aa, bb, cc, dd, F3, K3, in[5], 7); - ROUND(dd, ee, aa, bb, cc, F3, K3, in[12], 5); - - /* round 3: right lane */ - ROUND(ddd, eee, aaa, bbb, ccc, F3, KK3, in[15], 9); - ROUND(ccc, ddd, eee, aaa, bbb, F3, KK3, in[5], 7); - ROUND(bbb, ccc, ddd, eee, aaa, F3, KK3, in[1], 15); - ROUND(aaa, bbb, ccc, ddd, eee, F3, KK3, in[3], 11); - ROUND(eee, aaa, bbb, ccc, ddd, F3, KK3, in[7], 8); - ROUND(ddd, eee, aaa, bbb, ccc, F3, KK3, in[14], 6); - ROUND(ccc, ddd, eee, aaa, bbb, F3, KK3, in[6], 6); - ROUND(bbb, ccc, ddd, eee, aaa, F3, KK3, in[9], 14); - ROUND(aaa, bbb, ccc, ddd, eee, F3, KK3, in[11], 12); - ROUND(eee, aaa, bbb, ccc, ddd, F3, KK3, in[8], 13); - ROUND(ddd, eee, aaa, bbb, ccc, F3, KK3, in[12], 5); - ROUND(ccc, ddd, eee, aaa, bbb, F3, KK3, in[2], 14); - ROUND(bbb, ccc, ddd, eee, aaa, F3, KK3, in[10], 13); - ROUND(aaa, bbb, ccc, ddd, eee, F3, KK3, in[0], 13); - ROUND(eee, aaa, bbb, ccc, ddd, F3, KK3, in[4], 7); - ROUND(ddd, eee, aaa, bbb, ccc, F3, KK3, in[13], 5); - - /* Swap contents of "c" registers */ - swap(cc, ccc); - - /* round 4: left lane" */ - ROUND(cc, dd, ee, aa, bb, F4, K4, in[1], 11); - ROUND(bb, cc, dd, ee, aa, F4, K4, in[9], 12); - ROUND(aa, bb, cc, dd, ee, F4, K4, in[11], 14); - ROUND(ee, aa, bb, cc, dd, F4, K4, in[10], 15); - ROUND(dd, ee, aa, bb, cc, F4, K4, in[0], 14); - ROUND(cc, dd, ee, aa, bb, F4, K4, in[8], 15); - ROUND(bb, cc, dd, ee, aa, F4, K4, in[12], 9); - ROUND(aa, bb, cc, dd, ee, F4, K4, in[4], 8); - ROUND(ee, aa, bb, cc, dd, F4, K4, in[13], 9); - ROUND(dd, ee, aa, bb, cc, F4, K4, in[3], 14); - ROUND(cc, dd, ee, aa, bb, F4, K4, in[7], 5); - ROUND(bb, cc, dd, ee, aa, F4, K4, in[15], 6); - ROUND(aa, bb, cc, dd, ee, F4, K4, in[14], 8); - ROUND(ee, aa, bb, cc, dd, F4, K4, in[5], 6); - ROUND(dd, ee, aa, bb, cc, F4, K4, in[6], 5); - ROUND(cc, dd, ee, aa, bb, F4, K4, in[2], 12); - - /* round 4: right lane */ - ROUND(ccc, ddd, eee, aaa, bbb, F2, KK4, in[8], 15); - ROUND(bbb, ccc, ddd, eee, aaa, F2, KK4, in[6], 5); - ROUND(aaa, bbb, ccc, ddd, eee, F2, KK4, in[4], 8); - ROUND(eee, aaa, bbb, ccc, ddd, F2, KK4, in[1], 11); - ROUND(ddd, eee, aaa, bbb, ccc, F2, KK4, in[3], 14); - ROUND(ccc, ddd, eee, aaa, bbb, F2, KK4, in[11], 14); - ROUND(bbb, ccc, ddd, eee, aaa, F2, KK4, in[15], 6); - ROUND(aaa, bbb, ccc, ddd, eee, F2, KK4, in[0], 14); - ROUND(eee, aaa, bbb, ccc, ddd, F2, KK4, in[5], 6); - ROUND(ddd, eee, aaa, bbb, ccc, F2, KK4, in[12], 9); - ROUND(ccc, ddd, eee, aaa, bbb, F2, KK4, in[2], 12); - ROUND(bbb, ccc, ddd, eee, aaa, F2, KK4, in[13], 9); - ROUND(aaa, bbb, ccc, ddd, eee, F2, KK4, in[9], 12); - ROUND(eee, aaa, bbb, ccc, ddd, F2, KK4, in[7], 5); - ROUND(ddd, eee, aaa, bbb, ccc, F2, KK4, in[10], 15); - ROUND(ccc, ddd, eee, aaa, bbb, F2, KK4, in[14], 8); - - /* Swap contents of "d" registers */ - swap(dd, ddd); - - /* round 5: left lane" */ - ROUND(bb, cc, dd, ee, aa, F5, K5, in[4], 9); - ROUND(aa, bb, cc, dd, ee, F5, K5, in[0], 15); - ROUND(ee, aa, bb, cc, dd, F5, K5, in[5], 5); - ROUND(dd, ee, aa, bb, cc, F5, K5, in[9], 11); - ROUND(cc, dd, ee, aa, bb, F5, K5, in[7], 6); - ROUND(bb, cc, dd, ee, aa, F5, K5, in[12], 8); - ROUND(aa, bb, cc, dd, ee, F5, K5, in[2], 13); - ROUND(ee, aa, bb, cc, dd, F5, K5, in[10], 12); - ROUND(dd, ee, aa, bb, cc, F5, K5, in[14], 5); - ROUND(cc, dd, ee, aa, bb, F5, K5, in[1], 12); - ROUND(bb, cc, dd, ee, aa, F5, K5, in[3], 13); - ROUND(aa, bb, cc, dd, ee, F5, K5, in[8], 14); - ROUND(ee, aa, bb, cc, dd, F5, K5, in[11], 11); - ROUND(dd, ee, aa, bb, cc, F5, K5, in[6], 8); - ROUND(cc, dd, ee, aa, bb, F5, K5, in[15], 5); - ROUND(bb, cc, dd, ee, aa, F5, K5, in[13], 6); - - /* round 5: right lane */ - ROUND(bbb, ccc, ddd, eee, aaa, F1, KK5, in[12], 8); - ROUND(aaa, bbb, ccc, ddd, eee, F1, KK5, in[15], 5); - ROUND(eee, aaa, bbb, ccc, ddd, F1, KK5, in[10], 12); - ROUND(ddd, eee, aaa, bbb, ccc, F1, KK5, in[4], 9); - ROUND(ccc, ddd, eee, aaa, bbb, F1, KK5, in[1], 12); - ROUND(bbb, ccc, ddd, eee, aaa, F1, KK5, in[5], 5); - ROUND(aaa, bbb, ccc, ddd, eee, F1, KK5, in[8], 14); - ROUND(eee, aaa, bbb, ccc, ddd, F1, KK5, in[7], 6); - ROUND(ddd, eee, aaa, bbb, ccc, F1, KK5, in[6], 8); - ROUND(ccc, ddd, eee, aaa, bbb, F1, KK5, in[2], 13); - ROUND(bbb, ccc, ddd, eee, aaa, F1, KK5, in[13], 6); - ROUND(aaa, bbb, ccc, ddd, eee, F1, KK5, in[14], 5); - ROUND(eee, aaa, bbb, ccc, ddd, F1, KK5, in[0], 15); - ROUND(ddd, eee, aaa, bbb, ccc, F1, KK5, in[3], 13); - ROUND(ccc, ddd, eee, aaa, bbb, F1, KK5, in[9], 11); - ROUND(bbb, ccc, ddd, eee, aaa, F1, KK5, in[11], 11); - - /* Swap contents of "e" registers */ - swap(ee, eee); - - /* combine results */ - state[0] += aa; - state[1] += bb; - state[2] += cc; - state[3] += dd; - state[4] += ee; - state[5] += aaa; - state[6] += bbb; - state[7] += ccc; - state[8] += ddd; - state[9] += eee; -} - -static int rmd320_init(struct shash_desc *desc) -{ - struct rmd320_ctx *rctx = shash_desc_ctx(desc); - - rctx->byte_count = 0; - - rctx->state[0] = RMD_H0; - rctx->state[1] = RMD_H1; - rctx->state[2] = RMD_H2; - rctx->state[3] = RMD_H3; - rctx->state[4] = RMD_H4; - rctx->state[5] = RMD_H5; - rctx->state[6] = RMD_H6; - rctx->state[7] = RMD_H7; - rctx->state[8] = RMD_H8; - rctx->state[9] = RMD_H9; - - memset(rctx->buffer, 0, sizeof(rctx->buffer)); - - return 0; -} - -static int rmd320_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct rmd320_ctx *rctx = shash_desc_ctx(desc); - const u32 avail = sizeof(rctx->buffer) - (rctx->byte_count & 0x3f); - - rctx->byte_count += len; - - /* Enough space in buffer? If so copy and we're done */ - if (avail > len) { - memcpy((char *)rctx->buffer + (sizeof(rctx->buffer) - avail), - data, len); - goto out; - } - - memcpy((char *)rctx->buffer + (sizeof(rctx->buffer) - avail), - data, avail); - - rmd320_transform(rctx->state, rctx->buffer); - data += avail; - len -= avail; - - while (len >= sizeof(rctx->buffer)) { - memcpy(rctx->buffer, data, sizeof(rctx->buffer)); - rmd320_transform(rctx->state, rctx->buffer); - data += sizeof(rctx->buffer); - len -= sizeof(rctx->buffer); - } - - memcpy(rctx->buffer, data, len); - -out: - return 0; -} - -/* Add padding and return the message digest. */ -static int rmd320_final(struct shash_desc *desc, u8 *out) -{ - struct rmd320_ctx *rctx = shash_desc_ctx(desc); - u32 i, index, padlen; - __le64 bits; - __le32 *dst = (__le32 *)out; - static const u8 padding[64] = { 0x80, }; - - bits = cpu_to_le64(rctx->byte_count << 3); - - /* Pad out to 56 mod 64 */ - index = rctx->byte_count & 0x3f; - padlen = (index < 56) ? (56 - index) : ((64+56) - index); - rmd320_update(desc, padding, padlen); - - /* Append length */ - rmd320_update(desc, (const u8 *)&bits, sizeof(bits)); - - /* Store state in digest */ - for (i = 0; i < 10; i++) - dst[i] = cpu_to_le32p(&rctx->state[i]); - - /* Wipe context */ - memset(rctx, 0, sizeof(*rctx)); - - return 0; -} - -static struct shash_alg alg = { - .digestsize = RMD320_DIGEST_SIZE, - .init = rmd320_init, - .update = rmd320_update, - .final = rmd320_final, - .descsize = sizeof(struct rmd320_ctx), - .base = { - .cra_name = "rmd320", - .cra_driver_name = "rmd320-generic", - .cra_blocksize = RMD320_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int __init rmd320_mod_init(void) -{ - return crypto_register_shash(&alg); -} - -static void __exit rmd320_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -subsys_initcall(rmd320_mod_init); -module_exit(rmd320_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Adrian-Ken Rueegsegger "); -MODULE_DESCRIPTION("RIPEMD-320 Message Digest"); -MODULE_ALIAS_CRYPTO("rmd320"); diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 3fb842cb2c67..a231df72ca7d 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -71,7 +71,7 @@ static const char *check[] = { "blowfish", "twofish", "serpent", "sha384", "sha512", "md4", "aes", "cast6", "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea", "khazad", "wp512", "wp384", "wp256", "tnepres", "xeta", "fcrypt", - "camellia", "seed", "salsa20", "rmd160", "rmd320", + "camellia", "seed", "salsa20", "rmd160", "lzo", "lzo-rle", "cts", "sha3-224", "sha3-256", "sha3-384", "sha3-512", "streebog256", "streebog512", NULL @@ -1871,10 +1871,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) ret += tcrypt_test("rmd160"); break; - case 42: - ret += tcrypt_test("rmd320"); - break; - case 43: ret += tcrypt_test("ecb(seed)"); break; @@ -2401,10 +2397,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) test_hash_speed("rmd160", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; fallthrough; - case 317: - test_hash_speed("rmd320", sec, generic_hash_speed_template); - if (mode > 300 && mode < 400) break; - fallthrough; case 318: klen = 16; test_hash_speed("ghash", sec, generic_hash_speed_template); @@ -2517,10 +2509,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) test_ahash_speed("rmd160", sec, generic_hash_speed_template); if (mode > 400 && mode < 500) break; fallthrough; - case 417: - test_ahash_speed("rmd320", sec, generic_hash_speed_template); - if (mode > 400 && mode < 500) break; - fallthrough; case 418: test_ahash_speed("sha3-224", sec, generic_hash_speed_template); if (mode > 400 && mode < 500) break; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index c35de56fc25a..d12cec6ab003 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -5275,12 +5275,6 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(rmd160_tv_template) } - }, { - .alg = "rmd320", - .test = alg_test_hash, - .suite = { - .hash = __VECS(rmd320_tv_template) - } }, { .alg = "rsa", .test = alg_test_akcipher, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 86abd1f79aab..5625164cda54 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -3200,70 +3200,6 @@ static const struct hash_testvec rmd160_tv_template[] = { } }; -/* - * RIPEMD-320 test vectors - */ -static const struct hash_testvec rmd320_tv_template[] = { - { - .digest = "\x22\xd6\x5d\x56\x61\x53\x6c\xdc\x75\xc1" - "\xfd\xf5\xc6\xde\x7b\x41\xb9\xf2\x73\x25" - "\xeb\xc6\x1e\x85\x57\x17\x7d\x70\x5a\x0e" - "\xc8\x80\x15\x1c\x3a\x32\xa0\x08\x99\xb8", - }, { - .plaintext = "a", - .psize = 1, - .digest = "\xce\x78\x85\x06\x38\xf9\x26\x58\xa5\xa5" - "\x85\x09\x75\x79\x92\x6d\xda\x66\x7a\x57" - "\x16\x56\x2c\xfc\xf6\xfb\xe7\x7f\x63\x54" - "\x2f\x99\xb0\x47\x05\xd6\x97\x0d\xff\x5d", - }, { - .plaintext = "abc", - .psize = 3, - .digest = "\xde\x4c\x01\xb3\x05\x4f\x89\x30\xa7\x9d" - "\x09\xae\x73\x8e\x92\x30\x1e\x5a\x17\x08" - "\x5b\xef\xfd\xc1\xb8\xd1\x16\x71\x3e\x74" - "\xf8\x2f\xa9\x42\xd6\x4c\xdb\xc4\x68\x2d", - }, { - .plaintext = "message digest", - .psize = 14, - .digest = "\x3a\x8e\x28\x50\x2e\xd4\x5d\x42\x2f\x68" - "\x84\x4f\x9d\xd3\x16\xe7\xb9\x85\x33\xfa" - "\x3f\x2a\x91\xd2\x9f\x84\xd4\x25\xc8\x8d" - "\x6b\x4e\xff\x72\x7d\xf6\x6a\x7c\x01\x97", - }, { - .plaintext = "abcdefghijklmnopqrstuvwxyz", - .psize = 26, - .digest = "\xca\xbd\xb1\x81\x0b\x92\x47\x0a\x20\x93" - "\xaa\x6b\xce\x05\x95\x2c\x28\x34\x8c\xf4" - "\x3f\xf6\x08\x41\x97\x51\x66\xbb\x40\xed" - "\x23\x40\x04\xb8\x82\x44\x63\xe6\xb0\x09", - }, { - .plaintext = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcde" - "fghijklmnopqrstuvwxyz0123456789", - .psize = 62, - .digest = "\xed\x54\x49\x40\xc8\x6d\x67\xf2\x50\xd2" - "\x32\xc3\x0b\x7b\x3e\x57\x70\xe0\xc6\x0c" - "\x8c\xb9\xa4\xca\xfe\x3b\x11\x38\x8a\xf9" - "\x92\x0e\x1b\x99\x23\x0b\x84\x3c\x86\xa4", - }, { - .plaintext = "1234567890123456789012345678901234567890" - "1234567890123456789012345678901234567890", - .psize = 80, - .digest = "\x55\x78\x88\xaf\x5f\x6d\x8e\xd6\x2a\xb6" - "\x69\x45\xc6\xd2\xa0\xa4\x7e\xcd\x53\x41" - "\xe9\x15\xeb\x8f\xea\x1d\x05\x24\x95\x5f" - "\x82\x5d\xc7\x17\xe4\xa0\x08\xab\x2d\x42", - }, { - .plaintext = "abcdbcdecdefdefgefghfghighij" - "hijkijkljklmklmnlmnomnopnopq", - .psize = 56, - .digest = "\xd0\x34\xa7\x95\x0c\xf7\x22\x02\x1b\xa4" - "\xb8\x4d\xf7\x69\xa5\xde\x20\x60\xe2\x59" - "\xdf\x4c\x9b\xb4\xa4\x26\x8c\x0e\x93\x5b" - "\xbc\x74\x70\xa9\x69\xc9\xd0\x72\xa1\xac", - } -}; - static const struct hash_testvec crct10dif_tv_template[] = { { .plaintext = "abc", From 87cd723f8978c59bc4e28593da45d09ebf5d92a2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 21 Jan 2021 14:07:32 +0100 Subject: [PATCH 111/154] crypto: tgr192 - remove Tiger 128/160/192 hash algorithms Tiger is never referenced anywhere in the kernel, and unlikely to be depended upon by userspace via AF_ALG. So let's remove it. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/Kconfig | 13 - crypto/Makefile | 1 - crypto/tcrypt.c | 36 --- crypto/testmgr.c | 18 -- crypto/testmgr.h | 126 --------- crypto/tgr192.c | 682 ----------------------------------------------- 6 files changed, 876 deletions(-) delete mode 100644 crypto/tgr192.c diff --git a/crypto/Kconfig b/crypto/Kconfig index a32e25cca2b4..8d25d689a705 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1009,19 +1009,6 @@ config CRYPTO_STREEBOG https://tc26.ru/upload/iblock/fed/feddbb4d26b685903faa2ba11aea43f6.pdf https://tools.ietf.org/html/rfc6986 -config CRYPTO_TGR192 - tristate "Tiger digest algorithms" - select CRYPTO_HASH - help - Tiger hash algorithm 192, 160 and 128-bit hashes - - Tiger is a hash function optimized for 64-bit processors while - still having decent performance on 32-bit processors. - Tiger was developed by Ross Anderson and Eli Biham. - - See also: - . - config CRYPTO_WP512 tristate "Whirlpool digest algorithms" select CRYPTO_HASH diff --git a/crypto/Makefile b/crypto/Makefile index 946e821f1874..6b9622f21f7f 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -77,7 +77,6 @@ obj-$(CONFIG_CRYPTO_SM3) += sm3_generic.o obj-$(CONFIG_CRYPTO_STREEBOG) += streebog_generic.o obj-$(CONFIG_CRYPTO_WP512) += wp512.o CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149 -obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o obj-$(CONFIG_CRYPTO_BLAKE2B) += blake2b_generic.o obj-$(CONFIG_CRYPTO_BLAKE2S) += blake2s_generic.o obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index a231df72ca7d..696c44ef465e 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1815,18 +1815,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) ret += tcrypt_test("cbc(anubis)"); break; - case 27: - ret += tcrypt_test("tgr192"); - break; - - case 28: - ret += tcrypt_test("tgr160"); - break; - - case 29: - ret += tcrypt_test("tgr128"); - break; - case 30: ret += tcrypt_test("ecb(xeta)"); break; @@ -2377,18 +2365,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) test_hash_speed("wp512", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; fallthrough; - case 310: - test_hash_speed("tgr128", sec, generic_hash_speed_template); - if (mode > 300 && mode < 400) break; - fallthrough; - case 311: - test_hash_speed("tgr160", sec, generic_hash_speed_template); - if (mode > 300 && mode < 400) break; - fallthrough; - case 312: - test_hash_speed("tgr192", sec, generic_hash_speed_template); - if (mode > 300 && mode < 400) break; - fallthrough; case 313: test_hash_speed("sha224", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; @@ -2489,18 +2465,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) test_ahash_speed("wp512", sec, generic_hash_speed_template); if (mode > 400 && mode < 500) break; fallthrough; - case 410: - test_ahash_speed("tgr128", sec, generic_hash_speed_template); - if (mode > 400 && mode < 500) break; - fallthrough; - case 411: - test_ahash_speed("tgr160", sec, generic_hash_speed_template); - if (mode > 400 && mode < 500) break; - fallthrough; - case 412: - test_ahash_speed("tgr192", sec, generic_hash_speed_template); - if (mode > 400 && mode < 500) break; - fallthrough; case 413: test_ahash_speed("sha224", sec, generic_hash_speed_template); if (mode > 400 && mode < 500) break; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index d12cec6ab003..b87802ffb554 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -5375,24 +5375,6 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(streebog512_tv_template) } - }, { - .alg = "tgr128", - .test = alg_test_hash, - .suite = { - .hash = __VECS(tgr128_tv_template) - } - }, { - .alg = "tgr160", - .test = alg_test_hash, - .suite = { - .hash = __VECS(tgr160_tv_template) - } - }, { - .alg = "tgr192", - .test = alg_test_hash, - .suite = { - .hash = __VECS(tgr192_tv_template) - } }, { .alg = "vmac64(aes)", .test = alg_test_hash, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 5625164cda54..851c107a5584 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -4950,132 +4950,6 @@ static const struct hash_testvec wp256_tv_template[] = { }, }; -/* - * TIGER test vectors from Tiger website - */ -static const struct hash_testvec tgr192_tv_template[] = { - { - .plaintext = "", - .psize = 0, - .digest = "\x24\xf0\x13\x0c\x63\xac\x93\x32" - "\x16\x16\x6e\x76\xb1\xbb\x92\x5f" - "\xf3\x73\xde\x2d\x49\x58\x4e\x7a", - }, { - .plaintext = "abc", - .psize = 3, - .digest = "\xf2\x58\xc1\xe8\x84\x14\xab\x2a" - "\x52\x7a\xb5\x41\xff\xc5\xb8\xbf" - "\x93\x5f\x7b\x95\x1c\x13\x29\x51", - }, { - .plaintext = "Tiger", - .psize = 5, - .digest = "\x9f\x00\xf5\x99\x07\x23\x00\xdd" - "\x27\x6a\xbb\x38\xc8\xeb\x6d\xec" - "\x37\x79\x0c\x11\x6f\x9d\x2b\xdf", - }, { - .plaintext = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-", - .psize = 64, - .digest = "\x87\xfb\x2a\x90\x83\x85\x1c\xf7" - "\x47\x0d\x2c\xf8\x10\xe6\xdf\x9e" - "\xb5\x86\x44\x50\x34\xa5\xa3\x86", - }, { - .plaintext = "ABCDEFGHIJKLMNOPQRSTUVWXYZ=abcdefghijklmnopqrstuvwxyz+0123456789", - .psize = 64, - .digest = "\x46\x7d\xb8\x08\x63\xeb\xce\x48" - "\x8d\xf1\xcd\x12\x61\x65\x5d\xe9" - "\x57\x89\x65\x65\x97\x5f\x91\x97", - }, { - .plaintext = "Tiger - A Fast New Hash Function, " - "by Ross Anderson and Eli Biham, " - "proceedings of Fast Software Encryption 3, " - "Cambridge, 1996.", - .psize = 125, - .digest = "\x3d\x9a\xeb\x03\xd1\xbd\x1a\x63" - "\x57\xb2\x77\x4d\xfd\x6d\x5b\x24" - "\xdd\x68\x15\x1d\x50\x39\x74\xfc", - }, -}; - -static const struct hash_testvec tgr160_tv_template[] = { - { - .plaintext = "", - .psize = 0, - .digest = "\x24\xf0\x13\x0c\x63\xac\x93\x32" - "\x16\x16\x6e\x76\xb1\xbb\x92\x5f" - "\xf3\x73\xde\x2d", - }, { - .plaintext = "abc", - .psize = 3, - .digest = "\xf2\x58\xc1\xe8\x84\x14\xab\x2a" - "\x52\x7a\xb5\x41\xff\xc5\xb8\xbf" - "\x93\x5f\x7b\x95", - }, { - .plaintext = "Tiger", - .psize = 5, - .digest = "\x9f\x00\xf5\x99\x07\x23\x00\xdd" - "\x27\x6a\xbb\x38\xc8\xeb\x6d\xec" - "\x37\x79\x0c\x11", - }, { - .plaintext = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-", - .psize = 64, - .digest = "\x87\xfb\x2a\x90\x83\x85\x1c\xf7" - "\x47\x0d\x2c\xf8\x10\xe6\xdf\x9e" - "\xb5\x86\x44\x50", - }, { - .plaintext = "ABCDEFGHIJKLMNOPQRSTUVWXYZ=abcdefghijklmnopqrstuvwxyz+0123456789", - .psize = 64, - .digest = "\x46\x7d\xb8\x08\x63\xeb\xce\x48" - "\x8d\xf1\xcd\x12\x61\x65\x5d\xe9" - "\x57\x89\x65\x65", - }, { - .plaintext = "Tiger - A Fast New Hash Function, " - "by Ross Anderson and Eli Biham, " - "proceedings of Fast Software Encryption 3, " - "Cambridge, 1996.", - .psize = 125, - .digest = "\x3d\x9a\xeb\x03\xd1\xbd\x1a\x63" - "\x57\xb2\x77\x4d\xfd\x6d\x5b\x24" - "\xdd\x68\x15\x1d", - }, -}; - -static const struct hash_testvec tgr128_tv_template[] = { - { - .plaintext = "", - .psize = 0, - .digest = "\x24\xf0\x13\x0c\x63\xac\x93\x32" - "\x16\x16\x6e\x76\xb1\xbb\x92\x5f", - }, { - .plaintext = "abc", - .psize = 3, - .digest = "\xf2\x58\xc1\xe8\x84\x14\xab\x2a" - "\x52\x7a\xb5\x41\xff\xc5\xb8\xbf", - }, { - .plaintext = "Tiger", - .psize = 5, - .digest = "\x9f\x00\xf5\x99\x07\x23\x00\xdd" - "\x27\x6a\xbb\x38\xc8\xeb\x6d\xec", - }, { - .plaintext = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-", - .psize = 64, - .digest = "\x87\xfb\x2a\x90\x83\x85\x1c\xf7" - "\x47\x0d\x2c\xf8\x10\xe6\xdf\x9e", - }, { - .plaintext = "ABCDEFGHIJKLMNOPQRSTUVWXYZ=abcdefghijklmnopqrstuvwxyz+0123456789", - .psize = 64, - .digest = "\x46\x7d\xb8\x08\x63\xeb\xce\x48" - "\x8d\xf1\xcd\x12\x61\x65\x5d\xe9", - }, { - .plaintext = "Tiger - A Fast New Hash Function, " - "by Ross Anderson and Eli Biham, " - "proceedings of Fast Software Encryption 3, " - "Cambridge, 1996.", - .psize = 125, - .digest = "\x3d\x9a\xeb\x03\xd1\xbd\x1a\x63" - "\x57\xb2\x77\x4d\xfd\x6d\x5b\x24", - }, -}; - static const struct hash_testvec ghash_tv_template[] = { { diff --git a/crypto/tgr192.c b/crypto/tgr192.c deleted file mode 100644 index aa29c529b44e..000000000000 --- a/crypto/tgr192.c +++ /dev/null @@ -1,682 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Cryptographic API. - * - * Tiger hashing Algorithm - * - * Copyright (C) 1998 Free Software Foundation, Inc. - * - * The Tiger algorithm was developed by Ross Anderson and Eli Biham. - * It was optimized for 64-bit processors while still delievering - * decent performance on 32 and 16-bit processors. - * - * This version is derived from the GnuPG implementation and the - * Tiger-Perl interface written by Rafael Sevilla - * - * Adapted for Linux Kernel Crypto by Aaron Grothe - * ajgrothe@yahoo.com, February 22, 2005 - */ -#include -#include -#include -#include -#include -#include -#include - -#define TGR192_DIGEST_SIZE 24 -#define TGR160_DIGEST_SIZE 20 -#define TGR128_DIGEST_SIZE 16 - -#define TGR192_BLOCK_SIZE 64 - -struct tgr192_ctx { - u64 a, b, c; - u8 hash[64]; - int count; - u32 nblocks; -}; - -static const u64 sbox1[256] = { - 0x02aab17cf7e90c5eULL, 0xac424b03e243a8ecULL, 0x72cd5be30dd5fcd3ULL, - 0x6d019b93f6f97f3aULL, 0xcd9978ffd21f9193ULL, 0x7573a1c9708029e2ULL, - 0xb164326b922a83c3ULL, 0x46883eee04915870ULL, 0xeaace3057103ece6ULL, - 0xc54169b808a3535cULL, 0x4ce754918ddec47cULL, 0x0aa2f4dfdc0df40cULL, - 0x10b76f18a74dbefaULL, 0xc6ccb6235ad1ab6aULL, 0x13726121572fe2ffULL, - 0x1a488c6f199d921eULL, 0x4bc9f9f4da0007caULL, 0x26f5e6f6e85241c7ULL, - 0x859079dbea5947b6ULL, 0x4f1885c5c99e8c92ULL, 0xd78e761ea96f864bULL, - 0x8e36428c52b5c17dULL, 0x69cf6827373063c1ULL, 0xb607c93d9bb4c56eULL, - 0x7d820e760e76b5eaULL, 0x645c9cc6f07fdc42ULL, 0xbf38a078243342e0ULL, - 0x5f6b343c9d2e7d04ULL, 0xf2c28aeb600b0ec6ULL, 0x6c0ed85f7254bcacULL, - 0x71592281a4db4fe5ULL, 0x1967fa69ce0fed9fULL, 0xfd5293f8b96545dbULL, - 0xc879e9d7f2a7600bULL, 0x860248920193194eULL, 0xa4f9533b2d9cc0b3ULL, - 0x9053836c15957613ULL, 0xdb6dcf8afc357bf1ULL, 0x18beea7a7a370f57ULL, - 0x037117ca50b99066ULL, 0x6ab30a9774424a35ULL, 0xf4e92f02e325249bULL, - 0x7739db07061ccae1ULL, 0xd8f3b49ceca42a05ULL, 0xbd56be3f51382f73ULL, - 0x45faed5843b0bb28ULL, 0x1c813d5c11bf1f83ULL, 0x8af0e4b6d75fa169ULL, - 0x33ee18a487ad9999ULL, 0x3c26e8eab1c94410ULL, 0xb510102bc0a822f9ULL, - 0x141eef310ce6123bULL, 0xfc65b90059ddb154ULL, 0xe0158640c5e0e607ULL, - 0x884e079826c3a3cfULL, 0x930d0d9523c535fdULL, 0x35638d754e9a2b00ULL, - 0x4085fccf40469dd5ULL, 0xc4b17ad28be23a4cULL, 0xcab2f0fc6a3e6a2eULL, - 0x2860971a6b943fcdULL, 0x3dde6ee212e30446ULL, 0x6222f32ae01765aeULL, - 0x5d550bb5478308feULL, 0xa9efa98da0eda22aULL, 0xc351a71686c40da7ULL, - 0x1105586d9c867c84ULL, 0xdcffee85fda22853ULL, 0xccfbd0262c5eef76ULL, - 0xbaf294cb8990d201ULL, 0xe69464f52afad975ULL, 0x94b013afdf133e14ULL, - 0x06a7d1a32823c958ULL, 0x6f95fe5130f61119ULL, 0xd92ab34e462c06c0ULL, - 0xed7bde33887c71d2ULL, 0x79746d6e6518393eULL, 0x5ba419385d713329ULL, - 0x7c1ba6b948a97564ULL, 0x31987c197bfdac67ULL, 0xde6c23c44b053d02ULL, - 0x581c49fed002d64dULL, 0xdd474d6338261571ULL, 0xaa4546c3e473d062ULL, - 0x928fce349455f860ULL, 0x48161bbacaab94d9ULL, 0x63912430770e6f68ULL, - 0x6ec8a5e602c6641cULL, 0x87282515337ddd2bULL, 0x2cda6b42034b701bULL, - 0xb03d37c181cb096dULL, 0xe108438266c71c6fULL, 0x2b3180c7eb51b255ULL, - 0xdf92b82f96c08bbcULL, 0x5c68c8c0a632f3baULL, 0x5504cc861c3d0556ULL, - 0xabbfa4e55fb26b8fULL, 0x41848b0ab3baceb4ULL, 0xb334a273aa445d32ULL, - 0xbca696f0a85ad881ULL, 0x24f6ec65b528d56cULL, 0x0ce1512e90f4524aULL, - 0x4e9dd79d5506d35aULL, 0x258905fac6ce9779ULL, 0x2019295b3e109b33ULL, - 0xf8a9478b73a054ccULL, 0x2924f2f934417eb0ULL, 0x3993357d536d1bc4ULL, - 0x38a81ac21db6ff8bULL, 0x47c4fbf17d6016bfULL, 0x1e0faadd7667e3f5ULL, - 0x7abcff62938beb96ULL, 0xa78dad948fc179c9ULL, 0x8f1f98b72911e50dULL, - 0x61e48eae27121a91ULL, 0x4d62f7ad31859808ULL, 0xeceba345ef5ceaebULL, - 0xf5ceb25ebc9684ceULL, 0xf633e20cb7f76221ULL, 0xa32cdf06ab8293e4ULL, - 0x985a202ca5ee2ca4ULL, 0xcf0b8447cc8a8fb1ULL, 0x9f765244979859a3ULL, - 0xa8d516b1a1240017ULL, 0x0bd7ba3ebb5dc726ULL, 0xe54bca55b86adb39ULL, - 0x1d7a3afd6c478063ULL, 0x519ec608e7669eddULL, 0x0e5715a2d149aa23ULL, - 0x177d4571848ff194ULL, 0xeeb55f3241014c22ULL, 0x0f5e5ca13a6e2ec2ULL, - 0x8029927b75f5c361ULL, 0xad139fabc3d6e436ULL, 0x0d5df1a94ccf402fULL, - 0x3e8bd948bea5dfc8ULL, 0xa5a0d357bd3ff77eULL, 0xa2d12e251f74f645ULL, - 0x66fd9e525e81a082ULL, 0x2e0c90ce7f687a49ULL, 0xc2e8bcbeba973bc5ULL, - 0x000001bce509745fULL, 0x423777bbe6dab3d6ULL, 0xd1661c7eaef06eb5ULL, - 0xa1781f354daacfd8ULL, 0x2d11284a2b16affcULL, 0xf1fc4f67fa891d1fULL, - 0x73ecc25dcb920adaULL, 0xae610c22c2a12651ULL, 0x96e0a810d356b78aULL, - 0x5a9a381f2fe7870fULL, 0xd5ad62ede94e5530ULL, 0xd225e5e8368d1427ULL, - 0x65977b70c7af4631ULL, 0x99f889b2de39d74fULL, 0x233f30bf54e1d143ULL, - 0x9a9675d3d9a63c97ULL, 0x5470554ff334f9a8ULL, 0x166acb744a4f5688ULL, - 0x70c74caab2e4aeadULL, 0xf0d091646f294d12ULL, 0x57b82a89684031d1ULL, - 0xefd95a5a61be0b6bULL, 0x2fbd12e969f2f29aULL, 0x9bd37013feff9fe8ULL, - 0x3f9b0404d6085a06ULL, 0x4940c1f3166cfe15ULL, 0x09542c4dcdf3defbULL, - 0xb4c5218385cd5ce3ULL, 0xc935b7dc4462a641ULL, 0x3417f8a68ed3b63fULL, - 0xb80959295b215b40ULL, 0xf99cdaef3b8c8572ULL, 0x018c0614f8fcb95dULL, - 0x1b14accd1a3acdf3ULL, 0x84d471f200bb732dULL, 0xc1a3110e95e8da16ULL, - 0x430a7220bf1a82b8ULL, 0xb77e090d39df210eULL, 0x5ef4bd9f3cd05e9dULL, - 0x9d4ff6da7e57a444ULL, 0xda1d60e183d4a5f8ULL, 0xb287c38417998e47ULL, - 0xfe3edc121bb31886ULL, 0xc7fe3ccc980ccbefULL, 0xe46fb590189bfd03ULL, - 0x3732fd469a4c57dcULL, 0x7ef700a07cf1ad65ULL, 0x59c64468a31d8859ULL, - 0x762fb0b4d45b61f6ULL, 0x155baed099047718ULL, 0x68755e4c3d50baa6ULL, - 0xe9214e7f22d8b4dfULL, 0x2addbf532eac95f4ULL, 0x32ae3909b4bd0109ULL, - 0x834df537b08e3450ULL, 0xfa209da84220728dULL, 0x9e691d9b9efe23f7ULL, - 0x0446d288c4ae8d7fULL, 0x7b4cc524e169785bULL, 0x21d87f0135ca1385ULL, - 0xcebb400f137b8aa5ULL, 0x272e2b66580796beULL, 0x3612264125c2b0deULL, - 0x057702bdad1efbb2ULL, 0xd4babb8eacf84be9ULL, 0x91583139641bc67bULL, - 0x8bdc2de08036e024ULL, 0x603c8156f49f68edULL, 0xf7d236f7dbef5111ULL, - 0x9727c4598ad21e80ULL, 0xa08a0896670a5fd7ULL, 0xcb4a8f4309eba9cbULL, - 0x81af564b0f7036a1ULL, 0xc0b99aa778199abdULL, 0x959f1ec83fc8e952ULL, - 0x8c505077794a81b9ULL, 0x3acaaf8f056338f0ULL, 0x07b43f50627a6778ULL, - 0x4a44ab49f5eccc77ULL, 0x3bc3d6e4b679ee98ULL, 0x9cc0d4d1cf14108cULL, - 0x4406c00b206bc8a0ULL, 0x82a18854c8d72d89ULL, 0x67e366b35c3c432cULL, - 0xb923dd61102b37f2ULL, 0x56ab2779d884271dULL, 0xbe83e1b0ff1525afULL, - 0xfb7c65d4217e49a9ULL, 0x6bdbe0e76d48e7d4ULL, 0x08df828745d9179eULL, - 0x22ea6a9add53bd34ULL, 0xe36e141c5622200aULL, 0x7f805d1b8cb750eeULL, - 0xafe5c7a59f58e837ULL, 0xe27f996a4fb1c23cULL, 0xd3867dfb0775f0d0ULL, - 0xd0e673de6e88891aULL, 0x123aeb9eafb86c25ULL, 0x30f1d5d5c145b895ULL, - 0xbb434a2dee7269e7ULL, 0x78cb67ecf931fa38ULL, 0xf33b0372323bbf9cULL, - 0x52d66336fb279c74ULL, 0x505f33ac0afb4eaaULL, 0xe8a5cd99a2cce187ULL, - 0x534974801e2d30bbULL, 0x8d2d5711d5876d90ULL, 0x1f1a412891bc038eULL, - 0xd6e2e71d82e56648ULL, 0x74036c3a497732b7ULL, 0x89b67ed96361f5abULL, - 0xffed95d8f1ea02a2ULL, 0xe72b3bd61464d43dULL, 0xa6300f170bdc4820ULL, - 0xebc18760ed78a77aULL -}; - -static const u64 sbox2[256] = { - 0xe6a6be5a05a12138ULL, 0xb5a122a5b4f87c98ULL, 0x563c6089140b6990ULL, - 0x4c46cb2e391f5dd5ULL, 0xd932addbc9b79434ULL, 0x08ea70e42015aff5ULL, - 0xd765a6673e478cf1ULL, 0xc4fb757eab278d99ULL, 0xdf11c6862d6e0692ULL, - 0xddeb84f10d7f3b16ULL, 0x6f2ef604a665ea04ULL, 0x4a8e0f0ff0e0dfb3ULL, - 0xa5edeef83dbcba51ULL, 0xfc4f0a2a0ea4371eULL, 0xe83e1da85cb38429ULL, - 0xdc8ff882ba1b1ce2ULL, 0xcd45505e8353e80dULL, 0x18d19a00d4db0717ULL, - 0x34a0cfeda5f38101ULL, 0x0be77e518887caf2ULL, 0x1e341438b3c45136ULL, - 0xe05797f49089ccf9ULL, 0xffd23f9df2591d14ULL, 0x543dda228595c5cdULL, - 0x661f81fd99052a33ULL, 0x8736e641db0f7b76ULL, 0x15227725418e5307ULL, - 0xe25f7f46162eb2faULL, 0x48a8b2126c13d9feULL, 0xafdc541792e76eeaULL, - 0x03d912bfc6d1898fULL, 0x31b1aafa1b83f51bULL, 0xf1ac2796e42ab7d9ULL, - 0x40a3a7d7fcd2ebacULL, 0x1056136d0afbbcc5ULL, 0x7889e1dd9a6d0c85ULL, - 0xd33525782a7974aaULL, 0xa7e25d09078ac09bULL, 0xbd4138b3eac6edd0ULL, - 0x920abfbe71eb9e70ULL, 0xa2a5d0f54fc2625cULL, 0xc054e36b0b1290a3ULL, - 0xf6dd59ff62fe932bULL, 0x3537354511a8ac7dULL, 0xca845e9172fadcd4ULL, - 0x84f82b60329d20dcULL, 0x79c62ce1cd672f18ULL, 0x8b09a2add124642cULL, - 0xd0c1e96a19d9e726ULL, 0x5a786a9b4ba9500cULL, 0x0e020336634c43f3ULL, - 0xc17b474aeb66d822ULL, 0x6a731ae3ec9baac2ULL, 0x8226667ae0840258ULL, - 0x67d4567691caeca5ULL, 0x1d94155c4875adb5ULL, 0x6d00fd985b813fdfULL, - 0x51286efcb774cd06ULL, 0x5e8834471fa744afULL, 0xf72ca0aee761ae2eULL, - 0xbe40e4cdaee8e09aULL, 0xe9970bbb5118f665ULL, 0x726e4beb33df1964ULL, - 0x703b000729199762ULL, 0x4631d816f5ef30a7ULL, 0xb880b5b51504a6beULL, - 0x641793c37ed84b6cULL, 0x7b21ed77f6e97d96ULL, 0x776306312ef96b73ULL, - 0xae528948e86ff3f4ULL, 0x53dbd7f286a3f8f8ULL, 0x16cadce74cfc1063ULL, - 0x005c19bdfa52c6ddULL, 0x68868f5d64d46ad3ULL, 0x3a9d512ccf1e186aULL, - 0x367e62c2385660aeULL, 0xe359e7ea77dcb1d7ULL, 0x526c0773749abe6eULL, - 0x735ae5f9d09f734bULL, 0x493fc7cc8a558ba8ULL, 0xb0b9c1533041ab45ULL, - 0x321958ba470a59bdULL, 0x852db00b5f46c393ULL, 0x91209b2bd336b0e5ULL, - 0x6e604f7d659ef19fULL, 0xb99a8ae2782ccb24ULL, 0xccf52ab6c814c4c7ULL, - 0x4727d9afbe11727bULL, 0x7e950d0c0121b34dULL, 0x756f435670ad471fULL, - 0xf5add442615a6849ULL, 0x4e87e09980b9957aULL, 0x2acfa1df50aee355ULL, - 0xd898263afd2fd556ULL, 0xc8f4924dd80c8fd6ULL, 0xcf99ca3d754a173aULL, - 0xfe477bacaf91bf3cULL, 0xed5371f6d690c12dULL, 0x831a5c285e687094ULL, - 0xc5d3c90a3708a0a4ULL, 0x0f7f903717d06580ULL, 0x19f9bb13b8fdf27fULL, - 0xb1bd6f1b4d502843ULL, 0x1c761ba38fff4012ULL, 0x0d1530c4e2e21f3bULL, - 0x8943ce69a7372c8aULL, 0xe5184e11feb5ce66ULL, 0x618bdb80bd736621ULL, - 0x7d29bad68b574d0bULL, 0x81bb613e25e6fe5bULL, 0x071c9c10bc07913fULL, - 0xc7beeb7909ac2d97ULL, 0xc3e58d353bc5d757ULL, 0xeb017892f38f61e8ULL, - 0xd4effb9c9b1cc21aULL, 0x99727d26f494f7abULL, 0xa3e063a2956b3e03ULL, - 0x9d4a8b9a4aa09c30ULL, 0x3f6ab7d500090fb4ULL, 0x9cc0f2a057268ac0ULL, - 0x3dee9d2dedbf42d1ULL, 0x330f49c87960a972ULL, 0xc6b2720287421b41ULL, - 0x0ac59ec07c00369cULL, 0xef4eac49cb353425ULL, 0xf450244eef0129d8ULL, - 0x8acc46e5caf4deb6ULL, 0x2ffeab63989263f7ULL, 0x8f7cb9fe5d7a4578ULL, - 0x5bd8f7644e634635ULL, 0x427a7315bf2dc900ULL, 0x17d0c4aa2125261cULL, - 0x3992486c93518e50ULL, 0xb4cbfee0a2d7d4c3ULL, 0x7c75d6202c5ddd8dULL, - 0xdbc295d8e35b6c61ULL, 0x60b369d302032b19ULL, 0xce42685fdce44132ULL, - 0x06f3ddb9ddf65610ULL, 0x8ea4d21db5e148f0ULL, 0x20b0fce62fcd496fULL, - 0x2c1b912358b0ee31ULL, 0xb28317b818f5a308ULL, 0xa89c1e189ca6d2cfULL, - 0x0c6b18576aaadbc8ULL, 0xb65deaa91299fae3ULL, 0xfb2b794b7f1027e7ULL, - 0x04e4317f443b5bebULL, 0x4b852d325939d0a6ULL, 0xd5ae6beefb207ffcULL, - 0x309682b281c7d374ULL, 0xbae309a194c3b475ULL, 0x8cc3f97b13b49f05ULL, - 0x98a9422ff8293967ULL, 0x244b16b01076ff7cULL, 0xf8bf571c663d67eeULL, - 0x1f0d6758eee30da1ULL, 0xc9b611d97adeb9b7ULL, 0xb7afd5887b6c57a2ULL, - 0x6290ae846b984fe1ULL, 0x94df4cdeacc1a5fdULL, 0x058a5bd1c5483affULL, - 0x63166cc142ba3c37ULL, 0x8db8526eb2f76f40ULL, 0xe10880036f0d6d4eULL, - 0x9e0523c9971d311dULL, 0x45ec2824cc7cd691ULL, 0x575b8359e62382c9ULL, - 0xfa9e400dc4889995ULL, 0xd1823ecb45721568ULL, 0xdafd983b8206082fULL, - 0xaa7d29082386a8cbULL, 0x269fcd4403b87588ULL, 0x1b91f5f728bdd1e0ULL, - 0xe4669f39040201f6ULL, 0x7a1d7c218cf04adeULL, 0x65623c29d79ce5ceULL, - 0x2368449096c00bb1ULL, 0xab9bf1879da503baULL, 0xbc23ecb1a458058eULL, - 0x9a58df01bb401eccULL, 0xa070e868a85f143dULL, 0x4ff188307df2239eULL, - 0x14d565b41a641183ULL, 0xee13337452701602ULL, 0x950e3dcf3f285e09ULL, - 0x59930254b9c80953ULL, 0x3bf299408930da6dULL, 0xa955943f53691387ULL, - 0xa15edecaa9cb8784ULL, 0x29142127352be9a0ULL, 0x76f0371fff4e7afbULL, - 0x0239f450274f2228ULL, 0xbb073af01d5e868bULL, 0xbfc80571c10e96c1ULL, - 0xd267088568222e23ULL, 0x9671a3d48e80b5b0ULL, 0x55b5d38ae193bb81ULL, - 0x693ae2d0a18b04b8ULL, 0x5c48b4ecadd5335fULL, 0xfd743b194916a1caULL, - 0x2577018134be98c4ULL, 0xe77987e83c54a4adULL, 0x28e11014da33e1b9ULL, - 0x270cc59e226aa213ULL, 0x71495f756d1a5f60ULL, 0x9be853fb60afef77ULL, - 0xadc786a7f7443dbfULL, 0x0904456173b29a82ULL, 0x58bc7a66c232bd5eULL, - 0xf306558c673ac8b2ULL, 0x41f639c6b6c9772aULL, 0x216defe99fda35daULL, - 0x11640cc71c7be615ULL, 0x93c43694565c5527ULL, 0xea038e6246777839ULL, - 0xf9abf3ce5a3e2469ULL, 0x741e768d0fd312d2ULL, 0x0144b883ced652c6ULL, - 0xc20b5a5ba33f8552ULL, 0x1ae69633c3435a9dULL, 0x97a28ca4088cfdecULL, - 0x8824a43c1e96f420ULL, 0x37612fa66eeea746ULL, 0x6b4cb165f9cf0e5aULL, - 0x43aa1c06a0abfb4aULL, 0x7f4dc26ff162796bULL, 0x6cbacc8e54ed9b0fULL, - 0xa6b7ffefd2bb253eULL, 0x2e25bc95b0a29d4fULL, 0x86d6a58bdef1388cULL, - 0xded74ac576b6f054ULL, 0x8030bdbc2b45805dULL, 0x3c81af70e94d9289ULL, - 0x3eff6dda9e3100dbULL, 0xb38dc39fdfcc8847ULL, 0x123885528d17b87eULL, - 0xf2da0ed240b1b642ULL, 0x44cefadcd54bf9a9ULL, 0x1312200e433c7ee6ULL, - 0x9ffcc84f3a78c748ULL, 0xf0cd1f72248576bbULL, 0xec6974053638cfe4ULL, - 0x2ba7b67c0cec4e4cULL, 0xac2f4df3e5ce32edULL, 0xcb33d14326ea4c11ULL, - 0xa4e9044cc77e58bcULL, 0x5f513293d934fcefULL, 0x5dc9645506e55444ULL, - 0x50de418f317de40aULL, 0x388cb31a69dde259ULL, 0x2db4a83455820a86ULL, - 0x9010a91e84711ae9ULL, 0x4df7f0b7b1498371ULL, 0xd62a2eabc0977179ULL, - 0x22fac097aa8d5c0eULL -}; - -static const u64 sbox3[256] = { - 0xf49fcc2ff1daf39bULL, 0x487fd5c66ff29281ULL, 0xe8a30667fcdca83fULL, - 0x2c9b4be3d2fcce63ULL, 0xda3ff74b93fbbbc2ULL, 0x2fa165d2fe70ba66ULL, - 0xa103e279970e93d4ULL, 0xbecdec77b0e45e71ULL, 0xcfb41e723985e497ULL, - 0xb70aaa025ef75017ULL, 0xd42309f03840b8e0ULL, 0x8efc1ad035898579ULL, - 0x96c6920be2b2abc5ULL, 0x66af4163375a9172ULL, 0x2174abdcca7127fbULL, - 0xb33ccea64a72ff41ULL, 0xf04a4933083066a5ULL, 0x8d970acdd7289af5ULL, - 0x8f96e8e031c8c25eULL, 0xf3fec02276875d47ULL, 0xec7bf310056190ddULL, - 0xf5adb0aebb0f1491ULL, 0x9b50f8850fd58892ULL, 0x4975488358b74de8ULL, - 0xa3354ff691531c61ULL, 0x0702bbe481d2c6eeULL, 0x89fb24057deded98ULL, - 0xac3075138596e902ULL, 0x1d2d3580172772edULL, 0xeb738fc28e6bc30dULL, - 0x5854ef8f63044326ULL, 0x9e5c52325add3bbeULL, 0x90aa53cf325c4623ULL, - 0xc1d24d51349dd067ULL, 0x2051cfeea69ea624ULL, 0x13220f0a862e7e4fULL, - 0xce39399404e04864ULL, 0xd9c42ca47086fcb7ULL, 0x685ad2238a03e7ccULL, - 0x066484b2ab2ff1dbULL, 0xfe9d5d70efbf79ecULL, 0x5b13b9dd9c481854ULL, - 0x15f0d475ed1509adULL, 0x0bebcd060ec79851ULL, 0xd58c6791183ab7f8ULL, - 0xd1187c5052f3eee4ULL, 0xc95d1192e54e82ffULL, 0x86eea14cb9ac6ca2ULL, - 0x3485beb153677d5dULL, 0xdd191d781f8c492aULL, 0xf60866baa784ebf9ULL, - 0x518f643ba2d08c74ULL, 0x8852e956e1087c22ULL, 0xa768cb8dc410ae8dULL, - 0x38047726bfec8e1aULL, 0xa67738b4cd3b45aaULL, 0xad16691cec0dde19ULL, - 0xc6d4319380462e07ULL, 0xc5a5876d0ba61938ULL, 0x16b9fa1fa58fd840ULL, - 0x188ab1173ca74f18ULL, 0xabda2f98c99c021fULL, 0x3e0580ab134ae816ULL, - 0x5f3b05b773645abbULL, 0x2501a2be5575f2f6ULL, 0x1b2f74004e7e8ba9ULL, - 0x1cd7580371e8d953ULL, 0x7f6ed89562764e30ULL, 0xb15926ff596f003dULL, - 0x9f65293da8c5d6b9ULL, 0x6ecef04dd690f84cULL, 0x4782275fff33af88ULL, - 0xe41433083f820801ULL, 0xfd0dfe409a1af9b5ULL, 0x4325a3342cdb396bULL, - 0x8ae77e62b301b252ULL, 0xc36f9e9f6655615aULL, 0x85455a2d92d32c09ULL, - 0xf2c7dea949477485ULL, 0x63cfb4c133a39ebaULL, 0x83b040cc6ebc5462ULL, - 0x3b9454c8fdb326b0ULL, 0x56f56a9e87ffd78cULL, 0x2dc2940d99f42bc6ULL, - 0x98f7df096b096e2dULL, 0x19a6e01e3ad852bfULL, 0x42a99ccbdbd4b40bULL, - 0xa59998af45e9c559ULL, 0x366295e807d93186ULL, 0x6b48181bfaa1f773ULL, - 0x1fec57e2157a0a1dULL, 0x4667446af6201ad5ULL, 0xe615ebcacfb0f075ULL, - 0xb8f31f4f68290778ULL, 0x22713ed6ce22d11eULL, 0x3057c1a72ec3c93bULL, - 0xcb46acc37c3f1f2fULL, 0xdbb893fd02aaf50eULL, 0x331fd92e600b9fcfULL, - 0xa498f96148ea3ad6ULL, 0xa8d8426e8b6a83eaULL, 0xa089b274b7735cdcULL, - 0x87f6b3731e524a11ULL, 0x118808e5cbc96749ULL, 0x9906e4c7b19bd394ULL, - 0xafed7f7e9b24a20cULL, 0x6509eadeeb3644a7ULL, 0x6c1ef1d3e8ef0edeULL, - 0xb9c97d43e9798fb4ULL, 0xa2f2d784740c28a3ULL, 0x7b8496476197566fULL, - 0x7a5be3e6b65f069dULL, 0xf96330ed78be6f10ULL, 0xeee60de77a076a15ULL, - 0x2b4bee4aa08b9bd0ULL, 0x6a56a63ec7b8894eULL, 0x02121359ba34fef4ULL, - 0x4cbf99f8283703fcULL, 0x398071350caf30c8ULL, 0xd0a77a89f017687aULL, - 0xf1c1a9eb9e423569ULL, 0x8c7976282dee8199ULL, 0x5d1737a5dd1f7abdULL, - 0x4f53433c09a9fa80ULL, 0xfa8b0c53df7ca1d9ULL, 0x3fd9dcbc886ccb77ULL, - 0xc040917ca91b4720ULL, 0x7dd00142f9d1dcdfULL, 0x8476fc1d4f387b58ULL, - 0x23f8e7c5f3316503ULL, 0x032a2244e7e37339ULL, 0x5c87a5d750f5a74bULL, - 0x082b4cc43698992eULL, 0xdf917becb858f63cULL, 0x3270b8fc5bf86ddaULL, - 0x10ae72bb29b5dd76ULL, 0x576ac94e7700362bULL, 0x1ad112dac61efb8fULL, - 0x691bc30ec5faa427ULL, 0xff246311cc327143ULL, 0x3142368e30e53206ULL, - 0x71380e31e02ca396ULL, 0x958d5c960aad76f1ULL, 0xf8d6f430c16da536ULL, - 0xc8ffd13f1be7e1d2ULL, 0x7578ae66004ddbe1ULL, 0x05833f01067be646ULL, - 0xbb34b5ad3bfe586dULL, 0x095f34c9a12b97f0ULL, 0x247ab64525d60ca8ULL, - 0xdcdbc6f3017477d1ULL, 0x4a2e14d4decad24dULL, 0xbdb5e6d9be0a1eebULL, - 0x2a7e70f7794301abULL, 0xdef42d8a270540fdULL, 0x01078ec0a34c22c1ULL, - 0xe5de511af4c16387ULL, 0x7ebb3a52bd9a330aULL, 0x77697857aa7d6435ULL, - 0x004e831603ae4c32ULL, 0xe7a21020ad78e312ULL, 0x9d41a70c6ab420f2ULL, - 0x28e06c18ea1141e6ULL, 0xd2b28cbd984f6b28ULL, 0x26b75f6c446e9d83ULL, - 0xba47568c4d418d7fULL, 0xd80badbfe6183d8eULL, 0x0e206d7f5f166044ULL, - 0xe258a43911cbca3eULL, 0x723a1746b21dc0bcULL, 0xc7caa854f5d7cdd3ULL, - 0x7cac32883d261d9cULL, 0x7690c26423ba942cULL, 0x17e55524478042b8ULL, - 0xe0be477656a2389fULL, 0x4d289b5e67ab2da0ULL, 0x44862b9c8fbbfd31ULL, - 0xb47cc8049d141365ULL, 0x822c1b362b91c793ULL, 0x4eb14655fb13dfd8ULL, - 0x1ecbba0714e2a97bULL, 0x6143459d5cde5f14ULL, 0x53a8fbf1d5f0ac89ULL, - 0x97ea04d81c5e5b00ULL, 0x622181a8d4fdb3f3ULL, 0xe9bcd341572a1208ULL, - 0x1411258643cce58aULL, 0x9144c5fea4c6e0a4ULL, 0x0d33d06565cf620fULL, - 0x54a48d489f219ca1ULL, 0xc43e5eac6d63c821ULL, 0xa9728b3a72770dafULL, - 0xd7934e7b20df87efULL, 0xe35503b61a3e86e5ULL, 0xcae321fbc819d504ULL, - 0x129a50b3ac60bfa6ULL, 0xcd5e68ea7e9fb6c3ULL, 0xb01c90199483b1c7ULL, - 0x3de93cd5c295376cULL, 0xaed52edf2ab9ad13ULL, 0x2e60f512c0a07884ULL, - 0xbc3d86a3e36210c9ULL, 0x35269d9b163951ceULL, 0x0c7d6e2ad0cdb5faULL, - 0x59e86297d87f5733ULL, 0x298ef221898db0e7ULL, 0x55000029d1a5aa7eULL, - 0x8bc08ae1b5061b45ULL, 0xc2c31c2b6c92703aULL, 0x94cc596baf25ef42ULL, - 0x0a1d73db22540456ULL, 0x04b6a0f9d9c4179aULL, 0xeffdafa2ae3d3c60ULL, - 0xf7c8075bb49496c4ULL, 0x9cc5c7141d1cd4e3ULL, 0x78bd1638218e5534ULL, - 0xb2f11568f850246aULL, 0xedfabcfa9502bc29ULL, 0x796ce5f2da23051bULL, - 0xaae128b0dc93537cULL, 0x3a493da0ee4b29aeULL, 0xb5df6b2c416895d7ULL, - 0xfcabbd25122d7f37ULL, 0x70810b58105dc4b1ULL, 0xe10fdd37f7882a90ULL, - 0x524dcab5518a3f5cULL, 0x3c9e85878451255bULL, 0x4029828119bd34e2ULL, - 0x74a05b6f5d3ceccbULL, 0xb610021542e13ecaULL, 0x0ff979d12f59e2acULL, - 0x6037da27e4f9cc50ULL, 0x5e92975a0df1847dULL, 0xd66de190d3e623feULL, - 0x5032d6b87b568048ULL, 0x9a36b7ce8235216eULL, 0x80272a7a24f64b4aULL, - 0x93efed8b8c6916f7ULL, 0x37ddbff44cce1555ULL, 0x4b95db5d4b99bd25ULL, - 0x92d3fda169812fc0ULL, 0xfb1a4a9a90660bb6ULL, 0x730c196946a4b9b2ULL, - 0x81e289aa7f49da68ULL, 0x64669a0f83b1a05fULL, 0x27b3ff7d9644f48bULL, - 0xcc6b615c8db675b3ULL, 0x674f20b9bcebbe95ULL, 0x6f31238275655982ULL, - 0x5ae488713e45cf05ULL, 0xbf619f9954c21157ULL, 0xeabac46040a8eae9ULL, - 0x454c6fe9f2c0c1cdULL, 0x419cf6496412691cULL, 0xd3dc3bef265b0f70ULL, - 0x6d0e60f5c3578a9eULL -}; - -static const u64 sbox4[256] = { - 0x5b0e608526323c55ULL, 0x1a46c1a9fa1b59f5ULL, 0xa9e245a17c4c8ffaULL, - 0x65ca5159db2955d7ULL, 0x05db0a76ce35afc2ULL, 0x81eac77ea9113d45ULL, - 0x528ef88ab6ac0a0dULL, 0xa09ea253597be3ffULL, 0x430ddfb3ac48cd56ULL, - 0xc4b3a67af45ce46fULL, 0x4ececfd8fbe2d05eULL, 0x3ef56f10b39935f0ULL, - 0x0b22d6829cd619c6ULL, 0x17fd460a74df2069ULL, 0x6cf8cc8e8510ed40ULL, - 0xd6c824bf3a6ecaa7ULL, 0x61243d581a817049ULL, 0x048bacb6bbc163a2ULL, - 0xd9a38ac27d44cc32ULL, 0x7fddff5baaf410abULL, 0xad6d495aa804824bULL, - 0xe1a6a74f2d8c9f94ULL, 0xd4f7851235dee8e3ULL, 0xfd4b7f886540d893ULL, - 0x247c20042aa4bfdaULL, 0x096ea1c517d1327cULL, 0xd56966b4361a6685ULL, - 0x277da5c31221057dULL, 0x94d59893a43acff7ULL, 0x64f0c51ccdc02281ULL, - 0x3d33bcc4ff6189dbULL, 0xe005cb184ce66af1ULL, 0xff5ccd1d1db99beaULL, - 0xb0b854a7fe42980fULL, 0x7bd46a6a718d4b9fULL, 0xd10fa8cc22a5fd8cULL, - 0xd31484952be4bd31ULL, 0xc7fa975fcb243847ULL, 0x4886ed1e5846c407ULL, - 0x28cddb791eb70b04ULL, 0xc2b00be2f573417fULL, 0x5c9590452180f877ULL, - 0x7a6bddfff370eb00ULL, 0xce509e38d6d9d6a4ULL, 0xebeb0f00647fa702ULL, - 0x1dcc06cf76606f06ULL, 0xe4d9f28ba286ff0aULL, 0xd85a305dc918c262ULL, - 0x475b1d8732225f54ULL, 0x2d4fb51668ccb5feULL, 0xa679b9d9d72bba20ULL, - 0x53841c0d912d43a5ULL, 0x3b7eaa48bf12a4e8ULL, 0x781e0e47f22f1ddfULL, - 0xeff20ce60ab50973ULL, 0x20d261d19dffb742ULL, 0x16a12b03062a2e39ULL, - 0x1960eb2239650495ULL, 0x251c16fed50eb8b8ULL, 0x9ac0c330f826016eULL, - 0xed152665953e7671ULL, 0x02d63194a6369570ULL, 0x5074f08394b1c987ULL, - 0x70ba598c90b25ce1ULL, 0x794a15810b9742f6ULL, 0x0d5925e9fcaf8c6cULL, - 0x3067716cd868744eULL, 0x910ab077e8d7731bULL, 0x6a61bbdb5ac42f61ULL, - 0x93513efbf0851567ULL, 0xf494724b9e83e9d5ULL, 0xe887e1985c09648dULL, - 0x34b1d3c675370cfdULL, 0xdc35e433bc0d255dULL, 0xd0aab84234131be0ULL, - 0x08042a50b48b7eafULL, 0x9997c4ee44a3ab35ULL, 0x829a7b49201799d0ULL, - 0x263b8307b7c54441ULL, 0x752f95f4fd6a6ca6ULL, 0x927217402c08c6e5ULL, - 0x2a8ab754a795d9eeULL, 0xa442f7552f72943dULL, 0x2c31334e19781208ULL, - 0x4fa98d7ceaee6291ULL, 0x55c3862f665db309ULL, 0xbd0610175d53b1f3ULL, - 0x46fe6cb840413f27ULL, 0x3fe03792df0cfa59ULL, 0xcfe700372eb85e8fULL, - 0xa7be29e7adbce118ULL, 0xe544ee5cde8431ddULL, 0x8a781b1b41f1873eULL, - 0xa5c94c78a0d2f0e7ULL, 0x39412e2877b60728ULL, 0xa1265ef3afc9a62cULL, - 0xbcc2770c6a2506c5ULL, 0x3ab66dd5dce1ce12ULL, 0xe65499d04a675b37ULL, - 0x7d8f523481bfd216ULL, 0x0f6f64fcec15f389ULL, 0x74efbe618b5b13c8ULL, - 0xacdc82b714273e1dULL, 0xdd40bfe003199d17ULL, 0x37e99257e7e061f8ULL, - 0xfa52626904775aaaULL, 0x8bbbf63a463d56f9ULL, 0xf0013f1543a26e64ULL, - 0xa8307e9f879ec898ULL, 0xcc4c27a4150177ccULL, 0x1b432f2cca1d3348ULL, - 0xde1d1f8f9f6fa013ULL, 0x606602a047a7ddd6ULL, 0xd237ab64cc1cb2c7ULL, - 0x9b938e7225fcd1d3ULL, 0xec4e03708e0ff476ULL, 0xfeb2fbda3d03c12dULL, - 0xae0bced2ee43889aULL, 0x22cb8923ebfb4f43ULL, 0x69360d013cf7396dULL, - 0x855e3602d2d4e022ULL, 0x073805bad01f784cULL, 0x33e17a133852f546ULL, - 0xdf4874058ac7b638ULL, 0xba92b29c678aa14aULL, 0x0ce89fc76cfaadcdULL, - 0x5f9d4e0908339e34ULL, 0xf1afe9291f5923b9ULL, 0x6e3480f60f4a265fULL, - 0xeebf3a2ab29b841cULL, 0xe21938a88f91b4adULL, 0x57dfeff845c6d3c3ULL, - 0x2f006b0bf62caaf2ULL, 0x62f479ef6f75ee78ULL, 0x11a55ad41c8916a9ULL, - 0xf229d29084fed453ULL, 0x42f1c27b16b000e6ULL, 0x2b1f76749823c074ULL, - 0x4b76eca3c2745360ULL, 0x8c98f463b91691bdULL, 0x14bcc93cf1ade66aULL, - 0x8885213e6d458397ULL, 0x8e177df0274d4711ULL, 0xb49b73b5503f2951ULL, - 0x10168168c3f96b6bULL, 0x0e3d963b63cab0aeULL, 0x8dfc4b5655a1db14ULL, - 0xf789f1356e14de5cULL, 0x683e68af4e51dac1ULL, 0xc9a84f9d8d4b0fd9ULL, - 0x3691e03f52a0f9d1ULL, 0x5ed86e46e1878e80ULL, 0x3c711a0e99d07150ULL, - 0x5a0865b20c4e9310ULL, 0x56fbfc1fe4f0682eULL, 0xea8d5de3105edf9bULL, - 0x71abfdb12379187aULL, 0x2eb99de1bee77b9cULL, 0x21ecc0ea33cf4523ULL, - 0x59a4d7521805c7a1ULL, 0x3896f5eb56ae7c72ULL, 0xaa638f3db18f75dcULL, - 0x9f39358dabe9808eULL, 0xb7defa91c00b72acULL, 0x6b5541fd62492d92ULL, - 0x6dc6dee8f92e4d5bULL, 0x353f57abc4beea7eULL, 0x735769d6da5690ceULL, - 0x0a234aa642391484ULL, 0xf6f9508028f80d9dULL, 0xb8e319a27ab3f215ULL, - 0x31ad9c1151341a4dULL, 0x773c22a57bef5805ULL, 0x45c7561a07968633ULL, - 0xf913da9e249dbe36ULL, 0xda652d9b78a64c68ULL, 0x4c27a97f3bc334efULL, - 0x76621220e66b17f4ULL, 0x967743899acd7d0bULL, 0xf3ee5bcae0ed6782ULL, - 0x409f753600c879fcULL, 0x06d09a39b5926db6ULL, 0x6f83aeb0317ac588ULL, - 0x01e6ca4a86381f21ULL, 0x66ff3462d19f3025ULL, 0x72207c24ddfd3bfbULL, - 0x4af6b6d3e2ece2ebULL, 0x9c994dbec7ea08deULL, 0x49ace597b09a8bc4ULL, - 0xb38c4766cf0797baULL, 0x131b9373c57c2a75ULL, 0xb1822cce61931e58ULL, - 0x9d7555b909ba1c0cULL, 0x127fafdd937d11d2ULL, 0x29da3badc66d92e4ULL, - 0xa2c1d57154c2ecbcULL, 0x58c5134d82f6fe24ULL, 0x1c3ae3515b62274fULL, - 0xe907c82e01cb8126ULL, 0xf8ed091913e37fcbULL, 0x3249d8f9c80046c9ULL, - 0x80cf9bede388fb63ULL, 0x1881539a116cf19eULL, 0x5103f3f76bd52457ULL, - 0x15b7e6f5ae47f7a8ULL, 0xdbd7c6ded47e9ccfULL, 0x44e55c410228bb1aULL, - 0xb647d4255edb4e99ULL, 0x5d11882bb8aafc30ULL, 0xf5098bbb29d3212aULL, - 0x8fb5ea14e90296b3ULL, 0x677b942157dd025aULL, 0xfb58e7c0a390acb5ULL, - 0x89d3674c83bd4a01ULL, 0x9e2da4df4bf3b93bULL, 0xfcc41e328cab4829ULL, - 0x03f38c96ba582c52ULL, 0xcad1bdbd7fd85db2ULL, 0xbbb442c16082ae83ULL, - 0xb95fe86ba5da9ab0ULL, 0xb22e04673771a93fULL, 0x845358c9493152d8ULL, - 0xbe2a488697b4541eULL, 0x95a2dc2dd38e6966ULL, 0xc02c11ac923c852bULL, - 0x2388b1990df2a87bULL, 0x7c8008fa1b4f37beULL, 0x1f70d0c84d54e503ULL, - 0x5490adec7ece57d4ULL, 0x002b3c27d9063a3aULL, 0x7eaea3848030a2bfULL, - 0xc602326ded2003c0ULL, 0x83a7287d69a94086ULL, 0xc57a5fcb30f57a8aULL, - 0xb56844e479ebe779ULL, 0xa373b40f05dcbce9ULL, 0xd71a786e88570ee2ULL, - 0x879cbacdbde8f6a0ULL, 0x976ad1bcc164a32fULL, 0xab21e25e9666d78bULL, - 0x901063aae5e5c33cULL, 0x9818b34448698d90ULL, 0xe36487ae3e1e8abbULL, - 0xafbdf931893bdcb4ULL, 0x6345a0dc5fbbd519ULL, 0x8628fe269b9465caULL, - 0x1e5d01603f9c51ecULL, 0x4de44006a15049b7ULL, 0xbf6c70e5f776cbb1ULL, - 0x411218f2ef552bedULL, 0xcb0c0708705a36a3ULL, 0xe74d14754f986044ULL, - 0xcd56d9430ea8280eULL, 0xc12591d7535f5065ULL, 0xc83223f1720aef96ULL, - 0xc3a0396f7363a51fULL -}; - - -static void tgr192_round(u64 * ra, u64 * rb, u64 * rc, u64 x, int mul) -{ - u64 a = *ra; - u64 b = *rb; - u64 c = *rc; - - c ^= x; - a -= sbox1[c & 0xff] ^ sbox2[(c >> 16) & 0xff] - ^ sbox3[(c >> 32) & 0xff] ^ sbox4[(c >> 48) & 0xff]; - b += sbox4[(c >> 8) & 0xff] ^ sbox3[(c >> 24) & 0xff] - ^ sbox2[(c >> 40) & 0xff] ^ sbox1[(c >> 56) & 0xff]; - b *= mul; - - *ra = a; - *rb = b; - *rc = c; -} - - -static void tgr192_pass(u64 * ra, u64 * rb, u64 * rc, u64 * x, int mul) -{ - u64 a = *ra; - u64 b = *rb; - u64 c = *rc; - - tgr192_round(&a, &b, &c, x[0], mul); - tgr192_round(&b, &c, &a, x[1], mul); - tgr192_round(&c, &a, &b, x[2], mul); - tgr192_round(&a, &b, &c, x[3], mul); - tgr192_round(&b, &c, &a, x[4], mul); - tgr192_round(&c, &a, &b, x[5], mul); - tgr192_round(&a, &b, &c, x[6], mul); - tgr192_round(&b, &c, &a, x[7], mul); - - *ra = a; - *rb = b; - *rc = c; -} - - -static void tgr192_key_schedule(u64 * x) -{ - x[0] -= x[7] ^ 0xa5a5a5a5a5a5a5a5ULL; - x[1] ^= x[0]; - x[2] += x[1]; - x[3] -= x[2] ^ ((~x[1]) << 19); - x[4] ^= x[3]; - x[5] += x[4]; - x[6] -= x[5] ^ ((~x[4]) >> 23); - x[7] ^= x[6]; - x[0] += x[7]; - x[1] -= x[0] ^ ((~x[7]) << 19); - x[2] ^= x[1]; - x[3] += x[2]; - x[4] -= x[3] ^ ((~x[2]) >> 23); - x[5] ^= x[4]; - x[6] += x[5]; - x[7] -= x[6] ^ 0x0123456789abcdefULL; -} - - -/**************** - * Transform the message DATA which consists of 512 bytes (8 words) - */ - -static void tgr192_transform(struct tgr192_ctx *tctx, const u8 * data) -{ - u64 a, b, c, aa, bb, cc; - u64 x[8]; - int i; - - for (i = 0; i < 8; i++) - x[i] = get_unaligned_le64(data + i * sizeof(__le64)); - - /* save */ - a = aa = tctx->a; - b = bb = tctx->b; - c = cc = tctx->c; - - tgr192_pass(&a, &b, &c, x, 5); - tgr192_key_schedule(x); - tgr192_pass(&c, &a, &b, x, 7); - tgr192_key_schedule(x); - tgr192_pass(&b, &c, &a, x, 9); - - - /* feedforward */ - a ^= aa; - b -= bb; - c += cc; - /* store */ - tctx->a = a; - tctx->b = b; - tctx->c = c; -} - -static int tgr192_init(struct shash_desc *desc) -{ - struct tgr192_ctx *tctx = shash_desc_ctx(desc); - - tctx->a = 0x0123456789abcdefULL; - tctx->b = 0xfedcba9876543210ULL; - tctx->c = 0xf096a5b4c3b2e187ULL; - tctx->nblocks = 0; - tctx->count = 0; - - return 0; -} - - -/* Update the message digest with the contents - * of INBUF with length INLEN. */ -static int tgr192_update(struct shash_desc *desc, const u8 *inbuf, - unsigned int len) -{ - struct tgr192_ctx *tctx = shash_desc_ctx(desc); - - if (tctx->count == 64) { /* flush the buffer */ - tgr192_transform(tctx, tctx->hash); - tctx->count = 0; - tctx->nblocks++; - } - if (!inbuf) { - return 0; - } - if (tctx->count) { - for (; len && tctx->count < 64; len--) { - tctx->hash[tctx->count++] = *inbuf++; - } - tgr192_update(desc, NULL, 0); - if (!len) { - return 0; - } - - } - - while (len >= 64) { - tgr192_transform(tctx, inbuf); - tctx->count = 0; - tctx->nblocks++; - len -= 64; - inbuf += 64; - } - for (; len && tctx->count < 64; len--) { - tctx->hash[tctx->count++] = *inbuf++; - } - - return 0; -} - - - -/* The routine terminates the computation */ -static int tgr192_final(struct shash_desc *desc, u8 * out) -{ - struct tgr192_ctx *tctx = shash_desc_ctx(desc); - __be64 *dst = (__be64 *)out; - __be64 *be64p; - __le32 *le32p; - u32 t, msb, lsb; - - tgr192_update(desc, NULL, 0); /* flush */ - - msb = 0; - t = tctx->nblocks; - if ((lsb = t << 6) < t) { /* multiply by 64 to make a byte count */ - msb++; - } - msb += t >> 26; - t = lsb; - if ((lsb = t + tctx->count) < t) { /* add the count */ - msb++; - } - t = lsb; - if ((lsb = t << 3) < t) { /* multiply by 8 to make a bit count */ - msb++; - } - msb += t >> 29; - - if (tctx->count < 56) { /* enough room */ - tctx->hash[tctx->count++] = 0x01; /* pad */ - while (tctx->count < 56) { - tctx->hash[tctx->count++] = 0; /* pad */ - } - } else { /* need one extra block */ - tctx->hash[tctx->count++] = 0x01; /* pad character */ - while (tctx->count < 64) { - tctx->hash[tctx->count++] = 0; - } - tgr192_update(desc, NULL, 0); /* flush */ - memset(tctx->hash, 0, 56); /* fill next block with zeroes */ - } - /* append the 64 bit count */ - le32p = (__le32 *)&tctx->hash[56]; - le32p[0] = cpu_to_le32(lsb); - le32p[1] = cpu_to_le32(msb); - - tgr192_transform(tctx, tctx->hash); - - be64p = (__be64 *)tctx->hash; - dst[0] = be64p[0] = cpu_to_be64(tctx->a); - dst[1] = be64p[1] = cpu_to_be64(tctx->b); - dst[2] = be64p[2] = cpu_to_be64(tctx->c); - - return 0; -} - -static int tgr160_final(struct shash_desc *desc, u8 * out) -{ - u8 D[64]; - - tgr192_final(desc, D); - memcpy(out, D, TGR160_DIGEST_SIZE); - memzero_explicit(D, TGR192_DIGEST_SIZE); - - return 0; -} - -static int tgr128_final(struct shash_desc *desc, u8 * out) -{ - u8 D[64]; - - tgr192_final(desc, D); - memcpy(out, D, TGR128_DIGEST_SIZE); - memzero_explicit(D, TGR192_DIGEST_SIZE); - - return 0; -} - -static struct shash_alg tgr_algs[3] = { { - .digestsize = TGR192_DIGEST_SIZE, - .init = tgr192_init, - .update = tgr192_update, - .final = tgr192_final, - .descsize = sizeof(struct tgr192_ctx), - .base = { - .cra_name = "tgr192", - .cra_driver_name = "tgr192-generic", - .cra_blocksize = TGR192_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}, { - .digestsize = TGR160_DIGEST_SIZE, - .init = tgr192_init, - .update = tgr192_update, - .final = tgr160_final, - .descsize = sizeof(struct tgr192_ctx), - .base = { - .cra_name = "tgr160", - .cra_driver_name = "tgr160-generic", - .cra_blocksize = TGR192_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}, { - .digestsize = TGR128_DIGEST_SIZE, - .init = tgr192_init, - .update = tgr192_update, - .final = tgr128_final, - .descsize = sizeof(struct tgr192_ctx), - .base = { - .cra_name = "tgr128", - .cra_driver_name = "tgr128-generic", - .cra_blocksize = TGR192_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -} }; - -static int __init tgr192_mod_init(void) -{ - return crypto_register_shashes(tgr_algs, ARRAY_SIZE(tgr_algs)); -} - -static void __exit tgr192_mod_fini(void) -{ - crypto_unregister_shashes(tgr_algs, ARRAY_SIZE(tgr_algs)); -} - -MODULE_ALIAS_CRYPTO("tgr192"); -MODULE_ALIAS_CRYPTO("tgr160"); -MODULE_ALIAS_CRYPTO("tgr128"); - -subsys_initcall(tgr192_mod_init); -module_exit(tgr192_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Tiger Message Digest Algorithm"); From 663f63ee6d9cdc68adf9afca5427e5c2b5b4ae2d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 21 Jan 2021 14:07:33 +0100 Subject: [PATCH 112/154] crypto: salsa20 - remove Salsa20 stream cipher algorithm Salsa20 is not used anywhere in the kernel, is not suitable for disk encryption, and widely considered to have been superseded by ChaCha20. So let's remove it. Signed-off-by: Ard Biesheuvel Acked-by: Mike Snitzer Signed-off-by: Herbert Xu --- .../device-mapper/dm-integrity.rst | 4 +- crypto/Kconfig | 12 - crypto/Makefile | 1 - crypto/salsa20_generic.c | 212 --- crypto/tcrypt.c | 11 +- crypto/testmgr.c | 6 - crypto/testmgr.h | 1162 ----------------- 7 files changed, 3 insertions(+), 1405 deletions(-) delete mode 100644 crypto/salsa20_generic.c diff --git a/Documentation/admin-guide/device-mapper/dm-integrity.rst b/Documentation/admin-guide/device-mapper/dm-integrity.rst index 4e6f504474ac..d56112e2e354 100644 --- a/Documentation/admin-guide/device-mapper/dm-integrity.rst +++ b/Documentation/admin-guide/device-mapper/dm-integrity.rst @@ -143,8 +143,8 @@ recalculate journal_crypt:algorithm(:key) (the key is optional) Encrypt the journal using given algorithm to make sure that the attacker can't read the journal. You can use a block cipher here - (such as "cbc(aes)") or a stream cipher (for example "chacha20", - "salsa20" or "ctr(aes)"). + (such as "cbc(aes)") or a stream cipher (for example "chacha20" + or "ctr(aes)"). The journal contains history of last writes to the block device, an attacker reading the journal could see the last sector numbers diff --git a/crypto/Kconfig b/crypto/Kconfig index 8d25d689a705..9779c7f7531f 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1400,18 +1400,6 @@ config CRYPTO_KHAZAD See also: -config CRYPTO_SALSA20 - tristate "Salsa20 stream cipher algorithm" - select CRYPTO_SKCIPHER - help - Salsa20 stream cipher algorithm. - - Salsa20 is a stream cipher submitted to eSTREAM, the ECRYPT - Stream Cipher Project. See - - The Salsa20 stream cipher algorithm is designed by Daniel J. - Bernstein . See - config CRYPTO_CHACHA20 tristate "ChaCha stream cipher algorithms" select CRYPTO_LIB_CHACHA_GENERIC diff --git a/crypto/Makefile b/crypto/Makefile index 6b9622f21f7f..cf23affb1678 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -138,7 +138,6 @@ obj-$(CONFIG_CRYPTO_TEA) += tea.o obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o obj-$(CONFIG_CRYPTO_SEED) += seed.o -obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o obj-$(CONFIG_CRYPTO_CHACHA20) += chacha_generic.o obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c deleted file mode 100644 index 3418869dabef..000000000000 --- a/crypto/salsa20_generic.c +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Salsa20: Salsa20 stream cipher algorithm - * - * Copyright (c) 2007 Tan Swee Heng - * - * Derived from: - * - salsa20.c: Public domain C code by Daniel J. Bernstein - * - * Salsa20 is a stream cipher candidate in eSTREAM, the ECRYPT Stream - * Cipher Project. It is designed by Daniel J. Bernstein . - * More information about eSTREAM and Salsa20 can be found here: - * https://www.ecrypt.eu.org/stream/ - * https://cr.yp.to/snuffle.html - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ - -#include -#include -#include - -#define SALSA20_IV_SIZE 8 -#define SALSA20_MIN_KEY_SIZE 16 -#define SALSA20_MAX_KEY_SIZE 32 -#define SALSA20_BLOCK_SIZE 64 - -struct salsa20_ctx { - u32 initial_state[16]; -}; - -static void salsa20_block(u32 *state, __le32 *stream) -{ - u32 x[16]; - int i; - - memcpy(x, state, sizeof(x)); - - for (i = 0; i < 20; i += 2) { - x[ 4] ^= rol32((x[ 0] + x[12]), 7); - x[ 8] ^= rol32((x[ 4] + x[ 0]), 9); - x[12] ^= rol32((x[ 8] + x[ 4]), 13); - x[ 0] ^= rol32((x[12] + x[ 8]), 18); - x[ 9] ^= rol32((x[ 5] + x[ 1]), 7); - x[13] ^= rol32((x[ 9] + x[ 5]), 9); - x[ 1] ^= rol32((x[13] + x[ 9]), 13); - x[ 5] ^= rol32((x[ 1] + x[13]), 18); - x[14] ^= rol32((x[10] + x[ 6]), 7); - x[ 2] ^= rol32((x[14] + x[10]), 9); - x[ 6] ^= rol32((x[ 2] + x[14]), 13); - x[10] ^= rol32((x[ 6] + x[ 2]), 18); - x[ 3] ^= rol32((x[15] + x[11]), 7); - x[ 7] ^= rol32((x[ 3] + x[15]), 9); - x[11] ^= rol32((x[ 7] + x[ 3]), 13); - x[15] ^= rol32((x[11] + x[ 7]), 18); - x[ 1] ^= rol32((x[ 0] + x[ 3]), 7); - x[ 2] ^= rol32((x[ 1] + x[ 0]), 9); - x[ 3] ^= rol32((x[ 2] + x[ 1]), 13); - x[ 0] ^= rol32((x[ 3] + x[ 2]), 18); - x[ 6] ^= rol32((x[ 5] + x[ 4]), 7); - x[ 7] ^= rol32((x[ 6] + x[ 5]), 9); - x[ 4] ^= rol32((x[ 7] + x[ 6]), 13); - x[ 5] ^= rol32((x[ 4] + x[ 7]), 18); - x[11] ^= rol32((x[10] + x[ 9]), 7); - x[ 8] ^= rol32((x[11] + x[10]), 9); - x[ 9] ^= rol32((x[ 8] + x[11]), 13); - x[10] ^= rol32((x[ 9] + x[ 8]), 18); - x[12] ^= rol32((x[15] + x[14]), 7); - x[13] ^= rol32((x[12] + x[15]), 9); - x[14] ^= rol32((x[13] + x[12]), 13); - x[15] ^= rol32((x[14] + x[13]), 18); - } - - for (i = 0; i < 16; i++) - stream[i] = cpu_to_le32(x[i] + state[i]); - - if (++state[8] == 0) - state[9]++; -} - -static void salsa20_docrypt(u32 *state, u8 *dst, const u8 *src, - unsigned int bytes) -{ - __le32 stream[SALSA20_BLOCK_SIZE / sizeof(__le32)]; - - while (bytes >= SALSA20_BLOCK_SIZE) { - salsa20_block(state, stream); - crypto_xor_cpy(dst, src, (const u8 *)stream, - SALSA20_BLOCK_SIZE); - bytes -= SALSA20_BLOCK_SIZE; - dst += SALSA20_BLOCK_SIZE; - src += SALSA20_BLOCK_SIZE; - } - if (bytes) { - salsa20_block(state, stream); - crypto_xor_cpy(dst, src, (const u8 *)stream, bytes); - } -} - -static void salsa20_init(u32 *state, const struct salsa20_ctx *ctx, - const u8 *iv) -{ - memcpy(state, ctx->initial_state, sizeof(ctx->initial_state)); - state[6] = get_unaligned_le32(iv + 0); - state[7] = get_unaligned_le32(iv + 4); -} - -static int salsa20_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keysize) -{ - static const char sigma[16] = "expand 32-byte k"; - static const char tau[16] = "expand 16-byte k"; - struct salsa20_ctx *ctx = crypto_skcipher_ctx(tfm); - const char *constants; - - if (keysize != SALSA20_MIN_KEY_SIZE && - keysize != SALSA20_MAX_KEY_SIZE) - return -EINVAL; - - ctx->initial_state[1] = get_unaligned_le32(key + 0); - ctx->initial_state[2] = get_unaligned_le32(key + 4); - ctx->initial_state[3] = get_unaligned_le32(key + 8); - ctx->initial_state[4] = get_unaligned_le32(key + 12); - if (keysize == 32) { /* recommended */ - key += 16; - constants = sigma; - } else { /* keysize == 16 */ - constants = tau; - } - ctx->initial_state[11] = get_unaligned_le32(key + 0); - ctx->initial_state[12] = get_unaligned_le32(key + 4); - ctx->initial_state[13] = get_unaligned_le32(key + 8); - ctx->initial_state[14] = get_unaligned_le32(key + 12); - ctx->initial_state[0] = get_unaligned_le32(constants + 0); - ctx->initial_state[5] = get_unaligned_le32(constants + 4); - ctx->initial_state[10] = get_unaligned_le32(constants + 8); - ctx->initial_state[15] = get_unaligned_le32(constants + 12); - - /* space for the nonce; it will be overridden for each request */ - ctx->initial_state[6] = 0; - ctx->initial_state[7] = 0; - - /* initial block number */ - ctx->initial_state[8] = 0; - ctx->initial_state[9] = 0; - - return 0; -} - -static int salsa20_crypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - const struct salsa20_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - u32 state[16]; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - salsa20_init(state, ctx, req->iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - - if (nbytes < walk.total) - nbytes = round_down(nbytes, walk.stride); - - salsa20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr, - nbytes); - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } - - return err; -} - -static struct skcipher_alg alg = { - .base.cra_name = "salsa20", - .base.cra_driver_name = "salsa20-generic", - .base.cra_priority = 100, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct salsa20_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = SALSA20_MIN_KEY_SIZE, - .max_keysize = SALSA20_MAX_KEY_SIZE, - .ivsize = SALSA20_IV_SIZE, - .chunksize = SALSA20_BLOCK_SIZE, - .setkey = salsa20_setkey, - .encrypt = salsa20_crypt, - .decrypt = salsa20_crypt, -}; - -static int __init salsa20_generic_mod_init(void) -{ - return crypto_register_skcipher(&alg); -} - -static void __exit salsa20_generic_mod_fini(void) -{ - crypto_unregister_skcipher(&alg); -} - -subsys_initcall(salsa20_generic_mod_init); -module_exit(salsa20_generic_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm"); -MODULE_ALIAS_CRYPTO("salsa20"); -MODULE_ALIAS_CRYPTO("salsa20-generic"); diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 696c44ef465e..2877b88cfa45 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -71,7 +71,7 @@ static const char *check[] = { "blowfish", "twofish", "serpent", "sha384", "sha512", "md4", "aes", "cast6", "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea", "khazad", "wp512", "wp384", "wp256", "tnepres", "xeta", "fcrypt", - "camellia", "seed", "salsa20", "rmd160", + "camellia", "seed", "rmd160", "lzo", "lzo-rle", "cts", "sha3-224", "sha3-256", "sha3-384", "sha3-512", "streebog256", "streebog512", NULL @@ -1835,10 +1835,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) ret += tcrypt_test("sha224"); break; - case 34: - ret += tcrypt_test("salsa20"); - break; - case 35: ret += tcrypt_test("gcm(aes)"); break; @@ -2153,11 +2149,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) speed_template_32_48_64); break; - case 206: - test_cipher_speed("salsa20", ENCRYPT, sec, NULL, 0, - speed_template_16_32); - break; - case 207: test_cipher_speed("ecb(serpent)", ENCRYPT, sec, NULL, 0, speed_template_16_32); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index b87802ffb554..1a4103b1b202 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -5282,12 +5282,6 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .akcipher = __VECS(rsa_tv_template) } - }, { - .alg = "salsa20", - .test = alg_test_skcipher, - .suite = { - .cipher = __VECS(salsa20_stream_tv_template) - } }, { .alg = "sha1", .test = alg_test_hash, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 851c107a5584..99aca08263d2 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -24409,1168 +24409,6 @@ static const struct cipher_testvec seed_tv_template[] = { } }; -static const struct cipher_testvec salsa20_stream_tv_template[] = { - /* - * Testvectors from verified.test-vectors submitted to ECRYPT. - * They are truncated to size 39, 64, 111, 129 to test a variety - * of input length. - */ - { /* Set 3, vector 0 */ - .key = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", - .klen = 16, - .iv = "\x00\x00\x00\x00\x00\x00\x00\x00", - .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00", - .ctext = "\x2D\xD5\xC3\xF7\xBA\x2B\x20\xF7" - "\x68\x02\x41\x0C\x68\x86\x88\x89" - "\x5A\xD8\xC1\xBD\x4E\xA6\xC9\xB1" - "\x40\xFB\x9B\x90\xE2\x10\x49\xBF" - "\x58\x3F\x52\x79\x70\xEB\xC1", - .len = 39, - }, { /* Set 5, vector 0 */ - .key = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .klen = 16, - .iv = "\x80\x00\x00\x00\x00\x00\x00\x00", - .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .ctext = "\xB6\x6C\x1E\x44\x46\xDD\x95\x57" - "\xE5\x78\xE2\x23\xB0\xB7\x68\x01" - "\x7B\x23\xB2\x67\xBB\x02\x34\xAE" - "\x46\x26\xBF\x44\x3F\x21\x97\x76" - "\x43\x6F\xB1\x9F\xD0\xE8\x86\x6F" - "\xCD\x0D\xE9\xA9\x53\x8F\x4A\x09" - "\xCA\x9A\xC0\x73\x2E\x30\xBC\xF9" - "\x8E\x4F\x13\xE4\xB9\xE2\x01\xD9", - .len = 64, - }, { /* Set 3, vector 27 */ - .key = "\x1B\x1C\x1D\x1E\x1F\x20\x21\x22" - "\x23\x24\x25\x26\x27\x28\x29\x2A" - "\x2B\x2C\x2D\x2E\x2F\x30\x31\x32" - "\x33\x34\x35\x36\x37\x38\x39\x3A", - .klen = 32, - .iv = "\x00\x00\x00\x00\x00\x00\x00\x00", - .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00", - .ctext = "\xAE\x39\x50\x8E\xAC\x9A\xEC\xE7" - "\xBF\x97\xBB\x20\xB9\xDE\xE4\x1F" - "\x87\xD9\x47\xF8\x28\x91\x35\x98" - "\xDB\x72\xCC\x23\x29\x48\x56\x5E" - "\x83\x7E\x0B\xF3\x7D\x5D\x38\x7B" - "\x2D\x71\x02\xB4\x3B\xB5\xD8\x23" - "\xB0\x4A\xDF\x3C\xEC\xB6\xD9\x3B" - "\x9B\xA7\x52\xBE\xC5\xD4\x50\x59" - "\x15\x14\xB4\x0E\x40\xE6\x53\xD1" - "\x83\x9C\x5B\xA0\x92\x29\x6B\x5E" - "\x96\x5B\x1E\x2F\xD3\xAC\xC1\x92" - "\xB1\x41\x3F\x19\x2F\xC4\x3B\xC6" - "\x95\x46\x45\x54\xE9\x75\x03\x08" - "\x44\xAF\xE5\x8A\x81\x12\x09", - .len = 111, - }, { /* Set 5, vector 27 */ - .key = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .klen = 32, - .iv = "\x00\x00\x00\x10\x00\x00\x00\x00", - .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00", - .ctext = "\xD2\xDB\x1A\x5C\xF1\xC1\xAC\xDB" - "\xE8\x1A\x7A\x43\x40\xEF\x53\x43" - "\x5E\x7F\x4B\x1A\x50\x52\x3F\x8D" - "\x28\x3D\xCF\x85\x1D\x69\x6E\x60" - "\xF2\xDE\x74\x56\x18\x1B\x84\x10" - "\xD4\x62\xBA\x60\x50\xF0\x61\xF2" - "\x1C\x78\x7F\xC1\x24\x34\xAF\x58" - "\xBF\x2C\x59\xCA\x90\x77\xF3\xB0" - "\x5B\x4A\xDF\x89\xCE\x2C\x2F\xFC" - "\x67\xF0\xE3\x45\xE8\xB3\xB3\x75" - "\xA0\x95\x71\xA1\x29\x39\x94\xCA" - "\x45\x2F\xBD\xCB\x10\xB6\xBE\x9F" - "\x8E\xF9\xB2\x01\x0A\x5A\x0A\xB7" - "\x6B\x9D\x70\x8E\x4B\xD6\x2F\xCD" - "\x2E\x40\x48\x75\xE9\xE2\x21\x45" - "\x0B\xC9\xB6\xB5\x66\xBC\x9A\x59" - "\x5A", - .len = 129, - }, { /* large test vector generated using Crypto++ */ - .key = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", - .klen = 32, - .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .ptext = - "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" - "\x00\x03\x06\x09\x0c\x0f\x12\x15" - "\x18\x1b\x1e\x21\x24\x27\x2a\x2d" - "\x30\x33\x36\x39\x3c\x3f\x42\x45" - "\x48\x4b\x4e\x51\x54\x57\x5a\x5d" - "\x60\x63\x66\x69\x6c\x6f\x72\x75" - "\x78\x7b\x7e\x81\x84\x87\x8a\x8d" - "\x90\x93\x96\x99\x9c\x9f\xa2\xa5" - "\xa8\xab\xae\xb1\xb4\xb7\xba\xbd" - "\xc0\xc3\xc6\xc9\xcc\xcf\xd2\xd5" - "\xd8\xdb\xde\xe1\xe4\xe7\xea\xed" - "\xf0\xf3\xf6\xf9\xfc\xff\x02\x05" - "\x08\x0b\x0e\x11\x14\x17\x1a\x1d" - "\x20\x23\x26\x29\x2c\x2f\x32\x35" - "\x38\x3b\x3e\x41\x44\x47\x4a\x4d" - "\x50\x53\x56\x59\x5c\x5f\x62\x65" - "\x68\x6b\x6e\x71\x74\x77\x7a\x7d" - "\x80\x83\x86\x89\x8c\x8f\x92\x95" - "\x98\x9b\x9e\xa1\xa4\xa7\xaa\xad" - "\xb0\xb3\xb6\xb9\xbc\xbf\xc2\xc5" - "\xc8\xcb\xce\xd1\xd4\xd7\xda\xdd" - "\xe0\xe3\xe6\xe9\xec\xef\xf2\xf5" - "\xf8\xfb\xfe\x01\x04\x07\x0a\x0d" - "\x10\x13\x16\x19\x1c\x1f\x22\x25" - "\x28\x2b\x2e\x31\x34\x37\x3a\x3d" - "\x40\x43\x46\x49\x4c\x4f\x52\x55" - "\x58\x5b\x5e\x61\x64\x67\x6a\x6d" - "\x70\x73\x76\x79\x7c\x7f\x82\x85" - "\x88\x8b\x8e\x91\x94\x97\x9a\x9d" - "\xa0\xa3\xa6\xa9\xac\xaf\xb2\xb5" - "\xb8\xbb\xbe\xc1\xc4\xc7\xca\xcd" - "\xd0\xd3\xd6\xd9\xdc\xdf\xe2\xe5" - "\xe8\xeb\xee\xf1\xf4\xf7\xfa\xfd" - "\x00\x05\x0a\x0f\x14\x19\x1e\x23" - "\x28\x2d\x32\x37\x3c\x41\x46\x4b" - "\x50\x55\x5a\x5f\x64\x69\x6e\x73" - "\x78\x7d\x82\x87\x8c\x91\x96\x9b" - "\xa0\xa5\xaa\xaf\xb4\xb9\xbe\xc3" - "\xc8\xcd\xd2\xd7\xdc\xe1\xe6\xeb" - "\xf0\xf5\xfa\xff\x04\x09\x0e\x13" - "\x18\x1d\x22\x27\x2c\x31\x36\x3b" - "\x40\x45\x4a\x4f\x54\x59\x5e\x63" - "\x68\x6d\x72\x77\x7c\x81\x86\x8b" - "\x90\x95\x9a\x9f\xa4\xa9\xae\xb3" - "\xb8\xbd\xc2\xc7\xcc\xd1\xd6\xdb" - "\xe0\xe5\xea\xef\xf4\xf9\xfe\x03" - "\x08\x0d\x12\x17\x1c\x21\x26\x2b" - "\x30\x35\x3a\x3f\x44\x49\x4e\x53" - "\x58\x5d\x62\x67\x6c\x71\x76\x7b" - "\x80\x85\x8a\x8f\x94\x99\x9e\xa3" - "\xa8\xad\xb2\xb7\xbc\xc1\xc6\xcb" - "\xd0\xd5\xda\xdf\xe4\xe9\xee\xf3" - "\xf8\xfd\x02\x07\x0c\x11\x16\x1b" - "\x20\x25\x2a\x2f\x34\x39\x3e\x43" - "\x48\x4d\x52\x57\x5c\x61\x66\x6b" - "\x70\x75\x7a\x7f\x84\x89\x8e\x93" - "\x98\x9d\xa2\xa7\xac\xb1\xb6\xbb" - "\xc0\xc5\xca\xcf\xd4\xd9\xde\xe3" - "\xe8\xed\xf2\xf7\xfc\x01\x06\x0b" - "\x10\x15\x1a\x1f\x24\x29\x2e\x33" - "\x38\x3d\x42\x47\x4c\x51\x56\x5b" - "\x60\x65\x6a\x6f\x74\x79\x7e\x83" - "\x88\x8d\x92\x97\x9c\xa1\xa6\xab" - "\xb0\xb5\xba\xbf\xc4\xc9\xce\xd3" - "\xd8\xdd\xe2\xe7\xec\xf1\xf6\xfb" - "\x00\x07\x0e\x15\x1c\x23\x2a\x31" - "\x38\x3f\x46\x4d\x54\x5b\x62\x69" - "\x70\x77\x7e\x85\x8c\x93\x9a\xa1" - "\xa8\xaf\xb6\xbd\xc4\xcb\xd2\xd9" - "\xe0\xe7\xee\xf5\xfc\x03\x0a\x11" - "\x18\x1f\x26\x2d\x34\x3b\x42\x49" - "\x50\x57\x5e\x65\x6c\x73\x7a\x81" - "\x88\x8f\x96\x9d\xa4\xab\xb2\xb9" - "\xc0\xc7\xce\xd5\xdc\xe3\xea\xf1" - "\xf8\xff\x06\x0d\x14\x1b\x22\x29" - "\x30\x37\x3e\x45\x4c\x53\x5a\x61" - "\x68\x6f\x76\x7d\x84\x8b\x92\x99" - "\xa0\xa7\xae\xb5\xbc\xc3\xca\xd1" - "\xd8\xdf\xe6\xed\xf4\xfb\x02\x09" - "\x10\x17\x1e\x25\x2c\x33\x3a\x41" - "\x48\x4f\x56\x5d\x64\x6b\x72\x79" - "\x80\x87\x8e\x95\x9c\xa3\xaa\xb1" - "\xb8\xbf\xc6\xcd\xd4\xdb\xe2\xe9" - "\xf0\xf7\xfe\x05\x0c\x13\x1a\x21" - "\x28\x2f\x36\x3d\x44\x4b\x52\x59" - "\x60\x67\x6e\x75\x7c\x83\x8a\x91" - "\x98\x9f\xa6\xad\xb4\xbb\xc2\xc9" - "\xd0\xd7\xde\xe5\xec\xf3\xfa\x01" - "\x08\x0f\x16\x1d\x24\x2b\x32\x39" - "\x40\x47\x4e\x55\x5c\x63\x6a\x71" - "\x78\x7f\x86\x8d\x94\x9b\xa2\xa9" - "\xb0\xb7\xbe\xc5\xcc\xd3\xda\xe1" - "\xe8\xef\xf6\xfd\x04\x0b\x12\x19" - "\x20\x27\x2e\x35\x3c\x43\x4a\x51" - "\x58\x5f\x66\x6d\x74\x7b\x82\x89" - "\x90\x97\x9e\xa5\xac\xb3\xba\xc1" - "\xc8\xcf\xd6\xdd\xe4\xeb\xf2\xf9" - "\x00\x09\x12\x1b\x24\x2d\x36\x3f" - "\x48\x51\x5a\x63\x6c\x75\x7e\x87" - "\x90\x99\xa2\xab\xb4\xbd\xc6\xcf" - "\xd8\xe1\xea\xf3\xfc\x05\x0e\x17" - "\x20\x29\x32\x3b\x44\x4d\x56\x5f" - "\x68\x71\x7a\x83\x8c\x95\x9e\xa7" - "\xb0\xb9\xc2\xcb\xd4\xdd\xe6\xef" - "\xf8\x01\x0a\x13\x1c\x25\x2e\x37" - "\x40\x49\x52\x5b\x64\x6d\x76\x7f" - "\x88\x91\x9a\xa3\xac\xb5\xbe\xc7" - "\xd0\xd9\xe2\xeb\xf4\xfd\x06\x0f" - "\x18\x21\x2a\x33\x3c\x45\x4e\x57" - "\x60\x69\x72\x7b\x84\x8d\x96\x9f" - "\xa8\xb1\xba\xc3\xcc\xd5\xde\xe7" - "\xf0\xf9\x02\x0b\x14\x1d\x26\x2f" - "\x38\x41\x4a\x53\x5c\x65\x6e\x77" - "\x80\x89\x92\x9b\xa4\xad\xb6\xbf" - "\xc8\xd1\xda\xe3\xec\xf5\xfe\x07" - "\x10\x19\x22\x2b\x34\x3d\x46\x4f" - "\x58\x61\x6a\x73\x7c\x85\x8e\x97" - "\xa0\xa9\xb2\xbb\xc4\xcd\xd6\xdf" - "\xe8\xf1\xfa\x03\x0c\x15\x1e\x27" - "\x30\x39\x42\x4b\x54\x5d\x66\x6f" - "\x78\x81\x8a\x93\x9c\xa5\xae\xb7" - "\xc0\xc9\xd2\xdb\xe4\xed\xf6\xff" - "\x08\x11\x1a\x23\x2c\x35\x3e\x47" - "\x50\x59\x62\x6b\x74\x7d\x86\x8f" - "\x98\xa1\xaa\xb3\xbc\xc5\xce\xd7" - "\xe0\xe9\xf2\xfb\x04\x0d\x16\x1f" - "\x28\x31\x3a\x43\x4c\x55\x5e\x67" - "\x70\x79\x82\x8b\x94\x9d\xa6\xaf" - "\xb8\xc1\xca\xd3\xdc\xe5\xee\xf7" - "\x00\x0b\x16\x21\x2c\x37\x42\x4d" - "\x58\x63\x6e\x79\x84\x8f\x9a\xa5" - "\xb0\xbb\xc6\xd1\xdc\xe7\xf2\xfd" - "\x08\x13\x1e\x29\x34\x3f\x4a\x55" - "\x60\x6b\x76\x81\x8c\x97\xa2\xad" - "\xb8\xc3\xce\xd9\xe4\xef\xfa\x05" - "\x10\x1b\x26\x31\x3c\x47\x52\x5d" - "\x68\x73\x7e\x89\x94\x9f\xaa\xb5" - "\xc0\xcb\xd6\xe1\xec\xf7\x02\x0d" - "\x18\x23\x2e\x39\x44\x4f\x5a\x65" - "\x70\x7b\x86\x91\x9c\xa7\xb2\xbd" - "\xc8\xd3\xde\xe9\xf4\xff\x0a\x15" - "\x20\x2b\x36\x41\x4c\x57\x62\x6d" - "\x78\x83\x8e\x99\xa4\xaf\xba\xc5" - "\xd0\xdb\xe6\xf1\xfc\x07\x12\x1d" - "\x28\x33\x3e\x49\x54\x5f\x6a\x75" - "\x80\x8b\x96\xa1\xac\xb7\xc2\xcd" - "\xd8\xe3\xee\xf9\x04\x0f\x1a\x25" - "\x30\x3b\x46\x51\x5c\x67\x72\x7d" - "\x88\x93\x9e\xa9\xb4\xbf\xca\xd5" - "\xe0\xeb\xf6\x01\x0c\x17\x22\x2d" - "\x38\x43\x4e\x59\x64\x6f\x7a\x85" - "\x90\x9b\xa6\xb1\xbc\xc7\xd2\xdd" - "\xe8\xf3\xfe\x09\x14\x1f\x2a\x35" - "\x40\x4b\x56\x61\x6c\x77\x82\x8d" - "\x98\xa3\xae\xb9\xc4\xcf\xda\xe5" - "\xf0\xfb\x06\x11\x1c\x27\x32\x3d" - "\x48\x53\x5e\x69\x74\x7f\x8a\x95" - "\xa0\xab\xb6\xc1\xcc\xd7\xe2\xed" - "\xf8\x03\x0e\x19\x24\x2f\x3a\x45" - "\x50\x5b\x66\x71\x7c\x87\x92\x9d" - "\xa8\xb3\xbe\xc9\xd4\xdf\xea\xf5" - "\x00\x0d\x1a\x27\x34\x41\x4e\x5b" - "\x68\x75\x82\x8f\x9c\xa9\xb6\xc3" - "\xd0\xdd\xea\xf7\x04\x11\x1e\x2b" - "\x38\x45\x52\x5f\x6c\x79\x86\x93" - "\xa0\xad\xba\xc7\xd4\xe1\xee\xfb" - "\x08\x15\x22\x2f\x3c\x49\x56\x63" - "\x70\x7d\x8a\x97\xa4\xb1\xbe\xcb" - "\xd8\xe5\xf2\xff\x0c\x19\x26\x33" - "\x40\x4d\x5a\x67\x74\x81\x8e\x9b" - "\xa8\xb5\xc2\xcf\xdc\xe9\xf6\x03" - "\x10\x1d\x2a\x37\x44\x51\x5e\x6b" - "\x78\x85\x92\x9f\xac\xb9\xc6\xd3" - "\xe0\xed\xfa\x07\x14\x21\x2e\x3b" - "\x48\x55\x62\x6f\x7c\x89\x96\xa3" - "\xb0\xbd\xca\xd7\xe4\xf1\xfe\x0b" - "\x18\x25\x32\x3f\x4c\x59\x66\x73" - "\x80\x8d\x9a\xa7\xb4\xc1\xce\xdb" - "\xe8\xf5\x02\x0f\x1c\x29\x36\x43" - "\x50\x5d\x6a\x77\x84\x91\x9e\xab" - "\xb8\xc5\xd2\xdf\xec\xf9\x06\x13" - "\x20\x2d\x3a\x47\x54\x61\x6e\x7b" - "\x88\x95\xa2\xaf\xbc\xc9\xd6\xe3" - "\xf0\xfd\x0a\x17\x24\x31\x3e\x4b" - "\x58\x65\x72\x7f\x8c\x99\xa6\xb3" - "\xc0\xcd\xda\xe7\xf4\x01\x0e\x1b" - "\x28\x35\x42\x4f\x5c\x69\x76\x83" - "\x90\x9d\xaa\xb7\xc4\xd1\xde\xeb" - "\xf8\x05\x12\x1f\x2c\x39\x46\x53" - "\x60\x6d\x7a\x87\x94\xa1\xae\xbb" - "\xc8\xd5\xe2\xef\xfc\x09\x16\x23" - "\x30\x3d\x4a\x57\x64\x71\x7e\x8b" - "\x98\xa5\xb2\xbf\xcc\xd9\xe6\xf3" - "\x00\x0f\x1e\x2d\x3c\x4b\x5a\x69" - "\x78\x87\x96\xa5\xb4\xc3\xd2\xe1" - "\xf0\xff\x0e\x1d\x2c\x3b\x4a\x59" - "\x68\x77\x86\x95\xa4\xb3\xc2\xd1" - "\xe0\xef\xfe\x0d\x1c\x2b\x3a\x49" - "\x58\x67\x76\x85\x94\xa3\xb2\xc1" - "\xd0\xdf\xee\xfd\x0c\x1b\x2a\x39" - "\x48\x57\x66\x75\x84\x93\xa2\xb1" - "\xc0\xcf\xde\xed\xfc\x0b\x1a\x29" - "\x38\x47\x56\x65\x74\x83\x92\xa1" - "\xb0\xbf\xce\xdd\xec\xfb\x0a\x19" - "\x28\x37\x46\x55\x64\x73\x82\x91" - "\xa0\xaf\xbe\xcd\xdc\xeb\xfa\x09" - "\x18\x27\x36\x45\x54\x63\x72\x81" - "\x90\x9f\xae\xbd\xcc\xdb\xea\xf9" - "\x08\x17\x26\x35\x44\x53\x62\x71" - "\x80\x8f\x9e\xad\xbc\xcb\xda\xe9" - "\xf8\x07\x16\x25\x34\x43\x52\x61" - "\x70\x7f\x8e\x9d\xac\xbb\xca\xd9" - "\xe8\xf7\x06\x15\x24\x33\x42\x51" - "\x60\x6f\x7e\x8d\x9c\xab\xba\xc9" - "\xd8\xe7\xf6\x05\x14\x23\x32\x41" - "\x50\x5f\x6e\x7d\x8c\x9b\xaa\xb9" - "\xc8\xd7\xe6\xf5\x04\x13\x22\x31" - "\x40\x4f\x5e\x6d\x7c\x8b\x9a\xa9" - "\xb8\xc7\xd6\xe5\xf4\x03\x12\x21" - "\x30\x3f\x4e\x5d\x6c\x7b\x8a\x99" - "\xa8\xb7\xc6\xd5\xe4\xf3\x02\x11" - "\x20\x2f\x3e\x4d\x5c\x6b\x7a\x89" - "\x98\xa7\xb6\xc5\xd4\xe3\xf2\x01" - "\x10\x1f\x2e\x3d\x4c\x5b\x6a\x79" - "\x88\x97\xa6\xb5\xc4\xd3\xe2\xf1" - "\x00\x11\x22\x33\x44\x55\x66\x77" - "\x88\x99\xaa\xbb\xcc\xdd\xee\xff" - "\x10\x21\x32\x43\x54\x65\x76\x87" - "\x98\xa9\xba\xcb\xdc\xed\xfe\x0f" - "\x20\x31\x42\x53\x64\x75\x86\x97" - "\xa8\xb9\xca\xdb\xec\xfd\x0e\x1f" - "\x30\x41\x52\x63\x74\x85\x96\xa7" - "\xb8\xc9\xda\xeb\xfc\x0d\x1e\x2f" - "\x40\x51\x62\x73\x84\x95\xa6\xb7" - "\xc8\xd9\xea\xfb\x0c\x1d\x2e\x3f" - "\x50\x61\x72\x83\x94\xa5\xb6\xc7" - "\xd8\xe9\xfa\x0b\x1c\x2d\x3e\x4f" - "\x60\x71\x82\x93\xa4\xb5\xc6\xd7" - "\xe8\xf9\x0a\x1b\x2c\x3d\x4e\x5f" - "\x70\x81\x92\xa3\xb4\xc5\xd6\xe7" - "\xf8\x09\x1a\x2b\x3c\x4d\x5e\x6f" - "\x80\x91\xa2\xb3\xc4\xd5\xe6\xf7" - "\x08\x19\x2a\x3b\x4c\x5d\x6e\x7f" - "\x90\xa1\xb2\xc3\xd4\xe5\xf6\x07" - "\x18\x29\x3a\x4b\x5c\x6d\x7e\x8f" - "\xa0\xb1\xc2\xd3\xe4\xf5\x06\x17" - "\x28\x39\x4a\x5b\x6c\x7d\x8e\x9f" - "\xb0\xc1\xd2\xe3\xf4\x05\x16\x27" - "\x38\x49\x5a\x6b\x7c\x8d\x9e\xaf" - "\xc0\xd1\xe2\xf3\x04\x15\x26\x37" - "\x48\x59\x6a\x7b\x8c\x9d\xae\xbf" - "\xd0\xe1\xf2\x03\x14\x25\x36\x47" - "\x58\x69\x7a\x8b\x9c\xad\xbe\xcf" - "\xe0\xf1\x02\x13\x24\x35\x46\x57" - "\x68\x79\x8a\x9b\xac\xbd\xce\xdf" - "\xf0\x01\x12\x23\x34\x45\x56\x67" - "\x78\x89\x9a\xab\xbc\xcd\xde\xef" - "\x00\x13\x26\x39\x4c\x5f\x72\x85" - "\x98\xab\xbe\xd1\xe4\xf7\x0a\x1d" - "\x30\x43\x56\x69\x7c\x8f\xa2\xb5" - "\xc8\xdb\xee\x01\x14\x27\x3a\x4d" - "\x60\x73\x86\x99\xac\xbf\xd2\xe5" - "\xf8\x0b\x1e\x31\x44\x57\x6a\x7d" - "\x90\xa3\xb6\xc9\xdc\xef\x02\x15" - "\x28\x3b\x4e\x61\x74\x87\x9a\xad" - "\xc0\xd3\xe6\xf9\x0c\x1f\x32\x45" - "\x58\x6b\x7e\x91\xa4\xb7\xca\xdd" - "\xf0\x03\x16\x29\x3c\x4f\x62\x75" - "\x88\x9b\xae\xc1\xd4\xe7\xfa\x0d" - "\x20\x33\x46\x59\x6c\x7f\x92\xa5" - "\xb8\xcb\xde\xf1\x04\x17\x2a\x3d" - "\x50\x63\x76\x89\x9c\xaf\xc2\xd5" - "\xe8\xfb\x0e\x21\x34\x47\x5a\x6d" - "\x80\x93\xa6\xb9\xcc\xdf\xf2\x05" - "\x18\x2b\x3e\x51\x64\x77\x8a\x9d" - "\xb0\xc3\xd6\xe9\xfc\x0f\x22\x35" - "\x48\x5b\x6e\x81\x94\xa7\xba\xcd" - "\xe0\xf3\x06\x19\x2c\x3f\x52\x65" - "\x78\x8b\x9e\xb1\xc4\xd7\xea\xfd" - "\x10\x23\x36\x49\x5c\x6f\x82\x95" - "\xa8\xbb\xce\xe1\xf4\x07\x1a\x2d" - "\x40\x53\x66\x79\x8c\x9f\xb2\xc5" - "\xd8\xeb\xfe\x11\x24\x37\x4a\x5d" - "\x70\x83\x96\xa9\xbc\xcf\xe2\xf5" - "\x08\x1b\x2e\x41\x54\x67\x7a\x8d" - "\xa0\xb3\xc6\xd9\xec\xff\x12\x25" - "\x38\x4b\x5e\x71\x84\x97\xaa\xbd" - "\xd0\xe3\xf6\x09\x1c\x2f\x42\x55" - "\x68\x7b\x8e\xa1\xb4\xc7\xda\xed" - "\x00\x15\x2a\x3f\x54\x69\x7e\x93" - "\xa8\xbd\xd2\xe7\xfc\x11\x26\x3b" - "\x50\x65\x7a\x8f\xa4\xb9\xce\xe3" - "\xf8\x0d\x22\x37\x4c\x61\x76\x8b" - "\xa0\xb5\xca\xdf\xf4\x09\x1e\x33" - "\x48\x5d\x72\x87\x9c\xb1\xc6\xdb" - "\xf0\x05\x1a\x2f\x44\x59\x6e\x83" - "\x98\xad\xc2\xd7\xec\x01\x16\x2b" - "\x40\x55\x6a\x7f\x94\xa9\xbe\xd3" - "\xe8\xfd\x12\x27\x3c\x51\x66\x7b" - "\x90\xa5\xba\xcf\xe4\xf9\x0e\x23" - "\x38\x4d\x62\x77\x8c\xa1\xb6\xcb" - "\xe0\xf5\x0a\x1f\x34\x49\x5e\x73" - "\x88\x9d\xb2\xc7\xdc\xf1\x06\x1b" - "\x30\x45\x5a\x6f\x84\x99\xae\xc3" - "\xd8\xed\x02\x17\x2c\x41\x56\x6b" - "\x80\x95\xaa\xbf\xd4\xe9\xfe\x13" - "\x28\x3d\x52\x67\x7c\x91\xa6\xbb" - "\xd0\xe5\xfa\x0f\x24\x39\x4e\x63" - "\x78\x8d\xa2\xb7\xcc\xe1\xf6\x0b" - "\x20\x35\x4a\x5f\x74\x89\x9e\xb3" - "\xc8\xdd\xf2\x07\x1c\x31\x46\x5b" - "\x70\x85\x9a\xaf\xc4\xd9\xee\x03" - "\x18\x2d\x42\x57\x6c\x81\x96\xab" - "\xc0\xd5\xea\xff\x14\x29\x3e\x53" - "\x68\x7d\x92\xa7\xbc\xd1\xe6\xfb" - "\x10\x25\x3a\x4f\x64\x79\x8e\xa3" - "\xb8\xcd\xe2\xf7\x0c\x21\x36\x4b" - "\x60\x75\x8a\x9f\xb4\xc9\xde\xf3" - "\x08\x1d\x32\x47\x5c\x71\x86\x9b" - "\xb0\xc5\xda\xef\x04\x19\x2e\x43" - "\x58\x6d\x82\x97\xac\xc1\xd6\xeb" - "\x00\x17\x2e\x45\x5c\x73\x8a\xa1" - "\xb8\xcf\xe6\xfd\x14\x2b\x42\x59" - "\x70\x87\x9e\xb5\xcc\xe3\xfa\x11" - "\x28\x3f\x56\x6d\x84\x9b\xb2\xc9" - "\xe0\xf7\x0e\x25\x3c\x53\x6a\x81" - "\x98\xaf\xc6\xdd\xf4\x0b\x22\x39" - "\x50\x67\x7e\x95\xac\xc3\xda\xf1" - "\x08\x1f\x36\x4d\x64\x7b\x92\xa9" - "\xc0\xd7\xee\x05\x1c\x33\x4a\x61" - "\x78\x8f\xa6\xbd\xd4\xeb\x02\x19" - "\x30\x47\x5e\x75\x8c\xa3\xba\xd1" - "\xe8\xff\x16\x2d\x44\x5b\x72\x89" - "\xa0\xb7\xce\xe5\xfc\x13\x2a\x41" - "\x58\x6f\x86\x9d\xb4\xcb\xe2\xf9" - "\x10\x27\x3e\x55\x6c\x83\x9a\xb1" - "\xc8\xdf\xf6\x0d\x24\x3b\x52\x69" - "\x80\x97\xae\xc5\xdc\xf3\x0a\x21" - "\x38\x4f\x66\x7d\x94\xab\xc2\xd9" - "\xf0\x07\x1e\x35\x4c\x63\x7a\x91" - "\xa8\xbf\xd6\xed\x04\x1b\x32\x49" - "\x60\x77\x8e\xa5\xbc\xd3\xea\x01" - "\x18\x2f\x46\x5d\x74\x8b\xa2\xb9" - "\xd0\xe7\xfe\x15\x2c\x43\x5a\x71" - "\x88\x9f\xb6\xcd\xe4\xfb\x12\x29" - "\x40\x57\x6e\x85\x9c\xb3\xca\xe1" - "\xf8\x0f\x26\x3d\x54\x6b\x82\x99" - "\xb0\xc7\xde\xf5\x0c\x23\x3a\x51" - "\x68\x7f\x96\xad\xc4\xdb\xf2\x09" - "\x20\x37\x4e\x65\x7c\x93\xaa\xc1" - "\xd8\xef\x06\x1d\x34\x4b\x62\x79" - "\x90\xa7\xbe\xd5\xec\x03\x1a\x31" - "\x48\x5f\x76\x8d\xa4\xbb\xd2\xe9" - "\x00\x19\x32\x4b\x64\x7d\x96\xaf" - "\xc8\xe1\xfa\x13\x2c\x45\x5e\x77" - "\x90\xa9\xc2\xdb\xf4\x0d\x26\x3f" - "\x58\x71\x8a\xa3\xbc\xd5\xee\x07" - "\x20\x39\x52\x6b\x84\x9d\xb6\xcf" - "\xe8\x01\x1a\x33\x4c\x65\x7e\x97" - "\xb0\xc9\xe2\xfb\x14\x2d\x46\x5f" - "\x78\x91\xaa\xc3\xdc\xf5\x0e\x27" - "\x40\x59\x72\x8b\xa4\xbd\xd6\xef" - "\x08\x21\x3a\x53\x6c\x85\x9e\xb7" - "\xd0\xe9\x02\x1b\x34\x4d\x66\x7f" - "\x98\xb1\xca\xe3\xfc\x15\x2e\x47" - "\x60\x79\x92\xab\xc4\xdd\xf6\x0f" - "\x28\x41\x5a\x73\x8c\xa5\xbe\xd7" - "\xf0\x09\x22\x3b\x54\x6d\x86\x9f" - "\xb8\xd1\xea\x03\x1c\x35\x4e\x67" - "\x80\x99\xb2\xcb\xe4\xfd\x16\x2f" - "\x48\x61\x7a\x93\xac\xc5\xde\xf7" - "\x10\x29\x42\x5b\x74\x8d\xa6\xbf" - "\xd8\xf1\x0a\x23\x3c\x55\x6e\x87" - "\xa0\xb9\xd2\xeb\x04\x1d\x36\x4f" - "\x68\x81\x9a\xb3\xcc\xe5\xfe\x17" - "\x30\x49\x62\x7b\x94\xad\xc6\xdf" - "\xf8\x11\x2a\x43\x5c\x75\x8e\xa7" - "\xc0\xd9\xf2\x0b\x24\x3d\x56\x6f" - "\x88\xa1\xba\xd3\xec\x05\x1e\x37" - "\x50\x69\x82\x9b\xb4\xcd\xe6\xff" - "\x18\x31\x4a\x63\x7c\x95\xae\xc7" - "\xe0\xf9\x12\x2b\x44\x5d\x76\x8f" - "\xa8\xc1\xda\xf3\x0c\x25\x3e\x57" - "\x70\x89\xa2\xbb\xd4\xed\x06\x1f" - "\x38\x51\x6a\x83\x9c\xb5\xce\xe7" - "\x00\x1b\x36\x51\x6c\x87\xa2\xbd" - "\xd8\xf3\x0e\x29\x44\x5f\x7a\x95" - "\xb0\xcb\xe6\x01\x1c\x37\x52\x6d" - "\x88\xa3\xbe\xd9\xf4\x0f\x2a\x45" - "\x60\x7b\x96\xb1\xcc\xe7\x02\x1d" - "\x38\x53\x6e\x89\xa4\xbf\xda\xf5" - "\x10\x2b\x46\x61\x7c\x97\xb2\xcd" - "\xe8\x03\x1e\x39\x54\x6f\x8a\xa5" - "\xc0\xdb\xf6\x11\x2c\x47\x62\x7d" - "\x98\xb3\xce\xe9\x04\x1f\x3a\x55" - "\x70\x8b\xa6\xc1\xdc\xf7\x12\x2d" - "\x48\x63\x7e\x99\xb4\xcf\xea\x05" - "\x20\x3b\x56\x71\x8c\xa7\xc2\xdd" - "\xf8\x13\x2e\x49\x64\x7f\x9a\xb5" - "\xd0\xeb\x06\x21\x3c\x57\x72\x8d" - "\xa8\xc3\xde\xf9\x14\x2f\x4a\x65" - "\x80\x9b\xb6\xd1\xec\x07\x22\x3d" - "\x58\x73\x8e\xa9\xc4\xdf\xfa\x15" - "\x30\x4b\x66\x81\x9c\xb7\xd2\xed" - "\x08\x23\x3e\x59\x74\x8f\xaa\xc5" - "\xe0\xfb\x16\x31\x4c\x67\x82\x9d" - "\xb8\xd3\xee\x09\x24\x3f\x5a\x75" - "\x90\xab\xc6\xe1\xfc\x17\x32\x4d" - "\x68\x83\x9e\xb9\xd4\xef\x0a\x25" - "\x40\x5b\x76\x91\xac\xc7\xe2\xfd" - "\x18\x33\x4e\x69\x84\x9f\xba\xd5" - "\xf0\x0b\x26\x41\x5c\x77\x92\xad" - "\xc8\xe3\xfe\x19\x34\x4f\x6a\x85" - "\xa0\xbb\xd6\xf1\x0c\x27\x42\x5d" - "\x78\x93\xae\xc9\xe4\xff\x1a\x35" - "\x50\x6b\x86\xa1\xbc\xd7\xf2\x0d" - "\x28\x43\x5e\x79\x94\xaf\xca\xe5" - "\x00\x1d\x3a\x57\x74\x91\xae\xcb" - "\xe8\x05\x22\x3f\x5c\x79\x96\xb3" - "\xd0\xed\x0a\x27\x44\x61\x7e\x9b" - "\xb8\xd5\xf2\x0f\x2c\x49\x66\x83" - "\xa0\xbd\xda\xf7\x14\x31\x4e\x6b" - "\x88\xa5\xc2\xdf\xfc\x19\x36\x53" - "\x70\x8d\xaa\xc7\xe4\x01\x1e\x3b" - "\x58\x75\x92\xaf\xcc\xe9\x06\x23" - "\x40\x5d\x7a\x97\xb4\xd1\xee\x0b" - "\x28\x45\x62\x7f\x9c\xb9\xd6\xf3" - "\x10\x2d\x4a\x67\x84\xa1\xbe\xdb" - "\xf8\x15\x32\x4f\x6c\x89\xa6\xc3" - "\xe0\xfd\x1a\x37\x54\x71\x8e\xab" - "\xc8\xe5\x02\x1f\x3c\x59\x76\x93" - "\xb0\xcd\xea\x07\x24\x41\x5e\x7b" - "\x98\xb5\xd2\xef\x0c\x29\x46\x63" - "\x80\x9d\xba\xd7\xf4\x11\x2e\x4b" - "\x68\x85\xa2\xbf\xdc\xf9\x16\x33" - "\x50\x6d\x8a\xa7\xc4\xe1\xfe\x1b" - "\x38\x55\x72\x8f\xac\xc9\xe6\x03" - "\x20\x3d\x5a\x77\x94\xb1\xce\xeb" - "\x08\x25\x42\x5f\x7c\x99\xb6\xd3" - "\xf0\x0d\x2a\x47\x64\x81\x9e\xbb" - "\xd8\xf5\x12\x2f\x4c\x69\x86\xa3" - "\xc0\xdd\xfa\x17\x34\x51\x6e\x8b" - "\xa8\xc5\xe2\xff\x1c\x39\x56\x73" - "\x90\xad\xca\xe7\x04\x21\x3e\x5b" - "\x78\x95\xb2\xcf\xec\x09\x26\x43" - "\x60\x7d\x9a\xb7\xd4\xf1\x0e\x2b" - "\x48\x65\x82\x9f\xbc\xd9\xf6\x13" - "\x30\x4d\x6a\x87\xa4\xc1\xde\xfb" - "\x18\x35\x52\x6f\x8c\xa9\xc6\xe3" - "\x00\x1f\x3e\x5d\x7c\x9b\xba\xd9" - "\xf8\x17\x36\x55\x74\x93\xb2\xd1" - "\xf0\x0f\x2e\x4d\x6c\x8b\xaa\xc9" - "\xe8\x07\x26\x45\x64\x83\xa2\xc1" - "\xe0\xff\x1e\x3d\x5c\x7b\x9a\xb9" - "\xd8\xf7\x16\x35\x54\x73\x92\xb1" - "\xd0\xef\x0e\x2d\x4c\x6b\x8a\xa9" - "\xc8\xe7\x06\x25\x44\x63\x82\xa1" - "\xc0\xdf\xfe\x1d\x3c\x5b\x7a\x99" - "\xb8\xd7\xf6\x15\x34\x53\x72\x91" - "\xb0\xcf\xee\x0d\x2c\x4b\x6a\x89" - "\xa8\xc7\xe6\x05\x24\x43\x62\x81" - "\xa0\xbf\xde\xfd\x1c\x3b\x5a\x79" - "\x98\xb7\xd6\xf5\x14\x33\x52\x71" - "\x90\xaf\xce\xed\x0c\x2b\x4a\x69" - "\x88\xa7\xc6\xe5\x04\x23\x42\x61" - "\x80\x9f\xbe\xdd\xfc\x1b\x3a\x59" - "\x78\x97\xb6\xd5\xf4\x13\x32\x51" - "\x70\x8f\xae\xcd\xec\x0b\x2a\x49" - "\x68\x87\xa6\xc5\xe4\x03\x22\x41" - "\x60\x7f\x9e\xbd\xdc\xfb\x1a\x39" - "\x58\x77\x96\xb5\xd4\xf3\x12\x31" - "\x50\x6f\x8e\xad\xcc\xeb\x0a\x29" - "\x48\x67\x86\xa5\xc4\xe3\x02\x21" - "\x40\x5f\x7e\x9d\xbc\xdb\xfa\x19" - "\x38\x57\x76\x95\xb4\xd3\xf2\x11" - "\x30\x4f\x6e\x8d\xac\xcb\xea\x09" - "\x28\x47\x66\x85\xa4\xc3\xe2\x01" - "\x20\x3f\x5e\x7d\x9c\xbb\xda\xf9" - "\x18\x37\x56\x75\x94\xb3\xd2\xf1" - "\x10\x2f\x4e\x6d\x8c\xab\xca\xe9" - "\x08\x27\x46\x65\x84\xa3\xc2\xe1" - "\x00\x21\x42\x63", - .ctext = - "\xb5\x81\xf5\x64\x18\x73\xe3\xf0" - "\x4c\x13\xf2\x77\x18\x60\x65\x5e" - "\x29\x01\xce\x98\x55\x53\xf9\x0c" - "\x2a\x08\xd5\x09\xb3\x57\x55\x56" - "\xc5\xe9\x56\x90\xcb\x6a\xa3\xc0" - "\xff\xc4\x79\xb4\xd2\x97\x5d\xc4" - "\x43\xd1\xfe\x94\x7b\x88\x06\x5a" - "\xb2\x9e\x2c\xfc\x44\x03\xb7\x90" - "\xa0\xc1\xba\x6a\x33\xb8\xc7\xb2" - "\x9d\xe1\x12\x4f\xc0\x64\xd4\x01" - "\xfe\x8c\x7a\x66\xf7\xe6\x5a\x91" - "\xbb\xde\x56\x86\xab\x65\x21\x30" - "\x00\x84\x65\x24\xa5\x7d\x85\xb4" - "\xe3\x17\xed\x3a\xb7\x6f\xb4\x0b" - "\x0b\xaf\x15\xae\x5a\x8f\xf2\x0c" - "\x2f\x27\xf4\x09\xd8\xd2\x96\xb7" - "\x71\xf2\xc5\x99\x4d\x7e\x7f\x75" - "\x77\x89\x30\x8b\x59\xdb\xa2\xb2" - "\xa0\xf3\x19\x39\x2b\xc5\x7e\x3f" - "\x4f\xd9\xd3\x56\x28\x97\x44\xdc" - "\xc0\x8b\x77\x24\xd9\x52\xe7\xc5" - "\xaf\xf6\x7d\x59\xb2\x44\x05\x1d" - "\xb1\xb0\x11\xa5\x0f\xec\x33\xe1" - "\x6d\x1b\x4e\x1f\xff\x57\x91\xb4" - "\x5b\x9a\x96\xc5\x53\xbc\xae\x20" - "\x3c\xbb\x14\xe2\xe8\x22\x33\xc1" - "\x5e\x76\x9e\x46\x99\xf6\x2a\x15" - "\xc6\x97\x02\xa0\x66\x43\xd1\xa6" - "\x31\xa6\x9f\xfb\xf4\xd3\x69\xe5" - "\xcd\x76\x95\xb8\x7a\x82\x7f\x21" - "\x45\xff\x3f\xce\x55\xf6\x95\x10" - "\x08\x77\x10\x43\xc6\xf3\x09\xe5" - "\x68\xe7\x3c\xad\x00\x52\x45\x0d" - "\xfe\x2d\xc6\xc2\x94\x8c\x12\x1d" - "\xe6\x25\xae\x98\x12\x8e\x19\x9c" - "\x81\x68\xb1\x11\xf6\x69\xda\xe3" - "\x62\x08\x18\x7a\x25\x49\x28\xac" - "\xba\x71\x12\x0b\xe4\xa2\xe5\xc7" - "\x5d\x8e\xec\x49\x40\x21\xbf\x5a" - "\x98\xf3\x02\x68\x55\x03\x7f\x8a" - "\xe5\x94\x0c\x32\x5c\x07\x82\x63" - "\xaf\x6f\x91\x40\x84\x8e\x52\x25" - "\xd0\xb0\x29\x53\x05\xe2\x50\x7a" - "\x34\xeb\xc9\x46\x20\xa8\x3d\xde" - "\x7f\x16\x5f\x36\xc5\x2e\xdc\xd1" - "\x15\x47\xc7\x50\x40\x6d\x91\xc5" - "\xe7\x93\x95\x1a\xd3\x57\xbc\x52" - "\x33\xee\x14\x19\x22\x52\x89\xa7" - "\x4a\x25\x56\x77\x4b\xca\xcf\x0a" - "\xe1\xf5\x35\x85\x30\x7e\x59\x4a" - "\xbd\x14\x5b\xdf\xe3\x46\xcb\xac" - "\x1f\x6c\x96\x0e\xf4\x81\xd1\x99" - "\xca\x88\x63\x3d\x02\x58\x6b\xa9" - "\xe5\x9f\xb3\x00\xb2\x54\xc6\x74" - "\x1c\xbf\x46\xab\x97\xcc\xf8\x54" - "\x04\x07\x08\x52\xe6\xc0\xda\x93" - "\x74\x7d\x93\x99\x5d\x78\x68\xa6" - "\x2e\x6b\xd3\x6a\x69\xcc\x12\x6b" - "\xd4\xc7\xa5\xc6\xe7\xf6\x03\x04" - "\x5d\xcd\x61\x5e\x17\x40\xdc\xd1" - "\x5c\xf5\x08\xdf\x5c\x90\x85\xa4" - "\xaf\xf6\x78\xbb\x0d\xf1\xf4\xa4" - "\x54\x26\x72\x9e\x61\xfa\x86\xcf" - "\xe8\x9e\xa1\xe0\xc7\x48\x23\xae" - "\x5a\x90\xae\x75\x0a\x74\x18\x89" - "\x05\xb1\x92\xb2\x7f\xd0\x1b\xa6" - "\x62\x07\x25\x01\xc7\xc2\x4f\xf9" - "\xe8\xfe\x63\x95\x80\x07\xb4\x26" - "\xcc\xd1\x26\xb6\xc4\x3f\x9e\xcb" - "\x8e\x3b\x2e\x44\x16\xd3\x10\x9a" - "\x95\x08\xeb\xc8\xcb\xeb\xbf\x6f" - "\x0b\xcd\x1f\xc8\xca\x86\xaa\xec" - "\x33\xe6\x69\xf4\x45\x25\x86\x3a" - "\x22\x94\x4f\x00\x23\x6a\x44\xc2" - "\x49\x97\x33\xab\x36\x14\x0a\x70" - "\x24\xc3\xbe\x04\x3b\x79\xa0\xf9" - "\xb8\xe7\x76\x29\x22\x83\xd7\xf2" - "\x94\xf4\x41\x49\xba\x5f\x7b\x07" - "\xb5\xfb\xdb\x03\x1a\x9f\xb6\x4c" - "\xc2\x2e\x37\x40\x49\xc3\x38\x16" - "\xe2\x4f\x77\x82\xb0\x68\x4c\x71" - "\x1d\x57\x61\x9c\xd9\x4e\x54\x99" - "\x47\x13\x28\x73\x3c\xbb\x00\x90" - "\xf3\x4d\xc9\x0e\xfd\xe7\xb1\x71" - "\xd3\x15\x79\xbf\xcc\x26\x2f\xbd" - "\xad\x6c\x50\x69\x6c\x3e\x6d\x80" - "\x9a\xea\x78\xaf\x19\xb2\x0d\x4d" - "\xad\x04\x07\xae\x22\x90\x4a\x93" - "\x32\x0e\x36\x9b\x1b\x46\xba\x3b" - "\xb4\xac\xc6\xd1\xa2\x31\x53\x3b" - "\x2a\x3d\x45\xfe\x03\x61\x10\x85" - "\x17\x69\xa6\x78\xcc\x6c\x87\x49" - "\x53\xf9\x80\x10\xde\x80\xa2\x41" - "\x6a\xc3\x32\x02\xad\x6d\x3c\x56" - "\x00\x71\x51\x06\xa7\xbd\xfb\xef" - "\x3c\xb5\x9f\xfc\x48\x7d\x53\x7c" - "\x66\xb0\x49\x23\xc4\x47\x10\x0e" - "\xe5\x6c\x74\x13\xe6\xc5\x3f\xaa" - "\xde\xff\x07\x44\xdd\x56\x1b\xad" - "\x09\x77\xfb\x5b\x12\xb8\x0d\x38" - "\x17\x37\x35\x7b\x9b\xbc\xfe\xd4" - "\x7e\x8b\xda\x7e\x5b\x04\xa7\x22" - "\xa7\x31\xa1\x20\x86\xc7\x1b\x99" - "\xdb\xd1\x89\xf4\x94\xa3\x53\x69" - "\x8d\xe7\xe8\x74\x11\x8d\x74\xd6" - "\x07\x37\x91\x9f\xfd\x67\x50\x3a" - "\xc9\xe1\xf4\x36\xd5\xa0\x47\xd1" - "\xf9\xe5\x39\xa3\x31\xac\x07\x36" - "\x23\xf8\x66\x18\x14\x28\x34\x0f" - "\xb8\xd0\xe7\x29\xb3\x04\x4b\x55" - "\x01\x41\xb2\x75\x8d\xcb\x96\x85" - "\x3a\xfb\xab\x2b\x9e\xfa\x58\x20" - "\x44\x1f\xc0\x14\x22\x75\x61\xe8" - "\xaa\x19\xcf\xf1\x82\x56\xf4\xd7" - "\x78\x7b\x3d\x5f\xb3\x9e\x0b\x8a" - "\x57\x50\xdb\x17\x41\x65\x4d\xa3" - "\x02\xc9\x9c\x9c\x53\xfb\x39\x39" - "\x9b\x1d\x72\x24\xda\xb7\x39\xbe" - "\x13\x3b\xfa\x29\xda\x9e\x54\x64" - "\x6e\xba\xd8\xa1\xcb\xb3\x36\xfa" - "\xcb\x47\x85\xe9\x61\x38\xbc\xbe" - "\xc5\x00\x38\x2a\x54\xf7\xc4\xb9" - "\xb3\xd3\x7b\xa0\xa0\xf8\x72\x7f" - "\x8c\x8e\x82\x0e\xc6\x1c\x75\x9d" - "\xca\x8e\x61\x87\xde\xad\x80\xd2" - "\xf5\xf9\x80\xef\x15\x75\xaf\xf5" - "\x80\xfb\xff\x6d\x1e\x25\xb7\x40" - "\x61\x6a\x39\x5a\x6a\xb5\x31\xab" - "\x97\x8a\x19\x89\x44\x40\xc0\xa6" - "\xb4\x4e\x30\x32\x7b\x13\xe7\x67" - "\xa9\x8b\x57\x04\xc2\x01\xa6\xf4" - "\x28\x99\xad\x2c\x76\xa3\x78\xc2" - "\x4a\xe6\xca\x5c\x50\x6a\xc1\xb0" - "\x62\x4b\x10\x8e\x7c\x17\x43\xb3" - "\x17\x66\x1c\x3e\x8d\x69\xf0\x5a" - "\x71\xf5\x97\xdc\xd1\x45\xdd\x28" - "\xf3\x5d\xdf\x53\x7b\x11\xe5\xbc" - "\x4c\xdb\x1b\x51\x6b\xe9\xfb\x3d" - "\xc1\xc3\x2c\xb9\x71\xf5\xb6\xb2" - "\x13\x36\x79\x80\x53\xe8\xd3\xa6" - "\x0a\xaf\xfd\x56\x97\xf7\x40\x8e" - "\x45\xce\xf8\xb0\x9e\x5c\x33\x82" - "\xb0\x44\x56\xfc\x05\x09\xe9\x2a" - "\xac\x26\x80\x14\x1d\xc8\x3a\x35" - "\x4c\x82\x97\xfd\x76\xb7\xa9\x0a" - "\x35\x58\x79\x8e\x0f\x66\xea\xaf" - "\x51\x6c\x09\xa9\x6e\x9b\xcb\x9a" - "\x31\x47\xa0\x2f\x7c\x71\xb4\x4a" - "\x11\xaa\x8c\x66\xc5\x64\xe6\x3a" - "\x54\xda\x24\x6a\xc4\x41\x65\x46" - "\x82\xa0\x0a\x0f\x5f\xfb\x25\xd0" - "\x2c\x91\xa7\xee\xc4\x81\x07\x86" - "\x75\x5e\x33\x69\x97\xe4\x2c\xa8" - "\x9d\x9f\x0b\x6a\xbe\xad\x98\xda" - "\x6d\x94\x41\xda\x2c\x1e\x89\xc4" - "\xc2\xaf\x1e\x00\x05\x0b\x83\x60" - "\xbd\x43\xea\x15\x23\x7f\xb9\xac" - "\xee\x4f\x2c\xaf\x2a\xf3\xdf\xd0" - "\xf3\x19\x31\xbb\x4a\x74\x84\x17" - "\x52\x32\x2c\x7d\x61\xe4\xcb\xeb" - "\x80\x38\x15\x52\xcb\x6f\xea\xe5" - "\x73\x9c\xd9\x24\x69\xc6\x95\x32" - "\x21\xc8\x11\xe4\xdc\x36\xd7\x93" - "\x38\x66\xfb\xb2\x7f\x3a\xb9\xaf" - "\x31\xdd\x93\x75\x78\x8a\x2c\x94" - "\x87\x1a\x58\xec\x9e\x7d\x4d\xba" - "\xe1\xe5\x4d\xfc\xbc\xa4\x2a\x14" - "\xef\xcc\xa7\xec\xab\x43\x09\x18" - "\xd3\xab\x68\xd1\x07\x99\x44\x47" - "\xd6\x83\x85\x3b\x30\xea\xa9\x6b" - "\x63\xea\xc4\x07\xfb\x43\x2f\xa4" - "\xaa\xb0\xab\x03\x89\xce\x3f\x8c" - "\x02\x7c\x86\x54\xbc\x88\xaf\x75" - "\xd2\xdc\x63\x17\xd3\x26\xf6\x96" - "\xa9\x3c\xf1\x61\x8c\x11\x18\xcc" - "\xd6\xea\x5b\xe2\xcd\xf0\xf1\xb2" - "\xe5\x35\x90\x1f\x85\x4c\x76\x5b" - "\x66\xce\x44\xa4\x32\x9f\xe6\x7b" - "\x71\x6e\x9f\x58\x15\x67\x72\x87" - "\x64\x8e\x3a\x44\x45\xd4\x76\xfa" - "\xc2\xf6\xef\x85\x05\x18\x7a\x9b" - "\xba\x41\x54\xac\xf0\xfc\x59\x12" - "\x3f\xdf\xa0\xe5\x8a\x65\xfd\x3a" - "\x62\x8d\x83\x2c\x03\xbe\x05\x76" - "\x2e\x53\x49\x97\x94\x33\xae\x40" - "\x81\x15\xdb\x6e\xad\xaa\xf5\x4b" - "\xe3\x98\x70\xdf\xe0\x7c\xcd\xdb" - "\x02\xd4\x7d\x2f\xc1\xe6\xb4\xf3" - "\xd7\x0d\x7a\xd9\x23\x9e\x87\x2d" - "\xce\x87\xad\xcc\x72\x05\x00\x29" - "\xdc\x73\x7f\x64\xc1\x15\x0e\xc2" - "\xdf\xa7\x5f\xeb\x41\xa1\xcd\xef" - "\x5c\x50\x79\x2a\x56\x56\x71\x8c" - "\xac\xc0\x79\x50\x69\xca\x59\x32" - "\x65\xf2\x54\xe4\x52\x38\x76\xd1" - "\x5e\xde\x26\x9e\xfb\x75\x2e\x11" - "\xb5\x10\xf4\x17\x73\xf5\x89\xc7" - "\x4f\x43\x5c\x8e\x7c\xb9\x05\x52" - "\x24\x40\x99\xfe\x9b\x85\x0b\x6c" - "\x22\x3e\x8b\xae\x86\xa1\xd2\x79" - "\x05\x68\x6b\xab\xe3\x41\x49\xed" - "\x15\xa1\x8d\x40\x2d\x61\xdf\x1a" - "\x59\xc9\x26\x8b\xef\x30\x4c\x88" - "\x4b\x10\xf8\x8d\xa6\x92\x9f\x4b" - "\xf3\xc4\x53\x0b\x89\x5d\x28\x92" - "\xcf\x78\xb2\xc0\x5d\xed\x7e\xfc" - "\xc0\x12\x23\x5f\x5a\x78\x86\x43" - "\x6e\x27\xf7\x5a\xa7\x6a\xed\x19" - "\x04\xf0\xb3\x12\xd1\xbd\x0e\x89" - "\x6e\xbc\x96\xa8\xd8\x49\x39\x9f" - "\x7e\x67\xf0\x2e\x3e\x01\xa9\xba" - "\xec\x8b\x62\x8e\xcb\x4a\x70\x43" - "\xc7\xc2\xc4\xca\x82\x03\x73\xe9" - "\x11\xdf\xcf\x54\xea\xc9\xb0\x95" - "\x51\xc0\x13\x3d\x92\x05\xfa\xf4" - "\xa9\x34\xc8\xce\x6c\x3d\x54\xcc" - "\xc4\xaf\xf1\xdc\x11\x44\x26\xa2" - "\xaf\xf1\x85\x75\x7d\x03\x61\x68" - "\x4e\x78\xc6\x92\x7d\x86\x7d\x77" - "\xdc\x71\x72\xdb\xc6\xae\xa1\xcb" - "\x70\x9a\x0b\x19\xbe\x4a\x6c\x2a" - "\xe2\xba\x6c\x64\x9a\x13\x28\xdf" - "\x85\x75\xe6\x43\xf6\x87\x08\x68" - "\x6e\xba\x6e\x79\x9f\x04\xbc\x23" - "\x50\xf6\x33\x5c\x1f\x24\x25\xbe" - "\x33\x47\x80\x45\x56\xa3\xa7\xd7" - "\x7a\xb1\x34\x0b\x90\x3c\x9c\xad" - "\x44\x5f\x9e\x0e\x9d\xd4\xbd\x93" - "\x5e\xfa\x3c\xe0\xb0\xd9\xed\xf3" - "\xd6\x2e\xff\x24\xd8\x71\x6c\xed" - "\xaf\x55\xeb\x22\xac\x93\x68\x32" - "\x05\x5b\x47\xdd\xc6\x4a\xcb\xc7" - "\x10\xe1\x3c\x92\x1a\xf3\x23\x78" - "\x2b\xa1\xd2\x80\xf4\x12\xb1\x20" - "\x8f\xff\x26\x35\xdd\xfb\xc7\x4e" - "\x78\xf1\x2d\x50\x12\x77\xa8\x60" - "\x7c\x0f\xf5\x16\x2f\x63\x70\x2a" - "\xc0\x96\x80\x4e\x0a\xb4\x93\x35" - "\x5d\x1d\x3f\x56\xf7\x2f\xbb\x90" - "\x11\x16\x8f\xa2\xec\x47\xbe\xac" - "\x56\x01\x26\x56\xb1\x8c\xb2\x10" - "\xf9\x1a\xca\xf5\xd1\xb7\x39\x20" - "\x63\xf1\x69\x20\x4f\x13\x12\x1f" - "\x5b\x65\xfc\x98\xf7\xc4\x7a\xbe" - "\xf7\x26\x4d\x2b\x84\x7b\x42\xad" - "\xd8\x7a\x0a\xb4\xd8\x74\xbf\xc1" - "\xf0\x6e\xb4\x29\xa3\xbb\xca\x46" - "\x67\x70\x6a\x2d\xce\x0e\xa2\x8a" - "\xa9\x87\xbf\x05\xc4\xc1\x04\xa3" - "\xab\xd4\x45\x43\x8c\xb6\x02\xb0" - "\x41\xc8\xfc\x44\x3d\x59\xaa\x2e" - "\x44\x21\x2a\x8d\x88\x9d\x57\xf4" - "\xa0\x02\x77\xb8\xa6\xa0\xe6\x75" - "\x5c\x82\x65\x3e\x03\x5c\x29\x8f" - "\x38\x55\xab\x33\x26\xef\x9f\x43" - "\x52\xfd\x68\xaf\x36\xb4\xbb\x9a" - "\x58\x09\x09\x1b\xc3\x65\x46\x46" - "\x1d\xa7\x94\x18\x23\x50\x2c\xca" - "\x2c\x55\x19\x97\x01\x9d\x93\x3b" - "\x63\x86\xf2\x03\x67\x45\xd2\x72" - "\x28\x52\x6c\xf4\xe3\x1c\xb5\x11" - "\x13\xf1\xeb\x21\xc7\xd9\x56\x82" - "\x2b\x82\x39\xbd\x69\x54\xed\x62" - "\xc3\xe2\xde\x73\xd4\x6a\x12\xae" - "\x13\x21\x7f\x4b\x5b\xfc\xbf\xe8" - "\x2b\xbe\x56\xba\x68\x8b\x9a\xb1" - "\x6e\xfa\xbf\x7e\x5a\x4b\xf1\xac" - "\x98\x65\x85\xd1\x93\x53\xd3\x7b" - "\x09\xdd\x4b\x10\x6d\x84\xb0\x13" - "\x65\xbd\xcf\x52\x09\xc4\x85\xe2" - "\x84\x74\x15\x65\xb7\xf7\x51\xaf" - "\x55\xad\xa4\xd1\x22\x54\x70\x94" - "\xa0\x1c\x90\x41\xfd\x99\xd7\x5a" - "\x31\xef\xaa\x25\xd0\x7f\x4f\xea" - "\x1d\x55\x42\xe5\x49\xb0\xd0\x46" - "\x62\x36\x43\xb2\x82\x15\x75\x50" - "\xa4\x72\xeb\x54\x27\x1f\x8a\xe4" - "\x7d\xe9\x66\xc5\xf1\x53\xa4\xd1" - "\x0c\xeb\xb8\xf8\xbc\xd4\xe2\xe7" - "\xe1\xf8\x4b\xcb\xa9\xa1\xaf\x15" - "\x83\xcb\x72\xd0\x33\x79\x00\x2d" - "\x9f\xd7\xf1\x2e\x1e\x10\xe4\x45" - "\xc0\x75\x3a\x39\xea\x68\xf7\x5d" - "\x1b\x73\x8f\xe9\x8e\x0f\x72\x47" - "\xae\x35\x0a\x31\x7a\x14\x4d\x4a" - "\x6f\x47\xf7\x7e\x91\x6e\x74\x8b" - "\x26\x47\xf9\xc3\xf9\xde\x70\xf5" - "\x61\xab\xa9\x27\x9f\x82\xe4\x9c" - "\x89\x91\x3f\x2e\x6a\xfd\xb5\x49" - "\xe9\xfd\x59\x14\x36\x49\x40\x6d" - "\x32\xd8\x85\x42\xf3\xa5\xdf\x0c" - "\xa8\x27\xd7\x54\xe2\x63\x2f\xf2" - "\x7e\x8b\x8b\xe7\xf1\x9a\x95\x35" - "\x43\xdc\x3a\xe4\xb6\xf4\xd0\xdf" - "\x9c\xcb\x94\xf3\x21\xa0\x77\x50" - "\xe2\xc6\xc4\xc6\x5f\x09\x64\x5b" - "\x92\x90\xd8\xe1\xd1\xed\x4b\x42" - "\xd7\x37\xaf\x65\x3d\x11\x39\xb6" - "\x24\x8a\x60\xae\xd6\x1e\xbf\x0e" - "\x0d\xd7\xdc\x96\x0e\x65\x75\x4e" - "\x29\x06\x9d\xa4\x51\x3a\x10\x63" - "\x8f\x17\x07\xd5\x8e\x3c\xf4\x28" - "\x00\x5a\x5b\x05\x19\xd8\xc0\x6c" - "\xe5\x15\xe4\x9c\x9d\x71\x9d\x5e" - "\x94\x29\x1a\xa7\x80\xfa\x0e\x33" - "\x03\xdd\xb7\x3e\x9a\xa9\x26\x18" - "\x37\xa9\x64\x08\x4d\x94\x5a\x88" - "\xca\x35\xce\x81\x02\xe3\x1f\x1b" - "\x89\x1a\x77\x85\xe3\x41\x6d\x32" - "\x42\x19\x23\x7d\xc8\x73\xee\x25" - "\x85\x0d\xf8\x31\x25\x79\x1b\x6f" - "\x79\x25\xd2\xd8\xd4\x23\xfd\xf7" - "\x82\x36\x6a\x0c\x46\x22\x15\xe9" - "\xff\x72\x41\x91\x91\x7d\x3a\xb7" - "\xdd\x65\x99\x70\xf6\x8d\x84\xf8" - "\x67\x15\x20\x11\xd6\xb2\x55\x7b" - "\xdb\x87\xee\xef\x55\x89\x2a\x59" - "\x2b\x07\x8f\x43\x8a\x59\x3c\x01" - "\x8b\x65\x54\xa1\x66\xd5\x38\xbd" - "\xc6\x30\xa9\xcc\x49\xb6\xa8\x1b" - "\xb8\xc0\x0e\xe3\x45\x28\xe2\xff" - "\x41\x9f\x7e\x7c\xd1\xae\x9e\x25" - "\x3f\x4c\x7c\x7c\xf4\xa8\x26\x4d" - "\x5c\xfd\x4b\x27\x18\xf9\x61\x76" - "\x48\xba\x0c\x6b\xa9\x4d\xfc\xf5" - "\x3b\x35\x7e\x2f\x4a\xa9\xc2\x9a" - "\xae\xab\x86\x09\x89\xc9\xc2\x40" - "\x39\x2c\x81\xb3\xb8\x17\x67\xc2" - "\x0d\x32\x4a\x3a\x67\x81\xd7\x1a" - "\x34\x52\xc5\xdb\x0a\xf5\x63\x39" - "\xea\x1f\xe1\x7c\xa1\x9e\xc1\x35" - "\xe3\xb1\x18\x45\x67\xf9\x22\x38" - "\x95\xd9\x34\x34\x86\xc6\x41\x94" - "\x15\xf9\x5b\x41\xa6\x87\x8b\xf8" - "\xd5\xe1\x1b\xe2\x5b\xf3\x86\x10" - "\xff\xe6\xae\x69\x76\xbc\x0d\xb4" - "\x09\x90\x0c\xa2\x65\x0c\xad\x74" - "\xf5\xd7\xff\xda\xc1\xce\x85\xbe" - "\x00\xa7\xff\x4d\x2f\x65\xd3\x8c" - "\x86\x2d\x05\xe8\xed\x3e\x6b\x8b" - "\x0f\x3d\x83\x8c\xf1\x1d\x5b\x96" - "\x2e\xb1\x9c\xc2\x98\xe1\x70\xb9" - "\xba\x5c\x8a\x43\xd6\x34\xa7\x2d" - "\xc9\x92\xae\xf2\xa5\x7b\x05\x49" - "\xa7\x33\x34\x86\xca\xe4\x96\x23" - "\x76\x5b\xf2\xc6\xf1\x51\x28\x42" - "\x7b\xcc\x76\x8f\xfa\xa2\xad\x31" - "\xd4\xd6\x7a\x6d\x25\x25\x54\xe4" - "\x3f\x50\x59\xe1\x5c\x05\xb7\x27" - "\x48\xbf\x07\xec\x1b\x13\xbe\x2b" - "\xa1\x57\x2b\xd5\xab\xd7\xd0\x4c" - "\x1e\xcb\x71\x9b\xc5\x90\x85\xd3" - "\xde\x59\xec\x71\xeb\x89\xbb\xd0" - "\x09\x50\xe1\x16\x3f\xfd\x1c\x34" - "\xc3\x1c\xa1\x10\x77\x53\x98\xef" - "\xf2\xfd\xa5\x01\x59\xc2\x9b\x26" - "\xc7\x42\xd9\x49\xda\x58\x2b\x6e" - "\x9f\x53\x19\x76\x7e\xd9\xc9\x0e" - "\x68\xc8\x7f\x51\x22\x42\xef\x49" - "\xa4\x55\xb6\x36\xac\x09\xc7\x31" - "\x88\x15\x4b\x2e\x8f\x3a\x08\xf7" - "\xd8\xf7\xa8\xc5\xa9\x33\xa6\x45" - "\xe4\xc4\x94\x76\xf3\x0d\x8f\x7e" - "\xc8\xf6\xbc\x23\x0a\xb6\x4c\xd3" - "\x6a\xcd\x36\xc2\x90\x5c\x5c\x3c" - "\x65\x7b\xc2\xd6\xcc\xe6\x0d\x87" - "\x73\x2e\x71\x79\x16\x06\x63\x28" - "\x09\x15\xd8\x89\x38\x38\x3d\xb5" - "\x42\x1c\x08\x24\xf7\x2a\xd2\x9d" - "\xc8\xca\xef\xf9\x27\xd8\x07\x86" - "\xf7\x43\x0b\x55\x15\x3f\x9f\x83" - "\xef\xdc\x49\x9d\x2a\xc1\x54\x62" - "\xbd\x9b\x66\x55\x9f\xb7\x12\xf3" - "\x1b\x4d\x9d\x2a\x5c\xed\x87\x75" - "\x87\x26\xec\x61\x2c\xb4\x0f\x89" - "\xb0\xfb\x2e\x68\x5d\x15\xc7\x8d" - "\x2e\xc0\xd9\xec\xaf\x4f\xd2\x25" - "\x29\xe8\xd2\x26\x2b\x67\xe9\xfc" - "\x2b\xa8\x67\x96\x12\x1f\x5b\x96" - "\xc6\x14\x53\xaf\x44\xea\xd6\xe2" - "\x94\x98\xe4\x12\x93\x4c\x92\xe0" - "\x18\xa5\x8d\x2d\xe4\x71\x3c\x47" - "\x4c\xf7\xe6\x47\x9e\xc0\x68\xdf" - "\xd4\xf5\x5a\x74\xb1\x2b\x29\x03" - "\x19\x07\xaf\x90\x62\x5c\x68\x98" - "\x48\x16\x11\x02\x9d\xee\xb4\x9b" - "\xe5\x42\x7f\x08\xfd\x16\x32\x0b" - "\xd0\xb3\xfa\x2b\xb7\x99\xf9\x29" - "\xcd\x20\x45\x9f\xb3\x1a\x5d\xa2" - "\xaf\x4d\xe0\xbd\x42\x0d\xbc\x74" - "\x99\x9c\x8e\x53\x1a\xb4\x3e\xbd" - "\xa2\x9a\x2d\xf7\xf8\x39\x0f\x67" - "\x63\xfc\x6b\xc0\xaf\xb3\x4b\x4f" - "\x55\xc4\xcf\xa7\xc8\x04\x11\x3e" - "\x14\x32\xbb\x1b\x38\x77\xd6\x7f" - "\x54\x4c\xdf\x75\xf3\x07\x2d\x33" - "\x9b\xa8\x20\xe1\x7b\x12\xb5\xf3" - "\xef\x2f\xce\x72\xe5\x24\x60\xc1" - "\x30\xe2\xab\xa1\x8e\x11\x09\xa8" - "\x21\x33\x44\xfe\x7f\x35\x32\x93" - "\x39\xa7\xad\x8b\x79\x06\xb2\xcb" - "\x4e\xa9\x5f\xc7\xba\x74\x29\xec" - "\x93\xa0\x4e\x54\x93\xc0\xbc\x55" - "\x64\xf0\x48\xe5\x57\x99\xee\x75" - "\xd6\x79\x0f\x66\xb7\xc6\x57\x76" - "\xf7\xb7\xf3\x9c\xc5\x60\xe8\x7f" - "\x83\x76\xd6\x0e\xaa\xe6\x90\x39" - "\x1d\xa6\x32\x6a\x34\xe3\x55\xf8" - "\x58\xa0\x58\x7d\x33\xe0\x22\x39" - "\x44\x64\x87\x86\x5a\x2f\xa7\x7e" - "\x0f\x38\xea\xb0\x30\xcc\x61\xa5" - "\x6a\x32\xae\x1e\xf7\xe9\xd0\xa9" - "\x0c\x32\x4b\xb5\x49\x28\xab\x85" - "\x2f\x8e\x01\x36\x38\x52\xd0\xba" - "\xd6\x02\x78\xf8\x0e\x3e\x9c\x8b" - "\x6b\x45\x99\x3f\x5c\xfe\x58\xf1" - "\x5c\x94\x04\xe1\xf5\x18\x6d\x51" - "\xb2\x5d\x18\x20\xb6\xc2\x9a\x42" - "\x1d\xb3\xab\x3c\xb6\x3a\x13\x03" - "\xb2\x46\x82\x4f\xfc\x64\xbc\x4f" - "\xca\xfa\x9c\xc0\xd5\xa7\xbd\x11" - "\xb7\xe4\x5a\xf6\x6f\x4d\x4d\x54" - "\xea\xa4\x98\x66\xd4\x22\x3b\xd3" - "\x8f\x34\x47\xd9\x7c\xf4\x72\x3b" - "\x4d\x02\x77\xf6\xd6\xdd\x08\x0a" - "\x81\xe1\x86\x89\x3e\x56\x10\x3c" - "\xba\xd7\x81\x8c\x08\xbc\x8b\xe2" - "\x53\xec\xa7\x89\xee\xc8\x56\xb5" - "\x36\x2c\xb2\x03\xba\x99\xdd\x7c" - "\x48\xa0\xb0\xbc\x91\x33\xe9\xa8" - "\xcb\xcd\xcf\x59\x5f\x1f\x15\xe2" - "\x56\xf5\x4e\x01\x35\x27\x45\x77" - "\x47\xc8\xbc\xcb\x7e\x39\xc1\x97" - "\x28\xd3\x84\xfc\x2c\x3e\xc8\xad" - "\x9c\xf8\x8a\x61\x9c\x28\xaa\xc5" - "\x99\x20\x43\x85\x9d\xa5\xe2\x8b" - "\xb8\xae\xeb\xd0\x32\x0d\x52\x78" - "\x09\x56\x3f\xc7\xd8\x7e\x26\xfc" - "\x37\xfb\x6f\x04\xfc\xfa\x92\x10" - "\xac\xf8\x3e\x21\xdc\x8c\x21\x16" - "\x7d\x67\x6e\xf6\xcd\xda\xb6\x98" - "\x23\xab\x23\x3c\xb2\x10\xa0\x53" - "\x5a\x56\x9f\xc5\xd0\xff\xbb\xe4" - "\x98\x3c\x69\x1e\xdb\x38\x8f\x7e" - "\x0f\xd2\x98\x88\x81\x8b\x45\x67" - "\xea\x33\xf1\xeb\xe9\x97\x55\x2e" - "\xd9\xaa\xeb\x5a\xec\xda\xe1\x68" - "\xa8\x9d\x3c\x84\x7c\x05\x3d\x62" - "\x87\x8f\x03\x21\x28\x95\x0c\x89" - "\x25\x22\x4a\xb0\x93\xa9\x50\xa2" - "\x2f\x57\x6e\x18\x42\x19\x54\x0c" - "\x55\x67\xc6\x11\x49\xf4\x5c\xd2" - "\xe9\x3d\xdd\x8b\x48\x71\x21\x00" - "\xc3\x9a\x6c\x85\x74\x28\x83\x4a" - "\x1b\x31\x05\xe1\x06\x92\xe7\xda" - "\x85\x73\x78\x45\x20\x7f\xae\x13" - "\x7c\x33\x06\x22\xf4\x83\xf9\x35" - "\x3f\x6c\x71\xa8\x4e\x48\xbe\x9b" - "\xce\x8a\xba\xda\xbe\x28\x08\xf7" - "\xe2\x14\x8c\x71\xea\x72\xf9\x33" - "\xf2\x88\x3f\xd7\xbb\x69\x6c\x29" - "\x19\xdc\x84\xce\x1f\x12\x4f\xc8" - "\xaf\xa5\x04\xba\x5a\xab\xb0\xd9" - "\x14\x1f\x6c\x68\x98\x39\x89\x7a" - "\xd9\xd8\x2f\xdf\xa8\x47\x4a\x25" - "\xe2\xfb\x33\xf4\x59\x78\xe1\x68" - "\x85\xcf\xfe\x59\x20\xd4\x05\x1d" - "\x80\x99\xae\xbc\xca\xae\x0f\x2f" - "\x65\x43\x34\x8e\x7e\xac\xd3\x93" - "\x2f\xac\x6d\x14\x3d\x02\x07\x70" - "\x9d\xa4\xf3\x1b\x5c\x36\xfc\x01" - "\x73\x34\x85\x0c\x6c\xd6\xf1\xbd" - "\x3f\xdf\xee\xf5\xd9\xba\x56\xef" - "\xf4\x9b\x6b\xee\x9f\x5a\x78\x6d" - "\x32\x19\xf4\xf7\xf8\x4c\x69\x0b" - "\x4b\xbc\xbb\xb7\xf2\x85\xaf\x70" - "\x75\x24\x6c\x54\xa7\x0e\x4d\x1d" - "\x01\xbf\x08\xac\xcf\x7f\x2c\xe3" - "\x14\x89\x5e\x70\x5a\x99\x92\xcd" - "\x01\x84\xc8\xd2\xab\xe5\x4f\x58" - "\xe7\x0f\x2f\x0e\xff\x68\xea\xfd" - "\x15\xb3\x17\xe6\xb0\xe7\x85\xd8" - "\x23\x2e\x05\xc7\xc9\xc4\x46\x1f" - "\xe1\x9e\x49\x20\x23\x24\x4d\x7e" - "\x29\x65\xff\xf4\xb6\xfd\x1a\x85" - "\xc4\x16\xec\xfc\xea\x7b\xd6\x2c" - "\x43\xf8\xb7\xbf\x79\xc0\x85\xcd" - "\xef\xe1\x98\xd3\xa5\xf7\x90\x8c" - "\xe9\x7f\x80\x6b\xd2\xac\x4c\x30" - "\xa7\xc6\x61\x6c\xd2\xf9\x2c\xff" - "\x30\xbc\x22\x81\x7d\x93\x12\xe4" - "\x0a\xcd\xaf\xdd\xe8\xab\x0a\x1e" - "\x13\xa4\x27\xc3\x5f\xf7\x4b\xbb" - "\x37\x09\x4b\x91\x6f\x92\x4f\xaf" - "\x52\xee\xdf\xef\x09\x6f\xf7\x5c" - "\x6e\x12\x17\x72\x63\x57\xc7\xba" - "\x3b\x6b\x38\x32\x73\x1b\x9c\x80" - "\xc1\x7a\xc6\xcf\xcd\x35\xc0\x6b" - "\x31\x1a\x6b\xe9\xd8\x2c\x29\x3f" - "\x96\xfb\xb6\xcd\x13\x91\x3b\xc2" - "\xd2\xa3\x31\x8d\xa4\xcd\x57\xcd" - "\x13\x3d\x64\xfd\x06\xce\xe6\xdc" - "\x0c\x24\x43\x31\x40\x57\xf1\x72" - "\x17\xe3\x3a\x63\x6d\x35\xcf\x5d" - "\x97\x40\x59\xdd\xf7\x3c\x02\xf7" - "\x1c\x7e\x05\xbb\xa9\x0d\x01\xb1" - "\x8e\xc0\x30\xa9\x53\x24\xc9\x89" - "\x84\x6d\xaa\xd0\xcd\x91\xc2\x4d" - "\x91\xb0\x89\xe2\xbf\x83\x44\xaa" - "\x28\x72\x23\xa0\xc2\xad\xad\x1c" - "\xfc\x3f\x09\x7a\x0b\xdc\xc5\x1b" - "\x87\x13\xc6\x5b\x59\x8d\xf2\xc8" - "\xaf\xdf\x11\x95", - .len = 4100, - }, -}; - static const struct cipher_testvec chacha20_tv_template[] = { { /* RFC7539 A.2. Test Vector #1 */ .key = "\x00\x00\x00\x00\x00\x00\x00\x00" From b2cd1d812b955c4a06bb411d3363abf5e9009f9c Mon Sep 17 00:00:00 2001 From: dingsenjie Date: Tue, 26 Jan 2021 11:45:53 +0800 Subject: [PATCH 113/154] crypto: ccree - fix spelling typo of allocated allocted -> allocated Signed-off-by: dingsenjie Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_cipher.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c index cdfee501fbd9..78833491f534 100644 --- a/drivers/crypto/ccree/cc_cipher.c +++ b/drivers/crypto/ccree/cc_cipher.c @@ -921,7 +921,7 @@ static int cc_cipher_process(struct skcipher_request *req, return crypto_skcipher_decrypt(subreq); } - /* The IV we are handed may be allocted from the stack so + /* The IV we are handed may be allocated from the stack so * we must copy it to a DMAable buffer before use. */ req_ctx->iv = kmemdup(iv, ivsize, flags); From b33fa5ff8190befed1eb0ac4783e15adfa7f7135 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 27 Jan 2021 08:25:59 +0300 Subject: [PATCH 114/154] crypto: octeontx2 - fix signedness bug in cptvf_register_interrupts() The "num_vec" has to be signed for the error handling to work. Fixes: 19d8e8c7be15 ("crypto: octeontx2 - add virtual function driver support") Signed-off-by: Dan Carpenter Signed-off-by: Herbert Xu --- drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c index 9663be38ee40..47f378731024 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c @@ -34,7 +34,7 @@ static void cptvf_disable_pfvf_mbox_intrs(struct otx2_cptvf_dev *cptvf) static int cptvf_register_interrupts(struct otx2_cptvf_dev *cptvf) { int ret, irq; - u32 num_vec; + int num_vec; num_vec = pci_msix_vec_count(cptvf->pdev); if (num_vec <= 0) From b4ea2220b86afe6ea962c3bd51f61eaf56cd8212 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 29 Jan 2021 16:48:56 +1100 Subject: [PATCH 115/154] crypto: octeontx2 - Add dependency on NET_VENDOR_MARVELL The crypto octeontx2 driver depends on the mbox code in the network tree. It tries to select the MBOX Kconfig option but that option itself depends on many other options which are not selected, e.g., CONFIG_NET_VENDOR_MARVELL. It would be inappropriate to select them all as randomly prompting the user for network options which would oterhwise be disabled just because a crypto driver has been enabled makes no sense. This patch fixes this by adding a dependency on NET_VENDOR_MARVELL. This makes the crypto driver invisible if the network option is off. If the crypto driver must be visible even without the network stack then the shared mbox code should be moved out of drivers/net. Reported-by: Randy Dunlap Reported-by: kernel test robot Fixes: 5e8ce8334734 ("crypto: marvell - add Marvell OcteonTX2 CPT...") Signed-off-by: Herbert Xu Acked-by: Randy Dunlap # build-tested Signed-off-by: Herbert Xu --- drivers/crypto/marvell/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/marvell/Kconfig b/drivers/crypto/marvell/Kconfig index 2efbd79180ce..a188ad1fadd3 100644 --- a/drivers/crypto/marvell/Kconfig +++ b/drivers/crypto/marvell/Kconfig @@ -41,6 +41,7 @@ config CRYPTO_DEV_OCTEONTX2_CPT depends on ARM64 || COMPILE_TEST depends on PCI_MSI && 64BIT depends on CRYPTO_LIB_AES + depends on NET_VENDOR_MARVELL select OCTEONTX2_MBOX select CRYPTO_DEV_MARVELL select CRYPTO_SKCIPHER From 63b8ee4f548c36658c2854d353270b3474d45aeb Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 30 Jan 2021 14:55:38 -0800 Subject: [PATCH 116/154] crypto: crypto4xx - Avoid linking failure with HW_RANDOM=m It is currently possible to build CONFIG_HW_RANDOM_PPC4XX=y with CONFIG_HW_RANDOM=m which would lead to the inability of linking with devm_hwrng_{register,unregister}. We cannot have the framework modular and the consumer of that framework built-in, so make that dependency explicit. Reported-by: kernel test robot Signed-off-by: Florian Fainelli Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index cc29bc3f6a6c..e3e37a715fa7 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -348,7 +348,7 @@ config CRYPTO_DEV_PPC4XX config HW_RANDOM_PPC4XX bool "PowerPC 4xx generic true random number generator support" - depends on CRYPTO_DEV_PPC4XX && HW_RANDOM + depends on CRYPTO_DEV_PPC4XX && HW_RANDOM=y default y help This option provides the kernel-side support for the TRNG hardware From 42e6f351dcb05fd1f3e4197fc2286de9eb354b30 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 1 Feb 2021 14:44:31 +0100 Subject: [PATCH 117/154] crypto: marvell - CRYPTO_DEV_OCTEONTX2_CPT should depend on ARCH_THUNDER2 The Marvell OcteonTX2 CPT physical function PCI device is present only on OcteonTx2 SoC, and not available as an independent PCIe endpoint. Hence add a dependency on ARCH_THUNDER2, to prevent asking the user about this driver when configuring a kernel without OcteonTx2 platform support. Fixes: 5e8ce8334734c5f2 ("crypto: marvell - add Marvell OcteonTX2 CPT PF driver") Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/marvell/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/marvell/Kconfig b/drivers/crypto/marvell/Kconfig index a188ad1fadd3..9125199f1702 100644 --- a/drivers/crypto/marvell/Kconfig +++ b/drivers/crypto/marvell/Kconfig @@ -38,7 +38,7 @@ config CRYPTO_DEV_OCTEONTX_CPT config CRYPTO_DEV_OCTEONTX2_CPT tristate "Marvell OcteonTX2 CPT driver" - depends on ARM64 || COMPILE_TEST + depends on ARCH_THUNDER2 || COMPILE_TEST depends on PCI_MSI && 64BIT depends on CRYPTO_LIB_AES depends on NET_VENDOR_MARVELL From e145f5565dc48ccaf4cb50b7cfc48777bed8c100 Mon Sep 17 00:00:00 2001 From: Jan Henrik Weinstock Date: Mon, 1 Feb 2021 16:14:59 +0100 Subject: [PATCH 118/154] hwrng: timeriomem - Fix cooldown period calculation Ensure cooldown period tolerance of 1% is actually accounted for. Fixes: ca3bff70ab32 ("hwrng: timeriomem - Improve performance...") Signed-off-by: Jan Henrik Weinstock Signed-off-by: Herbert Xu --- drivers/char/hw_random/timeriomem-rng.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c index e262445fed5f..f35f0f31f52a 100644 --- a/drivers/char/hw_random/timeriomem-rng.c +++ b/drivers/char/hw_random/timeriomem-rng.c @@ -69,7 +69,7 @@ static int timeriomem_rng_read(struct hwrng *hwrng, void *data, */ if (retval > 0) usleep_range(period_us, - period_us + min(1, period_us / 100)); + period_us + max(1, period_us / 100)); *(u32 *)data = readl(priv->io_base); retval += sizeof(u32); From e1b2d980f03b833442768c1987d5ad0b9a58cfe7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 1 Feb 2021 19:02:29 +0100 Subject: [PATCH 119/154] crypto: michael_mic - fix broken misalignment handling The Michael MIC driver uses the cra_alignmask to ensure that pointers presented to its update and finup/final methods are 32-bit aligned. However, due to the way the shash API works, this is no guarantee that the 32-bit reads occurring in the update method are also aligned, as the size of the buffer presented to update may be of uneven length. For instance, an update() of 3 bytes followed by a misaligned update() of 4 or more bytes will result in a misaligned access using an accessor that is not suitable for this. On most architectures, this does not matter, and so setting the cra_alignmask is pointless. On architectures where this does matter, setting the cra_alignmask does not actually solve the problem. So let's get rid of the cra_alignmask, and use unaligned accessors instead, where appropriate. Cc: Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/michael_mic.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/crypto/michael_mic.c b/crypto/michael_mic.c index 63350c4ad461..f4c31049601c 100644 --- a/crypto/michael_mic.c +++ b/crypto/michael_mic.c @@ -7,7 +7,7 @@ * Copyright (c) 2004 Jouni Malinen */ #include -#include +#include #include #include #include @@ -19,7 +19,7 @@ struct michael_mic_ctx { }; struct michael_mic_desc_ctx { - u8 pending[4]; + __le32 pending; size_t pending_len; u32 l, r; @@ -60,13 +60,12 @@ static int michael_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct michael_mic_desc_ctx *mctx = shash_desc_ctx(desc); - const __le32 *src; if (mctx->pending_len) { int flen = 4 - mctx->pending_len; if (flen > len) flen = len; - memcpy(&mctx->pending[mctx->pending_len], data, flen); + memcpy((u8 *)&mctx->pending + mctx->pending_len, data, flen); mctx->pending_len += flen; data += flen; len -= flen; @@ -74,23 +73,21 @@ static int michael_update(struct shash_desc *desc, const u8 *data, if (mctx->pending_len < 4) return 0; - src = (const __le32 *)mctx->pending; - mctx->l ^= le32_to_cpup(src); + mctx->l ^= le32_to_cpu(mctx->pending); michael_block(mctx->l, mctx->r); mctx->pending_len = 0; } - src = (const __le32 *)data; - while (len >= 4) { - mctx->l ^= le32_to_cpup(src++); + mctx->l ^= get_unaligned_le32(data); michael_block(mctx->l, mctx->r); + data += 4; len -= 4; } if (len > 0) { mctx->pending_len = len; - memcpy(mctx->pending, src, len); + memcpy(&mctx->pending, data, len); } return 0; @@ -100,8 +97,7 @@ static int michael_update(struct shash_desc *desc, const u8 *data, static int michael_final(struct shash_desc *desc, u8 *out) { struct michael_mic_desc_ctx *mctx = shash_desc_ctx(desc); - u8 *data = mctx->pending; - __le32 *dst = (__le32 *)out; + u8 *data = (u8 *)&mctx->pending; /* Last block and padding (0x5a, 4..7 x 0) */ switch (mctx->pending_len) { @@ -123,8 +119,8 @@ static int michael_final(struct shash_desc *desc, u8 *out) /* l ^= 0; */ michael_block(mctx->l, mctx->r); - dst[0] = cpu_to_le32(mctx->l); - dst[1] = cpu_to_le32(mctx->r); + put_unaligned_le32(mctx->l, out); + put_unaligned_le32(mctx->r, out + 4); return 0; } @@ -135,13 +131,11 @@ static int michael_setkey(struct crypto_shash *tfm, const u8 *key, { struct michael_mic_ctx *mctx = crypto_shash_ctx(tfm); - const __le32 *data = (const __le32 *)key; - if (keylen != 8) return -EINVAL; - mctx->l = le32_to_cpu(data[0]); - mctx->r = le32_to_cpu(data[1]); + mctx->l = get_unaligned_le32(key); + mctx->r = get_unaligned_le32(key + 4); return 0; } @@ -156,7 +150,6 @@ static struct shash_alg alg = { .cra_name = "michael_mic", .cra_driver_name = "michael_mic-generic", .cra_blocksize = 8, - .cra_alignmask = 3, .cra_ctxsize = sizeof(struct michael_mic_ctx), .cra_module = THIS_MODULE, } From 784506a1df57737fc8460fd644b30ac8fecaedf0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 1 Feb 2021 19:02:30 +0100 Subject: [PATCH 120/154] crypto: serpent - get rid of obsolete tnepres variant It is not trivial to trace back why exactly the tnepres variant of serpent was added ~17 years ago - Google searches come up mostly empty, but it seems to be related with the 'kerneli' version, which was based on an incorrect interpretation of the serpent spec. In other words, nobody is likely to care anymore today, so let's get rid of it. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/Kconfig | 3 +- crypto/serpent_generic.c | 82 +++------------------------------------- crypto/tcrypt.c | 6 +-- crypto/testmgr.c | 6 --- crypto/testmgr.h | 79 -------------------------------------- 5 files changed, 7 insertions(+), 169 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 9779c7f7531f..15c9c28d9f53 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1460,8 +1460,7 @@ config CRYPTO_SERPENT Serpent cipher algorithm, by Anderson, Biham & Knudsen. Keys are allowed to be from 0 to 256 bits in length, in steps - of 8 bits. Also includes the 'Tnepres' algorithm, a reversed - variant of Serpent for compatibility with old kerneli.org code. + of 8 bits. See also: diff --git a/crypto/serpent_generic.c b/crypto/serpent_generic.c index 492c1d0bfe06..a932e0b2964f 100644 --- a/crypto/serpent_generic.c +++ b/crypto/serpent_generic.c @@ -5,11 +5,6 @@ * Serpent Cipher Algorithm. * * Copyright (C) 2002 Dag Arne Osvik - * 2003 Herbert Valerio Riedel - * - * Added tnepres support: - * Ruben Jesus Garcia Hernandez , 18.10.2004 - * Based on code by hvr */ #include @@ -576,59 +571,7 @@ static void serpent_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) __serpent_decrypt(ctx, dst, src); } -static int tnepres_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - u8 rev_key[SERPENT_MAX_KEY_SIZE]; - int i; - - for (i = 0; i < keylen; ++i) - rev_key[keylen - i - 1] = key[i]; - - return serpent_setkey(tfm, rev_key, keylen); -} - -static void tnepres_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - const u32 * const s = (const u32 * const)src; - u32 * const d = (u32 * const)dst; - - u32 rs[4], rd[4]; - - rs[0] = swab32(s[3]); - rs[1] = swab32(s[2]); - rs[2] = swab32(s[1]); - rs[3] = swab32(s[0]); - - serpent_encrypt(tfm, (u8 *)rd, (u8 *)rs); - - d[0] = swab32(rd[3]); - d[1] = swab32(rd[2]); - d[2] = swab32(rd[1]); - d[3] = swab32(rd[0]); -} - -static void tnepres_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - const u32 * const s = (const u32 * const)src; - u32 * const d = (u32 * const)dst; - - u32 rs[4], rd[4]; - - rs[0] = swab32(s[3]); - rs[1] = swab32(s[2]); - rs[2] = swab32(s[1]); - rs[3] = swab32(s[0]); - - serpent_decrypt(tfm, (u8 *)rd, (u8 *)rs); - - d[0] = swab32(rd[3]); - d[1] = swab32(rd[2]); - d[2] = swab32(rd[1]); - d[3] = swab32(rd[0]); -} - -static struct crypto_alg srp_algs[2] = { { +static struct crypto_alg srp_alg = { .cra_name = "serpent", .cra_driver_name = "serpent-generic", .cra_priority = 100, @@ -643,38 +586,23 @@ static struct crypto_alg srp_algs[2] = { { .cia_setkey = serpent_setkey, .cia_encrypt = serpent_encrypt, .cia_decrypt = serpent_decrypt } } -}, { - .cra_name = "tnepres", - .cra_driver_name = "tnepres-generic", - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 3, - .cra_module = THIS_MODULE, - .cra_u = { .cipher = { - .cia_min_keysize = SERPENT_MIN_KEY_SIZE, - .cia_max_keysize = SERPENT_MAX_KEY_SIZE, - .cia_setkey = tnepres_setkey, - .cia_encrypt = tnepres_encrypt, - .cia_decrypt = tnepres_decrypt } } -} }; +}; static int __init serpent_mod_init(void) { - return crypto_register_algs(srp_algs, ARRAY_SIZE(srp_algs)); + return crypto_register_alg(&srp_alg); } static void __exit serpent_mod_fini(void) { - crypto_unregister_algs(srp_algs, ARRAY_SIZE(srp_algs)); + crypto_unregister_alg(&srp_alg); } subsys_initcall(serpent_mod_init); module_exit(serpent_mod_fini); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Serpent and tnepres (kerneli compatible serpent reversed) Cipher Algorithm"); +MODULE_DESCRIPTION("Serpent Cipher Algorithm"); MODULE_AUTHOR("Dag Arne Osvik "); -MODULE_ALIAS_CRYPTO("tnepres"); MODULE_ALIAS_CRYPTO("serpent"); MODULE_ALIAS_CRYPTO("serpent-generic"); diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 2877b88cfa45..6b7c158dc508 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -70,7 +70,7 @@ static const char *check[] = { "des", "md5", "des3_ede", "rot13", "sha1", "sha224", "sha256", "sm3", "blowfish", "twofish", "serpent", "sha384", "sha512", "md4", "aes", "cast6", "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea", - "khazad", "wp512", "wp384", "wp256", "tnepres", "xeta", "fcrypt", + "khazad", "wp512", "wp384", "wp256", "xeta", "fcrypt", "camellia", "seed", "rmd160", "lzo", "lzo-rle", "cts", "sha3-224", "sha3-256", "sha3-384", "sha3-512", "streebog256", "streebog512", @@ -1806,10 +1806,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) ret += tcrypt_test("wp256"); break; - case 25: - ret += tcrypt_test("ecb(tnepres)"); - break; - case 26: ret += tcrypt_test("ecb(anubis)"); ret += tcrypt_test("cbc(anubis)"); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 1a4103b1b202..93359999c94b 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -4876,12 +4876,6 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .cipher = __VECS(tea_tv_template) } - }, { - .alg = "ecb(tnepres)", - .test = alg_test_skcipher, - .suite = { - .cipher = __VECS(tnepres_tv_template) - } }, { .alg = "ecb(twofish)", .test = alg_test_skcipher, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 99aca08263d2..ced56ea0c9b4 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -11415,85 +11415,6 @@ static const struct cipher_testvec serpent_tv_template[] = { }, }; -static const struct cipher_testvec tnepres_tv_template[] = { - { /* KeySize=0 */ - .ptext = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", - .ctext = "\x41\xcc\x6b\x31\x59\x31\x45\x97" - "\x6d\x6f\xbb\x38\x4b\x37\x21\x28", - .len = 16, - }, - { /* KeySize=128, PT=0, I=1 */ - .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .key = "\x80\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .klen = 16, - .ctext = "\x49\xaf\xbf\xad\x9d\x5a\x34\x05" - "\x2c\xd8\xff\xa5\x98\x6b\xd2\xdd", - .len = 16, - }, { /* KeySize=128 */ - .key = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", - .klen = 16, - .ptext = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", - .ctext = "\xea\xf4\xd7\xfc\xd8\x01\x34\x47" - "\x81\x45\x0b\xfa\x0c\xd6\xad\x6e", - .len = 16, - }, { /* KeySize=128, I=121 */ - .key = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80", - .klen = 16, - .ptext = zeroed_string, - .ctext = "\x3d\xda\xbf\xc0\x06\xda\xab\x06" - "\x46\x2a\xf4\xef\x81\x54\x4e\x26", - .len = 16, - }, { /* KeySize=192, PT=0, I=1 */ - .key = "\x80\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .klen = 24, - .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .ctext = "\xe7\x8e\x54\x02\xc7\x19\x55\x68" - "\xac\x36\x78\xf7\xa3\xf6\x0c\x66", - .len = 16, - }, { /* KeySize=256, PT=0, I=1 */ - .key = "\x80\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .klen = 32, - .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .ctext = "\xab\xed\x96\xe7\x66\xbf\x28\xcb" - "\xc0\xeb\xd2\x1a\x82\xef\x08\x19", - .len = 16, - }, { /* KeySize=256, I=257 */ - .key = "\x1f\x1e\x1d\x1c\x1b\x1a\x19\x18" - "\x17\x16\x15\x14\x13\x12\x11\x10" - "\x0f\x0e\x0d\x0c\x0b\x0a\x09\x08" - "\x07\x06\x05\x04\x03\x02\x01\x00", - .klen = 32, - .ptext = "\x0f\x0e\x0d\x0c\x0b\x0a\x09\x08" - "\x07\x06\x05\x04\x03\x02\x01\x00", - .ctext = "\x5c\xe7\x1c\x70\xd2\x88\x2e\x5b" - "\xb8\x32\xe4\x33\xf8\x9f\x26\xde", - .len = 16, - }, { /* KeySize=256 */ - .key = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", - .klen = 32, - .ptext = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", - .ctext = "\x64\xa9\x1a\x37\xed\x9f\xe7\x49" - "\xa8\x4e\x76\xd6\xf5\x0d\x78\xee", - .len = 16, - } -}; - static const struct cipher_testvec serpent_cbc_tv_template[] = { { /* Generated with Crypto++ */ .key = "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9" From 81d091a293a24912a61c22e073824d29496301d5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 1 Feb 2021 19:02:31 +0100 Subject: [PATCH 121/154] crypto: serpent - use unaligned accessors instead of alignmask Instead of using an alignmask of 0x3 to ensure 32-bit alignment of the Serpent input and output blocks, which propagates to mode drivers, and results in pointless copying on architectures that don't care about alignment, use the unaligned accessors, which will do the right thing on each respective architecture, avoiding the need for double buffering. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/serpent_generic.c | 44 ++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/crypto/serpent_generic.c b/crypto/serpent_generic.c index a932e0b2964f..236c87547a17 100644 --- a/crypto/serpent_generic.c +++ b/crypto/serpent_generic.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include @@ -448,19 +448,12 @@ void __serpent_encrypt(const void *c, u8 *dst, const u8 *src) { const struct serpent_ctx *ctx = c; const u32 *k = ctx->expkey; - const __le32 *s = (const __le32 *)src; - __le32 *d = (__le32 *)dst; u32 r0, r1, r2, r3, r4; -/* - * Note: The conversions between u8* and u32* might cause trouble - * on architectures with stricter alignment rules than x86 - */ - - r0 = le32_to_cpu(s[0]); - r1 = le32_to_cpu(s[1]); - r2 = le32_to_cpu(s[2]); - r3 = le32_to_cpu(s[3]); + r0 = get_unaligned_le32(src); + r1 = get_unaligned_le32(src + 4); + r2 = get_unaligned_le32(src + 8); + r3 = get_unaligned_le32(src + 12); K(r0, r1, r2, r3, 0); S0(r0, r1, r2, r3, r4); LK(r2, r1, r3, r0, r4, 1); @@ -496,10 +489,10 @@ void __serpent_encrypt(const void *c, u8 *dst, const u8 *src) S6(r0, r1, r3, r2, r4); LK(r3, r4, r1, r2, r0, 31); S7(r3, r4, r1, r2, r0); K(r0, r1, r2, r3, 32); - d[0] = cpu_to_le32(r0); - d[1] = cpu_to_le32(r1); - d[2] = cpu_to_le32(r2); - d[3] = cpu_to_le32(r3); + put_unaligned_le32(r0, dst); + put_unaligned_le32(r1, dst + 4); + put_unaligned_le32(r2, dst + 8); + put_unaligned_le32(r3, dst + 12); } EXPORT_SYMBOL_GPL(__serpent_encrypt); @@ -514,14 +507,12 @@ void __serpent_decrypt(const void *c, u8 *dst, const u8 *src) { const struct serpent_ctx *ctx = c; const u32 *k = ctx->expkey; - const __le32 *s = (const __le32 *)src; - __le32 *d = (__le32 *)dst; u32 r0, r1, r2, r3, r4; - r0 = le32_to_cpu(s[0]); - r1 = le32_to_cpu(s[1]); - r2 = le32_to_cpu(s[2]); - r3 = le32_to_cpu(s[3]); + r0 = get_unaligned_le32(src); + r1 = get_unaligned_le32(src + 4); + r2 = get_unaligned_le32(src + 8); + r3 = get_unaligned_le32(src + 12); K(r0, r1, r2, r3, 32); SI7(r0, r1, r2, r3, r4); KL(r1, r3, r0, r4, r2, 31); @@ -557,10 +548,10 @@ void __serpent_decrypt(const void *c, u8 *dst, const u8 *src) SI1(r3, r1, r2, r0, r4); KL(r4, r1, r2, r0, r3, 1); SI0(r4, r1, r2, r0, r3); K(r2, r3, r1, r4, 0); - d[0] = cpu_to_le32(r2); - d[1] = cpu_to_le32(r3); - d[2] = cpu_to_le32(r1); - d[3] = cpu_to_le32(r4); + put_unaligned_le32(r2, dst); + put_unaligned_le32(r3, dst + 4); + put_unaligned_le32(r1, dst + 8); + put_unaligned_le32(r4, dst + 12); } EXPORT_SYMBOL_GPL(__serpent_decrypt); @@ -578,7 +569,6 @@ static struct crypto_alg srp_alg = { .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = SERPENT_BLOCK_SIZE, .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 3, .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = SERPENT_MIN_KEY_SIZE, From 50a3a9fae3e0c3662786875b941c93dcdd26eee6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 1 Feb 2021 19:02:32 +0100 Subject: [PATCH 122/154] crypto: blowfish - use unaligned accessors instead of alignmask Instead of using an alignmask of 0x3 to ensure 32-bit alignment of the Blowfish input and output blocks, which propagates to mode drivers, and results in pointless copying on architectures that don't care about alignment, use the unaligned accessors, which will do the right thing on each respective architecture, avoiding the need for double buffering. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/blowfish_generic.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/crypto/blowfish_generic.c b/crypto/blowfish_generic.c index c3c2041fe0c5..003b52c6880e 100644 --- a/crypto/blowfish_generic.c +++ b/crypto/blowfish_generic.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -36,12 +36,10 @@ static void bf_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct bf_ctx *ctx = crypto_tfm_ctx(tfm); - const __be32 *in_blk = (const __be32 *)src; - __be32 *const out_blk = (__be32 *)dst; const u32 *P = ctx->p; const u32 *S = ctx->s; - u32 yl = be32_to_cpu(in_blk[0]); - u32 yr = be32_to_cpu(in_blk[1]); + u32 yl = get_unaligned_be32(src); + u32 yr = get_unaligned_be32(src + 4); ROUND(yr, yl, 0); ROUND(yl, yr, 1); @@ -63,19 +61,17 @@ static void bf_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) yl ^= P[16]; yr ^= P[17]; - out_blk[0] = cpu_to_be32(yr); - out_blk[1] = cpu_to_be32(yl); + put_unaligned_be32(yr, dst); + put_unaligned_be32(yl, dst + 4); } static void bf_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct bf_ctx *ctx = crypto_tfm_ctx(tfm); - const __be32 *in_blk = (const __be32 *)src; - __be32 *const out_blk = (__be32 *)dst; const u32 *P = ctx->p; const u32 *S = ctx->s; - u32 yl = be32_to_cpu(in_blk[0]); - u32 yr = be32_to_cpu(in_blk[1]); + u32 yl = get_unaligned_be32(src); + u32 yr = get_unaligned_be32(src + 4); ROUND(yr, yl, 17); ROUND(yl, yr, 16); @@ -97,8 +93,8 @@ static void bf_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) yl ^= P[1]; yr ^= P[0]; - out_blk[0] = cpu_to_be32(yr); - out_blk[1] = cpu_to_be32(yl); + put_unaligned_be32(yr, dst); + put_unaligned_be32(yl, dst + 4); } static struct crypto_alg alg = { @@ -108,7 +104,6 @@ static struct crypto_alg alg = { .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = BF_BLOCK_SIZE, .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 3, .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = BF_MIN_KEY_SIZE, From 83385415100591248b25d0b89a2796a9cb3bea5c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 1 Feb 2021 19:02:33 +0100 Subject: [PATCH 123/154] crypto: camellia - use unaligned accessors instead of alignmask Instead of using an alignmask of 0x3 to ensure 32-bit alignment of the Camellia input and output blocks, which propagates to mode drivers, and results in pointless copying on architectures that don't care about alignment, use the unaligned accessors, which will do the right thing on each respective architecture, avoiding the need for double buffering. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/camellia_generic.c | 45 ++++++++++++++------------------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/crypto/camellia_generic.c b/crypto/camellia_generic.c index 0b9f409f7370..fd1a88af9e77 100644 --- a/crypto/camellia_generic.c +++ b/crypto/camellia_generic.c @@ -9,14 +9,6 @@ * https://info.isl.ntt.co.jp/crypt/eng/camellia/specifications.html */ -/* - * - * NOTE --- NOTE --- NOTE --- NOTE - * This implementation assumes that all memory addresses passed - * as parameters are four-byte aligned. - * - */ - #include #include #include @@ -994,16 +986,14 @@ camellia_set_key(struct crypto_tfm *tfm, const u8 *in_key, static void camellia_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { const struct camellia_ctx *cctx = crypto_tfm_ctx(tfm); - const __be32 *src = (const __be32 *)in; - __be32 *dst = (__be32 *)out; unsigned int max; u32 tmp[4]; - tmp[0] = be32_to_cpu(src[0]); - tmp[1] = be32_to_cpu(src[1]); - tmp[2] = be32_to_cpu(src[2]); - tmp[3] = be32_to_cpu(src[3]); + tmp[0] = get_unaligned_be32(in); + tmp[1] = get_unaligned_be32(in + 4); + tmp[2] = get_unaligned_be32(in + 8); + tmp[3] = get_unaligned_be32(in + 12); if (cctx->key_length == 16) max = 24; @@ -1013,25 +1003,23 @@ static void camellia_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) camellia_do_encrypt(cctx->key_table, tmp, max); /* do_encrypt returns 0,1 swapped with 2,3 */ - dst[0] = cpu_to_be32(tmp[2]); - dst[1] = cpu_to_be32(tmp[3]); - dst[2] = cpu_to_be32(tmp[0]); - dst[3] = cpu_to_be32(tmp[1]); + put_unaligned_be32(tmp[2], out); + put_unaligned_be32(tmp[3], out + 4); + put_unaligned_be32(tmp[0], out + 8); + put_unaligned_be32(tmp[1], out + 12); } static void camellia_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { const struct camellia_ctx *cctx = crypto_tfm_ctx(tfm); - const __be32 *src = (const __be32 *)in; - __be32 *dst = (__be32 *)out; unsigned int max; u32 tmp[4]; - tmp[0] = be32_to_cpu(src[0]); - tmp[1] = be32_to_cpu(src[1]); - tmp[2] = be32_to_cpu(src[2]); - tmp[3] = be32_to_cpu(src[3]); + tmp[0] = get_unaligned_be32(in); + tmp[1] = get_unaligned_be32(in + 4); + tmp[2] = get_unaligned_be32(in + 8); + tmp[3] = get_unaligned_be32(in + 12); if (cctx->key_length == 16) max = 24; @@ -1041,10 +1029,10 @@ static void camellia_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) camellia_do_decrypt(cctx->key_table, tmp, max); /* do_decrypt returns 0,1 swapped with 2,3 */ - dst[0] = cpu_to_be32(tmp[2]); - dst[1] = cpu_to_be32(tmp[3]); - dst[2] = cpu_to_be32(tmp[0]); - dst[3] = cpu_to_be32(tmp[1]); + put_unaligned_be32(tmp[2], out); + put_unaligned_be32(tmp[3], out + 4); + put_unaligned_be32(tmp[0], out + 8); + put_unaligned_be32(tmp[1], out + 12); } static struct crypto_alg camellia_alg = { @@ -1054,7 +1042,6 @@ static struct crypto_alg camellia_alg = { .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = CAMELLIA_BLOCK_SIZE, .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 3, .cra_module = THIS_MODULE, .cra_u = { .cipher = { From 24a2ee44f2fb0b90b3322c1ecef3b7bfb86880be Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 1 Feb 2021 19:02:34 +0100 Subject: [PATCH 124/154] crypto: cast5 - use unaligned accessors instead of alignmask Instead of using an alignmask of 0x3 to ensure 32-bit alignment of the CAST5 input and output blocks, which propagates to mode drivers, and results in pointless copying on architectures that don't care about alignment, use the unaligned accessors, which will do the right thing on each respective architecture, avoiding the need for double buffering. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/cast5_generic.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/crypto/cast5_generic.c b/crypto/cast5_generic.c index 4095085d4e51..0257c14cefc2 100644 --- a/crypto/cast5_generic.c +++ b/crypto/cast5_generic.c @@ -13,7 +13,7 @@ */ -#include +#include #include #include #include @@ -302,8 +302,6 @@ static const u32 sb8[256] = { void __cast5_encrypt(struct cast5_ctx *c, u8 *outbuf, const u8 *inbuf) { - const __be32 *src = (const __be32 *)inbuf; - __be32 *dst = (__be32 *)outbuf; u32 l, r, t; u32 I; /* used by the Fx macros */ u32 *Km; @@ -315,8 +313,8 @@ void __cast5_encrypt(struct cast5_ctx *c, u8 *outbuf, const u8 *inbuf) /* (L0,R0) <-- (m1...m64). (Split the plaintext into left and * right 32-bit halves L0 = m1...m32 and R0 = m33...m64.) */ - l = be32_to_cpu(src[0]); - r = be32_to_cpu(src[1]); + l = get_unaligned_be32(inbuf); + r = get_unaligned_be32(inbuf + 4); /* (16 rounds) for i from 1 to 16, compute Li and Ri as follows: * Li = Ri-1; @@ -347,8 +345,8 @@ void __cast5_encrypt(struct cast5_ctx *c, u8 *outbuf, const u8 *inbuf) /* c1...c64 <-- (R16,L16). (Exchange final blocks L16, R16 and * concatenate to form the ciphertext.) */ - dst[0] = cpu_to_be32(r); - dst[1] = cpu_to_be32(l); + put_unaligned_be32(r, outbuf); + put_unaligned_be32(l, outbuf + 4); } EXPORT_SYMBOL_GPL(__cast5_encrypt); @@ -359,8 +357,6 @@ static void cast5_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf) void __cast5_decrypt(struct cast5_ctx *c, u8 *outbuf, const u8 *inbuf) { - const __be32 *src = (const __be32 *)inbuf; - __be32 *dst = (__be32 *)outbuf; u32 l, r, t; u32 I; u32 *Km; @@ -369,8 +365,8 @@ void __cast5_decrypt(struct cast5_ctx *c, u8 *outbuf, const u8 *inbuf) Km = c->Km; Kr = c->Kr; - l = be32_to_cpu(src[0]); - r = be32_to_cpu(src[1]); + l = get_unaligned_be32(inbuf); + r = get_unaligned_be32(inbuf + 4); if (!(c->rr)) { t = l; l = r; r = t ^ F1(r, Km[15], Kr[15]); @@ -391,8 +387,8 @@ void __cast5_decrypt(struct cast5_ctx *c, u8 *outbuf, const u8 *inbuf) t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]); t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]); - dst[0] = cpu_to_be32(r); - dst[1] = cpu_to_be32(l); + put_unaligned_be32(r, outbuf); + put_unaligned_be32(l, outbuf + 4); } EXPORT_SYMBOL_GPL(__cast5_decrypt); @@ -513,7 +509,6 @@ static struct crypto_alg alg = { .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = CAST5_BLOCK_SIZE, .cra_ctxsize = sizeof(struct cast5_ctx), - .cra_alignmask = 3, .cra_module = THIS_MODULE, .cra_u = { .cipher = { From 80879dd9de7aa34c8de620e9f18e940b919497f7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 1 Feb 2021 19:02:35 +0100 Subject: [PATCH 125/154] crypto: cast6 - use unaligned accessors instead of alignmask Instead of using an alignmask of 0x3 to ensure 32-bit alignment of the CAST6 input and output blocks, which propagates to mode drivers, and results in pointless copying on architectures that don't care about alignment, use the unaligned accessors, which will do the right thing on each respective architecture, avoiding the need for double buffering. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/cast6_generic.c | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/crypto/cast6_generic.c b/crypto/cast6_generic.c index c77ff6c8a2b2..75346380aa0b 100644 --- a/crypto/cast6_generic.c +++ b/crypto/cast6_generic.c @@ -10,7 +10,7 @@ */ -#include +#include #include #include #include @@ -172,16 +172,14 @@ static inline void QBAR(u32 *block, const u8 *Kr, const u32 *Km) void __cast6_encrypt(const void *ctx, u8 *outbuf, const u8 *inbuf) { const struct cast6_ctx *c = ctx; - const __be32 *src = (const __be32 *)inbuf; - __be32 *dst = (__be32 *)outbuf; u32 block[4]; const u32 *Km; const u8 *Kr; - block[0] = be32_to_cpu(src[0]); - block[1] = be32_to_cpu(src[1]); - block[2] = be32_to_cpu(src[2]); - block[3] = be32_to_cpu(src[3]); + block[0] = get_unaligned_be32(inbuf); + block[1] = get_unaligned_be32(inbuf + 4); + block[2] = get_unaligned_be32(inbuf + 8); + block[3] = get_unaligned_be32(inbuf + 12); Km = c->Km[0]; Kr = c->Kr[0]; Q(block, Kr, Km); Km = c->Km[1]; Kr = c->Kr[1]; Q(block, Kr, Km); @@ -196,10 +194,10 @@ void __cast6_encrypt(const void *ctx, u8 *outbuf, const u8 *inbuf) Km = c->Km[10]; Kr = c->Kr[10]; QBAR(block, Kr, Km); Km = c->Km[11]; Kr = c->Kr[11]; QBAR(block, Kr, Km); - dst[0] = cpu_to_be32(block[0]); - dst[1] = cpu_to_be32(block[1]); - dst[2] = cpu_to_be32(block[2]); - dst[3] = cpu_to_be32(block[3]); + put_unaligned_be32(block[0], outbuf); + put_unaligned_be32(block[1], outbuf + 4); + put_unaligned_be32(block[2], outbuf + 8); + put_unaligned_be32(block[3], outbuf + 12); } EXPORT_SYMBOL_GPL(__cast6_encrypt); @@ -211,16 +209,14 @@ static void cast6_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf) void __cast6_decrypt(const void *ctx, u8 *outbuf, const u8 *inbuf) { const struct cast6_ctx *c = ctx; - const __be32 *src = (const __be32 *)inbuf; - __be32 *dst = (__be32 *)outbuf; u32 block[4]; const u32 *Km; const u8 *Kr; - block[0] = be32_to_cpu(src[0]); - block[1] = be32_to_cpu(src[1]); - block[2] = be32_to_cpu(src[2]); - block[3] = be32_to_cpu(src[3]); + block[0] = get_unaligned_be32(inbuf); + block[1] = get_unaligned_be32(inbuf + 4); + block[2] = get_unaligned_be32(inbuf + 8); + block[3] = get_unaligned_be32(inbuf + 12); Km = c->Km[11]; Kr = c->Kr[11]; Q(block, Kr, Km); Km = c->Km[10]; Kr = c->Kr[10]; Q(block, Kr, Km); @@ -235,10 +231,10 @@ void __cast6_decrypt(const void *ctx, u8 *outbuf, const u8 *inbuf) Km = c->Km[1]; Kr = c->Kr[1]; QBAR(block, Kr, Km); Km = c->Km[0]; Kr = c->Kr[0]; QBAR(block, Kr, Km); - dst[0] = cpu_to_be32(block[0]); - dst[1] = cpu_to_be32(block[1]); - dst[2] = cpu_to_be32(block[2]); - dst[3] = cpu_to_be32(block[3]); + put_unaligned_be32(block[0], outbuf); + put_unaligned_be32(block[1], outbuf + 4); + put_unaligned_be32(block[2], outbuf + 8); + put_unaligned_be32(block[3], outbuf + 12); } EXPORT_SYMBOL_GPL(__cast6_decrypt); @@ -254,7 +250,6 @@ static struct crypto_alg alg = { .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = CAST6_BLOCK_SIZE, .cra_ctxsize = sizeof(struct cast6_ctx), - .cra_alignmask = 3, .cra_module = THIS_MODULE, .cra_u = { .cipher = { From e9cbaef5111a403b1e40ddec2bfb9adea2da682f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 1 Feb 2021 19:02:36 +0100 Subject: [PATCH 126/154] crypto: fcrypt - drop unneeded alignmask The fcrypt implementation uses memcpy() to access the input and output buffers so there is no need to set an alignmask. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/fcrypt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/crypto/fcrypt.c b/crypto/fcrypt.c index 58f935315cf8..c36ea0c8be98 100644 --- a/crypto/fcrypt.c +++ b/crypto/fcrypt.c @@ -396,7 +396,6 @@ static struct crypto_alg fcrypt_alg = { .cra_blocksize = 8, .cra_ctxsize = sizeof(struct fcrypt_ctx), .cra_module = THIS_MODULE, - .cra_alignmask = 3, .cra_u = { .cipher = { .cia_min_keysize = 8, .cia_max_keysize = 8, From af1050a4eca430c49a70e15a2b6972cf5a457f8d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 1 Feb 2021 19:02:37 +0100 Subject: [PATCH 127/154] crypto: twofish - use unaligned accessors instead of alignmask Instead of using an alignmask of 0x3 to ensure 32-bit alignment of the Twofish input and output blocks, which propagates to mode drivers, and results in pointless copying on architectures that don't care about alignment, use the unaligned accessors, which will do the right thing on each respective architecture, avoiding the need for double buffering. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/twofish_generic.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/crypto/twofish_generic.c b/crypto/twofish_generic.c index 4f7c033224f9..86b2f067a416 100644 --- a/crypto/twofish_generic.c +++ b/crypto/twofish_generic.c @@ -24,7 +24,7 @@ * Third Edition. */ -#include +#include #include #include #include @@ -83,11 +83,11 @@ * whitening subkey number m. */ #define INPACK(n, x, m) \ - x = le32_to_cpu(src[n]) ^ ctx->w[m] + x = get_unaligned_le32(in + (n) * 4) ^ ctx->w[m] #define OUTUNPACK(n, x, m) \ x ^= ctx->w[m]; \ - dst[n] = cpu_to_le32(x) + put_unaligned_le32(x, out + (n) * 4) @@ -95,8 +95,6 @@ static void twofish_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct twofish_ctx *ctx = crypto_tfm_ctx(tfm); - const __le32 *src = (const __le32 *)in; - __le32 *dst = (__le32 *)out; /* The four 32-bit chunks of the text. */ u32 a, b, c, d; @@ -132,8 +130,6 @@ static void twofish_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) static void twofish_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct twofish_ctx *ctx = crypto_tfm_ctx(tfm); - const __le32 *src = (const __le32 *)in; - __le32 *dst = (__le32 *)out; /* The four 32-bit chunks of the text. */ u32 a, b, c, d; @@ -172,7 +168,6 @@ static struct crypto_alg alg = { .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = TF_BLOCK_SIZE, .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 3, .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = TF_MIN_KEY_SIZE, From 6c2ab5bcb7fa367f055e4ed876de4b6e889f0cb7 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 2 Feb 2021 10:06:15 +0800 Subject: [PATCH 128/154] crypto: caam - Replace DEFINE_SIMPLE_ATTRIBUTE with DEFINE_DEBUGFS_ATTRIBUTE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following coccicheck warning: ./drivers/crypto/caam/debugfs.c:23:0-23: WARNING: caam_fops_u64_ro should be defined with DEFINE_DEBUGFS_ATTRIBUTE. ./drivers/crypto/caam/debugfs.c:22:0-23: WARNING: caam_fops_u32_ro should be defined with DEFINE_DEBUGFS_ATTRIBUTE. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/caam/debugfs.c b/drivers/crypto/caam/debugfs.c index 8ebf18398166..806bb20d2aa1 100644 --- a/drivers/crypto/caam/debugfs.c +++ b/drivers/crypto/caam/debugfs.c @@ -19,8 +19,8 @@ static int caam_debugfs_u32_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u32_ro, caam_debugfs_u32_get, NULL, "%llu\n"); -DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u64_ro, caam_debugfs_u64_get, NULL, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(caam_fops_u32_ro, caam_debugfs_u32_get, NULL, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(caam_fops_u64_ro, caam_debugfs_u64_get, NULL, "%llu\n"); #ifdef CONFIG_CAAM_QI /* From 578f23d359bf7c988b1c9026d4711de7112b0c1c Mon Sep 17 00:00:00 2001 From: Yang Li Date: Tue, 2 Feb 2021 11:17:30 +0800 Subject: [PATCH 129/154] crypto: powerpc/sha256 - remove unneeded semicolon Eliminate the following coccicheck warning: ./arch/powerpc/crypto/sha256-spe-glue.c:132:2-3: Unneeded semicolon Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Herbert Xu --- arch/powerpc/crypto/sha256-spe-glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c index a6e650a97d8f..ffedea7e4bef 100644 --- a/arch/powerpc/crypto/sha256-spe-glue.c +++ b/arch/powerpc/crypto/sha256-spe-glue.c @@ -129,7 +129,7 @@ static int ppc_spe_sha256_update(struct shash_desc *desc, const u8 *data, src += bytes; len -= bytes; - }; + } memcpy((char *)sctx->buf, src, len); return 0; From a53ab94eb6850c3657392e2d2ce9b38c387a2633 Mon Sep 17 00:00:00 2001 From: Daniele Alessandrelli Date: Wed, 3 Feb 2021 11:28:37 +0000 Subject: [PATCH 130/154] crypto: ecdh_helper - Ensure 'len >= secret.len' in decode_key() The length ('len' parameter) passed to crypto_ecdh_decode_key() is never checked against the length encoded in the passed buffer ('buf' parameter). This could lead to an out-of-bounds access when the passed length is less than the encoded length. Add a check to prevent that. Fixes: 3c4b23901a0c7 ("crypto: ecdh - Add ECDH software support") Signed-off-by: Daniele Alessandrelli Signed-off-by: Herbert Xu --- crypto/ecdh_helper.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crypto/ecdh_helper.c b/crypto/ecdh_helper.c index 66fcb2ea8154..fca63b559f65 100644 --- a/crypto/ecdh_helper.c +++ b/crypto/ecdh_helper.c @@ -67,6 +67,9 @@ int crypto_ecdh_decode_key(const char *buf, unsigned int len, if (secret.type != CRYPTO_KPP_SECRET_TYPE_ECDH) return -EINVAL; + if (unlikely(len < secret.len)) + return -EINVAL; + ptr = ecdh_unpack_data(¶ms->curve_id, ptr, sizeof(params->curve_id)); ptr = ecdh_unpack_data(¶ms->key_size, ptr, sizeof(params->key_size)); if (secret.len != crypto_ecdh_key_len(params)) From 5a69e1b73d5460953b8198ab03e9e1c86c5aeb11 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 3 Feb 2021 12:36:19 +0100 Subject: [PATCH 131/154] crypto: arm64/sha1-ce - simplify NEON yield Instead of calling into kernel_neon_end() and kernel_neon_begin() (and potentially into schedule()) from the assembler code when running in task mode and a reschedule is pending, perform only the preempt count check in assembler, but simply return early in this case, and let the C code deal with the consequences. This reverts commit 7df8d164753e6e6f229b72767595072bc6a71f48. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/sha1-ce-core.S | 47 ++++++++++++-------------------- arch/arm64/crypto/sha1-ce-glue.c | 22 ++++++++------- 2 files changed, 29 insertions(+), 40 deletions(-) diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S index 92d0d2753e81..8c02bbc2684e 100644 --- a/arch/arm64/crypto/sha1-ce-core.S +++ b/arch/arm64/crypto/sha1-ce-core.S @@ -62,40 +62,34 @@ .endm /* - * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, - * int blocks) + * int sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, + * int blocks) */ SYM_FUNC_START(sha1_ce_transform) - frame_push 3 - - mov x19, x0 - mov x20, x1 - mov x21, x2 - /* load round constants */ -0: loadrc k0.4s, 0x5a827999, w6 + loadrc k0.4s, 0x5a827999, w6 loadrc k1.4s, 0x6ed9eba1, w6 loadrc k2.4s, 0x8f1bbcdc, w6 loadrc k3.4s, 0xca62c1d6, w6 /* load state */ - ld1 {dgav.4s}, [x19] - ldr dgb, [x19, #16] + ld1 {dgav.4s}, [x0] + ldr dgb, [x0, #16] /* load sha1_ce_state::finalize */ ldr_l w4, sha1_ce_offsetof_finalize, x4 - ldr w4, [x19, x4] + ldr w4, [x0, x4] /* load input */ -1: ld1 {v8.4s-v11.4s}, [x20], #64 - sub w21, w21, #1 +0: ld1 {v8.4s-v11.4s}, [x1], #64 + sub w2, w2, #1 CPU_LE( rev32 v8.16b, v8.16b ) CPU_LE( rev32 v9.16b, v9.16b ) CPU_LE( rev32 v10.16b, v10.16b ) CPU_LE( rev32 v11.16b, v11.16b ) -2: add t0.4s, v8.4s, k0.4s +1: add t0.4s, v8.4s, k0.4s mov dg0v.16b, dgav.16b add_update c, ev, k0, 8, 9, 10, 11, dgb @@ -126,25 +120,18 @@ CPU_LE( rev32 v11.16b, v11.16b ) add dgbv.2s, dgbv.2s, dg1v.2s add dgav.4s, dgav.4s, dg0v.4s - cbz w21, 3f - - if_will_cond_yield_neon - st1 {dgav.4s}, [x19] - str dgb, [x19, #16] - do_cond_yield_neon + cbz w2, 2f + cond_yield 3f, x5 b 0b - endif_yield_neon - - b 1b /* * Final block: add padding and total bit count. * Skip if the input size was not a round multiple of the block size, * the padding is handled by the C code in that case. */ -3: cbz x4, 4f +2: cbz x4, 3f ldr_l w4, sha1_ce_offsetof_count, x4 - ldr x4, [x19, x4] + ldr x4, [x0, x4] movi v9.2d, #0 mov x8, #0x80000000 movi v10.2d, #0 @@ -153,11 +140,11 @@ CPU_LE( rev32 v11.16b, v11.16b ) mov x4, #0 mov v11.d[0], xzr mov v11.d[1], x7 - b 2b + b 1b /* store new state */ -4: st1 {dgav.4s}, [x19] - str dgb, [x19, #16] - frame_pop +3: st1 {dgav.4s}, [x0] + str dgb, [x0, #16] + mov w0, w2 ret SYM_FUNC_END(sha1_ce_transform) diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index c1362861765f..71fa4f1122d7 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -29,14 +29,22 @@ struct sha1_ce_state { extern const u32 sha1_ce_offsetof_count; extern const u32 sha1_ce_offsetof_finalize; -asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, - int blocks); +asmlinkage int sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, + int blocks); static void __sha1_ce_transform(struct sha1_state *sst, u8 const *src, int blocks) { - sha1_ce_transform(container_of(sst, struct sha1_ce_state, sst), src, - blocks); + while (blocks) { + int rem; + + kernel_neon_begin(); + rem = sha1_ce_transform(container_of(sst, struct sha1_ce_state, + sst), src, blocks); + kernel_neon_end(); + src += (blocks - rem) * SHA1_BLOCK_SIZE; + blocks = rem; + } } const u32 sha1_ce_offsetof_count = offsetof(struct sha1_ce_state, sst.count); @@ -51,9 +59,7 @@ static int sha1_ce_update(struct shash_desc *desc, const u8 *data, return crypto_sha1_update(desc, data, len); sctx->finalize = 0; - kernel_neon_begin(); sha1_base_do_update(desc, data, len, __sha1_ce_transform); - kernel_neon_end(); return 0; } @@ -73,11 +79,9 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, */ sctx->finalize = finalize; - kernel_neon_begin(); sha1_base_do_update(desc, data, len, __sha1_ce_transform); if (!finalize) sha1_base_do_finalize(desc, __sha1_ce_transform); - kernel_neon_end(); return sha1_base_finish(desc, out); } @@ -89,9 +93,7 @@ static int sha1_ce_final(struct shash_desc *desc, u8 *out) return crypto_sha1_finup(desc, NULL, 0, out); sctx->finalize = 0; - kernel_neon_begin(); sha1_base_do_finalize(desc, __sha1_ce_transform); - kernel_neon_end(); return sha1_base_finish(desc, out); } From b2eadbf40e8f82279f145aa841727b2e01f7dc1d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 3 Feb 2021 12:36:20 +0100 Subject: [PATCH 132/154] crypto: arm64/sha2-ce - simplify NEON yield Instead of calling into kernel_neon_end() and kernel_neon_begin() (and potentially into schedule()) from the assembler code when running in task mode and a reschedule is pending, perform only the preempt count check in assembler, but simply return early in this case, and let the C code deal with the consequences. This reverts commit d82f37ab5e2426287013eba38b1212e8b71e5be3. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/sha2-ce-core.S | 38 +++++++++++--------------------- arch/arm64/crypto/sha2-ce-glue.c | 22 +++++++++--------- 2 files changed, 25 insertions(+), 35 deletions(-) diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 3f9d0f326987..6cdea7d56059 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S @@ -76,36 +76,30 @@ */ .text SYM_FUNC_START(sha2_ce_transform) - frame_push 3 - - mov x19, x0 - mov x20, x1 - mov x21, x2 - /* load round constants */ -0: adr_l x8, .Lsha2_rcon + adr_l x8, .Lsha2_rcon ld1 { v0.4s- v3.4s}, [x8], #64 ld1 { v4.4s- v7.4s}, [x8], #64 ld1 { v8.4s-v11.4s}, [x8], #64 ld1 {v12.4s-v15.4s}, [x8] /* load state */ - ld1 {dgav.4s, dgbv.4s}, [x19] + ld1 {dgav.4s, dgbv.4s}, [x0] /* load sha256_ce_state::finalize */ ldr_l w4, sha256_ce_offsetof_finalize, x4 - ldr w4, [x19, x4] + ldr w4, [x0, x4] /* load input */ -1: ld1 {v16.4s-v19.4s}, [x20], #64 - sub w21, w21, #1 +0: ld1 {v16.4s-v19.4s}, [x1], #64 + sub w2, w2, #1 CPU_LE( rev32 v16.16b, v16.16b ) CPU_LE( rev32 v17.16b, v17.16b ) CPU_LE( rev32 v18.16b, v18.16b ) CPU_LE( rev32 v19.16b, v19.16b ) -2: add t0.4s, v16.4s, v0.4s +1: add t0.4s, v16.4s, v0.4s mov dg0v.16b, dgav.16b mov dg1v.16b, dgbv.16b @@ -134,24 +128,18 @@ CPU_LE( rev32 v19.16b, v19.16b ) add dgbv.4s, dgbv.4s, dg1v.4s /* handled all input blocks? */ - cbz w21, 3f - - if_will_cond_yield_neon - st1 {dgav.4s, dgbv.4s}, [x19] - do_cond_yield_neon + cbz w2, 2f + cond_yield 3f, x5 b 0b - endif_yield_neon - - b 1b /* * Final block: add padding and total bit count. * Skip if the input size was not a round multiple of the block size, * the padding is handled by the C code in that case. */ -3: cbz x4, 4f +2: cbz x4, 3f ldr_l w4, sha256_ce_offsetof_count, x4 - ldr x4, [x19, x4] + ldr x4, [x0, x4] movi v17.2d, #0 mov x8, #0x80000000 movi v18.2d, #0 @@ -160,10 +148,10 @@ CPU_LE( rev32 v19.16b, v19.16b ) mov x4, #0 mov v19.d[0], xzr mov v19.d[1], x7 - b 2b + b 1b /* store new state */ -4: st1 {dgav.4s, dgbv.4s}, [x19] - frame_pop +3: st1 {dgav.4s, dgbv.4s}, [x0] + mov w0, w2 ret SYM_FUNC_END(sha2_ce_transform) diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index ded3a6488f81..c57a6119fefc 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -30,14 +30,22 @@ struct sha256_ce_state { extern const u32 sha256_ce_offsetof_count; extern const u32 sha256_ce_offsetof_finalize; -asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, - int blocks); +asmlinkage int sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, + int blocks); static void __sha2_ce_transform(struct sha256_state *sst, u8 const *src, int blocks) { - sha2_ce_transform(container_of(sst, struct sha256_ce_state, sst), src, - blocks); + while (blocks) { + int rem; + + kernel_neon_begin(); + rem = sha2_ce_transform(container_of(sst, struct sha256_ce_state, + sst), src, blocks); + kernel_neon_end(); + src += (blocks - rem) * SHA256_BLOCK_SIZE; + blocks = rem; + } } const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state, @@ -63,9 +71,7 @@ static int sha256_ce_update(struct shash_desc *desc, const u8 *data, __sha256_block_data_order); sctx->finalize = 0; - kernel_neon_begin(); sha256_base_do_update(desc, data, len, __sha2_ce_transform); - kernel_neon_end(); return 0; } @@ -90,11 +96,9 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data, */ sctx->finalize = finalize; - kernel_neon_begin(); sha256_base_do_update(desc, data, len, __sha2_ce_transform); if (!finalize) sha256_base_do_finalize(desc, __sha2_ce_transform); - kernel_neon_end(); return sha256_base_finish(desc, out); } @@ -108,9 +112,7 @@ static int sha256_ce_final(struct shash_desc *desc, u8 *out) } sctx->finalize = 0; - kernel_neon_begin(); sha256_base_do_finalize(desc, __sha2_ce_transform); - kernel_neon_end(); return sha256_base_finish(desc, out); } From 9ecc9f31d0a43d538d80f51debfb25d75da44892 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 3 Feb 2021 12:36:21 +0100 Subject: [PATCH 133/154] crypto: arm64/sha3-ce - simplify NEON yield Instead of calling into kernel_neon_end() and kernel_neon_begin() (and potentially into schedule()) from the assembler code when running in task mode and a reschedule is pending, perform only the preempt count check in assembler, but simply return early in this case, and let the C code deal with the consequences. This reverts commit 7edc86cb1c18b4c274672232117586ea2bef1d9a. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/sha3-ce-core.S | 81 ++++++++++++-------------------- arch/arm64/crypto/sha3-ce-glue.c | 14 ++++-- 2 files changed, 39 insertions(+), 56 deletions(-) diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S index 1cfb768df350..6f5208414fe3 100644 --- a/arch/arm64/crypto/sha3-ce-core.S +++ b/arch/arm64/crypto/sha3-ce-core.S @@ -37,20 +37,13 @@ .endm /* - * sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size) + * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size) */ .text SYM_FUNC_START(sha3_ce_transform) - frame_push 4 - - mov x19, x0 - mov x20, x1 - mov x21, x2 - mov x22, x3 - -0: /* load state */ - add x8, x19, #32 - ld1 { v0.1d- v3.1d}, [x19] + /* load state */ + add x8, x0, #32 + ld1 { v0.1d- v3.1d}, [x0] ld1 { v4.1d- v7.1d}, [x8], #32 ld1 { v8.1d-v11.1d}, [x8], #32 ld1 {v12.1d-v15.1d}, [x8], #32 @@ -58,13 +51,13 @@ SYM_FUNC_START(sha3_ce_transform) ld1 {v20.1d-v23.1d}, [x8], #32 ld1 {v24.1d}, [x8] -1: sub w21, w21, #1 +0: sub w2, w2, #1 mov w8, #24 adr_l x9, .Lsha3_rcon /* load input */ - ld1 {v25.8b-v28.8b}, [x20], #32 - ld1 {v29.8b-v31.8b}, [x20], #24 + ld1 {v25.8b-v28.8b}, [x1], #32 + ld1 {v29.8b-v31.8b}, [x1], #24 eor v0.8b, v0.8b, v25.8b eor v1.8b, v1.8b, v26.8b eor v2.8b, v2.8b, v27.8b @@ -73,10 +66,10 @@ SYM_FUNC_START(sha3_ce_transform) eor v5.8b, v5.8b, v30.8b eor v6.8b, v6.8b, v31.8b - tbnz x22, #6, 3f // SHA3-512 + tbnz x3, #6, 2f // SHA3-512 - ld1 {v25.8b-v28.8b}, [x20], #32 - ld1 {v29.8b-v30.8b}, [x20], #16 + ld1 {v25.8b-v28.8b}, [x1], #32 + ld1 {v29.8b-v30.8b}, [x1], #16 eor v7.8b, v7.8b, v25.8b eor v8.8b, v8.8b, v26.8b eor v9.8b, v9.8b, v27.8b @@ -84,34 +77,34 @@ SYM_FUNC_START(sha3_ce_transform) eor v11.8b, v11.8b, v29.8b eor v12.8b, v12.8b, v30.8b - tbnz x22, #4, 2f // SHA3-384 or SHA3-224 + tbnz x3, #4, 1f // SHA3-384 or SHA3-224 // SHA3-256 - ld1 {v25.8b-v28.8b}, [x20], #32 + ld1 {v25.8b-v28.8b}, [x1], #32 eor v13.8b, v13.8b, v25.8b eor v14.8b, v14.8b, v26.8b eor v15.8b, v15.8b, v27.8b eor v16.8b, v16.8b, v28.8b - b 4f + b 3f -2: tbz x22, #2, 4f // bit 2 cleared? SHA-384 +1: tbz x3, #2, 3f // bit 2 cleared? SHA-384 // SHA3-224 - ld1 {v25.8b-v28.8b}, [x20], #32 - ld1 {v29.8b}, [x20], #8 + ld1 {v25.8b-v28.8b}, [x1], #32 + ld1 {v29.8b}, [x1], #8 eor v13.8b, v13.8b, v25.8b eor v14.8b, v14.8b, v26.8b eor v15.8b, v15.8b, v27.8b eor v16.8b, v16.8b, v28.8b eor v17.8b, v17.8b, v29.8b - b 4f + b 3f // SHA3-512 -3: ld1 {v25.8b-v26.8b}, [x20], #16 +2: ld1 {v25.8b-v26.8b}, [x1], #16 eor v7.8b, v7.8b, v25.8b eor v8.8b, v8.8b, v26.8b -4: sub w8, w8, #1 +3: sub w8, w8, #1 eor3 v29.16b, v4.16b, v9.16b, v14.16b eor3 v26.16b, v1.16b, v6.16b, v11.16b @@ -190,33 +183,19 @@ SYM_FUNC_START(sha3_ce_transform) eor v0.16b, v0.16b, v31.16b - cbnz w8, 4b - cbz w21, 5f - - if_will_cond_yield_neon - add x8, x19, #32 - st1 { v0.1d- v3.1d}, [x19] - st1 { v4.1d- v7.1d}, [x8], #32 - st1 { v8.1d-v11.1d}, [x8], #32 - st1 {v12.1d-v15.1d}, [x8], #32 - st1 {v16.1d-v19.1d}, [x8], #32 - st1 {v20.1d-v23.1d}, [x8], #32 - st1 {v24.1d}, [x8] - do_cond_yield_neon - b 0b - endif_yield_neon - - b 1b + cbnz w8, 3b + cond_yield 3f, x8 + cbnz w2, 0b /* save state */ -5: st1 { v0.1d- v3.1d}, [x19], #32 - st1 { v4.1d- v7.1d}, [x19], #32 - st1 { v8.1d-v11.1d}, [x19], #32 - st1 {v12.1d-v15.1d}, [x19], #32 - st1 {v16.1d-v19.1d}, [x19], #32 - st1 {v20.1d-v23.1d}, [x19], #32 - st1 {v24.1d}, [x19] - frame_pop +3: st1 { v0.1d- v3.1d}, [x0], #32 + st1 { v4.1d- v7.1d}, [x0], #32 + st1 { v8.1d-v11.1d}, [x0], #32 + st1 {v12.1d-v15.1d}, [x0], #32 + st1 {v16.1d-v19.1d}, [x0], #32 + st1 {v20.1d-v23.1d}, [x0], #32 + st1 {v24.1d}, [x0] + mov w0, w2 ret SYM_FUNC_END(sha3_ce_transform) diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c index 7288d3046354..8c65cecf560a 100644 --- a/arch/arm64/crypto/sha3-ce-glue.c +++ b/arch/arm64/crypto/sha3-ce-glue.c @@ -28,8 +28,8 @@ MODULE_ALIAS_CRYPTO("sha3-256"); MODULE_ALIAS_CRYPTO("sha3-384"); MODULE_ALIAS_CRYPTO("sha3-512"); -asmlinkage void sha3_ce_transform(u64 *st, const u8 *data, int blocks, - int md_len); +asmlinkage int sha3_ce_transform(u64 *st, const u8 *data, int blocks, + int md_len); static int sha3_update(struct shash_desc *desc, const u8 *data, unsigned int len) @@ -59,11 +59,15 @@ static int sha3_update(struct shash_desc *desc, const u8 *data, blocks = len / sctx->rsiz; len %= sctx->rsiz; - if (blocks) { + while (blocks) { + int rem; + kernel_neon_begin(); - sha3_ce_transform(sctx->st, data, blocks, digest_size); + rem = sha3_ce_transform(sctx->st, data, blocks, + digest_size); kernel_neon_end(); - data += blocks * sctx->rsiz; + data += (blocks - rem) * sctx->rsiz; + blocks = rem; } } From 5f6cb2e6176815cf631593eb7a94a2725d8528e5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 3 Feb 2021 12:36:22 +0100 Subject: [PATCH 134/154] crypto: arm64/sha512-ce - simplify NEON yield Instead of calling into kernel_neon_end() and kernel_neon_begin() (and potentially into schedule()) from the assembler code when running in task mode and a reschedule is pending, perform only the preempt count check in assembler, but simply return early in this case, and let the C code deal with the consequences. This reverts commit 6caf7adc5e458f77f550b6c6ca8effa152d61b4a. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/sha512-ce-core.S | 29 +++++----------- arch/arm64/crypto/sha512-ce-glue.c | 53 +++++++++++++++--------------- 2 files changed, 34 insertions(+), 48 deletions(-) diff --git a/arch/arm64/crypto/sha512-ce-core.S b/arch/arm64/crypto/sha512-ce-core.S index cde606c0323e..d6e7f6c95fa6 100644 --- a/arch/arm64/crypto/sha512-ce-core.S +++ b/arch/arm64/crypto/sha512-ce-core.S @@ -107,23 +107,17 @@ */ .text SYM_FUNC_START(sha512_ce_transform) - frame_push 3 - - mov x19, x0 - mov x20, x1 - mov x21, x2 - /* load state */ -0: ld1 {v8.2d-v11.2d}, [x19] + ld1 {v8.2d-v11.2d}, [x0] /* load first 4 round constants */ adr_l x3, .Lsha512_rcon ld1 {v20.2d-v23.2d}, [x3], #64 /* load input */ -1: ld1 {v12.2d-v15.2d}, [x20], #64 - ld1 {v16.2d-v19.2d}, [x20], #64 - sub w21, w21, #1 +0: ld1 {v12.2d-v15.2d}, [x1], #64 + ld1 {v16.2d-v19.2d}, [x1], #64 + sub w2, w2, #1 CPU_LE( rev64 v12.16b, v12.16b ) CPU_LE( rev64 v13.16b, v13.16b ) @@ -201,19 +195,12 @@ CPU_LE( rev64 v19.16b, v19.16b ) add v10.2d, v10.2d, v2.2d add v11.2d, v11.2d, v3.2d + cond_yield 3f, x4 /* handled all input blocks? */ - cbz w21, 3f - - if_will_cond_yield_neon - st1 {v8.2d-v11.2d}, [x19] - do_cond_yield_neon - b 0b - endif_yield_neon - - b 1b + cbnz w2, 0b /* store new state */ -3: st1 {v8.2d-v11.2d}, [x19] - frame_pop +3: st1 {v8.2d-v11.2d}, [x0] + mov w0, w2 ret SYM_FUNC_END(sha512_ce_transform) diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c index a6b1adf31c56..e62a094a9d52 100644 --- a/arch/arm64/crypto/sha512-ce-glue.c +++ b/arch/arm64/crypto/sha512-ce-glue.c @@ -26,11 +26,25 @@ MODULE_LICENSE("GPL v2"); MODULE_ALIAS_CRYPTO("sha384"); MODULE_ALIAS_CRYPTO("sha512"); -asmlinkage void sha512_ce_transform(struct sha512_state *sst, u8 const *src, - int blocks); +asmlinkage int sha512_ce_transform(struct sha512_state *sst, u8 const *src, + int blocks); asmlinkage void sha512_block_data_order(u64 *digest, u8 const *src, int blocks); +static void __sha512_ce_transform(struct sha512_state *sst, u8 const *src, + int blocks) +{ + while (blocks) { + int rem; + + kernel_neon_begin(); + rem = sha512_ce_transform(sst, src, blocks); + kernel_neon_end(); + src += (blocks - rem) * SHA512_BLOCK_SIZE; + blocks = rem; + } +} + static void __sha512_block_data_order(struct sha512_state *sst, u8 const *src, int blocks) { @@ -40,45 +54,30 @@ static void __sha512_block_data_order(struct sha512_state *sst, u8 const *src, static int sha512_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - if (!crypto_simd_usable()) - return sha512_base_do_update(desc, data, len, - __sha512_block_data_order); - - kernel_neon_begin(); - sha512_base_do_update(desc, data, len, sha512_ce_transform); - kernel_neon_end(); + sha512_block_fn *fn = crypto_simd_usable() ? __sha512_ce_transform + : __sha512_block_data_order; + sha512_base_do_update(desc, data, len, fn); return 0; } static int sha512_ce_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - if (!crypto_simd_usable()) { - if (len) - sha512_base_do_update(desc, data, len, - __sha512_block_data_order); - sha512_base_do_finalize(desc, __sha512_block_data_order); - return sha512_base_finish(desc, out); - } + sha512_block_fn *fn = crypto_simd_usable() ? __sha512_ce_transform + : __sha512_block_data_order; - kernel_neon_begin(); - sha512_base_do_update(desc, data, len, sha512_ce_transform); - sha512_base_do_finalize(desc, sha512_ce_transform); - kernel_neon_end(); + sha512_base_do_update(desc, data, len, fn); + sha512_base_do_finalize(desc, fn); return sha512_base_finish(desc, out); } static int sha512_ce_final(struct shash_desc *desc, u8 *out) { - if (!crypto_simd_usable()) { - sha512_base_do_finalize(desc, __sha512_block_data_order); - return sha512_base_finish(desc, out); - } + sha512_block_fn *fn = crypto_simd_usable() ? __sha512_ce_transform + : __sha512_block_data_order; - kernel_neon_begin(); - sha512_base_do_finalize(desc, sha512_ce_transform); - kernel_neon_end(); + sha512_base_do_finalize(desc, fn); return sha512_base_finish(desc, out); } From f5943ef456f8961ed1266a5713b8faf73019405b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 3 Feb 2021 12:36:23 +0100 Subject: [PATCH 135/154] crypto: arm64/aes-neonbs - remove NEON yield calls There is no need for elaborate yield handling in the bit-sliced NEON implementation of AES, given that skciphers are naturally bounded by the size of the chunks returned by the skcipher_walk API. So remove the yield calls from the asm code. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-neonbs-core.S | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S index 63a52ad9a75c..a3405b8c344b 100644 --- a/arch/arm64/crypto/aes-neonbs-core.S +++ b/arch/arm64/crypto/aes-neonbs-core.S @@ -613,7 +613,6 @@ SYM_FUNC_END(aesbs_decrypt8) st1 {\o7\().16b}, [x19], #16 cbz x23, 1f - cond_yield_neon b 99b 1: frame_pop @@ -715,7 +714,6 @@ SYM_FUNC_START(aesbs_cbc_decrypt) 1: st1 {v24.16b}, [x24] // store IV cbz x23, 2f - cond_yield_neon b 99b 2: frame_pop @@ -801,7 +799,7 @@ SYM_FUNC_END(__xts_crypt8) mov x23, x4 mov x24, x5 -0: movi v30.2s, #0x1 + movi v30.2s, #0x1 movi v25.2s, #0x87 uzp1 v30.4s, v30.4s, v25.4s ld1 {v25.16b}, [x24] @@ -846,7 +844,6 @@ SYM_FUNC_END(__xts_crypt8) cbz x23, 1f st1 {v25.16b}, [x24] - cond_yield_neon 0b b 99b 1: st1 {v25.16b}, [x24] @@ -889,7 +886,7 @@ SYM_FUNC_START(aesbs_ctr_encrypt) cset x26, ne add x23, x23, x26 // do one extra block if final -98: ldp x7, x8, [x24] + ldp x7, x8, [x24] ld1 {v0.16b}, [x24] CPU_LE( rev x7, x7 ) CPU_LE( rev x8, x8 ) @@ -967,7 +964,6 @@ CPU_LE( rev x8, x8 ) st1 {v0.16b}, [x24] cbz x23, .Lctr_done - cond_yield_neon 98b b 99b .Lctr_done: From f0070f4a7934e4deba83fdde70c79d9798b2366b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 3 Feb 2021 12:36:24 +0100 Subject: [PATCH 136/154] crypto: arm64/aes-ce-mac - simplify NEON yield Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-glue.c | 21 +++++++++----- arch/arm64/crypto/aes-modes.S | 52 +++++++++++++---------------------- 2 files changed, 33 insertions(+), 40 deletions(-) diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index e7f116d833b9..17e735931a0c 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -105,9 +105,9 @@ asmlinkage void aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds, int blocks, u8 iv[], u32 const rk2[]); -asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds, - int blocks, u8 dg[], int enc_before, - int enc_after); +asmlinkage int aes_mac_update(u8 const in[], u32 const rk[], int rounds, + int blocks, u8 dg[], int enc_before, + int enc_after); struct crypto_aes_xts_ctx { struct crypto_aes_ctx key1; @@ -856,10 +856,17 @@ static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks, int rounds = 6 + ctx->key_length / 4; if (crypto_simd_usable()) { - kernel_neon_begin(); - aes_mac_update(in, ctx->key_enc, rounds, blocks, dg, enc_before, - enc_after); - kernel_neon_end(); + int rem; + + do { + kernel_neon_begin(); + rem = aes_mac_update(in, ctx->key_enc, rounds, blocks, + dg, enc_before, enc_after); + kernel_neon_end(); + in += (blocks - rem) * AES_BLOCK_SIZE; + blocks = rem; + enc_before = 0; + } while (blocks); } else { if (enc_before) aes_encrypt(ctx, dg, dg); diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index 3d1f97799899..bbdb54702aa7 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -678,61 +678,47 @@ AES_FUNC_END(aes_xts_decrypt) * int blocks, u8 dg[], int enc_before, int enc_after) */ AES_FUNC_START(aes_mac_update) - frame_push 6 - - mov x19, x0 - mov x20, x1 - mov x21, x2 - mov x22, x3 - mov x23, x4 - mov x24, x6 - - ld1 {v0.16b}, [x23] /* get dg */ + ld1 {v0.16b}, [x4] /* get dg */ enc_prepare w2, x1, x7 cbz w5, .Lmacloop4x encrypt_block v0, w2, x1, x7, w8 .Lmacloop4x: - subs w22, w22, #4 + subs w3, w3, #4 bmi .Lmac1x - ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */ + ld1 {v1.16b-v4.16b}, [x0], #64 /* get next pt block */ eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ - encrypt_block v0, w21, x20, x7, w8 + encrypt_block v0, w2, x1, x7, w8 eor v0.16b, v0.16b, v2.16b - encrypt_block v0, w21, x20, x7, w8 + encrypt_block v0, w2, x1, x7, w8 eor v0.16b, v0.16b, v3.16b - encrypt_block v0, w21, x20, x7, w8 + encrypt_block v0, w2, x1, x7, w8 eor v0.16b, v0.16b, v4.16b - cmp w22, wzr - csinv x5, x24, xzr, eq + cmp w3, wzr + csinv x5, x6, xzr, eq cbz w5, .Lmacout - encrypt_block v0, w21, x20, x7, w8 - st1 {v0.16b}, [x23] /* return dg */ - cond_yield_neon .Lmacrestart + encrypt_block v0, w2, x1, x7, w8 + st1 {v0.16b}, [x4] /* return dg */ + cond_yield .Lmacout, x7 b .Lmacloop4x .Lmac1x: - add w22, w22, #4 + add w3, w3, #4 .Lmacloop: - cbz w22, .Lmacout - ld1 {v1.16b}, [x19], #16 /* get next pt block */ + cbz w3, .Lmacout + ld1 {v1.16b}, [x0], #16 /* get next pt block */ eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ - subs w22, w22, #1 - csinv x5, x24, xzr, eq + subs w3, w3, #1 + csinv x5, x6, xzr, eq cbz w5, .Lmacout .Lmacenc: - encrypt_block v0, w21, x20, x7, w8 + encrypt_block v0, w2, x1, x7, w8 b .Lmacloop .Lmacout: - st1 {v0.16b}, [x23] /* return dg */ - frame_pop + st1 {v0.16b}, [x4] /* return dg */ + mov w0, w3 ret - -.Lmacrestart: - ld1 {v0.16b}, [x23] /* get dg */ - enc_prepare w21, x20, x0 - b .Lmacloop4x AES_FUNC_END(aes_mac_update) From fc754c024a343b836cfbb794afd3c7a87f625dbb Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 3 Feb 2021 12:36:25 +0100 Subject: [PATCH 137/154] crypto: arm64/crc-t10dif - move NEON yield to C code Instead of yielding from the bowels of the asm routine if a reschedule is needed, divide up the input into 4 KB chunks in the C glue. This simplifies the code substantially, and avoids scheduling out the task with the asm routine on the call stack, which is undesirable from a CFI/instrumentation point of view. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/crct10dif-ce-core.S | 43 +++++++-------------------- arch/arm64/crypto/crct10dif-ce-glue.c | 30 +++++++++++++++---- 2 files changed, 35 insertions(+), 38 deletions(-) diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S index 111d9c9abddd..dce6dcebfca1 100644 --- a/arch/arm64/crypto/crct10dif-ce-core.S +++ b/arch/arm64/crypto/crct10dif-ce-core.S @@ -68,10 +68,10 @@ .text .arch armv8-a+crypto - init_crc .req w19 - buf .req x20 - len .req x21 - fold_consts_ptr .req x22 + init_crc .req w0 + buf .req x1 + len .req x2 + fold_consts_ptr .req x3 fold_consts .req v10 @@ -257,12 +257,6 @@ CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 ) .endm .macro crc_t10dif_pmull, p - frame_push 4, 128 - - mov init_crc, w0 - mov buf, x1 - mov len, x2 - __pmull_init_\p // For sizes less than 256 bytes, we can't fold 128 bytes at a time. @@ -317,26 +311,7 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 ) fold_32_bytes \p, v6, v7 subs len, len, #128 - b.lt .Lfold_128_bytes_loop_done_\@ - - if_will_cond_yield_neon - stp q0, q1, [sp, #.Lframe_local_offset] - stp q2, q3, [sp, #.Lframe_local_offset + 32] - stp q4, q5, [sp, #.Lframe_local_offset + 64] - stp q6, q7, [sp, #.Lframe_local_offset + 96] - do_cond_yield_neon - ldp q0, q1, [sp, #.Lframe_local_offset] - ldp q2, q3, [sp, #.Lframe_local_offset + 32] - ldp q4, q5, [sp, #.Lframe_local_offset + 64] - ldp q6, q7, [sp, #.Lframe_local_offset + 96] - ld1 {fold_consts.2d}, [fold_consts_ptr] - __pmull_init_\p - __pmull_pre_\p fold_consts - endif_yield_neon - - b .Lfold_128_bytes_loop_\@ - -.Lfold_128_bytes_loop_done_\@: + b.ge .Lfold_128_bytes_loop_\@ // Now fold the 112 bytes in v0-v6 into the 16 bytes in v7. @@ -453,7 +428,9 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 ) // Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of v0. umov w0, v0.h[0] - frame_pop + .ifc \p, p8 + ldp x29, x30, [sp], #16 + .endif ret .Lless_than_256_bytes_\@: @@ -489,7 +466,9 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 ) // Assumes len >= 16. // SYM_FUNC_START(crc_t10dif_pmull_p8) - crc_t10dif_pmull p8 + stp x29, x30, [sp, #-16]! + mov x29, sp + crc_t10dif_pmull p8 SYM_FUNC_END(crc_t10dif_pmull_p8) .align 5 diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c index ccc3f6067742..09eb1456aed4 100644 --- a/arch/arm64/crypto/crct10dif-ce-glue.c +++ b/arch/arm64/crypto/crct10dif-ce-glue.c @@ -37,9 +37,18 @@ static int crct10dif_update_pmull_p8(struct shash_desc *desc, const u8 *data, u16 *crc = shash_desc_ctx(desc); if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && crypto_simd_usable()) { - kernel_neon_begin(); - *crc = crc_t10dif_pmull_p8(*crc, data, length); - kernel_neon_end(); + do { + unsigned int chunk = length; + + if (chunk > SZ_4K + CRC_T10DIF_PMULL_CHUNK_SIZE) + chunk = SZ_4K; + + kernel_neon_begin(); + *crc = crc_t10dif_pmull_p8(*crc, data, chunk); + kernel_neon_end(); + data += chunk; + length -= chunk; + } while (length); } else { *crc = crc_t10dif_generic(*crc, data, length); } @@ -53,9 +62,18 @@ static int crct10dif_update_pmull_p64(struct shash_desc *desc, const u8 *data, u16 *crc = shash_desc_ctx(desc); if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && crypto_simd_usable()) { - kernel_neon_begin(); - *crc = crc_t10dif_pmull_p64(*crc, data, length); - kernel_neon_end(); + do { + unsigned int chunk = length; + + if (chunk > SZ_4K + CRC_T10DIF_PMULL_CHUNK_SIZE) + chunk = SZ_4K; + + kernel_neon_begin(); + *crc = crc_t10dif_pmull_p64(*crc, data, chunk); + kernel_neon_end(); + data += chunk; + length -= chunk; + } while (length); } else { *crc = crc_t10dif_generic(*crc, data, length); } From 14ab6de485dddbb414057417831b2491fe2a8729 Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Wed, 3 Feb 2021 20:26:06 +0800 Subject: [PATCH 138/154] hwrng: optee - Use device-managed registration API Use devm_hwrng_register to get rid of manual unregistration. Signed-off-by: Tian Tao Reviewed-by: Sumit Garg Signed-off-by: Herbert Xu --- drivers/char/hw_random/optee-rng.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/char/hw_random/optee-rng.c b/drivers/char/hw_random/optee-rng.c index a99d82949981..135a82590923 100644 --- a/drivers/char/hw_random/optee-rng.c +++ b/drivers/char/hw_random/optee-rng.c @@ -243,7 +243,7 @@ static int optee_rng_probe(struct device *dev) if (err) goto out_sess; - err = hwrng_register(&pvt_data.optee_rng); + err = devm_hwrng_register(dev, &pvt_data.optee_rng); if (err) { dev_err(dev, "hwrng registration failed (%d)\n", err); goto out_sess; @@ -263,7 +263,6 @@ out_ctx: static int optee_rng_remove(struct device *dev) { - hwrng_unregister(&pvt_data.optee_rng); tee_client_close_session(pvt_data.ctx, pvt_data.session_id); tee_client_close_context(pvt_data.ctx); From cfb28fde083761bfb839bc53059068bab5634b6a Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Wed, 3 Feb 2021 21:09:33 +0530 Subject: [PATCH 139/154] crypto: xor - Fix typo of optimization s/optimzation/optimization/ Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap Signed-off-by: Herbert Xu --- crypto/xor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/xor.c b/crypto/xor.c index eacbf4f93990..c046d074f522 100644 --- a/crypto/xor.c +++ b/crypto/xor.c @@ -95,7 +95,7 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) for (i = 0; i < 3; i++) { start = ktime_get(); for (j = 0; j < REPS; j++) { - mb(); /* prevent loop optimzation */ + mb(); /* prevent loop optimization */ tmpl->do_2(BENCH_SIZE, b1, b2); mb(); } From 0db5bc85c5d871188b6f66ee26bc712a309a4a3d Mon Sep 17 00:00:00 2001 From: Daniele Alessandrelli Date: Wed, 3 Feb 2021 15:42:10 +0000 Subject: [PATCH 140/154] crypto: keembay-ocs-aes - Fix 'q' assignment during CCM B0 generation In ocs_aes_ccm_write_b0(), 'q' (the octet length of the binary representation of the octet length of the payload) is set to 'iv[0]', while it should be set to 'iv[0] & 0x7' (i.e., only the last 3 bits of iv[0] should be used), as documented in NIST Special Publication 800-38C: https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38c.pdf In practice, this is not an issue, since 'iv[0]' is checked to be in the range [1-7] by ocs_aes_validate_inputs(), but let's fix the assignment anyway, in order to make the code more robust. Signed-off-by: Daniele Alessandrelli Signed-off-by: Herbert Xu --- drivers/crypto/keembay/ocs-aes.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/keembay/ocs-aes.c b/drivers/crypto/keembay/ocs-aes.c index b85c89477afa..be9f32fc8f42 100644 --- a/drivers/crypto/keembay/ocs-aes.c +++ b/drivers/crypto/keembay/ocs-aes.c @@ -1080,15 +1080,15 @@ static int ocs_aes_ccm_write_b0(const struct ocs_aes_dev *aes_dev, /* * q is the octet length of Q. * q can only be an element of {2, 3, 4, 5, 6, 7, 8} and is encoded as - * q - 1 == iv[0] + * q - 1 == iv[0] & 0x7; */ b0[0] |= iv[0] & 0x7; /* * Copy the Nonce N from IV to B0; N is located in iv[1]..iv[15 - q] * and must be copied to b0[1]..b0[15-q]. - * q == iv[0] + 1 + * q == (iv[0] & 0x7) + 1 */ - q = iv[0] + 1; + q = (iv[0] & 0x7) + 1; for (i = 1; i <= 15 - q; i++) b0[i] = iv[i]; /* From d27fb0460b65d1edb2db6b78d109f2531902b3ca Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Thu, 4 Feb 2021 07:11:33 +0000 Subject: [PATCH 141/154] crypto: cpt - remove casting dma_alloc_coherent Remove casting the values returned by dma_alloc_coherent. Signed-off-by: Xu Wang Signed-off-by: Herbert Xu --- drivers/crypto/cavium/cpt/cptvf_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/cavium/cpt/cptvf_main.c b/drivers/crypto/cavium/cpt/cptvf_main.c index f016448e43bb..112b12a32542 100644 --- a/drivers/crypto/cavium/cpt/cptvf_main.c +++ b/drivers/crypto/cavium/cpt/cptvf_main.c @@ -233,10 +233,10 @@ static int alloc_command_queues(struct cpt_vf *cptvf, c_size = (rem_q_size > qcsize_bytes) ? qcsize_bytes : rem_q_size; - curr->head = (u8 *)dma_alloc_coherent(&pdev->dev, - c_size + CPT_NEXT_CHUNK_PTR_SIZE, - &curr->dma_addr, - GFP_KERNEL); + curr->head = dma_alloc_coherent(&pdev->dev, + c_size + CPT_NEXT_CHUNK_PTR_SIZE, + &curr->dma_addr, + GFP_KERNEL); if (!curr->head) { dev_err(&pdev->dev, "Command Q (%d) chunk (%d) allocation failed\n", i, queue->nchunks); From 2db3e2387ad959d8630942f9df8793e6fd6eacd7 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Thu, 4 Feb 2021 16:59:33 +0800 Subject: [PATCH 142/154] crypto: hisilicon/hpre - adapt the number of clusters HPRE of Kunpeng 930 is updated on cluster numbers, so we try to update this driver to make it running okay on Kunpeng920/Kunpeng930 chips. Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre.h | 8 ++- drivers/crypto/hisilicon/hpre/hpre_main.c | 71 +++++++++++++++-------- 2 files changed, 52 insertions(+), 27 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre.h b/drivers/crypto/hisilicon/hpre/hpre.h index f69252b24671..181c109b19f7 100644 --- a/drivers/crypto/hisilicon/hpre/hpre.h +++ b/drivers/crypto/hisilicon/hpre/hpre.h @@ -14,8 +14,7 @@ enum { HPRE_CLUSTER0, HPRE_CLUSTER1, HPRE_CLUSTER2, - HPRE_CLUSTER3, - HPRE_CLUSTERS_NUM, + HPRE_CLUSTER3 }; enum hpre_ctrl_dbgfs_file { @@ -36,7 +35,10 @@ enum hpre_dfx_dbgfs_file { HPRE_DFX_FILE_NUM }; -#define HPRE_DEBUGFS_FILE_NUM (HPRE_DEBUG_FILE_NUM + HPRE_CLUSTERS_NUM - 1) +#define HPRE_CLUSTERS_NUM_V2 (HPRE_CLUSTER3 + 1) +#define HPRE_CLUSTERS_NUM_V3 1 +#define HPRE_CLUSTERS_NUM_MAX HPRE_CLUSTERS_NUM_V2 +#define HPRE_DEBUGFS_FILE_NUM (HPRE_DEBUG_FILE_NUM + HPRE_CLUSTERS_NUM_MAX - 1) struct hpre_debugfs_file { int index; diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 3b3481e7527c..0045261692bc 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -73,7 +73,8 @@ #define HPRE_QM_AXI_CFG_MASK 0xffff #define HPRE_QM_VFG_AX_MASK 0xff #define HPRE_BD_USR_MASK 0x3 -#define HPRE_CLUSTER_CORE_MASK 0xf +#define HPRE_CLUSTER_CORE_MASK_V2 0xf +#define HPRE_CLUSTER_CORE_MASK_V3 0xff #define HPRE_AM_OOO_SHUTDOWN_ENB 0x301044 #define HPRE_AM_OOO_SHUTDOWN_ENABLE BIT(0) @@ -86,6 +87,11 @@ #define HPRE_QM_PM_FLR BIT(11) #define HPRE_QM_SRIOV_FLR BIT(12) +#define HPRE_CLUSTERS_NUM(qm) \ + (((qm)->ver >= QM_HW_V3) ? HPRE_CLUSTERS_NUM_V3 : HPRE_CLUSTERS_NUM_V2) +#define HPRE_CLUSTER_CORE_MASK(qm) \ + (((qm)->ver >= QM_HW_V3) ? HPRE_CLUSTER_CORE_MASK_V3 :\ + HPRE_CLUSTER_CORE_MASK_V2) #define HPRE_VIA_MSI_DSM 1 #define HPRE_SQE_MASK_OFFSET 8 #define HPRE_SQE_MASK_LEN 24 @@ -279,6 +285,38 @@ static int hpre_cfg_by_dsm(struct hisi_qm *qm) return 0; } +static int hpre_set_cluster(struct hisi_qm *qm) +{ + u32 cluster_core_mask = HPRE_CLUSTER_CORE_MASK(qm); + u8 clusters_num = HPRE_CLUSTERS_NUM(qm); + struct device *dev = &qm->pdev->dev; + unsigned long offset; + u32 val = 0; + int ret, i; + + for (i = 0; i < clusters_num; i++) { + offset = i * HPRE_CLSTR_ADDR_INTRVL; + + /* clusters initiating */ + writel(cluster_core_mask, + HPRE_ADDR(qm, offset + HPRE_CORE_ENB)); + writel(0x1, HPRE_ADDR(qm, offset + HPRE_CORE_INI_CFG)); + ret = readl_relaxed_poll_timeout(HPRE_ADDR(qm, offset + + HPRE_CORE_INI_STATUS), val, + ((val & cluster_core_mask) == + cluster_core_mask), + HPRE_REG_RD_INTVRL_US, + HPRE_REG_RD_TMOUT_US); + if (ret) { + dev_err(dev, + "cluster %d int st status timeout!\n", i); + return -ETIMEDOUT; + } + } + + return 0; +} + /* * For Hi1620, we shoul disable FLR triggered by hardware (BME/PM/SRIOV). * Or it may stay in D3 state when we bind and unbind hpre quickly, @@ -298,9 +336,8 @@ static void disable_flr_of_bme(struct hisi_qm *qm) static int hpre_set_user_domain_and_cache(struct hisi_qm *qm) { struct device *dev = &qm->pdev->dev; - unsigned long offset; - int ret, i; u32 val; + int ret; writel(HPRE_QM_USR_CFG_MASK, HPRE_ADDR(qm, QM_ARUSER_M_CFG_ENABLE)); writel(HPRE_QM_USR_CFG_MASK, HPRE_ADDR(qm, QM_AWUSER_M_CFG_ENABLE)); @@ -335,25 +372,9 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm) return -ETIMEDOUT; } - for (i = 0; i < HPRE_CLUSTERS_NUM; i++) { - offset = i * HPRE_CLSTR_ADDR_INTRVL; - - /* clusters initiating */ - writel(HPRE_CLUSTER_CORE_MASK, - HPRE_ADDR(qm, offset + HPRE_CORE_ENB)); - writel(0x1, HPRE_ADDR(qm, offset + HPRE_CORE_INI_CFG)); - ret = readl_relaxed_poll_timeout(HPRE_ADDR(qm, offset + - HPRE_CORE_INI_STATUS), val, - ((val & HPRE_CLUSTER_CORE_MASK) == - HPRE_CLUSTER_CORE_MASK), - HPRE_REG_RD_INTVRL_US, - HPRE_REG_RD_TMOUT_US); - if (ret) { - dev_err(dev, - "cluster %d int st status timeout!\n", i); - return -ETIMEDOUT; - } - } + ret = hpre_set_cluster(qm); + if (ret) + return -ETIMEDOUT; ret = hpre_cfg_by_dsm(qm); if (ret) @@ -366,6 +387,7 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm) static void hpre_cnt_regs_clear(struct hisi_qm *qm) { + u8 clusters_num = HPRE_CLUSTERS_NUM(qm); unsigned long offset; int i; @@ -374,7 +396,7 @@ static void hpre_cnt_regs_clear(struct hisi_qm *qm) writel(0x0, qm->io_base + QM_DFX_DB_CNT_VF); /* clear clusterX/cluster_ctrl */ - for (i = 0; i < HPRE_CLUSTERS_NUM; i++) { + for (i = 0; i < clusters_num; i++) { offset = HPRE_CLSTR_BASE + i * HPRE_CLSTR_ADDR_INTRVL; writel(0x0, qm->io_base + offset + HPRE_CLUSTER_INQURY); } @@ -673,13 +695,14 @@ static int hpre_pf_comm_regs_debugfs_init(struct hisi_qm *qm) static int hpre_cluster_debugfs_init(struct hisi_qm *qm) { + u8 clusters_num = HPRE_CLUSTERS_NUM(qm); struct device *dev = &qm->pdev->dev; char buf[HPRE_DBGFS_VAL_MAX_LEN]; struct debugfs_regset32 *regset; struct dentry *tmp_d; int i, ret; - for (i = 0; i < HPRE_CLUSTERS_NUM; i++) { + for (i = 0; i < clusters_num; i++) { ret = snprintf(buf, HPRE_DBGFS_VAL_MAX_LEN, "cluster%d", i); if (ret < 0) return -EINVAL; From 8db84129d4fffafd732b69f2c5c0e216466165b8 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Thu, 4 Feb 2021 16:59:34 +0800 Subject: [PATCH 143/154] crypto: hisilicon/hpre - tiny fix Update since some special settings only for Kunpeng920. Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 0045261692bc..06448b8b96df 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -318,7 +318,7 @@ static int hpre_set_cluster(struct hisi_qm *qm) } /* - * For Hi1620, we shoul disable FLR triggered by hardware (BME/PM/SRIOV). + * For Kunpeng 920, we shoul disable FLR triggered by hardware (BME/PM/SRIOV). * Or it may stay in D3 state when we bind and unbind hpre quickly, * as it does FLR triggered by hardware. */ @@ -376,11 +376,14 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm) if (ret) return -ETIMEDOUT; - ret = hpre_cfg_by_dsm(qm); - if (ret) - dev_err(dev, "acpi_evaluate_dsm err.\n"); + /* This setting is only needed by Kunpeng 920. */ + if (qm->ver == QM_HW_V2) { + ret = hpre_cfg_by_dsm(qm); + if (ret) + dev_err(dev, "acpi_evaluate_dsm err.\n"); - disable_flr_of_bme(qm); + disable_flr_of_bme(qm); + } return ret; } From 553d09b3140035cc5f5f60cfcf1088c99bc9b1cf Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Thu, 4 Feb 2021 16:59:35 +0800 Subject: [PATCH 144/154] crypto: hisilicon/qm - fix use of 'dma_map_single' Calling 'dma_map_single' after the data is written to ensure that the cpu cache and dma cache are consistent. Signed-off-by: Weili Qian Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 50 +++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 904b99a22442..93d4a21cf825 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -1747,12 +1747,6 @@ static int qm_sq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid) sqc = kzalloc(sizeof(struct qm_sqc), GFP_KERNEL); if (!sqc) return -ENOMEM; - sqc_dma = dma_map_single(dev, sqc, sizeof(struct qm_sqc), - DMA_TO_DEVICE); - if (dma_mapping_error(dev, sqc_dma)) { - kfree(sqc); - return -ENOMEM; - } INIT_QC_COMMON(sqc, qp->sqe_dma, pasid); if (ver == QM_HW_V1) { @@ -1765,6 +1759,13 @@ static int qm_sq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid) sqc->cq_num = cpu_to_le16(qp_id); sqc->w13 = cpu_to_le16(QM_MK_SQC_W13(0, 1, qp->alg_type)); + sqc_dma = dma_map_single(dev, sqc, sizeof(struct qm_sqc), + DMA_TO_DEVICE); + if (dma_mapping_error(dev, sqc_dma)) { + kfree(sqc); + return -ENOMEM; + } + ret = qm_mb(qm, QM_MB_CMD_SQC, sqc_dma, qp_id, 0); dma_unmap_single(dev, sqc_dma, sizeof(struct qm_sqc), DMA_TO_DEVICE); kfree(sqc); @@ -1784,12 +1785,6 @@ static int qm_cq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid) cqc = kzalloc(sizeof(struct qm_cqc), GFP_KERNEL); if (!cqc) return -ENOMEM; - cqc_dma = dma_map_single(dev, cqc, sizeof(struct qm_cqc), - DMA_TO_DEVICE); - if (dma_mapping_error(dev, cqc_dma)) { - kfree(cqc); - return -ENOMEM; - } INIT_QC_COMMON(cqc, qp->cqe_dma, pasid); if (ver == QM_HW_V1) { @@ -1802,6 +1797,13 @@ static int qm_cq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid) } cqc->dw6 = cpu_to_le32(1 << QM_CQ_PHASE_SHIFT | 1 << QM_CQ_FLAG_SHIFT); + cqc_dma = dma_map_single(dev, cqc, sizeof(struct qm_cqc), + DMA_TO_DEVICE); + if (dma_mapping_error(dev, cqc_dma)) { + kfree(cqc); + return -ENOMEM; + } + ret = qm_mb(qm, QM_MB_CMD_CQC, cqc_dma, qp_id, 0); dma_unmap_single(dev, cqc_dma, sizeof(struct qm_cqc), DMA_TO_DEVICE); kfree(cqc); @@ -2558,15 +2560,9 @@ static int qm_eq_ctx_cfg(struct hisi_qm *qm) dma_addr_t eqc_dma; int ret; - eqc = kzalloc(sizeof(struct qm_eqc), GFP_KERNEL); //todo + eqc = kzalloc(sizeof(struct qm_eqc), GFP_KERNEL); if (!eqc) return -ENOMEM; - eqc_dma = dma_map_single(dev, eqc, sizeof(struct qm_eqc), - DMA_TO_DEVICE); - if (dma_mapping_error(dev, eqc_dma)) { - kfree(eqc); - return -ENOMEM; - } eqc->base_l = cpu_to_le32(lower_32_bits(qm->eqe_dma)); eqc->base_h = cpu_to_le32(upper_32_bits(qm->eqe_dma)); @@ -2574,6 +2570,13 @@ static int qm_eq_ctx_cfg(struct hisi_qm *qm) eqc->dw3 = cpu_to_le32(QM_EQE_AEQE_SIZE); eqc->dw6 = cpu_to_le32((QM_EQ_DEPTH - 1) | (1 << QM_EQC_PHASE_SHIFT)); + eqc_dma = dma_map_single(dev, eqc, sizeof(struct qm_eqc), + DMA_TO_DEVICE); + if (dma_mapping_error(dev, eqc_dma)) { + kfree(eqc); + return -ENOMEM; + } + ret = qm_mb(qm, QM_MB_CMD_EQC, eqc_dma, 0, 0); dma_unmap_single(dev, eqc_dma, sizeof(struct qm_eqc), DMA_TO_DEVICE); kfree(eqc); @@ -2591,6 +2594,11 @@ static int qm_aeq_ctx_cfg(struct hisi_qm *qm) aeqc = kzalloc(sizeof(struct qm_aeqc), GFP_KERNEL); if (!aeqc) return -ENOMEM; + + aeqc->base_l = cpu_to_le32(lower_32_bits(qm->aeqe_dma)); + aeqc->base_h = cpu_to_le32(upper_32_bits(qm->aeqe_dma)); + aeqc->dw6 = cpu_to_le32((QM_Q_DEPTH - 1) | (1 << QM_EQC_PHASE_SHIFT)); + aeqc_dma = dma_map_single(dev, aeqc, sizeof(struct qm_aeqc), DMA_TO_DEVICE); if (dma_mapping_error(dev, aeqc_dma)) { @@ -2598,10 +2606,6 @@ static int qm_aeq_ctx_cfg(struct hisi_qm *qm) return -ENOMEM; } - aeqc->base_l = cpu_to_le32(lower_32_bits(qm->aeqe_dma)); - aeqc->base_h = cpu_to_le32(upper_32_bits(qm->aeqe_dma)); - aeqc->dw6 = cpu_to_le32((QM_Q_DEPTH - 1) | (1 << QM_EQC_PHASE_SHIFT)); - ret = qm_mb(qm, QM_MB_CMD_AEQC, aeqc_dma, 0, 0); dma_unmap_single(dev, aeqc_dma, sizeof(struct qm_aeqc), DMA_TO_DEVICE); kfree(aeqc); From cc3292d1df23539302752bb316b5f42f508f0963 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Thu, 4 Feb 2021 16:59:36 +0800 Subject: [PATCH 145/154] crypto: hisilicon - PASID fixed on Kunpeng 930 Enable PASID by setting 'sqc' and 'cqc' pasid bits per queue in Kunpeng 930. For Kunpeng 920, PASID is effective for all queues once set in SVA scenarios. Signed-off-by: Weili Qian Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 24 ++++++++++++----------- drivers/crypto/hisilicon/qm.c | 11 +++++++++++ drivers/crypto/hisilicon/qm.h | 1 + drivers/crypto/hisilicon/sec2/sec_main.c | 2 +- drivers/crypto/hisilicon/zip/zip_main.c | 2 +- 5 files changed, 27 insertions(+), 13 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 06448b8b96df..67f1fcac670d 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -356,10 +356,6 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm) writel(0x0, HPRE_ADDR(qm, HPRE_COMM_CNT_CLR_CE)); writel(0x0, HPRE_ADDR(qm, HPRE_ECC_BYPASS)); - /* Enable data buffer pasid */ - if (qm->use_sva) - hpre_pasid_enable(qm); - writel(HPRE_BD_USR_MASK, HPRE_ADDR(qm, HPRE_BD_ARUSR_CFG)); writel(HPRE_BD_USR_MASK, HPRE_ADDR(qm, HPRE_BD_AWUSR_CFG)); writel(0x1, HPRE_ADDR(qm, HPRE_RDCHN_INI_CFG)); @@ -383,6 +379,10 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm) dev_err(dev, "acpi_evaluate_dsm err.\n"); disable_flr_of_bme(qm); + + /* Enable data buffer pasid */ + if (qm->use_sva) + hpre_pasid_enable(qm); } return ret; @@ -993,16 +993,18 @@ static void hpre_remove(struct pci_dev *pdev) return; } } - if (qm->fun_type == QM_HW_PF) { - if (qm->use_sva) - hpre_pasid_disable(qm); - hpre_cnt_regs_clear(qm); - qm->debug.curr_qm_qp_num = 0; - } hpre_debugfs_exit(qm); hisi_qm_stop(qm, QM_NORMAL); - hisi_qm_dev_err_uninit(qm); + + if (qm->fun_type == QM_HW_PF) { + if (qm->use_sva && qm->ver == QM_HW_V2) + hpre_pasid_disable(qm); + hpre_cnt_regs_clear(qm); + qm->debug.curr_qm_qp_num = 0; + hisi_qm_dev_err_uninit(qm); + } + hisi_qm_uninit(qm); } diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 93d4a21cf825..c36c4a5244de 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -54,6 +54,8 @@ #define QM_SQ_PRIORITY_SHIFT 0 #define QM_SQ_ORDERS_SHIFT 4 #define QM_SQ_TYPE_SHIFT 8 +#define QM_QC_PASID_ENABLE 0x1 +#define QM_QC_PASID_ENABLE_SHIFT 7 #define QM_SQ_TYPE_MASK GENMASK(3, 0) #define QM_SQ_TAIL_IDX(sqc) ((le16_to_cpu((sqc)->w11) >> 6) & 0x1) @@ -1685,6 +1687,7 @@ static struct hisi_qp *qm_create_qp_nolock(struct hisi_qm *qm, u8 alg_type) qp->req_cb = NULL; qp->qp_id = qp_id; qp->alg_type = alg_type; + qp->is_in_kernel = true; qm->qp_in_used++; atomic_set(&qp->qp_status.flags, QP_INIT); @@ -1759,6 +1762,10 @@ static int qm_sq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid) sqc->cq_num = cpu_to_le16(qp_id); sqc->w13 = cpu_to_le16(QM_MK_SQC_W13(0, 1, qp->alg_type)); + if (ver >= QM_HW_V3 && qm->use_sva && !qp->is_in_kernel) + sqc->w11 = cpu_to_le16(QM_QC_PASID_ENABLE << + QM_QC_PASID_ENABLE_SHIFT); + sqc_dma = dma_map_single(dev, sqc, sizeof(struct qm_sqc), DMA_TO_DEVICE); if (dma_mapping_error(dev, sqc_dma)) { @@ -1797,6 +1804,9 @@ static int qm_cq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid) } cqc->dw6 = cpu_to_le32(1 << QM_CQ_PHASE_SHIFT | 1 << QM_CQ_FLAG_SHIFT); + if (ver >= QM_HW_V3 && qm->use_sva && !qp->is_in_kernel) + cqc->w11 = cpu_to_le16(QM_QC_PASID_ENABLE); + cqc_dma = dma_map_single(dev, cqc, sizeof(struct qm_cqc), DMA_TO_DEVICE); if (dma_mapping_error(dev, cqc_dma)) { @@ -2067,6 +2077,7 @@ static int hisi_qm_uacce_get_queue(struct uacce_device *uacce, qp->uacce_q = q; qp->event_cb = qm_qp_event_notifier; qp->pasid = arg; + qp->is_in_kernel = false; return 0; } diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h index c1dd0fcf5beb..8f5a3b9704d6 100644 --- a/drivers/crypto/hisilicon/qm.h +++ b/drivers/crypto/hisilicon/qm.h @@ -288,6 +288,7 @@ struct hisi_qp { struct hisi_qm *qm; bool is_resetting; + bool is_in_kernel; u16 pasid; struct uacce_queue *uacce_q; }; diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 4809c19dcdc8..b5c2f7e99eed 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -323,7 +323,7 @@ static int sec_engine_init(struct hisi_qm *qm) reg = readl_relaxed(SEC_ADDR(qm, SEC_INTERFACE_USER_CTRL1_REG)); reg &= SEC_USER1_SMMU_MASK; - if (qm->use_sva) + if (qm->use_sva && qm->ver == QM_HW_V2) reg |= SEC_USER1_SMMU_SVA; else reg |= SEC_USER1_SMMU_NORMAL; diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 9cdecff01bcb..115560f9a8bd 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -292,7 +292,7 @@ static int hisi_zip_set_user_domain_and_cache(struct hisi_qm *qm) writel(AXUSER_BASE, base + HZIP_SGL_RUSER_32_63); writel(AXUSER_BASE, base + HZIP_BD_WUSER_32_63); - if (qm->use_sva) { + if (qm->use_sva && qm->ver == QM_HW_V2) { writel(AXUSER_BASE | AXUSER_SSV, base + HZIP_DATA_RUSER_32_63); writel(AXUSER_BASE | AXUSER_SSV, base + HZIP_DATA_WUSER_32_63); } else { From fbc75d03fda048bc821cb27f724ff367d5591ce8 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Thu, 4 Feb 2021 17:00:24 +0800 Subject: [PATCH 146/154] crypto: hisilicon/hpre - enable Elliptic curve cryptography Enable x25519/x448/ecdh/ecdsa/sm2 algorithm on Kunpeng 930. Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 67f1fcac670d..db170fcf556f 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -30,6 +30,8 @@ #define HPRE_BD_ARUSR_CFG 0x301030 #define HPRE_BD_AWUSR_CFG 0x301034 #define HPRE_TYPES_ENB 0x301038 +#define HPRE_RSA_ENB BIT(0) +#define HPRE_ECC_ENB BIT(1) #define HPRE_DATA_RUSER_CFG 0x30103c #define HPRE_DATA_WUSER_CFG 0x301040 #define HPRE_INT_MASK 0x301400 @@ -348,7 +350,12 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm) val |= BIT(HPRE_TIMEOUT_ABNML_BIT); writel_relaxed(val, HPRE_ADDR(qm, HPRE_QM_ABNML_INT_MASK)); - writel(0x1, HPRE_ADDR(qm, HPRE_TYPES_ENB)); + if (qm->ver >= QM_HW_V3) + writel(HPRE_RSA_ENB | HPRE_ECC_ENB, + HPRE_ADDR(qm, HPRE_TYPES_ENB)); + else + writel(HPRE_RSA_ENB, HPRE_ADDR(qm, HPRE_TYPES_ENB)); + writel(HPRE_QM_VFG_AX_MASK, HPRE_ADDR(qm, HPRE_VFG_AXCACHE)); writel(0x0, HPRE_ADDR(qm, HPRE_BD_ENDIAN)); writel(0x0, HPRE_ADDR(qm, HPRE_INT_MASK)); From 6956d8be23871a779bf74085c51efdb76ad6638a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Feb 2021 16:42:15 +0100 Subject: [PATCH 147/154] crypto: octeontx2 - fix -Wpointer-bool-conversion warning When CONFIG_CPUMASK_OFFSTACK is disabled, clang reports a warning about a bogus condition: drivers/crypto/marvell/octeontx2/otx2_cptlf.c:334:21: error: address of array 'lfs->lf[slot].affinity_mask' will always evaluate to 'true' [-Werror,-Wpointer-bool-conversion] if (lfs->lf[slot].affinity_mask) ~~ ~~~~~~~~~~~~~~^~~~~~~~~~~~~ In this configuration, the free_cpumask_var() function does nothing, so the condition could be skipped. When the option is enabled, there is no warning, but the check is also redundant because free_cpumask_var() falls back to kfree(), which is documented as ignoring NULL pointers. Remove the check to avoid the warning. Fixes: 64506017030d ("crypto: octeontx2 - add LF framework") Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Signed-off-by: Herbert Xu --- drivers/crypto/marvell/octeontx2/otx2_cptlf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.c b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c index e27ea8909368..823a4571fd67 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptlf.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c @@ -331,8 +331,7 @@ void otx2_cptlf_free_irqs_affinity(struct otx2_cptlfs_info *lfs) irq_set_affinity_hint(pci_irq_vector(lfs->pdev, lfs->lf[slot].msix_offset + offs), NULL); - if (lfs->lf[slot].affinity_mask) - free_cpumask_var(lfs->lf[slot].affinity_mask); + free_cpumask_var(lfs->lf[slot].affinity_mask); } } From 3e9954fe36ad3e254d35cc7da5117c850cbc0e50 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Fri, 5 Feb 2021 18:12:53 +0800 Subject: [PATCH 148/154] crypto: hisilicon/qm - removing driver after reset Add waiting logic for resetting as removing driver, otherwise call trace will occur due to releasing resource. Signed-off-by: Weili Qian Reviewed-by: Zaibo Xu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 2 +- drivers/crypto/hisilicon/qm.c | 48 +++++++++++++++-------- drivers/crypto/hisilicon/qm.h | 2 +- drivers/crypto/hisilicon/sec2/sec_main.c | 2 +- drivers/crypto/hisilicon/zip/zip_main.c | 2 +- 5 files changed, 36 insertions(+), 20 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index db170fcf556f..5c56ec4e2174 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -994,7 +994,7 @@ static void hpre_remove(struct pci_dev *pdev) hisi_qm_wait_task_finish(qm, &hpre_devices); hisi_qm_alg_unregister(qm, &hpre_devices); if (qm->fun_type == QM_HW_PF && qm->vfs_num) { - ret = hisi_qm_sriov_disable(pdev, qm->is_frozen); + ret = hisi_qm_sriov_disable(pdev, true); if (ret) { pci_err(pdev, "Disable SRIOV fail!\n"); return; diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index c36c4a5244de..cd72d2d26be9 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -149,7 +149,6 @@ #define QM_RAS_CE_TIMES_PER_IRQ 1 #define QM_RAS_MSI_INT_SEL 0x1040f4 -#define QM_DEV_RESET_FLAG 0 #define QM_RESET_WAIT_TIMEOUT 400 #define QM_PEH_VENDOR_ID 0x1000d8 #define ACC_VENDOR_ID_VALUE 0x5a5a @@ -187,6 +186,10 @@ #define QM_SQE_ADDR_MASK GENMASK(7, 0) #define QM_EQ_DEPTH (1024 * 2) +#define QM_DRIVER_REMOVING 0 +#define QM_RST_SCHED 1 +#define QM_RESETTING 2 + #define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \ (((hop_num) << QM_CQ_HOP_NUM_SHIFT) | \ ((pg_sz) << QM_CQ_PAGE_SIZE_SHIFT) | \ @@ -2261,17 +2264,15 @@ static int qm_alloc_uacce(struct hisi_qm *qm) */ static int qm_frozen(struct hisi_qm *qm) { - down_write(&qm->qps_lock); - - if (qm->is_frozen) { - up_write(&qm->qps_lock); + if (test_bit(QM_DRIVER_REMOVING, &qm->misc_ctl)) return 0; - } + + down_write(&qm->qps_lock); if (!qm->qp_in_used) { qm->qp_in_used = qm->qp_num; - qm->is_frozen = true; up_write(&qm->qps_lock); + set_bit(QM_DRIVER_REMOVING, &qm->misc_ctl); return 0; } @@ -2324,6 +2325,10 @@ void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list) msleep(WAIT_PERIOD); } + while (test_bit(QM_RST_SCHED, &qm->misc_ctl) || + test_bit(QM_RESETTING, &qm->misc_ctl)) + msleep(WAIT_PERIOD); + udelay(REMOVE_WAIT_DELAY); } EXPORT_SYMBOL_GPL(hisi_qm_wait_task_finish); @@ -2452,7 +2457,7 @@ static void hisi_qm_pre_init(struct hisi_qm *qm) mutex_init(&qm->mailbox_lock); init_rwsem(&qm->qps_lock); qm->qp_in_used = 0; - qm->is_frozen = false; + qm->misc_ctl = false; } static void hisi_qm_pci_uninit(struct hisi_qm *qm) @@ -3263,7 +3268,7 @@ EXPORT_SYMBOL_GPL(hisi_qm_sriov_disable); int hisi_qm_sriov_configure(struct pci_dev *pdev, int num_vfs) { if (num_vfs == 0) - return hisi_qm_sriov_disable(pdev, 0); + return hisi_qm_sriov_disable(pdev, false); else return hisi_qm_sriov_enable(pdev, num_vfs); } @@ -3480,7 +3485,7 @@ static int qm_reset_prepare_ready(struct hisi_qm *qm) int delay = 0; /* All reset requests need to be queued for processing */ - while (test_and_set_bit(QM_DEV_RESET_FLAG, &pf_qm->reset_flag)) { + while (test_and_set_bit(QM_RESETTING, &pf_qm->misc_ctl)) { msleep(++delay); if (delay > QM_RESET_WAIT_TIMEOUT) return -EBUSY; @@ -3504,6 +3509,7 @@ static int qm_controller_reset_prepare(struct hisi_qm *qm) ret = qm_vf_reset_prepare(qm, QM_SOFT_RESET); if (ret) { pci_err(pdev, "Fails to stop VFs!\n"); + clear_bit(QM_RESETTING, &qm->misc_ctl); return ret; } } @@ -3511,9 +3517,12 @@ static int qm_controller_reset_prepare(struct hisi_qm *qm) ret = hisi_qm_stop(qm, QM_SOFT_RESET); if (ret) { pci_err(pdev, "Fails to stop QM!\n"); + clear_bit(QM_RESETTING, &qm->misc_ctl); return ret; } + clear_bit(QM_RST_SCHED, &qm->misc_ctl); + return 0; } @@ -3751,7 +3760,7 @@ static int qm_controller_reset_done(struct hisi_qm *qm) hisi_qm_dev_err_init(qm); qm_restart_done(qm); - clear_bit(QM_DEV_RESET_FLAG, &qm->reset_flag); + clear_bit(QM_RESETTING, &qm->misc_ctl); return 0; } @@ -3764,18 +3773,23 @@ static int qm_controller_reset(struct hisi_qm *qm) pci_info(pdev, "Controller resetting...\n"); ret = qm_controller_reset_prepare(qm); - if (ret) + if (ret) { + clear_bit(QM_RST_SCHED, &qm->misc_ctl); return ret; + } ret = qm_soft_reset(qm); if (ret) { pci_err(pdev, "Controller reset failed (%d)\n", ret); + clear_bit(QM_RESETTING, &qm->misc_ctl); return ret; } ret = qm_controller_reset_done(qm); - if (ret) + if (ret) { + clear_bit(QM_RESETTING, &qm->misc_ctl); return ret; + } pci_info(pdev, "Controller reset complete\n"); @@ -3882,8 +3896,6 @@ static bool qm_flr_reset_complete(struct pci_dev *pdev) return false; } - clear_bit(QM_DEV_RESET_FLAG, &qm->reset_flag); - return true; } @@ -3927,6 +3939,8 @@ void hisi_qm_reset_done(struct pci_dev *pdev) flr_done: if (qm_flr_reset_complete(pdev)) pci_info(pdev, "FLR reset complete\n"); + + clear_bit(QM_RESETTING, &qm->misc_ctl); } EXPORT_SYMBOL_GPL(hisi_qm_reset_done); @@ -3937,7 +3951,9 @@ static irqreturn_t qm_abnormal_irq(int irq, void *data) atomic64_inc(&qm->debug.dfx.abnormal_irq_cnt); ret = qm_process_dev_error(qm); - if (ret == ACC_ERR_NEED_RESET) + if (ret == ACC_ERR_NEED_RESET && + !test_bit(QM_DRIVER_REMOVING, &qm->misc_ctl) && + !test_and_set_bit(QM_RST_SCHED, &qm->misc_ctl)) schedule_work(&qm->rst_work); return IRQ_HANDLED; diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h index 8f5a3b9704d6..c3f8b741119a 100644 --- a/drivers/crypto/hisilicon/qm.h +++ b/drivers/crypto/hisilicon/qm.h @@ -230,7 +230,7 @@ struct hisi_qm { struct hisi_qm_status status; const struct hisi_qm_err_ini *err_ini; struct hisi_qm_err_status err_status; - unsigned long reset_flag; + unsigned long misc_ctl; /* driver removing and reset sched */ struct rw_semaphore qps_lock; struct idr qp_idr; diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index b5c2f7e99eed..086722795d5c 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -949,7 +949,7 @@ static void sec_remove(struct pci_dev *pdev) hisi_qm_wait_task_finish(qm, &sec_devices); hisi_qm_alg_unregister(qm, &sec_devices); if (qm->fun_type == QM_HW_PF && qm->vfs_num) - hisi_qm_sriov_disable(pdev, qm->is_frozen); + hisi_qm_sriov_disable(pdev, true); sec_debugfs_exit(qm); diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 115560f9a8bd..24ddd0dd85da 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -901,7 +901,7 @@ static void hisi_zip_remove(struct pci_dev *pdev) hisi_qm_alg_unregister(qm, &zip_devices); if (qm->fun_type == QM_HW_PF && qm->vfs_num) - hisi_qm_sriov_disable(pdev, qm->is_frozen); + hisi_qm_sriov_disable(pdev, true); hisi_zip_debugfs_exit(qm); hisi_qm_stop(qm, QM_NORMAL); From 80d89fa2b42b83d7ed7d45d6988b4da41a87cc48 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Fri, 5 Feb 2021 18:12:54 +0800 Subject: [PATCH 149/154] crypto: hisilicon/qm - fix request missing error Add 'qp_stop_fail_cb' to ensure it is called as device is resetting. Signed-off-by: Weili Qian Reviewed-by: Zaibo Xu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index cd72d2d26be9..4a4ad82b08f2 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -624,6 +624,9 @@ static void qm_cq_head_update(struct hisi_qp *qp) static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm) { + if (unlikely(atomic_read(&qp->qp_status.flags) == QP_STOP)) + return; + if (qp->event_cb) { qp->event_cb(qp); return; @@ -1879,6 +1882,28 @@ int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg) } EXPORT_SYMBOL_GPL(hisi_qm_start_qp); +/** + * qp_stop_fail_cb() - call request cb. + * @qp: stopped failed qp. + * + * Callback function should be called whether task completed or not. + */ +static void qp_stop_fail_cb(struct hisi_qp *qp) +{ + int qp_used = atomic_read(&qp->qp_status.used); + u16 cur_tail = qp->qp_status.sq_tail; + u16 cur_head = (cur_tail + QM_Q_DEPTH - qp_used) % QM_Q_DEPTH; + struct hisi_qm *qm = qp->qm; + u16 pos; + int i; + + for (i = 0; i < qp_used; i++) { + pos = (i + cur_head) % QM_Q_DEPTH; + qp->req_cb(qp, qp->sqe + (u32)(qm->sqe_size * pos)); + atomic_dec(&qp->qp_status.used); + } +} + /** * qm_drain_qp() - Drain a qp. * @qp: The qp we want to drain. @@ -1974,6 +1999,9 @@ static int qm_stop_qp_nolock(struct hisi_qp *qp) else flush_work(&qp->qm->work); + if (unlikely(qp->is_resetting && atomic_read(&qp->qp_status.used))) + qp_stop_fail_cb(qp); + dev_dbg(dev, "stop queue %u!", qp->qp_id); return 0; From 7f5151e5efbe8fc2293a77cb853679ceff46991b Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Fri, 5 Feb 2021 18:12:55 +0800 Subject: [PATCH 150/154] crypto: hisilicon/qm - fix the value of 'QM_SQC_VFT_BASE_MASK_V2' Since the size of base number is 16 bits, update the value of 'QM_SQC_VFT_BASE_MASK_V2' as 'GENMASK(15, 0)'. Signed-off-by: Weili Qian Reviewed-by: Zaibo Xu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 4a4ad82b08f2..0495113f952a 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -122,7 +122,7 @@ #define QM_CQC_VFT_VALID (1ULL << 28) #define QM_SQC_VFT_BASE_SHIFT_V2 28 -#define QM_SQC_VFT_BASE_MASK_V2 GENMASK(5, 0) +#define QM_SQC_VFT_BASE_MASK_V2 GENMASK(15, 0) #define QM_SQC_VFT_NUM_SHIFT_V2 45 #define QM_SQC_VFT_NUM_MASK_v2 GENMASK(9, 0) From 87c356548fcc13b02e18e455cc145e9c817a33e9 Mon Sep 17 00:00:00 2001 From: Sihang Chen Date: Fri, 5 Feb 2021 18:12:56 +0800 Subject: [PATCH 151/154] crypto: hisilicon/qm - update irqflag There is no need to share IRQ among several devices, and set 'irqflag' as 0. Signed-off-by: Sihang Chen Signed-off-by: Weili Qian Reviewed-by: Zaibo Xu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 0495113f952a..1dea61a0c3c7 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -3993,21 +3993,20 @@ static int qm_irq_register(struct hisi_qm *qm) int ret; ret = request_irq(pci_irq_vector(pdev, QM_EQ_EVENT_IRQ_VECTOR), - qm_irq, IRQF_SHARED, qm->dev_name, qm); + qm_irq, 0, qm->dev_name, qm); if (ret) return ret; if (qm->ver != QM_HW_V1) { ret = request_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR), - qm_aeq_irq, IRQF_SHARED, qm->dev_name, qm); + qm_aeq_irq, 0, qm->dev_name, qm); if (ret) goto err_aeq_irq; if (qm->fun_type == QM_HW_PF) { ret = request_irq(pci_irq_vector(pdev, QM_ABNORMAL_EVENT_IRQ_VECTOR), - qm_abnormal_irq, IRQF_SHARED, - qm->dev_name, qm); + qm_abnormal_irq, 0, qm->dev_name, qm); if (ret) goto err_abonormal_irq; } From 1db0016e0d223b644d2c77a4569e8939f5c55a7c Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Fri, 5 Feb 2021 18:12:57 +0800 Subject: [PATCH 152/154] crypto: hisilicon/qm - do not reset hardware when CE happens There is no need to reset hardware when Corrected Error(CE) happens. Signed-off-by: Weili Qian Reviewed-by: Zaibo Xu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 1 + drivers/crypto/hisilicon/qm.c | 23 +++++++++++++++++------ drivers/crypto/hisilicon/qm.h | 1 + drivers/crypto/hisilicon/sec2/sec_main.c | 1 + drivers/crypto/hisilicon/zip/zip_main.c | 5 ++++- 5 files changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 5c56ec4e2174..e7a2c70eb9cf 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -881,6 +881,7 @@ static const struct hisi_qm_err_ini hpre_err_ini = { .fe = 0, .ecc_2bits_mask = HPRE_CORE_ECC_2BIT_ERR | HPRE_OOO_ECC_2BIT_ERR, + .dev_ce_mask = HPRE_HAC_RAS_CE_ENABLE, .msi_wr_port = HPRE_WR_MSI_PORT, .acpi_rst = "HRST", } diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 1dea61a0c3c7..5b77c8e70b64 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -1612,7 +1612,7 @@ static void qm_log_hw_error(struct hisi_qm *qm, u32 error_status) static enum acc_err_result qm_hw_error_handle_v2(struct hisi_qm *qm) { - u32 error_status, tmp; + u32 error_status, tmp, val; /* read err sts */ tmp = readl(qm->io_base + QM_ABNORMAL_INT_STATUS); @@ -1623,9 +1623,13 @@ static enum acc_err_result qm_hw_error_handle_v2(struct hisi_qm *qm) qm->err_status.is_qm_ecc_mbit = true; qm_log_hw_error(qm, error_status); - if (error_status == QM_DB_RANDOM_INVALID) { + val = error_status | QM_DB_RANDOM_INVALID | QM_BASE_CE; + /* ce error does not need to be reset */ + if (val == (QM_DB_RANDOM_INVALID | QM_BASE_CE)) { writel(error_status, qm->io_base + QM_ABNORMAL_INT_SOURCE); + writel(qm->err_ini->err_info.nfe, + qm->io_base + QM_RAS_NFE_ENABLE); return ACC_ERR_RECOVERED; } @@ -3317,12 +3321,19 @@ static enum acc_err_result qm_dev_err_handle(struct hisi_qm *qm) if (err_sts & qm->err_ini->err_info.ecc_2bits_mask) qm->err_status.is_dev_ecc_mbit = true; - if (!qm->err_ini->log_dev_hw_err) { - dev_err(&qm->pdev->dev, "Device doesn't support log hw error!\n"); - return ACC_ERR_NEED_RESET; + if (qm->err_ini->log_dev_hw_err) + qm->err_ini->log_dev_hw_err(qm, err_sts); + + /* ce error does not need to be reset */ + if ((err_sts | qm->err_ini->err_info.dev_ce_mask) == + qm->err_ini->err_info.dev_ce_mask) { + if (qm->err_ini->clear_dev_hw_err_status) + qm->err_ini->clear_dev_hw_err_status(qm, + err_sts); + + return ACC_ERR_RECOVERED; } - qm->err_ini->log_dev_hw_err(qm, err_sts); return ACC_ERR_NEED_RESET; } diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h index c3f8b741119a..af47b76f4747 100644 --- a/drivers/crypto/hisilicon/qm.h +++ b/drivers/crypto/hisilicon/qm.h @@ -173,6 +173,7 @@ struct hisi_qm_err_info { char *acpi_rst; u32 msi_wr_port; u32 ecc_2bits_mask; + u32 dev_ce_mask; u32 ce; u32 nfe; u32 fe; diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 086722795d5c..dc68ba76f65e 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -752,6 +752,7 @@ static const struct hisi_qm_err_ini sec_err_ini = { QM_ACC_WB_NOT_READY_TIMEOUT, .fe = 0, .ecc_2bits_mask = SEC_CORE_INT_STATUS_M_ECC, + .dev_ce_mask = SEC_RAS_CE_ENB_MSK, .msi_wr_port = BIT(0), .acpi_rst = "SRST", } diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 24ddd0dd85da..02c445722445 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -66,6 +66,7 @@ #define HZIP_CORE_INT_STATUS_M_ECC BIT(1) #define HZIP_CORE_SRAM_ECC_ERR_INFO 0x301148 #define HZIP_CORE_INT_RAS_CE_ENB 0x301160 +#define HZIP_CORE_INT_RAS_CE_ENABLE 0x1 #define HZIP_CORE_INT_RAS_NFE_ENB 0x301164 #define HZIP_CORE_INT_RAS_FE_ENB 0x301168 #define HZIP_CORE_INT_RAS_NFE_ENABLE 0x7FE @@ -327,7 +328,8 @@ static void hisi_zip_hw_error_enable(struct hisi_qm *qm) writel(HZIP_CORE_INT_MASK_ALL, qm->io_base + HZIP_CORE_INT_SOURCE); /* configure error type */ - writel(0x1, qm->io_base + HZIP_CORE_INT_RAS_CE_ENB); + writel(HZIP_CORE_INT_RAS_CE_ENABLE, + qm->io_base + HZIP_CORE_INT_RAS_CE_ENB); writel(0x0, qm->io_base + HZIP_CORE_INT_RAS_FE_ENB); writel(HZIP_CORE_INT_RAS_NFE_ENABLE, qm->io_base + HZIP_CORE_INT_RAS_NFE_ENB); @@ -727,6 +729,7 @@ static const struct hisi_qm_err_ini hisi_zip_err_ini = { QM_ACC_WB_NOT_READY_TIMEOUT, .fe = 0, .ecc_2bits_mask = HZIP_CORE_INT_STATUS_M_ECC, + .dev_ce_mask = HZIP_CORE_INT_RAS_CE_ENABLE, .msi_wr_port = HZIP_WR_PORT, .acpi_rst = "ZRST", } From 4cf0806ee92a8820f630c2e1ba4479575e393bf3 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Fri, 5 Feb 2021 18:12:58 +0800 Subject: [PATCH 153/154] crypto: hisilicon/qm - fix printing format issue This patch fixes inconsistent of printing format with argument type. Signed-off-by: Weili Qian Reviewed-by: Zaibo Xu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 16 ++++++++-------- drivers/crypto/hisilicon/qm.h | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 5b77c8e70b64..13cb4216561a 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -725,7 +725,7 @@ static irqreturn_t qm_aeq_irq(int irq, void *data) dev_err(&qm->pdev->dev, "%s overflow\n", qm_fifo_overflow[type]); else - dev_err(&qm->pdev->dev, "unknown error type %d\n", + dev_err(&qm->pdev->dev, "unknown error type %u\n", type); if (qm->status.aeq_head == QM_Q_DEPTH - 1) { @@ -1129,7 +1129,7 @@ static int dump_show(struct hisi_qm *qm, void *info, dev_info(dev, "%s DUMP\n", info_name); for (i = 0; i < info_size; i += BYTE_PER_DW) { - pr_info("DW%d: %02X%02X %02X%02X\n", i / BYTE_PER_DW, + pr_info("DW%u: %02X%02X %02X%02X\n", i / BYTE_PER_DW, info_buf[i], info_buf[i + 1UL], info_buf[i + 2UL], info_buf[i + 3UL]); } @@ -1162,7 +1162,7 @@ static int qm_sqc_dump(struct hisi_qm *qm, const char *s) ret = kstrtou32(s, 0, &qp_id); if (ret || qp_id >= qm->qp_num) { - dev_err(dev, "Please input qp num (0-%d)", qm->qp_num - 1); + dev_err(dev, "Please input qp num (0-%u)", qm->qp_num - 1); return -EINVAL; } @@ -1208,7 +1208,7 @@ static int qm_cqc_dump(struct hisi_qm *qm, const char *s) ret = kstrtou32(s, 0, &qp_id); if (ret || qp_id >= qm->qp_num) { - dev_err(dev, "Please input qp num (0-%d)", qm->qp_num - 1); + dev_err(dev, "Please input qp num (0-%u)", qm->qp_num - 1); return -EINVAL; } @@ -1287,7 +1287,7 @@ static int q_dump_param_parse(struct hisi_qm *qm, char *s, ret = kstrtou32(presult, 0, q_id); if (ret || *q_id >= qp_num) { - dev_err(dev, "Please input qp num (0-%d)", qp_num - 1); + dev_err(dev, "Please input qp num (0-%u)", qp_num - 1); return -EINVAL; } @@ -2729,7 +2729,7 @@ int hisi_qm_start(struct hisi_qm *qm) return -EPERM; } - dev_dbg(dev, "qm start with %d queue pairs\n", qm->qp_num); + dev_dbg(dev, "qm start with %u queue pairs\n", qm->qp_num); if (!qm->qp_num) { dev_err(dev, "qp_num should not be 0\n"); @@ -3164,7 +3164,7 @@ int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, mutex_unlock(&qm_list->lock); if (ret) - pr_info("Failed to create qps, node[%d], alg[%d], qp[%d]!\n", + pr_info("Failed to create qps, node[%d], alg[%u], qp[%d]!\n", node, alg_type, qp_num); err: @@ -3372,7 +3372,7 @@ pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev, if (pdev->is_virtfn) return PCI_ERS_RESULT_NONE; - pci_info(pdev, "PCI error detected, state(=%d)!!\n", state); + pci_info(pdev, "PCI error detected, state(=%u)!!\n", state); if (state == pci_channel_io_perm_failure) return PCI_ERS_RESULT_DISCONNECT; diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h index af47b76f4747..54967c6b9c78 100644 --- a/drivers/crypto/hisilicon/qm.h +++ b/drivers/crypto/hisilicon/qm.h @@ -307,7 +307,7 @@ static inline int q_num_set(const char *val, const struct kernel_param *kp, if (!pdev) { q_num = min_t(u32, QM_QNUM_V1, QM_QNUM_V2); - pr_info("No device found currently, suppose queue number is %d\n", + pr_info("No device found currently, suppose queue number is %u\n", q_num); } else { if (pdev->revision == QM_HW_V1) From 0de9dc80625b0ca1cb9730c5ed1c5a8cab538369 Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Sun, 7 Feb 2021 10:39:05 +0800 Subject: [PATCH 154/154] hwrng: timeriomem - Use device-managed registration API Use devm_hwrng_register to get rid of manual unregistration. Signed-off-by: Tian Tao Signed-off-by: Herbert Xu --- drivers/char/hw_random/timeriomem-rng.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c index f35f0f31f52a..8ea1fc831eb7 100644 --- a/drivers/char/hw_random/timeriomem-rng.c +++ b/drivers/char/hw_random/timeriomem-rng.c @@ -169,7 +169,7 @@ static int timeriomem_rng_probe(struct platform_device *pdev) priv->present = 1; complete(&priv->completion); - err = hwrng_register(&priv->rng_ops); + err = devm_hwrng_register(&pdev->dev, &priv->rng_ops); if (err) { dev_err(&pdev->dev, "problem registering\n"); return err; @@ -185,7 +185,6 @@ static int timeriomem_rng_remove(struct platform_device *pdev) { struct timeriomem_rng_private *priv = platform_get_drvdata(pdev); - hwrng_unregister(&priv->rng_ops); hrtimer_cancel(&priv->timer); return 0;