Linux Audio

Check our new training course

Embedded Linux Audio

Check our new training course
with Creative Commons CC-BY-SA
lecture materials

Bootlin logo

Elixir Cross Referencer

Loading...
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Poly1305 authenticator algorithm, RFC7539, SIMD glue code
 *
 * Copyright (C) 2015 Martin Willi
 */

#include <crypto/algapi.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/poly1305.h>
#include <crypto/internal/simd.h>
#include <linux/crypto.h>
#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/simd.h>

asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src,
				    const u32 *r, unsigned int blocks);
asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
				     unsigned int blocks, const u32 *u);
asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
				     unsigned int blocks, const u32 *u);

static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2);

static void poly1305_simd_mult(u32 *a, const u32 *b)
{
	u8 m[POLY1305_BLOCK_SIZE];

	memset(m, 0, sizeof(m));
	/* The poly1305 block function adds a hi-bit to the accumulator which
	 * we don't need for key multiplication; compensate for it. */
	a[4] -= 1 << 24;
	poly1305_block_sse2(a, m, b, 1);
}

static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx,
					   const u8 *src, unsigned int srclen)
{
	unsigned int datalen;

	if (unlikely(!dctx->sset)) {
		datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
		src += srclen - datalen;
		srclen = datalen;
	}
	if (srclen >= POLY1305_BLOCK_SIZE) {
		poly1305_core_blocks(&dctx->h, dctx->r, src,
				     srclen / POLY1305_BLOCK_SIZE, 1);
		srclen %= POLY1305_BLOCK_SIZE;
	}
	return srclen;
}

static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
					 const u8 *src, unsigned int srclen)
{
	unsigned int blocks, datalen;

	if (unlikely(!dctx->sset)) {
		datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
		src += srclen - datalen;
		srclen = datalen;
	}

	if (IS_ENABLED(CONFIG_AS_AVX2) &&
	    static_branch_likely(&poly1305_use_avx2) &&
	    srclen >= POLY1305_BLOCK_SIZE * 4) {
		if (unlikely(dctx->rset < 4)) {
			if (dctx->rset < 2) {
				dctx->r[1] = dctx->r[0];
				poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
			}
			dctx->r[2] = dctx->r[1];
			poly1305_simd_mult(dctx->r[2].r, dctx->r[0].r);
			dctx->r[3] = dctx->r[2];
			poly1305_simd_mult(dctx->r[3].r, dctx->r[0].r);
			dctx->rset = 4;
		}
		blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
		poly1305_4block_avx2(dctx->h.h, src, dctx->r[0].r, blocks,
				     dctx->r[1].r);
		src += POLY1305_BLOCK_SIZE * 4 * blocks;
		srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
	}

	if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
		if (unlikely(dctx->rset < 2)) {
			dctx->r[1] = dctx->r[0];
			poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
			dctx->rset = 2;
		}
		blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
		poly1305_2block_sse2(dctx->h.h, src, dctx->r[0].r,
				     blocks, dctx->r[1].r);
		src += POLY1305_BLOCK_SIZE * 2 * blocks;
		srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
	}
	if (srclen >= POLY1305_BLOCK_SIZE) {
		poly1305_block_sse2(dctx->h.h, src, dctx->r[0].r, 1);
		srclen -= POLY1305_BLOCK_SIZE;
	}
	return srclen;
}

void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key)
{
	poly1305_init_generic(desc, key);
}
EXPORT_SYMBOL(poly1305_init_arch);

void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
			  unsigned int srclen)
{
	unsigned int bytes;

	if (unlikely(dctx->buflen)) {
		bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
		memcpy(dctx->buf + dctx->buflen, src, bytes);
		src += bytes;
		srclen -= bytes;
		dctx->buflen += bytes;

		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
			if (static_branch_likely(&poly1305_use_simd) &&
			    likely(crypto_simd_usable())) {
				kernel_fpu_begin();
				poly1305_simd_blocks(dctx, dctx->buf,
						     POLY1305_BLOCK_SIZE);
				kernel_fpu_end();
			} else {
				poly1305_scalar_blocks(dctx, dctx->buf,
						       POLY1305_BLOCK_SIZE);
			}
			dctx->buflen = 0;
		}
	}

	if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
		if (static_branch_likely(&poly1305_use_simd) &&
		    likely(crypto_simd_usable())) {
			kernel_fpu_begin();
			bytes = poly1305_simd_blocks(dctx, src, srclen);
			kernel_fpu_end();
		} else {
			bytes = poly1305_scalar_blocks(dctx, src, srclen);
		}
		src += srclen - bytes;
		srclen = bytes;
	}

	if (unlikely(srclen)) {
		dctx->buflen = srclen;
		memcpy(dctx->buf, src, srclen);
	}
}
EXPORT_SYMBOL(poly1305_update_arch);

void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest)
{
	poly1305_final_generic(desc, digest);
}
EXPORT_SYMBOL(poly1305_final_arch);

static int crypto_poly1305_init(struct shash_desc *desc)
{
	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);

	poly1305_core_init(&dctx->h);
	dctx->buflen = 0;
	dctx->rset = 0;
	dctx->sset = false;

	return 0;
}

static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
{
	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);

	if (unlikely(!dctx->sset))
		return -ENOKEY;

	poly1305_final_generic(dctx, dst);
	return 0;
}

static int poly1305_simd_update(struct shash_desc *desc,
				const u8 *src, unsigned int srclen)
{
	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);

	poly1305_update_arch(dctx, src, srclen);
	return 0;
}

static struct shash_alg alg = {
	.digestsize	= POLY1305_DIGEST_SIZE,
	.init		= crypto_poly1305_init,
	.update		= poly1305_simd_update,
	.final		= crypto_poly1305_final,
	.descsize	= sizeof(struct poly1305_desc_ctx),
	.base		= {
		.cra_name		= "poly1305",
		.cra_driver_name	= "poly1305-simd",
		.cra_priority		= 300,
		.cra_blocksize		= POLY1305_BLOCK_SIZE,
		.cra_module		= THIS_MODULE,
	},
};

static int __init poly1305_simd_mod_init(void)
{
	if (!boot_cpu_has(X86_FEATURE_XMM2))
		return 0;

	static_branch_enable(&poly1305_use_simd);

	if (IS_ENABLED(CONFIG_AS_AVX2) &&
	    boot_cpu_has(X86_FEATURE_AVX) &&
	    boot_cpu_has(X86_FEATURE_AVX2) &&
	    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
		static_branch_enable(&poly1305_use_avx2);

	return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? crypto_register_shash(&alg) : 0;
}

static void __exit poly1305_simd_mod_exit(void)
{
	if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
		crypto_unregister_shash(&alg);
}

module_init(poly1305_simd_mod_init);
module_exit(poly1305_simd_mod_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
MODULE_DESCRIPTION("Poly1305 authenticator");
MODULE_ALIAS_CRYPTO("poly1305");
MODULE_ALIAS_CRYPTO("poly1305-simd");