mirror of
https://github.com/hanwckf/immortalwrt-mt798x.git
synced 2025-01-10 03:09:08 +08:00
kernel: 5.4: import wireguard backport
Rather than using the clunky, old, slower wireguard-linux-compat out of tree module, this commit does a patch-by-patch backport of upstream's wireguard to 5.4. This specific backport is in widespread use, being part of SUSE's enterprise kernel, Oracle's enterprise kernel, Google's Android kernel, Gentoo's distro kernel, and probably more I've forgotten about. It's definately the "more proper" way of adding wireguard to a kernel than the ugly compat.h hell of the wireguard-linux-compat repo. And most importantly for OpenWRT, it allows using the same module configuration code for 5.10 as for 5.4, with no need for bifurcation. These patches are from the backport tree which is maintained in the open here: https://git.zx2c4.com/wireguard-linux/log/?h=backport-5.4.y I'll be sending PRs to update this as needed. Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> (cherry picked from commit 3888fa78802354ab7bbd19b7d061fd80a16ce06b) (cherry picked from commit d54072587146dd0db9bb52b513234d944edabda3) (cherry picked from commit 196f3d586f11d96ba4ab60068cfb12420bcd20fd) (cherry picked from commit 3500fd7938a6d0c0e320295f0aa2fa34b1ebc08d) (cherry picked from commit 23b801d3ba57e34cc609ea40982c7fbed08164e9) (cherry picked from commit 0c0cb97da7f5cc06919449131dd57ed805f8f78d) (cherry picked from commit 2a27f6f90a430342cdbe84806e8b10acff446a2d) Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
This commit is contained in:
parent
aebfc2f6f3
commit
c0cb86e1d5
@ -0,0 +1,112 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 8 Nov 2019 13:22:07 +0100
|
||||
Subject: [PATCH] crypto: lib - tidy up lib/crypto Kconfig and Makefile
|
||||
|
||||
commit 746b2e024c67aa605ac12d135cd7085a49cf9dc4 upstream.
|
||||
|
||||
In preparation of introducing a set of crypto library interfaces, tidy
|
||||
up the Makefile and split off the Kconfig symbols into a separate file.
|
||||
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
crypto/Kconfig | 13 +------------
|
||||
lib/crypto/Kconfig | 15 +++++++++++++++
|
||||
lib/crypto/Makefile | 16 ++++++++--------
|
||||
3 files changed, 24 insertions(+), 20 deletions(-)
|
||||
create mode 100644 lib/crypto/Kconfig
|
||||
|
||||
--- a/crypto/Kconfig
|
||||
+++ b/crypto/Kconfig
|
||||
@@ -878,9 +878,6 @@ config CRYPTO_SHA1_PPC_SPE
|
||||
SHA-1 secure hash standard (DFIPS 180-4) implemented
|
||||
using powerpc SPE SIMD instruction set.
|
||||
|
||||
-config CRYPTO_LIB_SHA256
|
||||
- tristate
|
||||
-
|
||||
config CRYPTO_SHA256
|
||||
tristate "SHA224 and SHA256 digest algorithm"
|
||||
select CRYPTO_HASH
|
||||
@@ -1019,9 +1016,6 @@ config CRYPTO_GHASH_CLMUL_NI_INTEL
|
||||
|
||||
comment "Ciphers"
|
||||
|
||||
-config CRYPTO_LIB_AES
|
||||
- tristate
|
||||
-
|
||||
config CRYPTO_AES
|
||||
tristate "AES cipher algorithms"
|
||||
select CRYPTO_ALGAPI
|
||||
@@ -1150,9 +1144,6 @@ config CRYPTO_ANUBIS
|
||||
<https://www.cosic.esat.kuleuven.be/nessie/reports/>
|
||||
<http://www.larc.usp.br/~pbarreto/AnubisPage.html>
|
||||
|
||||
-config CRYPTO_LIB_ARC4
|
||||
- tristate
|
||||
-
|
||||
config CRYPTO_ARC4
|
||||
tristate "ARC4 cipher algorithm"
|
||||
select CRYPTO_BLKCIPHER
|
||||
@@ -1339,9 +1330,6 @@ config CRYPTO_CAST6_AVX_X86_64
|
||||
This module provides the Cast6 cipher algorithm that processes
|
||||
eight blocks parallel using the AVX instruction set.
|
||||
|
||||
-config CRYPTO_LIB_DES
|
||||
- tristate
|
||||
-
|
||||
config CRYPTO_DES
|
||||
tristate "DES and Triple DES EDE cipher algorithms"
|
||||
select CRYPTO_ALGAPI
|
||||
@@ -1845,6 +1833,7 @@ config CRYPTO_STATS
|
||||
config CRYPTO_HASH_INFO
|
||||
bool
|
||||
|
||||
+source "lib/crypto/Kconfig"
|
||||
source "drivers/crypto/Kconfig"
|
||||
source "crypto/asymmetric_keys/Kconfig"
|
||||
source "certs/Kconfig"
|
||||
--- /dev/null
|
||||
+++ b/lib/crypto/Kconfig
|
||||
@@ -0,0 +1,15 @@
|
||||
+# SPDX-License-Identifier: GPL-2.0
|
||||
+
|
||||
+comment "Crypto library routines"
|
||||
+
|
||||
+config CRYPTO_LIB_AES
|
||||
+ tristate
|
||||
+
|
||||
+config CRYPTO_LIB_ARC4
|
||||
+ tristate
|
||||
+
|
||||
+config CRYPTO_LIB_DES
|
||||
+ tristate
|
||||
+
|
||||
+config CRYPTO_LIB_SHA256
|
||||
+ tristate
|
||||
--- a/lib/crypto/Makefile
|
||||
+++ b/lib/crypto/Makefile
|
||||
@@ -1,13 +1,13 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
-obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o
|
||||
-libaes-y := aes.o
|
||||
+obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o
|
||||
+libaes-y := aes.o
|
||||
|
||||
-obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o
|
||||
-libarc4-y := arc4.o
|
||||
+obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o
|
||||
+libarc4-y := arc4.o
|
||||
|
||||
-obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
|
||||
-libdes-y := des.o
|
||||
+obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
|
||||
+libdes-y := des.o
|
||||
|
||||
-obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
|
||||
-libsha256-y := sha256.o
|
||||
+obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
|
||||
+libsha256-y := sha256.o
|
@ -0,0 +1,668 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 8 Nov 2019 13:22:08 +0100
|
||||
Subject: [PATCH] crypto: chacha - move existing library code into lib/crypto
|
||||
|
||||
commit 5fb8ef25803ef33e2eb60b626435828b937bed75 upstream.
|
||||
|
||||
Currently, our generic ChaCha implementation consists of a permute
|
||||
function in lib/chacha.c that operates on the 64-byte ChaCha state
|
||||
directly [and which is always included into the core kernel since it
|
||||
is used by the /dev/random driver], and the crypto API plumbing to
|
||||
expose it as a skcipher.
|
||||
|
||||
In order to support in-kernel users that need the ChaCha streamcipher
|
||||
but have no need [or tolerance] for going through the abstractions of
|
||||
the crypto API, let's expose the streamcipher bits via a library API
|
||||
as well, in a way that permits the implementation to be superseded by
|
||||
an architecture specific one if provided.
|
||||
|
||||
So move the streamcipher code into a separate module in lib/crypto,
|
||||
and expose the init() and crypt() routines to users of the library.
|
||||
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/arm/crypto/chacha-neon-glue.c | 2 +-
|
||||
arch/arm64/crypto/chacha-neon-glue.c | 2 +-
|
||||
arch/x86/crypto/chacha_glue.c | 2 +-
|
||||
crypto/Kconfig | 1 +
|
||||
crypto/chacha_generic.c | 60 ++--------------------
|
||||
include/crypto/chacha.h | 77 ++++++++++++++++++++++------
|
||||
include/crypto/internal/chacha.h | 53 +++++++++++++++++++
|
||||
lib/Makefile | 3 +-
|
||||
lib/crypto/Kconfig | 26 ++++++++++
|
||||
lib/crypto/Makefile | 4 ++
|
||||
lib/{ => crypto}/chacha.c | 20 ++++----
|
||||
lib/crypto/libchacha.c | 35 +++++++++++++
|
||||
12 files changed, 199 insertions(+), 86 deletions(-)
|
||||
create mode 100644 include/crypto/internal/chacha.h
|
||||
rename lib/{ => crypto}/chacha.c (88%)
|
||||
create mode 100644 lib/crypto/libchacha.c
|
||||
|
||||
--- a/arch/arm/crypto/chacha-neon-glue.c
|
||||
+++ b/arch/arm/crypto/chacha-neon-glue.c
|
||||
@@ -20,7 +20,7 @@
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
-#include <crypto/chacha.h>
|
||||
+#include <crypto/internal/chacha.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/kernel.h>
|
||||
--- a/arch/arm64/crypto/chacha-neon-glue.c
|
||||
+++ b/arch/arm64/crypto/chacha-neon-glue.c
|
||||
@@ -20,7 +20,7 @@
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
-#include <crypto/chacha.h>
|
||||
+#include <crypto/internal/chacha.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/kernel.h>
|
||||
--- a/arch/x86/crypto/chacha_glue.c
|
||||
+++ b/arch/x86/crypto/chacha_glue.c
|
||||
@@ -7,7 +7,7 @@
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
-#include <crypto/chacha.h>
|
||||
+#include <crypto/internal/chacha.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/kernel.h>
|
||||
--- a/crypto/Kconfig
|
||||
+++ b/crypto/Kconfig
|
||||
@@ -1393,6 +1393,7 @@ config CRYPTO_SALSA20
|
||||
|
||||
config CRYPTO_CHACHA20
|
||||
tristate "ChaCha stream cipher algorithms"
|
||||
+ select CRYPTO_LIB_CHACHA_GENERIC
|
||||
select CRYPTO_BLKCIPHER
|
||||
help
|
||||
The ChaCha20, XChaCha20, and XChaCha12 stream cipher algorithms.
|
||||
--- a/crypto/chacha_generic.c
|
||||
+++ b/crypto/chacha_generic.c
|
||||
@@ -8,29 +8,10 @@
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/algapi.h>
|
||||
-#include <crypto/chacha.h>
|
||||
+#include <crypto/internal/chacha.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
-static void chacha_docrypt(u32 *state, u8 *dst, const u8 *src,
|
||||
- unsigned int bytes, int nrounds)
|
||||
-{
|
||||
- /* aligned to potentially speed up crypto_xor() */
|
||||
- u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long));
|
||||
-
|
||||
- while (bytes >= CHACHA_BLOCK_SIZE) {
|
||||
- chacha_block(state, stream, nrounds);
|
||||
- crypto_xor_cpy(dst, src, stream, CHACHA_BLOCK_SIZE);
|
||||
- bytes -= CHACHA_BLOCK_SIZE;
|
||||
- dst += CHACHA_BLOCK_SIZE;
|
||||
- src += CHACHA_BLOCK_SIZE;
|
||||
- }
|
||||
- if (bytes) {
|
||||
- chacha_block(state, stream, nrounds);
|
||||
- crypto_xor_cpy(dst, src, stream, bytes);
|
||||
- }
|
||||
-}
|
||||
-
|
||||
static int chacha_stream_xor(struct skcipher_request *req,
|
||||
const struct chacha_ctx *ctx, const u8 *iv)
|
||||
{
|
||||
@@ -48,8 +29,8 @@ static int chacha_stream_xor(struct skci
|
||||
if (nbytes < walk.total)
|
||||
nbytes = round_down(nbytes, CHACHA_BLOCK_SIZE);
|
||||
|
||||
- chacha_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||||
- nbytes, ctx->nrounds);
|
||||
+ chacha_crypt_generic(state, walk.dst.virt.addr,
|
||||
+ walk.src.virt.addr, nbytes, ctx->nrounds);
|
||||
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
}
|
||||
|
||||
@@ -58,41 +39,10 @@ static int chacha_stream_xor(struct skci
|
||||
|
||||
void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv)
|
||||
{
|
||||
- state[0] = 0x61707865; /* "expa" */
|
||||
- state[1] = 0x3320646e; /* "nd 3" */
|
||||
- state[2] = 0x79622d32; /* "2-by" */
|
||||
- state[3] = 0x6b206574; /* "te k" */
|
||||
- state[4] = ctx->key[0];
|
||||
- state[5] = ctx->key[1];
|
||||
- state[6] = ctx->key[2];
|
||||
- state[7] = ctx->key[3];
|
||||
- state[8] = ctx->key[4];
|
||||
- state[9] = ctx->key[5];
|
||||
- state[10] = ctx->key[6];
|
||||
- state[11] = ctx->key[7];
|
||||
- state[12] = get_unaligned_le32(iv + 0);
|
||||
- state[13] = get_unaligned_le32(iv + 4);
|
||||
- state[14] = get_unaligned_le32(iv + 8);
|
||||
- state[15] = get_unaligned_le32(iv + 12);
|
||||
+ chacha_init_generic(state, ctx->key, iv);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_chacha_init);
|
||||
|
||||
-static int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
- unsigned int keysize, int nrounds)
|
||||
-{
|
||||
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
- int i;
|
||||
-
|
||||
- if (keysize != CHACHA_KEY_SIZE)
|
||||
- return -EINVAL;
|
||||
-
|
||||
- for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
|
||||
- ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
|
||||
-
|
||||
- ctx->nrounds = nrounds;
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
unsigned int keysize)
|
||||
{
|
||||
@@ -126,7 +76,7 @@ int crypto_xchacha_crypt(struct skcipher
|
||||
|
||||
/* Compute the subkey given the original key and first 128 nonce bits */
|
||||
crypto_chacha_init(state, ctx, req->iv);
|
||||
- hchacha_block(state, subctx.key, ctx->nrounds);
|
||||
+ hchacha_block_generic(state, subctx.key, ctx->nrounds);
|
||||
subctx.nrounds = ctx->nrounds;
|
||||
|
||||
/* Build the real IV */
|
||||
--- a/include/crypto/chacha.h
|
||||
+++ b/include/crypto/chacha.h
|
||||
@@ -15,9 +15,8 @@
|
||||
#ifndef _CRYPTO_CHACHA_H
|
||||
#define _CRYPTO_CHACHA_H
|
||||
|
||||
-#include <crypto/skcipher.h>
|
||||
+#include <asm/unaligned.h>
|
||||
#include <linux/types.h>
|
||||
-#include <linux/crypto.h>
|
||||
|
||||
/* 32-bit stream position, then 96-bit nonce (RFC7539 convention) */
|
||||
#define CHACHA_IV_SIZE 16
|
||||
@@ -29,26 +28,70 @@
|
||||
/* 192-bit nonce, then 64-bit stream position */
|
||||
#define XCHACHA_IV_SIZE 32
|
||||
|
||||
-struct chacha_ctx {
|
||||
- u32 key[8];
|
||||
- int nrounds;
|
||||
-};
|
||||
-
|
||||
-void chacha_block(u32 *state, u8 *stream, int nrounds);
|
||||
+void chacha_block_generic(u32 *state, u8 *stream, int nrounds);
|
||||
static inline void chacha20_block(u32 *state, u8 *stream)
|
||||
{
|
||||
- chacha_block(state, stream, 20);
|
||||
+ chacha_block_generic(state, stream, 20);
|
||||
}
|
||||
-void hchacha_block(const u32 *in, u32 *out, int nrounds);
|
||||
|
||||
-void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv);
|
||||
+void hchacha_block_arch(const u32 *state, u32 *out, int nrounds);
|
||||
+void hchacha_block_generic(const u32 *state, u32 *out, int nrounds);
|
||||
+
|
||||
+static inline void hchacha_block(const u32 *state, u32 *out, int nrounds)
|
||||
+{
|
||||
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA))
|
||||
+ hchacha_block_arch(state, out, nrounds);
|
||||
+ else
|
||||
+ hchacha_block_generic(state, out, nrounds);
|
||||
+}
|
||||
|
||||
-int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
- unsigned int keysize);
|
||||
-int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
- unsigned int keysize);
|
||||
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv);
|
||||
+static inline void chacha_init_generic(u32 *state, const u32 *key, const u8 *iv)
|
||||
+{
|
||||
+ state[0] = 0x61707865; /* "expa" */
|
||||
+ state[1] = 0x3320646e; /* "nd 3" */
|
||||
+ state[2] = 0x79622d32; /* "2-by" */
|
||||
+ state[3] = 0x6b206574; /* "te k" */
|
||||
+ state[4] = key[0];
|
||||
+ state[5] = key[1];
|
||||
+ state[6] = key[2];
|
||||
+ state[7] = key[3];
|
||||
+ state[8] = key[4];
|
||||
+ state[9] = key[5];
|
||||
+ state[10] = key[6];
|
||||
+ state[11] = key[7];
|
||||
+ state[12] = get_unaligned_le32(iv + 0);
|
||||
+ state[13] = get_unaligned_le32(iv + 4);
|
||||
+ state[14] = get_unaligned_le32(iv + 8);
|
||||
+ state[15] = get_unaligned_le32(iv + 12);
|
||||
+}
|
||||
|
||||
-int crypto_chacha_crypt(struct skcipher_request *req);
|
||||
-int crypto_xchacha_crypt(struct skcipher_request *req);
|
||||
+static inline void chacha_init(u32 *state, const u32 *key, const u8 *iv)
|
||||
+{
|
||||
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA))
|
||||
+ chacha_init_arch(state, key, iv);
|
||||
+ else
|
||||
+ chacha_init_generic(state, key, iv);
|
||||
+}
|
||||
+
|
||||
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
|
||||
+ unsigned int bytes, int nrounds);
|
||||
+void chacha_crypt_generic(u32 *state, u8 *dst, const u8 *src,
|
||||
+ unsigned int bytes, int nrounds);
|
||||
+
|
||||
+static inline void chacha_crypt(u32 *state, u8 *dst, const u8 *src,
|
||||
+ unsigned int bytes, int nrounds)
|
||||
+{
|
||||
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA))
|
||||
+ chacha_crypt_arch(state, dst, src, bytes, nrounds);
|
||||
+ else
|
||||
+ chacha_crypt_generic(state, dst, src, bytes, nrounds);
|
||||
+}
|
||||
+
|
||||
+static inline void chacha20_crypt(u32 *state, u8 *dst, const u8 *src,
|
||||
+ unsigned int bytes)
|
||||
+{
|
||||
+ chacha_crypt(state, dst, src, bytes, 20);
|
||||
+}
|
||||
|
||||
#endif /* _CRYPTO_CHACHA_H */
|
||||
--- /dev/null
|
||||
+++ b/include/crypto/internal/chacha.h
|
||||
@@ -0,0 +1,53 @@
|
||||
+/* SPDX-License-Identifier: GPL-2.0 */
|
||||
+
|
||||
+#ifndef _CRYPTO_INTERNAL_CHACHA_H
|
||||
+#define _CRYPTO_INTERNAL_CHACHA_H
|
||||
+
|
||||
+#include <crypto/chacha.h>
|
||||
+#include <crypto/internal/skcipher.h>
|
||||
+#include <linux/crypto.h>
|
||||
+
|
||||
+struct chacha_ctx {
|
||||
+ u32 key[8];
|
||||
+ int nrounds;
|
||||
+};
|
||||
+
|
||||
+void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv);
|
||||
+
|
||||
+static inline int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
+ unsigned int keysize, int nrounds)
|
||||
+{
|
||||
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
+ int i;
|
||||
+
|
||||
+ if (keysize != CHACHA_KEY_SIZE)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
|
||||
+ ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
|
||||
+
|
||||
+ ctx->nrounds = nrounds;
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static inline int chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
+ unsigned int keysize)
|
||||
+{
|
||||
+ return chacha_setkey(tfm, key, keysize, 20);
|
||||
+}
|
||||
+
|
||||
+static int inline chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
+ unsigned int keysize)
|
||||
+{
|
||||
+ return chacha_setkey(tfm, key, keysize, 12);
|
||||
+}
|
||||
+
|
||||
+int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
+ unsigned int keysize);
|
||||
+int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
+ unsigned int keysize);
|
||||
+
|
||||
+int crypto_chacha_crypt(struct skcipher_request *req);
|
||||
+int crypto_xchacha_crypt(struct skcipher_request *req);
|
||||
+
|
||||
+#endif /* _CRYPTO_CHACHA_H */
|
||||
--- a/lib/Makefile
|
||||
+++ b/lib/Makefile
|
||||
@@ -26,8 +26,7 @@ endif
|
||||
|
||||
lib-y := ctype.o string.o vsprintf.o cmdline.o \
|
||||
rbtree.o radix-tree.o timerqueue.o xarray.o \
|
||||
- idr.o extable.o \
|
||||
- sha1.o chacha.o irq_regs.o argv_split.o \
|
||||
+ idr.o extable.o sha1.o irq_regs.o argv_split.o \
|
||||
flex_proportions.o ratelimit.o show_mem.o \
|
||||
is_single_threaded.o plist.o decompress.o kobject_uevent.o \
|
||||
earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
|
||||
--- a/lib/crypto/Kconfig
|
||||
+++ b/lib/crypto/Kconfig
|
||||
@@ -8,6 +8,32 @@ config CRYPTO_LIB_AES
|
||||
config CRYPTO_LIB_ARC4
|
||||
tristate
|
||||
|
||||
+config CRYPTO_ARCH_HAVE_LIB_CHACHA
|
||||
+ tristate
|
||||
+ help
|
||||
+ Declares whether the architecture provides an arch-specific
|
||||
+ accelerated implementation of the ChaCha library interface,
|
||||
+ either builtin or as a module.
|
||||
+
|
||||
+config CRYPTO_LIB_CHACHA_GENERIC
|
||||
+ tristate
|
||||
+ select CRYPTO_ALGAPI
|
||||
+ help
|
||||
+ This symbol can be depended upon by arch implementations of the
|
||||
+ ChaCha library interface that require the generic code as a
|
||||
+ fallback, e.g., for SIMD implementations. If no arch specific
|
||||
+ implementation is enabled, this implementation serves the users
|
||||
+ of CRYPTO_LIB_CHACHA.
|
||||
+
|
||||
+config CRYPTO_LIB_CHACHA
|
||||
+ tristate "ChaCha library interface"
|
||||
+ depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA
|
||||
+ select CRYPTO_LIB_CHACHA_GENERIC if CRYPTO_ARCH_HAVE_LIB_CHACHA=n
|
||||
+ help
|
||||
+ Enable the ChaCha library interface. This interface may be fulfilled
|
||||
+ by either the generic implementation or an arch-specific one, if one
|
||||
+ is available and enabled.
|
||||
+
|
||||
config CRYPTO_LIB_DES
|
||||
tristate
|
||||
|
||||
--- a/lib/crypto/Makefile
|
||||
+++ b/lib/crypto/Makefile
|
||||
@@ -1,5 +1,9 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
+# chacha is used by the /dev/random driver which is always builtin
|
||||
+obj-y += chacha.o
|
||||
+obj-$(CONFIG_CRYPTO_LIB_CHACHA_GENERIC) += libchacha.o
|
||||
+
|
||||
obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o
|
||||
libaes-y := aes.o
|
||||
|
||||
--- a/lib/chacha.c
|
||||
+++ /dev/null
|
||||
@@ -1,113 +0,0 @@
|
||||
-// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
-/*
|
||||
- * The "hash function" used as the core of the ChaCha stream cipher (RFC7539)
|
||||
- *
|
||||
- * Copyright (C) 2015 Martin Willi
|
||||
- */
|
||||
-
|
||||
-#include <linux/kernel.h>
|
||||
-#include <linux/export.h>
|
||||
-#include <linux/bitops.h>
|
||||
-#include <linux/cryptohash.h>
|
||||
-#include <asm/unaligned.h>
|
||||
-#include <crypto/chacha.h>
|
||||
-
|
||||
-static void chacha_permute(u32 *x, int nrounds)
|
||||
-{
|
||||
- int i;
|
||||
-
|
||||
- /* whitelist the allowed round counts */
|
||||
- WARN_ON_ONCE(nrounds != 20 && nrounds != 12);
|
||||
-
|
||||
- for (i = 0; i < nrounds; i += 2) {
|
||||
- x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16);
|
||||
- x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16);
|
||||
- x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16);
|
||||
- x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 16);
|
||||
-
|
||||
- x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 12);
|
||||
- x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 12);
|
||||
- x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 12);
|
||||
- x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 12);
|
||||
-
|
||||
- x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 8);
|
||||
- x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 8);
|
||||
- x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 8);
|
||||
- x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 8);
|
||||
-
|
||||
- x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 7);
|
||||
- x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 7);
|
||||
- x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 7);
|
||||
- x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 7);
|
||||
-
|
||||
- x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 16);
|
||||
- x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 16);
|
||||
- x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 16);
|
||||
- x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 16);
|
||||
-
|
||||
- x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 12);
|
||||
- x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 12);
|
||||
- x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 12);
|
||||
- x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 12);
|
||||
-
|
||||
- x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 8);
|
||||
- x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 8);
|
||||
- x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 8);
|
||||
- x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 8);
|
||||
-
|
||||
- x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 7);
|
||||
- x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 7);
|
||||
- x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7);
|
||||
- x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7);
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-/**
|
||||
- * chacha_block - generate one keystream block and increment block counter
|
||||
- * @state: input state matrix (16 32-bit words)
|
||||
- * @stream: output keystream block (64 bytes)
|
||||
- * @nrounds: number of rounds (20 or 12; 20 is recommended)
|
||||
- *
|
||||
- * This is the ChaCha core, a function from 64-byte strings to 64-byte strings.
|
||||
- * The caller has already converted the endianness of the input. This function
|
||||
- * also handles incrementing the block counter in the input matrix.
|
||||
- */
|
||||
-void chacha_block(u32 *state, u8 *stream, int nrounds)
|
||||
-{
|
||||
- u32 x[16];
|
||||
- int i;
|
||||
-
|
||||
- memcpy(x, state, 64);
|
||||
-
|
||||
- chacha_permute(x, nrounds);
|
||||
-
|
||||
- for (i = 0; i < ARRAY_SIZE(x); i++)
|
||||
- put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]);
|
||||
-
|
||||
- state[12]++;
|
||||
-}
|
||||
-EXPORT_SYMBOL(chacha_block);
|
||||
-
|
||||
-/**
|
||||
- * hchacha_block - abbreviated ChaCha core, for XChaCha
|
||||
- * @in: input state matrix (16 32-bit words)
|
||||
- * @out: output (8 32-bit words)
|
||||
- * @nrounds: number of rounds (20 or 12; 20 is recommended)
|
||||
- *
|
||||
- * HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step
|
||||
- * towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). HChaCha
|
||||
- * skips the final addition of the initial state, and outputs only certain words
|
||||
- * of the state. It should not be used for streaming directly.
|
||||
- */
|
||||
-void hchacha_block(const u32 *in, u32 *out, int nrounds)
|
||||
-{
|
||||
- u32 x[16];
|
||||
-
|
||||
- memcpy(x, in, 64);
|
||||
-
|
||||
- chacha_permute(x, nrounds);
|
||||
-
|
||||
- memcpy(&out[0], &x[0], 16);
|
||||
- memcpy(&out[4], &x[12], 16);
|
||||
-}
|
||||
-EXPORT_SYMBOL(hchacha_block);
|
||||
--- /dev/null
|
||||
+++ b/lib/crypto/chacha.c
|
||||
@@ -0,0 +1,115 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
+/*
|
||||
+ * The "hash function" used as the core of the ChaCha stream cipher (RFC7539)
|
||||
+ *
|
||||
+ * Copyright (C) 2015 Martin Willi
|
||||
+ */
|
||||
+
|
||||
+#include <linux/bug.h>
|
||||
+#include <linux/kernel.h>
|
||||
+#include <linux/export.h>
|
||||
+#include <linux/bitops.h>
|
||||
+#include <linux/string.h>
|
||||
+#include <linux/cryptohash.h>
|
||||
+#include <asm/unaligned.h>
|
||||
+#include <crypto/chacha.h>
|
||||
+
|
||||
+static void chacha_permute(u32 *x, int nrounds)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ /* whitelist the allowed round counts */
|
||||
+ WARN_ON_ONCE(nrounds != 20 && nrounds != 12);
|
||||
+
|
||||
+ for (i = 0; i < nrounds; i += 2) {
|
||||
+ x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16);
|
||||
+ x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16);
|
||||
+ x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16);
|
||||
+ x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 16);
|
||||
+
|
||||
+ x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 12);
|
||||
+ x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 12);
|
||||
+ x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 12);
|
||||
+ x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 12);
|
||||
+
|
||||
+ x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 8);
|
||||
+ x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 8);
|
||||
+ x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 8);
|
||||
+ x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 8);
|
||||
+
|
||||
+ x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 7);
|
||||
+ x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 7);
|
||||
+ x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 7);
|
||||
+ x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 7);
|
||||
+
|
||||
+ x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 16);
|
||||
+ x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 16);
|
||||
+ x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 16);
|
||||
+ x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 16);
|
||||
+
|
||||
+ x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 12);
|
||||
+ x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 12);
|
||||
+ x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 12);
|
||||
+ x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 12);
|
||||
+
|
||||
+ x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 8);
|
||||
+ x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 8);
|
||||
+ x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 8);
|
||||
+ x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 8);
|
||||
+
|
||||
+ x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 7);
|
||||
+ x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 7);
|
||||
+ x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7);
|
||||
+ x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * chacha_block - generate one keystream block and increment block counter
|
||||
+ * @state: input state matrix (16 32-bit words)
|
||||
+ * @stream: output keystream block (64 bytes)
|
||||
+ * @nrounds: number of rounds (20 or 12; 20 is recommended)
|
||||
+ *
|
||||
+ * This is the ChaCha core, a function from 64-byte strings to 64-byte strings.
|
||||
+ * The caller has already converted the endianness of the input. This function
|
||||
+ * also handles incrementing the block counter in the input matrix.
|
||||
+ */
|
||||
+void chacha_block_generic(u32 *state, u8 *stream, int nrounds)
|
||||
+{
|
||||
+ u32 x[16];
|
||||
+ int i;
|
||||
+
|
||||
+ memcpy(x, state, 64);
|
||||
+
|
||||
+ chacha_permute(x, nrounds);
|
||||
+
|
||||
+ for (i = 0; i < ARRAY_SIZE(x); i++)
|
||||
+ put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]);
|
||||
+
|
||||
+ state[12]++;
|
||||
+}
|
||||
+EXPORT_SYMBOL(chacha_block_generic);
|
||||
+
|
||||
+/**
|
||||
+ * hchacha_block_generic - abbreviated ChaCha core, for XChaCha
|
||||
+ * @state: input state matrix (16 32-bit words)
|
||||
+ * @out: output (8 32-bit words)
|
||||
+ * @nrounds: number of rounds (20 or 12; 20 is recommended)
|
||||
+ *
|
||||
+ * HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step
|
||||
+ * towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). HChaCha
|
||||
+ * skips the final addition of the initial state, and outputs only certain words
|
||||
+ * of the state. It should not be used for streaming directly.
|
||||
+ */
|
||||
+void hchacha_block_generic(const u32 *state, u32 *stream, int nrounds)
|
||||
+{
|
||||
+ u32 x[16];
|
||||
+
|
||||
+ memcpy(x, state, 64);
|
||||
+
|
||||
+ chacha_permute(x, nrounds);
|
||||
+
|
||||
+ memcpy(&stream[0], &x[0], 16);
|
||||
+ memcpy(&stream[4], &x[12], 16);
|
||||
+}
|
||||
+EXPORT_SYMBOL(hchacha_block_generic);
|
||||
--- /dev/null
|
||||
+++ b/lib/crypto/libchacha.c
|
||||
@@ -0,0 +1,35 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
+/*
|
||||
+ * The ChaCha stream cipher (RFC7539)
|
||||
+ *
|
||||
+ * Copyright (C) 2015 Martin Willi
|
||||
+ */
|
||||
+
|
||||
+#include <linux/kernel.h>
|
||||
+#include <linux/export.h>
|
||||
+#include <linux/module.h>
|
||||
+
|
||||
+#include <crypto/algapi.h> // for crypto_xor_cpy
|
||||
+#include <crypto/chacha.h>
|
||||
+
|
||||
+void chacha_crypt_generic(u32 *state, u8 *dst, const u8 *src,
|
||||
+ unsigned int bytes, int nrounds)
|
||||
+{
|
||||
+ /* aligned to potentially speed up crypto_xor() */
|
||||
+ u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long));
|
||||
+
|
||||
+ while (bytes >= CHACHA_BLOCK_SIZE) {
|
||||
+ chacha_block_generic(state, stream, nrounds);
|
||||
+ crypto_xor_cpy(dst, src, stream, CHACHA_BLOCK_SIZE);
|
||||
+ bytes -= CHACHA_BLOCK_SIZE;
|
||||
+ dst += CHACHA_BLOCK_SIZE;
|
||||
+ src += CHACHA_BLOCK_SIZE;
|
||||
+ }
|
||||
+ if (bytes) {
|
||||
+ chacha_block_generic(state, stream, nrounds);
|
||||
+ crypto_xor_cpy(dst, src, stream, bytes);
|
||||
+ }
|
||||
+}
|
||||
+EXPORT_SYMBOL(chacha_crypt_generic);
|
||||
+
|
||||
+MODULE_LICENSE("GPL");
|
@ -0,0 +1,192 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 8 Nov 2019 13:22:09 +0100
|
||||
Subject: [PATCH] crypto: x86/chacha - depend on generic chacha library instead
|
||||
of crypto driver
|
||||
|
||||
commit 28e8d89b1ce8d2e7badfb5f69971dd635acb8863 upstream.
|
||||
|
||||
In preparation of extending the x86 ChaCha driver to also expose the ChaCha
|
||||
library interface, drop the dependency on the chacha_generic crypto driver
|
||||
as a non-SIMD fallback, and depend on the generic ChaCha library directly.
|
||||
This way, we only pull in the code we actually need, without registering
|
||||
a set of ChaCha skciphers that we will never use.
|
||||
|
||||
Since turning the FPU on and off is cheap these days, simplify the SIMD
|
||||
routine by dropping the per-page yield, which makes for a cleaner switch
|
||||
to the library API as well. This also allows use to invoke the skcipher
|
||||
walk routines in non-atomic mode.
|
||||
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/x86/crypto/chacha_glue.c | 90 ++++++++++++++---------------------
|
||||
crypto/Kconfig | 2 +-
|
||||
2 files changed, 36 insertions(+), 56 deletions(-)
|
||||
|
||||
--- a/arch/x86/crypto/chacha_glue.c
|
||||
+++ b/arch/x86/crypto/chacha_glue.c
|
||||
@@ -123,37 +123,38 @@ static void chacha_dosimd(u32 *state, u8
|
||||
}
|
||||
}
|
||||
|
||||
-static int chacha_simd_stream_xor(struct skcipher_walk *walk,
|
||||
+static int chacha_simd_stream_xor(struct skcipher_request *req,
|
||||
const struct chacha_ctx *ctx, const u8 *iv)
|
||||
{
|
||||
u32 *state, state_buf[16 + 2] __aligned(8);
|
||||
- int next_yield = 4096; /* bytes until next FPU yield */
|
||||
- int err = 0;
|
||||
+ struct skcipher_walk walk;
|
||||
+ int err;
|
||||
+
|
||||
+ err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
|
||||
state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
|
||||
|
||||
- crypto_chacha_init(state, ctx, iv);
|
||||
+ chacha_init_generic(state, ctx->key, iv);
|
||||
|
||||
- while (walk->nbytes > 0) {
|
||||
- unsigned int nbytes = walk->nbytes;
|
||||
+ while (walk.nbytes > 0) {
|
||||
+ unsigned int nbytes = walk.nbytes;
|
||||
|
||||
- if (nbytes < walk->total) {
|
||||
- nbytes = round_down(nbytes, walk->stride);
|
||||
- next_yield -= nbytes;
|
||||
- }
|
||||
-
|
||||
- chacha_dosimd(state, walk->dst.virt.addr, walk->src.virt.addr,
|
||||
- nbytes, ctx->nrounds);
|
||||
+ if (nbytes < walk.total)
|
||||
+ nbytes = round_down(nbytes, walk.stride);
|
||||
|
||||
- if (next_yield <= 0) {
|
||||
- /* temporarily allow preemption */
|
||||
- kernel_fpu_end();
|
||||
+ if (!crypto_simd_usable()) {
|
||||
+ chacha_crypt_generic(state, walk.dst.virt.addr,
|
||||
+ walk.src.virt.addr, nbytes,
|
||||
+ ctx->nrounds);
|
||||
+ } else {
|
||||
kernel_fpu_begin();
|
||||
- next_yield = 4096;
|
||||
+ chacha_dosimd(state, walk.dst.virt.addr,
|
||||
+ walk.src.virt.addr, nbytes,
|
||||
+ ctx->nrounds);
|
||||
+ kernel_fpu_end();
|
||||
}
|
||||
-
|
||||
- err = skcipher_walk_done(walk, walk->nbytes - nbytes);
|
||||
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
@@ -163,55 +164,34 @@ static int chacha_simd(struct skcipher_r
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
- struct skcipher_walk walk;
|
||||
- int err;
|
||||
-
|
||||
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
|
||||
- return crypto_chacha_crypt(req);
|
||||
|
||||
- err = skcipher_walk_virt(&walk, req, true);
|
||||
- if (err)
|
||||
- return err;
|
||||
-
|
||||
- kernel_fpu_begin();
|
||||
- err = chacha_simd_stream_xor(&walk, ctx, req->iv);
|
||||
- kernel_fpu_end();
|
||||
- return err;
|
||||
+ return chacha_simd_stream_xor(req, ctx, req->iv);
|
||||
}
|
||||
|
||||
static int xchacha_simd(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
- struct skcipher_walk walk;
|
||||
- struct chacha_ctx subctx;
|
||||
u32 *state, state_buf[16 + 2] __aligned(8);
|
||||
+ struct chacha_ctx subctx;
|
||||
u8 real_iv[16];
|
||||
- int err;
|
||||
-
|
||||
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
|
||||
- return crypto_xchacha_crypt(req);
|
||||
-
|
||||
- err = skcipher_walk_virt(&walk, req, true);
|
||||
- if (err)
|
||||
- return err;
|
||||
|
||||
BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
|
||||
state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
|
||||
- crypto_chacha_init(state, ctx, req->iv);
|
||||
+ chacha_init_generic(state, ctx->key, req->iv);
|
||||
|
||||
- kernel_fpu_begin();
|
||||
-
|
||||
- hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
|
||||
+ if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) {
|
||||
+ kernel_fpu_begin();
|
||||
+ hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
|
||||
+ kernel_fpu_end();
|
||||
+ } else {
|
||||
+ hchacha_block_generic(state, subctx.key, ctx->nrounds);
|
||||
+ }
|
||||
subctx.nrounds = ctx->nrounds;
|
||||
|
||||
memcpy(&real_iv[0], req->iv + 24, 8);
|
||||
memcpy(&real_iv[8], req->iv + 16, 8);
|
||||
- err = chacha_simd_stream_xor(&walk, &subctx, real_iv);
|
||||
-
|
||||
- kernel_fpu_end();
|
||||
-
|
||||
- return err;
|
||||
+ return chacha_simd_stream_xor(req, &subctx, real_iv);
|
||||
}
|
||||
|
||||
static struct skcipher_alg algs[] = {
|
||||
@@ -227,7 +207,7 @@ static struct skcipher_alg algs[] = {
|
||||
.max_keysize = CHACHA_KEY_SIZE,
|
||||
.ivsize = CHACHA_IV_SIZE,
|
||||
.chunksize = CHACHA_BLOCK_SIZE,
|
||||
- .setkey = crypto_chacha20_setkey,
|
||||
+ .setkey = chacha20_setkey,
|
||||
.encrypt = chacha_simd,
|
||||
.decrypt = chacha_simd,
|
||||
}, {
|
||||
@@ -242,7 +222,7 @@ static struct skcipher_alg algs[] = {
|
||||
.max_keysize = CHACHA_KEY_SIZE,
|
||||
.ivsize = XCHACHA_IV_SIZE,
|
||||
.chunksize = CHACHA_BLOCK_SIZE,
|
||||
- .setkey = crypto_chacha20_setkey,
|
||||
+ .setkey = chacha20_setkey,
|
||||
.encrypt = xchacha_simd,
|
||||
.decrypt = xchacha_simd,
|
||||
}, {
|
||||
@@ -257,7 +237,7 @@ static struct skcipher_alg algs[] = {
|
||||
.max_keysize = CHACHA_KEY_SIZE,
|
||||
.ivsize = XCHACHA_IV_SIZE,
|
||||
.chunksize = CHACHA_BLOCK_SIZE,
|
||||
- .setkey = crypto_chacha12_setkey,
|
||||
+ .setkey = chacha12_setkey,
|
||||
.encrypt = xchacha_simd,
|
||||
.decrypt = xchacha_simd,
|
||||
},
|
||||
--- a/crypto/Kconfig
|
||||
+++ b/crypto/Kconfig
|
||||
@@ -1417,7 +1417,7 @@ config CRYPTO_CHACHA20_X86_64
|
||||
tristate "ChaCha stream cipher algorithms (x86_64/SSSE3/AVX2/AVX-512VL)"
|
||||
depends on X86 && 64BIT
|
||||
select CRYPTO_BLKCIPHER
|
||||
- select CRYPTO_CHACHA20
|
||||
+ select CRYPTO_LIB_CHACHA_GENERIC
|
||||
help
|
||||
SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20,
|
||||
XChaCha20, and XChaCha12 stream ciphers.
|
@ -0,0 +1,205 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 8 Nov 2019 13:22:10 +0100
|
||||
Subject: [PATCH] crypto: x86/chacha - expose SIMD ChaCha routine as library
|
||||
function
|
||||
|
||||
commit 84e03fa39fbe95a5567d43bff458c6d3b3a23ad1 upstream.
|
||||
|
||||
Wire the existing x86 SIMD ChaCha code into the new ChaCha library
|
||||
interface, so that users of the library interface will get the
|
||||
accelerated version when available.
|
||||
|
||||
Given that calls into the library API will always go through the
|
||||
routines in this module if it is enabled, switch to static keys
|
||||
to select the optimal implementation available (which may be none
|
||||
at all, in which case we defer to the generic implementation for
|
||||
all invocations).
|
||||
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/x86/crypto/chacha_glue.c | 91 +++++++++++++++++++++++++----------
|
||||
crypto/Kconfig | 1 +
|
||||
include/crypto/chacha.h | 6 +++
|
||||
3 files changed, 73 insertions(+), 25 deletions(-)
|
||||
|
||||
--- a/arch/x86/crypto/chacha_glue.c
|
||||
+++ b/arch/x86/crypto/chacha_glue.c
|
||||
@@ -21,24 +21,24 @@ asmlinkage void chacha_block_xor_ssse3(u
|
||||
asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
|
||||
unsigned int len, int nrounds);
|
||||
asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds);
|
||||
-#ifdef CONFIG_AS_AVX2
|
||||
+
|
||||
asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
|
||||
unsigned int len, int nrounds);
|
||||
asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
|
||||
unsigned int len, int nrounds);
|
||||
asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
|
||||
unsigned int len, int nrounds);
|
||||
-static bool chacha_use_avx2;
|
||||
-#ifdef CONFIG_AS_AVX512
|
||||
+
|
||||
asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
|
||||
unsigned int len, int nrounds);
|
||||
asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
|
||||
unsigned int len, int nrounds);
|
||||
asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
|
||||
unsigned int len, int nrounds);
|
||||
-static bool chacha_use_avx512vl;
|
||||
-#endif
|
||||
-#endif
|
||||
+
|
||||
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd);
|
||||
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2);
|
||||
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl);
|
||||
|
||||
static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
|
||||
{
|
||||
@@ -49,9 +49,8 @@ static unsigned int chacha_advance(unsig
|
||||
static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
|
||||
unsigned int bytes, int nrounds)
|
||||
{
|
||||
-#ifdef CONFIG_AS_AVX2
|
||||
-#ifdef CONFIG_AS_AVX512
|
||||
- if (chacha_use_avx512vl) {
|
||||
+ if (IS_ENABLED(CONFIG_AS_AVX512) &&
|
||||
+ static_branch_likely(&chacha_use_avx512vl)) {
|
||||
while (bytes >= CHACHA_BLOCK_SIZE * 8) {
|
||||
chacha_8block_xor_avx512vl(state, dst, src, bytes,
|
||||
nrounds);
|
||||
@@ -79,8 +78,9 @@ static void chacha_dosimd(u32 *state, u8
|
||||
return;
|
||||
}
|
||||
}
|
||||
-#endif
|
||||
- if (chacha_use_avx2) {
|
||||
+
|
||||
+ if (IS_ENABLED(CONFIG_AS_AVX2) &&
|
||||
+ static_branch_likely(&chacha_use_avx2)) {
|
||||
while (bytes >= CHACHA_BLOCK_SIZE * 8) {
|
||||
chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
|
||||
bytes -= CHACHA_BLOCK_SIZE * 8;
|
||||
@@ -104,7 +104,7 @@ static void chacha_dosimd(u32 *state, u8
|
||||
return;
|
||||
}
|
||||
}
|
||||
-#endif
|
||||
+
|
||||
while (bytes >= CHACHA_BLOCK_SIZE * 4) {
|
||||
chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
|
||||
bytes -= CHACHA_BLOCK_SIZE * 4;
|
||||
@@ -123,6 +123,43 @@ static void chacha_dosimd(u32 *state, u8
|
||||
}
|
||||
}
|
||||
|
||||
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
|
||||
+{
|
||||
+ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
|
||||
+
|
||||
+ if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) {
|
||||
+ hchacha_block_generic(state, stream, nrounds);
|
||||
+ } else {
|
||||
+ kernel_fpu_begin();
|
||||
+ hchacha_block_ssse3(state, stream, nrounds);
|
||||
+ kernel_fpu_end();
|
||||
+ }
|
||||
+}
|
||||
+EXPORT_SYMBOL(hchacha_block_arch);
|
||||
+
|
||||
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
|
||||
+{
|
||||
+ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
|
||||
+
|
||||
+ chacha_init_generic(state, key, iv);
|
||||
+}
|
||||
+EXPORT_SYMBOL(chacha_init_arch);
|
||||
+
|
||||
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
|
||||
+ int nrounds)
|
||||
+{
|
||||
+ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
|
||||
+
|
||||
+ if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() ||
|
||||
+ bytes <= CHACHA_BLOCK_SIZE)
|
||||
+ return chacha_crypt_generic(state, dst, src, bytes, nrounds);
|
||||
+
|
||||
+ kernel_fpu_begin();
|
||||
+ chacha_dosimd(state, dst, src, bytes, nrounds);
|
||||
+ kernel_fpu_end();
|
||||
+}
|
||||
+EXPORT_SYMBOL(chacha_crypt_arch);
|
||||
+
|
||||
static int chacha_simd_stream_xor(struct skcipher_request *req,
|
||||
const struct chacha_ctx *ctx, const u8 *iv)
|
||||
{
|
||||
@@ -143,7 +180,8 @@ static int chacha_simd_stream_xor(struct
|
||||
if (nbytes < walk.total)
|
||||
nbytes = round_down(nbytes, walk.stride);
|
||||
|
||||
- if (!crypto_simd_usable()) {
|
||||
+ if (!static_branch_likely(&chacha_use_simd) ||
|
||||
+ !crypto_simd_usable()) {
|
||||
chacha_crypt_generic(state, walk.dst.virt.addr,
|
||||
walk.src.virt.addr, nbytes,
|
||||
ctx->nrounds);
|
||||
@@ -246,18 +284,21 @@ static struct skcipher_alg algs[] = {
|
||||
static int __init chacha_simd_mod_init(void)
|
||||
{
|
||||
if (!boot_cpu_has(X86_FEATURE_SSSE3))
|
||||
- return -ENODEV;
|
||||
+ return 0;
|
||||
|
||||
-#ifdef CONFIG_AS_AVX2
|
||||
- chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
|
||||
- boot_cpu_has(X86_FEATURE_AVX2) &&
|
||||
- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
|
||||
-#ifdef CONFIG_AS_AVX512
|
||||
- chacha_use_avx512vl = chacha_use_avx2 &&
|
||||
- boot_cpu_has(X86_FEATURE_AVX512VL) &&
|
||||
- boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */
|
||||
-#endif
|
||||
-#endif
|
||||
+ static_branch_enable(&chacha_use_simd);
|
||||
+
|
||||
+ if (IS_ENABLED(CONFIG_AS_AVX2) &&
|
||||
+ boot_cpu_has(X86_FEATURE_AVX) &&
|
||||
+ boot_cpu_has(X86_FEATURE_AVX2) &&
|
||||
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
|
||||
+ static_branch_enable(&chacha_use_avx2);
|
||||
+
|
||||
+ if (IS_ENABLED(CONFIG_AS_AVX512) &&
|
||||
+ boot_cpu_has(X86_FEATURE_AVX512VL) &&
|
||||
+ boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */
|
||||
+ static_branch_enable(&chacha_use_avx512vl);
|
||||
+ }
|
||||
return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
--- a/crypto/Kconfig
|
||||
+++ b/crypto/Kconfig
|
||||
@@ -1418,6 +1418,7 @@ config CRYPTO_CHACHA20_X86_64
|
||||
depends on X86 && 64BIT
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_LIB_CHACHA_GENERIC
|
||||
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
|
||||
help
|
||||
SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20,
|
||||
XChaCha20, and XChaCha12 stream ciphers.
|
||||
--- a/include/crypto/chacha.h
|
||||
+++ b/include/crypto/chacha.h
|
||||
@@ -25,6 +25,12 @@
|
||||
#define CHACHA_BLOCK_SIZE 64
|
||||
#define CHACHAPOLY_IV_SIZE 12
|
||||
|
||||
+#ifdef CONFIG_X86_64
|
||||
+#define CHACHA_STATE_WORDS ((CHACHA_BLOCK_SIZE + 12) / sizeof(u32))
|
||||
+#else
|
||||
+#define CHACHA_STATE_WORDS (CHACHA_BLOCK_SIZE / sizeof(u32))
|
||||
+#endif
|
||||
+
|
||||
/* 192-bit nonce, then 64-bit stream position */
|
||||
#define XCHACHA_IV_SIZE 32
|
||||
|
@ -0,0 +1,129 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 8 Nov 2019 13:22:11 +0100
|
||||
Subject: [PATCH] crypto: arm64/chacha - depend on generic chacha library
|
||||
instead of crypto driver
|
||||
|
||||
commit c77da4867cbb7841177275dbb250f5c09679fae4 upstream.
|
||||
|
||||
Depend on the generic ChaCha library routines instead of pulling in the
|
||||
generic ChaCha skcipher driver, which is more than we need, and makes
|
||||
managing the dependencies between the generic library, generic driver,
|
||||
accelerated library and driver more complicated.
|
||||
|
||||
While at it, drop the logic to prefer the scalar code on short inputs.
|
||||
Turning the NEON on and off is cheap these days, and one major use case
|
||||
for ChaCha20 is ChaCha20-Poly1305, which is guaranteed to hit the scalar
|
||||
path upon every invocation (when doing the Poly1305 nonce generation)
|
||||
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/arm64/crypto/Kconfig | 2 +-
|
||||
arch/arm64/crypto/chacha-neon-glue.c | 40 +++++++++++++++-------------
|
||||
2 files changed, 23 insertions(+), 19 deletions(-)
|
||||
|
||||
--- a/arch/arm64/crypto/Kconfig
|
||||
+++ b/arch/arm64/crypto/Kconfig
|
||||
@@ -103,7 +103,7 @@ config CRYPTO_CHACHA20_NEON
|
||||
tristate "ChaCha20, XChaCha20, and XChaCha12 stream ciphers using NEON instructions"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_BLKCIPHER
|
||||
- select CRYPTO_CHACHA20
|
||||
+ select CRYPTO_LIB_CHACHA_GENERIC
|
||||
|
||||
config CRYPTO_NHPOLY1305_NEON
|
||||
tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)"
|
||||
--- a/arch/arm64/crypto/chacha-neon-glue.c
|
||||
+++ b/arch/arm64/crypto/chacha-neon-glue.c
|
||||
@@ -68,7 +68,7 @@ static int chacha_neon_stream_xor(struct
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
- crypto_chacha_init(state, ctx, iv);
|
||||
+ chacha_init_generic(state, ctx->key, iv);
|
||||
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int nbytes = walk.nbytes;
|
||||
@@ -76,10 +76,16 @@ static int chacha_neon_stream_xor(struct
|
||||
if (nbytes < walk.total)
|
||||
nbytes = rounddown(nbytes, walk.stride);
|
||||
|
||||
- kernel_neon_begin();
|
||||
- chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||||
- nbytes, ctx->nrounds);
|
||||
- kernel_neon_end();
|
||||
+ if (!crypto_simd_usable()) {
|
||||
+ chacha_crypt_generic(state, walk.dst.virt.addr,
|
||||
+ walk.src.virt.addr, nbytes,
|
||||
+ ctx->nrounds);
|
||||
+ } else {
|
||||
+ kernel_neon_begin();
|
||||
+ chacha_doneon(state, walk.dst.virt.addr,
|
||||
+ walk.src.virt.addr, nbytes, ctx->nrounds);
|
||||
+ kernel_neon_end();
|
||||
+ }
|
||||
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
}
|
||||
|
||||
@@ -91,9 +97,6 @@ static int chacha_neon(struct skcipher_r
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
|
||||
- return crypto_chacha_crypt(req);
|
||||
-
|
||||
return chacha_neon_stream_xor(req, ctx, req->iv);
|
||||
}
|
||||
|
||||
@@ -105,14 +108,15 @@ static int xchacha_neon(struct skcipher_
|
||||
u32 state[16];
|
||||
u8 real_iv[16];
|
||||
|
||||
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
|
||||
- return crypto_xchacha_crypt(req);
|
||||
-
|
||||
- crypto_chacha_init(state, ctx, req->iv);
|
||||
+ chacha_init_generic(state, ctx->key, req->iv);
|
||||
|
||||
- kernel_neon_begin();
|
||||
- hchacha_block_neon(state, subctx.key, ctx->nrounds);
|
||||
- kernel_neon_end();
|
||||
+ if (crypto_simd_usable()) {
|
||||
+ kernel_neon_begin();
|
||||
+ hchacha_block_neon(state, subctx.key, ctx->nrounds);
|
||||
+ kernel_neon_end();
|
||||
+ } else {
|
||||
+ hchacha_block_generic(state, subctx.key, ctx->nrounds);
|
||||
+ }
|
||||
subctx.nrounds = ctx->nrounds;
|
||||
|
||||
memcpy(&real_iv[0], req->iv + 24, 8);
|
||||
@@ -134,7 +138,7 @@ static struct skcipher_alg algs[] = {
|
||||
.ivsize = CHACHA_IV_SIZE,
|
||||
.chunksize = CHACHA_BLOCK_SIZE,
|
||||
.walksize = 5 * CHACHA_BLOCK_SIZE,
|
||||
- .setkey = crypto_chacha20_setkey,
|
||||
+ .setkey = chacha20_setkey,
|
||||
.encrypt = chacha_neon,
|
||||
.decrypt = chacha_neon,
|
||||
}, {
|
||||
@@ -150,7 +154,7 @@ static struct skcipher_alg algs[] = {
|
||||
.ivsize = XCHACHA_IV_SIZE,
|
||||
.chunksize = CHACHA_BLOCK_SIZE,
|
||||
.walksize = 5 * CHACHA_BLOCK_SIZE,
|
||||
- .setkey = crypto_chacha20_setkey,
|
||||
+ .setkey = chacha20_setkey,
|
||||
.encrypt = xchacha_neon,
|
||||
.decrypt = xchacha_neon,
|
||||
}, {
|
||||
@@ -166,7 +170,7 @@ static struct skcipher_alg algs[] = {
|
||||
.ivsize = XCHACHA_IV_SIZE,
|
||||
.chunksize = CHACHA_BLOCK_SIZE,
|
||||
.walksize = 5 * CHACHA_BLOCK_SIZE,
|
||||
- .setkey = crypto_chacha12_setkey,
|
||||
+ .setkey = chacha12_setkey,
|
||||
.encrypt = xchacha_neon,
|
||||
.decrypt = xchacha_neon,
|
||||
}
|
@ -0,0 +1,138 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 8 Nov 2019 13:22:12 +0100
|
||||
Subject: [PATCH] crypto: arm64/chacha - expose arm64 ChaCha routine as library
|
||||
function
|
||||
|
||||
commit b3aad5bad26a01a4bd8c49a5c5f52aec665f3b7c upstream.
|
||||
|
||||
Expose the accelerated NEON ChaCha routine directly as a symbol
|
||||
export so that users of the ChaCha library API can use it directly.
|
||||
|
||||
Given that calls into the library API will always go through the
|
||||
routines in this module if it is enabled, switch to static keys
|
||||
to select the optimal implementation available (which may be none
|
||||
at all, in which case we defer to the generic implementation for
|
||||
all invocations).
|
||||
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/arm64/crypto/Kconfig | 1 +
|
||||
arch/arm64/crypto/chacha-neon-glue.c | 53 ++++++++++++++++++++++------
|
||||
2 files changed, 43 insertions(+), 11 deletions(-)
|
||||
|
||||
--- a/arch/arm64/crypto/Kconfig
|
||||
+++ b/arch/arm64/crypto/Kconfig
|
||||
@@ -104,6 +104,7 @@ config CRYPTO_CHACHA20_NEON
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_LIB_CHACHA_GENERIC
|
||||
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
|
||||
|
||||
config CRYPTO_NHPOLY1305_NEON
|
||||
tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)"
|
||||
--- a/arch/arm64/crypto/chacha-neon-glue.c
|
||||
+++ b/arch/arm64/crypto/chacha-neon-glue.c
|
||||
@@ -23,6 +23,7 @@
|
||||
#include <crypto/internal/chacha.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
+#include <linux/jump_label.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
@@ -36,6 +37,8 @@ asmlinkage void chacha_4block_xor_neon(u
|
||||
int nrounds, int bytes);
|
||||
asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
|
||||
|
||||
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
|
||||
+
|
||||
static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
|
||||
int bytes, int nrounds)
|
||||
{
|
||||
@@ -59,6 +62,37 @@ static void chacha_doneon(u32 *state, u8
|
||||
}
|
||||
}
|
||||
|
||||
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
|
||||
+{
|
||||
+ if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) {
|
||||
+ hchacha_block_generic(state, stream, nrounds);
|
||||
+ } else {
|
||||
+ kernel_neon_begin();
|
||||
+ hchacha_block_neon(state, stream, nrounds);
|
||||
+ kernel_neon_end();
|
||||
+ }
|
||||
+}
|
||||
+EXPORT_SYMBOL(hchacha_block_arch);
|
||||
+
|
||||
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
|
||||
+{
|
||||
+ chacha_init_generic(state, key, iv);
|
||||
+}
|
||||
+EXPORT_SYMBOL(chacha_init_arch);
|
||||
+
|
||||
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
|
||||
+ int nrounds)
|
||||
+{
|
||||
+ if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE ||
|
||||
+ !crypto_simd_usable())
|
||||
+ return chacha_crypt_generic(state, dst, src, bytes, nrounds);
|
||||
+
|
||||
+ kernel_neon_begin();
|
||||
+ chacha_doneon(state, dst, src, bytes, nrounds);
|
||||
+ kernel_neon_end();
|
||||
+}
|
||||
+EXPORT_SYMBOL(chacha_crypt_arch);
|
||||
+
|
||||
static int chacha_neon_stream_xor(struct skcipher_request *req,
|
||||
const struct chacha_ctx *ctx, const u8 *iv)
|
||||
{
|
||||
@@ -76,7 +110,8 @@ static int chacha_neon_stream_xor(struct
|
||||
if (nbytes < walk.total)
|
||||
nbytes = rounddown(nbytes, walk.stride);
|
||||
|
||||
- if (!crypto_simd_usable()) {
|
||||
+ if (!static_branch_likely(&have_neon) ||
|
||||
+ !crypto_simd_usable()) {
|
||||
chacha_crypt_generic(state, walk.dst.virt.addr,
|
||||
walk.src.virt.addr, nbytes,
|
||||
ctx->nrounds);
|
||||
@@ -109,14 +144,7 @@ static int xchacha_neon(struct skcipher_
|
||||
u8 real_iv[16];
|
||||
|
||||
chacha_init_generic(state, ctx->key, req->iv);
|
||||
-
|
||||
- if (crypto_simd_usable()) {
|
||||
- kernel_neon_begin();
|
||||
- hchacha_block_neon(state, subctx.key, ctx->nrounds);
|
||||
- kernel_neon_end();
|
||||
- } else {
|
||||
- hchacha_block_generic(state, subctx.key, ctx->nrounds);
|
||||
- }
|
||||
+ hchacha_block_arch(state, subctx.key, ctx->nrounds);
|
||||
subctx.nrounds = ctx->nrounds;
|
||||
|
||||
memcpy(&real_iv[0], req->iv + 24, 8);
|
||||
@@ -179,14 +207,17 @@ static struct skcipher_alg algs[] = {
|
||||
static int __init chacha_simd_mod_init(void)
|
||||
{
|
||||
if (!cpu_have_named_feature(ASIMD))
|
||||
- return -ENODEV;
|
||||
+ return 0;
|
||||
+
|
||||
+ static_branch_enable(&have_neon);
|
||||
|
||||
return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
static void __exit chacha_simd_mod_fini(void)
|
||||
{
|
||||
- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
|
||||
+ if (cpu_have_named_feature(ASIMD))
|
||||
+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
module_init(chacha_simd_mod_init);
|
@ -0,0 +1,480 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 8 Nov 2019 13:22:13 +0100
|
||||
Subject: [PATCH] crypto: arm/chacha - import Eric Biggers's scalar accelerated
|
||||
ChaCha code
|
||||
|
||||
commit 29621d099f9c642b22a69dc8e7e20c108473a392 upstream.
|
||||
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/arm/crypto/chacha-scalar-core.S | 461 +++++++++++++++++++++++++++
|
||||
1 file changed, 461 insertions(+)
|
||||
create mode 100644 arch/arm/crypto/chacha-scalar-core.S
|
||||
|
||||
--- /dev/null
|
||||
+++ b/arch/arm/crypto/chacha-scalar-core.S
|
||||
@@ -0,0 +1,461 @@
|
||||
+/* SPDX-License-Identifier: GPL-2.0 */
|
||||
+/*
|
||||
+ * Copyright (C) 2018 Google, Inc.
|
||||
+ */
|
||||
+
|
||||
+#include <linux/linkage.h>
|
||||
+#include <asm/assembler.h>
|
||||
+
|
||||
+/*
|
||||
+ * Design notes:
|
||||
+ *
|
||||
+ * 16 registers would be needed to hold the state matrix, but only 14 are
|
||||
+ * available because 'sp' and 'pc' cannot be used. So we spill the elements
|
||||
+ * (x8, x9) to the stack and swap them out with (x10, x11). This adds one
|
||||
+ * 'ldrd' and one 'strd' instruction per round.
|
||||
+ *
|
||||
+ * All rotates are performed using the implicit rotate operand accepted by the
|
||||
+ * 'add' and 'eor' instructions. This is faster than using explicit rotate
|
||||
+ * instructions. To make this work, we allow the values in the second and last
|
||||
+ * rows of the ChaCha state matrix (rows 'b' and 'd') to temporarily have the
|
||||
+ * wrong rotation amount. The rotation amount is then fixed up just in time
|
||||
+ * when the values are used. 'brot' is the number of bits the values in row 'b'
|
||||
+ * need to be rotated right to arrive at the correct values, and 'drot'
|
||||
+ * similarly for row 'd'. (brot, drot) start out as (0, 0) but we make it such
|
||||
+ * that they end up as (25, 24) after every round.
|
||||
+ */
|
||||
+
|
||||
+ // ChaCha state registers
|
||||
+ X0 .req r0
|
||||
+ X1 .req r1
|
||||
+ X2 .req r2
|
||||
+ X3 .req r3
|
||||
+ X4 .req r4
|
||||
+ X5 .req r5
|
||||
+ X6 .req r6
|
||||
+ X7 .req r7
|
||||
+ X8_X10 .req r8 // shared by x8 and x10
|
||||
+ X9_X11 .req r9 // shared by x9 and x11
|
||||
+ X12 .req r10
|
||||
+ X13 .req r11
|
||||
+ X14 .req r12
|
||||
+ X15 .req r14
|
||||
+
|
||||
+.Lexpand_32byte_k:
|
||||
+ // "expand 32-byte k"
|
||||
+ .word 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
|
||||
+
|
||||
+#ifdef __thumb2__
|
||||
+# define adrl adr
|
||||
+#endif
|
||||
+
|
||||
+.macro __rev out, in, t0, t1, t2
|
||||
+.if __LINUX_ARM_ARCH__ >= 6
|
||||
+ rev \out, \in
|
||||
+.else
|
||||
+ lsl \t0, \in, #24
|
||||
+ and \t1, \in, #0xff00
|
||||
+ and \t2, \in, #0xff0000
|
||||
+ orr \out, \t0, \in, lsr #24
|
||||
+ orr \out, \out, \t1, lsl #8
|
||||
+ orr \out, \out, \t2, lsr #8
|
||||
+.endif
|
||||
+.endm
|
||||
+
|
||||
+.macro _le32_bswap x, t0, t1, t2
|
||||
+#ifdef __ARMEB__
|
||||
+ __rev \x, \x, \t0, \t1, \t2
|
||||
+#endif
|
||||
+.endm
|
||||
+
|
||||
+.macro _le32_bswap_4x a, b, c, d, t0, t1, t2
|
||||
+ _le32_bswap \a, \t0, \t1, \t2
|
||||
+ _le32_bswap \b, \t0, \t1, \t2
|
||||
+ _le32_bswap \c, \t0, \t1, \t2
|
||||
+ _le32_bswap \d, \t0, \t1, \t2
|
||||
+.endm
|
||||
+
|
||||
+.macro __ldrd a, b, src, offset
|
||||
+#if __LINUX_ARM_ARCH__ >= 6
|
||||
+ ldrd \a, \b, [\src, #\offset]
|
||||
+#else
|
||||
+ ldr \a, [\src, #\offset]
|
||||
+ ldr \b, [\src, #\offset + 4]
|
||||
+#endif
|
||||
+.endm
|
||||
+
|
||||
+.macro __strd a, b, dst, offset
|
||||
+#if __LINUX_ARM_ARCH__ >= 6
|
||||
+ strd \a, \b, [\dst, #\offset]
|
||||
+#else
|
||||
+ str \a, [\dst, #\offset]
|
||||
+ str \b, [\dst, #\offset + 4]
|
||||
+#endif
|
||||
+.endm
|
||||
+
|
||||
+.macro _halfround a1, b1, c1, d1, a2, b2, c2, d2
|
||||
+
|
||||
+ // a += b; d ^= a; d = rol(d, 16);
|
||||
+ add \a1, \a1, \b1, ror #brot
|
||||
+ add \a2, \a2, \b2, ror #brot
|
||||
+ eor \d1, \a1, \d1, ror #drot
|
||||
+ eor \d2, \a2, \d2, ror #drot
|
||||
+ // drot == 32 - 16 == 16
|
||||
+
|
||||
+ // c += d; b ^= c; b = rol(b, 12);
|
||||
+ add \c1, \c1, \d1, ror #16
|
||||
+ add \c2, \c2, \d2, ror #16
|
||||
+ eor \b1, \c1, \b1, ror #brot
|
||||
+ eor \b2, \c2, \b2, ror #brot
|
||||
+ // brot == 32 - 12 == 20
|
||||
+
|
||||
+ // a += b; d ^= a; d = rol(d, 8);
|
||||
+ add \a1, \a1, \b1, ror #20
|
||||
+ add \a2, \a2, \b2, ror #20
|
||||
+ eor \d1, \a1, \d1, ror #16
|
||||
+ eor \d2, \a2, \d2, ror #16
|
||||
+ // drot == 32 - 8 == 24
|
||||
+
|
||||
+ // c += d; b ^= c; b = rol(b, 7);
|
||||
+ add \c1, \c1, \d1, ror #24
|
||||
+ add \c2, \c2, \d2, ror #24
|
||||
+ eor \b1, \c1, \b1, ror #20
|
||||
+ eor \b2, \c2, \b2, ror #20
|
||||
+ // brot == 32 - 7 == 25
|
||||
+.endm
|
||||
+
|
||||
+.macro _doubleround
|
||||
+
|
||||
+ // column round
|
||||
+
|
||||
+ // quarterrounds: (x0, x4, x8, x12) and (x1, x5, x9, x13)
|
||||
+ _halfround X0, X4, X8_X10, X12, X1, X5, X9_X11, X13
|
||||
+
|
||||
+ // save (x8, x9); restore (x10, x11)
|
||||
+ __strd X8_X10, X9_X11, sp, 0
|
||||
+ __ldrd X8_X10, X9_X11, sp, 8
|
||||
+
|
||||
+ // quarterrounds: (x2, x6, x10, x14) and (x3, x7, x11, x15)
|
||||
+ _halfround X2, X6, X8_X10, X14, X3, X7, X9_X11, X15
|
||||
+
|
||||
+ .set brot, 25
|
||||
+ .set drot, 24
|
||||
+
|
||||
+ // diagonal round
|
||||
+
|
||||
+ // quarterrounds: (x0, x5, x10, x15) and (x1, x6, x11, x12)
|
||||
+ _halfround X0, X5, X8_X10, X15, X1, X6, X9_X11, X12
|
||||
+
|
||||
+ // save (x10, x11); restore (x8, x9)
|
||||
+ __strd X8_X10, X9_X11, sp, 8
|
||||
+ __ldrd X8_X10, X9_X11, sp, 0
|
||||
+
|
||||
+ // quarterrounds: (x2, x7, x8, x13) and (x3, x4, x9, x14)
|
||||
+ _halfround X2, X7, X8_X10, X13, X3, X4, X9_X11, X14
|
||||
+.endm
|
||||
+
|
||||
+.macro _chacha_permute nrounds
|
||||
+ .set brot, 0
|
||||
+ .set drot, 0
|
||||
+ .rept \nrounds / 2
|
||||
+ _doubleround
|
||||
+ .endr
|
||||
+.endm
|
||||
+
|
||||
+.macro _chacha nrounds
|
||||
+
|
||||
+.Lnext_block\@:
|
||||
+ // Stack: unused0-unused1 x10-x11 x0-x15 OUT IN LEN
|
||||
+ // Registers contain x0-x9,x12-x15.
|
||||
+
|
||||
+ // Do the core ChaCha permutation to update x0-x15.
|
||||
+ _chacha_permute \nrounds
|
||||
+
|
||||
+ add sp, #8
|
||||
+ // Stack: x10-x11 orig_x0-orig_x15 OUT IN LEN
|
||||
+ // Registers contain x0-x9,x12-x15.
|
||||
+ // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
|
||||
+
|
||||
+ // Free up some registers (r8-r12,r14) by pushing (x8-x9,x12-x15).
|
||||
+ push {X8_X10, X9_X11, X12, X13, X14, X15}
|
||||
+
|
||||
+ // Load (OUT, IN, LEN).
|
||||
+ ldr r14, [sp, #96]
|
||||
+ ldr r12, [sp, #100]
|
||||
+ ldr r11, [sp, #104]
|
||||
+
|
||||
+ orr r10, r14, r12
|
||||
+
|
||||
+ // Use slow path if fewer than 64 bytes remain.
|
||||
+ cmp r11, #64
|
||||
+ blt .Lxor_slowpath\@
|
||||
+
|
||||
+ // Use slow path if IN and/or OUT isn't 4-byte aligned. Needed even on
|
||||
+ // ARMv6+, since ldmia and stmia (used below) still require alignment.
|
||||
+ tst r10, #3
|
||||
+ bne .Lxor_slowpath\@
|
||||
+
|
||||
+ // Fast path: XOR 64 bytes of aligned data.
|
||||
+
|
||||
+ // Stack: x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
|
||||
+ // Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is OUT.
|
||||
+ // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
|
||||
+
|
||||
+ // x0-x3
|
||||
+ __ldrd r8, r9, sp, 32
|
||||
+ __ldrd r10, r11, sp, 40
|
||||
+ add X0, X0, r8
|
||||
+ add X1, X1, r9
|
||||
+ add X2, X2, r10
|
||||
+ add X3, X3, r11
|
||||
+ _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
|
||||
+ ldmia r12!, {r8-r11}
|
||||
+ eor X0, X0, r8
|
||||
+ eor X1, X1, r9
|
||||
+ eor X2, X2, r10
|
||||
+ eor X3, X3, r11
|
||||
+ stmia r14!, {X0-X3}
|
||||
+
|
||||
+ // x4-x7
|
||||
+ __ldrd r8, r9, sp, 48
|
||||
+ __ldrd r10, r11, sp, 56
|
||||
+ add X4, r8, X4, ror #brot
|
||||
+ add X5, r9, X5, ror #brot
|
||||
+ ldmia r12!, {X0-X3}
|
||||
+ add X6, r10, X6, ror #brot
|
||||
+ add X7, r11, X7, ror #brot
|
||||
+ _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
|
||||
+ eor X4, X4, X0
|
||||
+ eor X5, X5, X1
|
||||
+ eor X6, X6, X2
|
||||
+ eor X7, X7, X3
|
||||
+ stmia r14!, {X4-X7}
|
||||
+
|
||||
+ // x8-x15
|
||||
+ pop {r0-r7} // (x8-x9,x12-x15,x10-x11)
|
||||
+ __ldrd r8, r9, sp, 32
|
||||
+ __ldrd r10, r11, sp, 40
|
||||
+ add r0, r0, r8 // x8
|
||||
+ add r1, r1, r9 // x9
|
||||
+ add r6, r6, r10 // x10
|
||||
+ add r7, r7, r11 // x11
|
||||
+ _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
|
||||
+ ldmia r12!, {r8-r11}
|
||||
+ eor r0, r0, r8 // x8
|
||||
+ eor r1, r1, r9 // x9
|
||||
+ eor r6, r6, r10 // x10
|
||||
+ eor r7, r7, r11 // x11
|
||||
+ stmia r14!, {r0,r1,r6,r7}
|
||||
+ ldmia r12!, {r0,r1,r6,r7}
|
||||
+ __ldrd r8, r9, sp, 48
|
||||
+ __ldrd r10, r11, sp, 56
|
||||
+ add r2, r8, r2, ror #drot // x12
|
||||
+ add r3, r9, r3, ror #drot // x13
|
||||
+ add r4, r10, r4, ror #drot // x14
|
||||
+ add r5, r11, r5, ror #drot // x15
|
||||
+ _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
|
||||
+ ldr r9, [sp, #72] // load LEN
|
||||
+ eor r2, r2, r0 // x12
|
||||
+ eor r3, r3, r1 // x13
|
||||
+ eor r4, r4, r6 // x14
|
||||
+ eor r5, r5, r7 // x15
|
||||
+ subs r9, #64 // decrement and check LEN
|
||||
+ stmia r14!, {r2-r5}
|
||||
+
|
||||
+ beq .Ldone\@
|
||||
+
|
||||
+.Lprepare_for_next_block\@:
|
||||
+
|
||||
+ // Stack: x0-x15 OUT IN LEN
|
||||
+
|
||||
+ // Increment block counter (x12)
|
||||
+ add r8, #1
|
||||
+
|
||||
+ // Store updated (OUT, IN, LEN)
|
||||
+ str r14, [sp, #64]
|
||||
+ str r12, [sp, #68]
|
||||
+ str r9, [sp, #72]
|
||||
+
|
||||
+ mov r14, sp
|
||||
+
|
||||
+ // Store updated block counter (x12)
|
||||
+ str r8, [sp, #48]
|
||||
+
|
||||
+ sub sp, #16
|
||||
+
|
||||
+ // Reload state and do next block
|
||||
+ ldmia r14!, {r0-r11} // load x0-x11
|
||||
+ __strd r10, r11, sp, 8 // store x10-x11 before state
|
||||
+ ldmia r14, {r10-r12,r14} // load x12-x15
|
||||
+ b .Lnext_block\@
|
||||
+
|
||||
+.Lxor_slowpath\@:
|
||||
+ // Slow path: < 64 bytes remaining, or unaligned input or output buffer.
|
||||
+ // We handle it by storing the 64 bytes of keystream to the stack, then
|
||||
+ // XOR-ing the needed portion with the data.
|
||||
+
|
||||
+ // Allocate keystream buffer
|
||||
+ sub sp, #64
|
||||
+ mov r14, sp
|
||||
+
|
||||
+ // Stack: ks0-ks15 x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
|
||||
+ // Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is &ks0.
|
||||
+ // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
|
||||
+
|
||||
+ // Save keystream for x0-x3
|
||||
+ __ldrd r8, r9, sp, 96
|
||||
+ __ldrd r10, r11, sp, 104
|
||||
+ add X0, X0, r8
|
||||
+ add X1, X1, r9
|
||||
+ add X2, X2, r10
|
||||
+ add X3, X3, r11
|
||||
+ _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
|
||||
+ stmia r14!, {X0-X3}
|
||||
+
|
||||
+ // Save keystream for x4-x7
|
||||
+ __ldrd r8, r9, sp, 112
|
||||
+ __ldrd r10, r11, sp, 120
|
||||
+ add X4, r8, X4, ror #brot
|
||||
+ add X5, r9, X5, ror #brot
|
||||
+ add X6, r10, X6, ror #brot
|
||||
+ add X7, r11, X7, ror #brot
|
||||
+ _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
|
||||
+ add r8, sp, #64
|
||||
+ stmia r14!, {X4-X7}
|
||||
+
|
||||
+ // Save keystream for x8-x15
|
||||
+ ldm r8, {r0-r7} // (x8-x9,x12-x15,x10-x11)
|
||||
+ __ldrd r8, r9, sp, 128
|
||||
+ __ldrd r10, r11, sp, 136
|
||||
+ add r0, r0, r8 // x8
|
||||
+ add r1, r1, r9 // x9
|
||||
+ add r6, r6, r10 // x10
|
||||
+ add r7, r7, r11 // x11
|
||||
+ _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
|
||||
+ stmia r14!, {r0,r1,r6,r7}
|
||||
+ __ldrd r8, r9, sp, 144
|
||||
+ __ldrd r10, r11, sp, 152
|
||||
+ add r2, r8, r2, ror #drot // x12
|
||||
+ add r3, r9, r3, ror #drot // x13
|
||||
+ add r4, r10, r4, ror #drot // x14
|
||||
+ add r5, r11, r5, ror #drot // x15
|
||||
+ _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
|
||||
+ stmia r14, {r2-r5}
|
||||
+
|
||||
+ // Stack: ks0-ks15 unused0-unused7 x0-x15 OUT IN LEN
|
||||
+ // Registers: r8 is block counter, r12 is IN.
|
||||
+
|
||||
+ ldr r9, [sp, #168] // LEN
|
||||
+ ldr r14, [sp, #160] // OUT
|
||||
+ cmp r9, #64
|
||||
+ mov r0, sp
|
||||
+ movle r1, r9
|
||||
+ movgt r1, #64
|
||||
+ // r1 is number of bytes to XOR, in range [1, 64]
|
||||
+
|
||||
+.if __LINUX_ARM_ARCH__ < 6
|
||||
+ orr r2, r12, r14
|
||||
+ tst r2, #3 // IN or OUT misaligned?
|
||||
+ bne .Lxor_next_byte\@
|
||||
+.endif
|
||||
+
|
||||
+ // XOR a word at a time
|
||||
+.rept 16
|
||||
+ subs r1, #4
|
||||
+ blt .Lxor_words_done\@
|
||||
+ ldr r2, [r12], #4
|
||||
+ ldr r3, [r0], #4
|
||||
+ eor r2, r2, r3
|
||||
+ str r2, [r14], #4
|
||||
+.endr
|
||||
+ b .Lxor_slowpath_done\@
|
||||
+.Lxor_words_done\@:
|
||||
+ ands r1, r1, #3
|
||||
+ beq .Lxor_slowpath_done\@
|
||||
+
|
||||
+ // XOR a byte at a time
|
||||
+.Lxor_next_byte\@:
|
||||
+ ldrb r2, [r12], #1
|
||||
+ ldrb r3, [r0], #1
|
||||
+ eor r2, r2, r3
|
||||
+ strb r2, [r14], #1
|
||||
+ subs r1, #1
|
||||
+ bne .Lxor_next_byte\@
|
||||
+
|
||||
+.Lxor_slowpath_done\@:
|
||||
+ subs r9, #64
|
||||
+ add sp, #96
|
||||
+ bgt .Lprepare_for_next_block\@
|
||||
+
|
||||
+.Ldone\@:
|
||||
+.endm // _chacha
|
||||
+
|
||||
+/*
|
||||
+ * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8],
|
||||
+ * const u32 iv[4]);
|
||||
+ */
|
||||
+ENTRY(chacha20_arm)
|
||||
+ cmp r2, #0 // len == 0?
|
||||
+ reteq lr
|
||||
+
|
||||
+ push {r0-r2,r4-r11,lr}
|
||||
+
|
||||
+ // Push state x0-x15 onto stack.
|
||||
+ // Also store an extra copy of x10-x11 just before the state.
|
||||
+
|
||||
+ ldr r4, [sp, #48] // iv
|
||||
+ mov r0, sp
|
||||
+ sub sp, #80
|
||||
+
|
||||
+ // iv: x12-x15
|
||||
+ ldm r4, {X12,X13,X14,X15}
|
||||
+ stmdb r0!, {X12,X13,X14,X15}
|
||||
+
|
||||
+ // key: x4-x11
|
||||
+ __ldrd X8_X10, X9_X11, r3, 24
|
||||
+ __strd X8_X10, X9_X11, sp, 8
|
||||
+ stmdb r0!, {X8_X10, X9_X11}
|
||||
+ ldm r3, {X4-X9_X11}
|
||||
+ stmdb r0!, {X4-X9_X11}
|
||||
+
|
||||
+ // constants: x0-x3
|
||||
+ adrl X3, .Lexpand_32byte_k
|
||||
+ ldm X3, {X0-X3}
|
||||
+ __strd X0, X1, sp, 16
|
||||
+ __strd X2, X3, sp, 24
|
||||
+
|
||||
+ _chacha 20
|
||||
+
|
||||
+ add sp, #76
|
||||
+ pop {r4-r11, pc}
|
||||
+ENDPROC(chacha20_arm)
|
||||
+
|
||||
+/*
|
||||
+ * void hchacha20_arm(const u32 state[16], u32 out[8]);
|
||||
+ */
|
||||
+ENTRY(hchacha20_arm)
|
||||
+ push {r1,r4-r11,lr}
|
||||
+
|
||||
+ mov r14, r0
|
||||
+ ldmia r14!, {r0-r11} // load x0-x11
|
||||
+ push {r10-r11} // store x10-x11 to stack
|
||||
+ ldm r14, {r10-r12,r14} // load x12-x15
|
||||
+ sub sp, #8
|
||||
+
|
||||
+ _chacha_permute 20
|
||||
+
|
||||
+ // Skip over (unused0-unused1, x10-x11)
|
||||
+ add sp, #16
|
||||
+
|
||||
+ // Fix up rotations of x12-x15
|
||||
+ ror X12, X12, #drot
|
||||
+ ror X13, X13, #drot
|
||||
+ pop {r4} // load 'out'
|
||||
+ ror X14, X14, #drot
|
||||
+ ror X15, X15, #drot
|
||||
+
|
||||
+ // Store (x0-x3,x12-x15) to 'out'
|
||||
+ stm r4, {X0,X1,X2,X3,X12,X13,X14,X15}
|
||||
+
|
||||
+ pop {r4-r11,pc}
|
||||
+ENDPROC(hchacha20_arm)
|
@ -0,0 +1,691 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 8 Nov 2019 13:22:14 +0100
|
||||
Subject: [PATCH] crypto: arm/chacha - remove dependency on generic ChaCha
|
||||
driver
|
||||
|
||||
commit b36d8c09e710c71f6a9690b6586fea2d1c9e1e27 upstream.
|
||||
|
||||
Instead of falling back to the generic ChaCha skcipher driver for
|
||||
non-SIMD cases, use a fast scalar implementation for ARM authored
|
||||
by Eric Biggers. This removes the module dependency on chacha-generic
|
||||
altogether, which also simplifies things when we expose the ChaCha
|
||||
library interface from this module.
|
||||
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/arm/crypto/Kconfig | 4 +-
|
||||
arch/arm/crypto/Makefile | 3 +-
|
||||
arch/arm/crypto/chacha-glue.c | 304 +++++++++++++++++++++++++++
|
||||
arch/arm/crypto/chacha-neon-glue.c | 202 ------------------
|
||||
arch/arm/crypto/chacha-scalar-core.S | 65 +++---
|
||||
arch/arm64/crypto/chacha-neon-glue.c | 2 +-
|
||||
6 files changed, 340 insertions(+), 240 deletions(-)
|
||||
create mode 100644 arch/arm/crypto/chacha-glue.c
|
||||
delete mode 100644 arch/arm/crypto/chacha-neon-glue.c
|
||||
|
||||
--- a/arch/arm/crypto/Kconfig
|
||||
+++ b/arch/arm/crypto/Kconfig
|
||||
@@ -127,10 +127,8 @@ config CRYPTO_CRC32_ARM_CE
|
||||
select CRYPTO_HASH
|
||||
|
||||
config CRYPTO_CHACHA20_NEON
|
||||
- tristate "NEON accelerated ChaCha stream cipher algorithms"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ tristate "NEON and scalar accelerated ChaCha stream cipher algorithms"
|
||||
select CRYPTO_BLKCIPHER
|
||||
- select CRYPTO_CHACHA20
|
||||
|
||||
config CRYPTO_NHPOLY1305_NEON
|
||||
tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
|
||||
--- a/arch/arm/crypto/Makefile
|
||||
+++ b/arch/arm/crypto/Makefile
|
||||
@@ -53,7 +53,8 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glu
|
||||
ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
|
||||
crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
|
||||
crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
|
||||
-chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
|
||||
+chacha-neon-y := chacha-scalar-core.o chacha-glue.o
|
||||
+chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o
|
||||
nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
|
||||
|
||||
ifdef REGENERATE_ARM_CRYPTO
|
||||
--- /dev/null
|
||||
+++ b/arch/arm/crypto/chacha-glue.c
|
||||
@@ -0,0 +1,304 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0
|
||||
+/*
|
||||
+ * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
|
||||
+ * including ChaCha20 (RFC7539)
|
||||
+ *
|
||||
+ * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
|
||||
+ * Copyright (C) 2015 Martin Willi
|
||||
+ */
|
||||
+
|
||||
+#include <crypto/algapi.h>
|
||||
+#include <crypto/internal/chacha.h>
|
||||
+#include <crypto/internal/simd.h>
|
||||
+#include <crypto/internal/skcipher.h>
|
||||
+#include <linux/kernel.h>
|
||||
+#include <linux/module.h>
|
||||
+
|
||||
+#include <asm/cputype.h>
|
||||
+#include <asm/hwcap.h>
|
||||
+#include <asm/neon.h>
|
||||
+#include <asm/simd.h>
|
||||
+
|
||||
+asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
|
||||
+ int nrounds);
|
||||
+asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
|
||||
+ int nrounds);
|
||||
+asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
|
||||
+asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
|
||||
+
|
||||
+asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
|
||||
+ const u32 *state, int nrounds);
|
||||
+
|
||||
+static inline bool neon_usable(void)
|
||||
+{
|
||||
+ return crypto_simd_usable();
|
||||
+}
|
||||
+
|
||||
+static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
|
||||
+ unsigned int bytes, int nrounds)
|
||||
+{
|
||||
+ u8 buf[CHACHA_BLOCK_SIZE];
|
||||
+
|
||||
+ while (bytes >= CHACHA_BLOCK_SIZE * 4) {
|
||||
+ chacha_4block_xor_neon(state, dst, src, nrounds);
|
||||
+ bytes -= CHACHA_BLOCK_SIZE * 4;
|
||||
+ src += CHACHA_BLOCK_SIZE * 4;
|
||||
+ dst += CHACHA_BLOCK_SIZE * 4;
|
||||
+ state[12] += 4;
|
||||
+ }
|
||||
+ while (bytes >= CHACHA_BLOCK_SIZE) {
|
||||
+ chacha_block_xor_neon(state, dst, src, nrounds);
|
||||
+ bytes -= CHACHA_BLOCK_SIZE;
|
||||
+ src += CHACHA_BLOCK_SIZE;
|
||||
+ dst += CHACHA_BLOCK_SIZE;
|
||||
+ state[12]++;
|
||||
+ }
|
||||
+ if (bytes) {
|
||||
+ memcpy(buf, src, bytes);
|
||||
+ chacha_block_xor_neon(state, buf, buf, nrounds);
|
||||
+ memcpy(dst, buf, bytes);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static int chacha_stream_xor(struct skcipher_request *req,
|
||||
+ const struct chacha_ctx *ctx, const u8 *iv,
|
||||
+ bool neon)
|
||||
+{
|
||||
+ struct skcipher_walk walk;
|
||||
+ u32 state[16];
|
||||
+ int err;
|
||||
+
|
||||
+ err = skcipher_walk_virt(&walk, req, false);
|
||||
+
|
||||
+ chacha_init_generic(state, ctx->key, iv);
|
||||
+
|
||||
+ while (walk.nbytes > 0) {
|
||||
+ unsigned int nbytes = walk.nbytes;
|
||||
+
|
||||
+ if (nbytes < walk.total)
|
||||
+ nbytes = round_down(nbytes, walk.stride);
|
||||
+
|
||||
+ if (!neon) {
|
||||
+ chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
+ nbytes, state, ctx->nrounds);
|
||||
+ state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
|
||||
+ } else {
|
||||
+ kernel_neon_begin();
|
||||
+ chacha_doneon(state, walk.dst.virt.addr,
|
||||
+ walk.src.virt.addr, nbytes, ctx->nrounds);
|
||||
+ kernel_neon_end();
|
||||
+ }
|
||||
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
+ }
|
||||
+
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
+static int do_chacha(struct skcipher_request *req, bool neon)
|
||||
+{
|
||||
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
+
|
||||
+ return chacha_stream_xor(req, ctx, req->iv, neon);
|
||||
+}
|
||||
+
|
||||
+static int chacha_arm(struct skcipher_request *req)
|
||||
+{
|
||||
+ return do_chacha(req, false);
|
||||
+}
|
||||
+
|
||||
+static int chacha_neon(struct skcipher_request *req)
|
||||
+{
|
||||
+ return do_chacha(req, neon_usable());
|
||||
+}
|
||||
+
|
||||
+static int do_xchacha(struct skcipher_request *req, bool neon)
|
||||
+{
|
||||
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
+ struct chacha_ctx subctx;
|
||||
+ u32 state[16];
|
||||
+ u8 real_iv[16];
|
||||
+
|
||||
+ chacha_init_generic(state, ctx->key, req->iv);
|
||||
+
|
||||
+ if (!neon) {
|
||||
+ hchacha_block_arm(state, subctx.key, ctx->nrounds);
|
||||
+ } else {
|
||||
+ kernel_neon_begin();
|
||||
+ hchacha_block_neon(state, subctx.key, ctx->nrounds);
|
||||
+ kernel_neon_end();
|
||||
+ }
|
||||
+ subctx.nrounds = ctx->nrounds;
|
||||
+
|
||||
+ memcpy(&real_iv[0], req->iv + 24, 8);
|
||||
+ memcpy(&real_iv[8], req->iv + 16, 8);
|
||||
+ return chacha_stream_xor(req, &subctx, real_iv, neon);
|
||||
+}
|
||||
+
|
||||
+static int xchacha_arm(struct skcipher_request *req)
|
||||
+{
|
||||
+ return do_xchacha(req, false);
|
||||
+}
|
||||
+
|
||||
+static int xchacha_neon(struct skcipher_request *req)
|
||||
+{
|
||||
+ return do_xchacha(req, neon_usable());
|
||||
+}
|
||||
+
|
||||
+static struct skcipher_alg arm_algs[] = {
|
||||
+ {
|
||||
+ .base.cra_name = "chacha20",
|
||||
+ .base.cra_driver_name = "chacha20-arm",
|
||||
+ .base.cra_priority = 200,
|
||||
+ .base.cra_blocksize = 1,
|
||||
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
+ .base.cra_module = THIS_MODULE,
|
||||
+
|
||||
+ .min_keysize = CHACHA_KEY_SIZE,
|
||||
+ .max_keysize = CHACHA_KEY_SIZE,
|
||||
+ .ivsize = CHACHA_IV_SIZE,
|
||||
+ .chunksize = CHACHA_BLOCK_SIZE,
|
||||
+ .setkey = chacha20_setkey,
|
||||
+ .encrypt = chacha_arm,
|
||||
+ .decrypt = chacha_arm,
|
||||
+ }, {
|
||||
+ .base.cra_name = "xchacha20",
|
||||
+ .base.cra_driver_name = "xchacha20-arm",
|
||||
+ .base.cra_priority = 200,
|
||||
+ .base.cra_blocksize = 1,
|
||||
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
+ .base.cra_module = THIS_MODULE,
|
||||
+
|
||||
+ .min_keysize = CHACHA_KEY_SIZE,
|
||||
+ .max_keysize = CHACHA_KEY_SIZE,
|
||||
+ .ivsize = XCHACHA_IV_SIZE,
|
||||
+ .chunksize = CHACHA_BLOCK_SIZE,
|
||||
+ .setkey = chacha20_setkey,
|
||||
+ .encrypt = xchacha_arm,
|
||||
+ .decrypt = xchacha_arm,
|
||||
+ }, {
|
||||
+ .base.cra_name = "xchacha12",
|
||||
+ .base.cra_driver_name = "xchacha12-arm",
|
||||
+ .base.cra_priority = 200,
|
||||
+ .base.cra_blocksize = 1,
|
||||
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
+ .base.cra_module = THIS_MODULE,
|
||||
+
|
||||
+ .min_keysize = CHACHA_KEY_SIZE,
|
||||
+ .max_keysize = CHACHA_KEY_SIZE,
|
||||
+ .ivsize = XCHACHA_IV_SIZE,
|
||||
+ .chunksize = CHACHA_BLOCK_SIZE,
|
||||
+ .setkey = chacha12_setkey,
|
||||
+ .encrypt = xchacha_arm,
|
||||
+ .decrypt = xchacha_arm,
|
||||
+ },
|
||||
+};
|
||||
+
|
||||
+static struct skcipher_alg neon_algs[] = {
|
||||
+ {
|
||||
+ .base.cra_name = "chacha20",
|
||||
+ .base.cra_driver_name = "chacha20-neon",
|
||||
+ .base.cra_priority = 300,
|
||||
+ .base.cra_blocksize = 1,
|
||||
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
+ .base.cra_module = THIS_MODULE,
|
||||
+
|
||||
+ .min_keysize = CHACHA_KEY_SIZE,
|
||||
+ .max_keysize = CHACHA_KEY_SIZE,
|
||||
+ .ivsize = CHACHA_IV_SIZE,
|
||||
+ .chunksize = CHACHA_BLOCK_SIZE,
|
||||
+ .walksize = 4 * CHACHA_BLOCK_SIZE,
|
||||
+ .setkey = chacha20_setkey,
|
||||
+ .encrypt = chacha_neon,
|
||||
+ .decrypt = chacha_neon,
|
||||
+ }, {
|
||||
+ .base.cra_name = "xchacha20",
|
||||
+ .base.cra_driver_name = "xchacha20-neon",
|
||||
+ .base.cra_priority = 300,
|
||||
+ .base.cra_blocksize = 1,
|
||||
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
+ .base.cra_module = THIS_MODULE,
|
||||
+
|
||||
+ .min_keysize = CHACHA_KEY_SIZE,
|
||||
+ .max_keysize = CHACHA_KEY_SIZE,
|
||||
+ .ivsize = XCHACHA_IV_SIZE,
|
||||
+ .chunksize = CHACHA_BLOCK_SIZE,
|
||||
+ .walksize = 4 * CHACHA_BLOCK_SIZE,
|
||||
+ .setkey = chacha20_setkey,
|
||||
+ .encrypt = xchacha_neon,
|
||||
+ .decrypt = xchacha_neon,
|
||||
+ }, {
|
||||
+ .base.cra_name = "xchacha12",
|
||||
+ .base.cra_driver_name = "xchacha12-neon",
|
||||
+ .base.cra_priority = 300,
|
||||
+ .base.cra_blocksize = 1,
|
||||
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
+ .base.cra_module = THIS_MODULE,
|
||||
+
|
||||
+ .min_keysize = CHACHA_KEY_SIZE,
|
||||
+ .max_keysize = CHACHA_KEY_SIZE,
|
||||
+ .ivsize = XCHACHA_IV_SIZE,
|
||||
+ .chunksize = CHACHA_BLOCK_SIZE,
|
||||
+ .walksize = 4 * CHACHA_BLOCK_SIZE,
|
||||
+ .setkey = chacha12_setkey,
|
||||
+ .encrypt = xchacha_neon,
|
||||
+ .decrypt = xchacha_neon,
|
||||
+ }
|
||||
+};
|
||||
+
|
||||
+static int __init chacha_simd_mod_init(void)
|
||||
+{
|
||||
+ int err;
|
||||
+
|
||||
+ err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
|
||||
+ int i;
|
||||
+
|
||||
+ switch (read_cpuid_part()) {
|
||||
+ case ARM_CPU_PART_CORTEX_A7:
|
||||
+ case ARM_CPU_PART_CORTEX_A5:
|
||||
+ /*
|
||||
+ * The Cortex-A7 and Cortex-A5 do not perform well with
|
||||
+ * the NEON implementation but do incredibly with the
|
||||
+ * scalar one and use less power.
|
||||
+ */
|
||||
+ for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
|
||||
+ neon_algs[i].base.cra_priority = 0;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
|
||||
+ if (err)
|
||||
+ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
|
||||
+ }
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
+static void __exit chacha_simd_mod_fini(void)
|
||||
+{
|
||||
+ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
|
||||
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
|
||||
+ crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
|
||||
+}
|
||||
+
|
||||
+module_init(chacha_simd_mod_init);
|
||||
+module_exit(chacha_simd_mod_fini);
|
||||
+
|
||||
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)");
|
||||
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
+MODULE_LICENSE("GPL v2");
|
||||
+MODULE_ALIAS_CRYPTO("chacha20");
|
||||
+MODULE_ALIAS_CRYPTO("chacha20-arm");
|
||||
+MODULE_ALIAS_CRYPTO("xchacha20");
|
||||
+MODULE_ALIAS_CRYPTO("xchacha20-arm");
|
||||
+MODULE_ALIAS_CRYPTO("xchacha12");
|
||||
+MODULE_ALIAS_CRYPTO("xchacha12-arm");
|
||||
+#ifdef CONFIG_KERNEL_MODE_NEON
|
||||
+MODULE_ALIAS_CRYPTO("chacha20-neon");
|
||||
+MODULE_ALIAS_CRYPTO("xchacha20-neon");
|
||||
+MODULE_ALIAS_CRYPTO("xchacha12-neon");
|
||||
+#endif
|
||||
--- a/arch/arm/crypto/chacha-neon-glue.c
|
||||
+++ /dev/null
|
||||
@@ -1,202 +0,0 @@
|
||||
-/*
|
||||
- * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
|
||||
- * including ChaCha20 (RFC7539)
|
||||
- *
|
||||
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
|
||||
- *
|
||||
- * This program is free software; you can redistribute it and/or modify
|
||||
- * it under the terms of the GNU General Public License version 2 as
|
||||
- * published by the Free Software Foundation.
|
||||
- *
|
||||
- * Based on:
|
||||
- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
|
||||
- *
|
||||
- * Copyright (C) 2015 Martin Willi
|
||||
- *
|
||||
- * This program is free software; you can redistribute it and/or modify
|
||||
- * it under the terms of the GNU General Public License as published by
|
||||
- * the Free Software Foundation; either version 2 of the License, or
|
||||
- * (at your option) any later version.
|
||||
- */
|
||||
-
|
||||
-#include <crypto/algapi.h>
|
||||
-#include <crypto/internal/chacha.h>
|
||||
-#include <crypto/internal/simd.h>
|
||||
-#include <crypto/internal/skcipher.h>
|
||||
-#include <linux/kernel.h>
|
||||
-#include <linux/module.h>
|
||||
-
|
||||
-#include <asm/hwcap.h>
|
||||
-#include <asm/neon.h>
|
||||
-#include <asm/simd.h>
|
||||
-
|
||||
-asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
|
||||
- int nrounds);
|
||||
-asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
|
||||
- int nrounds);
|
||||
-asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
|
||||
-
|
||||
-static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
|
||||
- unsigned int bytes, int nrounds)
|
||||
-{
|
||||
- u8 buf[CHACHA_BLOCK_SIZE];
|
||||
-
|
||||
- while (bytes >= CHACHA_BLOCK_SIZE * 4) {
|
||||
- chacha_4block_xor_neon(state, dst, src, nrounds);
|
||||
- bytes -= CHACHA_BLOCK_SIZE * 4;
|
||||
- src += CHACHA_BLOCK_SIZE * 4;
|
||||
- dst += CHACHA_BLOCK_SIZE * 4;
|
||||
- state[12] += 4;
|
||||
- }
|
||||
- while (bytes >= CHACHA_BLOCK_SIZE) {
|
||||
- chacha_block_xor_neon(state, dst, src, nrounds);
|
||||
- bytes -= CHACHA_BLOCK_SIZE;
|
||||
- src += CHACHA_BLOCK_SIZE;
|
||||
- dst += CHACHA_BLOCK_SIZE;
|
||||
- state[12]++;
|
||||
- }
|
||||
- if (bytes) {
|
||||
- memcpy(buf, src, bytes);
|
||||
- chacha_block_xor_neon(state, buf, buf, nrounds);
|
||||
- memcpy(dst, buf, bytes);
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-static int chacha_neon_stream_xor(struct skcipher_request *req,
|
||||
- const struct chacha_ctx *ctx, const u8 *iv)
|
||||
-{
|
||||
- struct skcipher_walk walk;
|
||||
- u32 state[16];
|
||||
- int err;
|
||||
-
|
||||
- err = skcipher_walk_virt(&walk, req, false);
|
||||
-
|
||||
- crypto_chacha_init(state, ctx, iv);
|
||||
-
|
||||
- while (walk.nbytes > 0) {
|
||||
- unsigned int nbytes = walk.nbytes;
|
||||
-
|
||||
- if (nbytes < walk.total)
|
||||
- nbytes = round_down(nbytes, walk.stride);
|
||||
-
|
||||
- kernel_neon_begin();
|
||||
- chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||||
- nbytes, ctx->nrounds);
|
||||
- kernel_neon_end();
|
||||
- err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
- }
|
||||
-
|
||||
- return err;
|
||||
-}
|
||||
-
|
||||
-static int chacha_neon(struct skcipher_request *req)
|
||||
-{
|
||||
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
-
|
||||
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
|
||||
- return crypto_chacha_crypt(req);
|
||||
-
|
||||
- return chacha_neon_stream_xor(req, ctx, req->iv);
|
||||
-}
|
||||
-
|
||||
-static int xchacha_neon(struct skcipher_request *req)
|
||||
-{
|
||||
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
- struct chacha_ctx subctx;
|
||||
- u32 state[16];
|
||||
- u8 real_iv[16];
|
||||
-
|
||||
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
|
||||
- return crypto_xchacha_crypt(req);
|
||||
-
|
||||
- crypto_chacha_init(state, ctx, req->iv);
|
||||
-
|
||||
- kernel_neon_begin();
|
||||
- hchacha_block_neon(state, subctx.key, ctx->nrounds);
|
||||
- kernel_neon_end();
|
||||
- subctx.nrounds = ctx->nrounds;
|
||||
-
|
||||
- memcpy(&real_iv[0], req->iv + 24, 8);
|
||||
- memcpy(&real_iv[8], req->iv + 16, 8);
|
||||
- return chacha_neon_stream_xor(req, &subctx, real_iv);
|
||||
-}
|
||||
-
|
||||
-static struct skcipher_alg algs[] = {
|
||||
- {
|
||||
- .base.cra_name = "chacha20",
|
||||
- .base.cra_driver_name = "chacha20-neon",
|
||||
- .base.cra_priority = 300,
|
||||
- .base.cra_blocksize = 1,
|
||||
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
- .base.cra_module = THIS_MODULE,
|
||||
-
|
||||
- .min_keysize = CHACHA_KEY_SIZE,
|
||||
- .max_keysize = CHACHA_KEY_SIZE,
|
||||
- .ivsize = CHACHA_IV_SIZE,
|
||||
- .chunksize = CHACHA_BLOCK_SIZE,
|
||||
- .walksize = 4 * CHACHA_BLOCK_SIZE,
|
||||
- .setkey = crypto_chacha20_setkey,
|
||||
- .encrypt = chacha_neon,
|
||||
- .decrypt = chacha_neon,
|
||||
- }, {
|
||||
- .base.cra_name = "xchacha20",
|
||||
- .base.cra_driver_name = "xchacha20-neon",
|
||||
- .base.cra_priority = 300,
|
||||
- .base.cra_blocksize = 1,
|
||||
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
- .base.cra_module = THIS_MODULE,
|
||||
-
|
||||
- .min_keysize = CHACHA_KEY_SIZE,
|
||||
- .max_keysize = CHACHA_KEY_SIZE,
|
||||
- .ivsize = XCHACHA_IV_SIZE,
|
||||
- .chunksize = CHACHA_BLOCK_SIZE,
|
||||
- .walksize = 4 * CHACHA_BLOCK_SIZE,
|
||||
- .setkey = crypto_chacha20_setkey,
|
||||
- .encrypt = xchacha_neon,
|
||||
- .decrypt = xchacha_neon,
|
||||
- }, {
|
||||
- .base.cra_name = "xchacha12",
|
||||
- .base.cra_driver_name = "xchacha12-neon",
|
||||
- .base.cra_priority = 300,
|
||||
- .base.cra_blocksize = 1,
|
||||
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
- .base.cra_module = THIS_MODULE,
|
||||
-
|
||||
- .min_keysize = CHACHA_KEY_SIZE,
|
||||
- .max_keysize = CHACHA_KEY_SIZE,
|
||||
- .ivsize = XCHACHA_IV_SIZE,
|
||||
- .chunksize = CHACHA_BLOCK_SIZE,
|
||||
- .walksize = 4 * CHACHA_BLOCK_SIZE,
|
||||
- .setkey = crypto_chacha12_setkey,
|
||||
- .encrypt = xchacha_neon,
|
||||
- .decrypt = xchacha_neon,
|
||||
- }
|
||||
-};
|
||||
-
|
||||
-static int __init chacha_simd_mod_init(void)
|
||||
-{
|
||||
- if (!(elf_hwcap & HWCAP_NEON))
|
||||
- return -ENODEV;
|
||||
-
|
||||
- return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
|
||||
-}
|
||||
-
|
||||
-static void __exit chacha_simd_mod_fini(void)
|
||||
-{
|
||||
- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
|
||||
-}
|
||||
-
|
||||
-module_init(chacha_simd_mod_init);
|
||||
-module_exit(chacha_simd_mod_fini);
|
||||
-
|
||||
-MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
|
||||
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
-MODULE_LICENSE("GPL v2");
|
||||
-MODULE_ALIAS_CRYPTO("chacha20");
|
||||
-MODULE_ALIAS_CRYPTO("chacha20-neon");
|
||||
-MODULE_ALIAS_CRYPTO("xchacha20");
|
||||
-MODULE_ALIAS_CRYPTO("xchacha20-neon");
|
||||
-MODULE_ALIAS_CRYPTO("xchacha12");
|
||||
-MODULE_ALIAS_CRYPTO("xchacha12-neon");
|
||||
--- a/arch/arm/crypto/chacha-scalar-core.S
|
||||
+++ b/arch/arm/crypto/chacha-scalar-core.S
|
||||
@@ -41,14 +41,6 @@
|
||||
X14 .req r12
|
||||
X15 .req r14
|
||||
|
||||
-.Lexpand_32byte_k:
|
||||
- // "expand 32-byte k"
|
||||
- .word 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
|
||||
-
|
||||
-#ifdef __thumb2__
|
||||
-# define adrl adr
|
||||
-#endif
|
||||
-
|
||||
.macro __rev out, in, t0, t1, t2
|
||||
.if __LINUX_ARM_ARCH__ >= 6
|
||||
rev \out, \in
|
||||
@@ -391,61 +383,65 @@
|
||||
.endm // _chacha
|
||||
|
||||
/*
|
||||
- * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8],
|
||||
- * const u32 iv[4]);
|
||||
+ * void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
|
||||
+ * const u32 *state, int nrounds);
|
||||
*/
|
||||
-ENTRY(chacha20_arm)
|
||||
+ENTRY(chacha_doarm)
|
||||
cmp r2, #0 // len == 0?
|
||||
reteq lr
|
||||
|
||||
+ ldr ip, [sp]
|
||||
+ cmp ip, #12
|
||||
+
|
||||
push {r0-r2,r4-r11,lr}
|
||||
|
||||
// Push state x0-x15 onto stack.
|
||||
// Also store an extra copy of x10-x11 just before the state.
|
||||
|
||||
- ldr r4, [sp, #48] // iv
|
||||
- mov r0, sp
|
||||
- sub sp, #80
|
||||
-
|
||||
- // iv: x12-x15
|
||||
- ldm r4, {X12,X13,X14,X15}
|
||||
- stmdb r0!, {X12,X13,X14,X15}
|
||||
+ add X12, r3, #48
|
||||
+ ldm X12, {X12,X13,X14,X15}
|
||||
+ push {X12,X13,X14,X15}
|
||||
+ sub sp, sp, #64
|
||||
|
||||
- // key: x4-x11
|
||||
- __ldrd X8_X10, X9_X11, r3, 24
|
||||
+ __ldrd X8_X10, X9_X11, r3, 40
|
||||
__strd X8_X10, X9_X11, sp, 8
|
||||
- stmdb r0!, {X8_X10, X9_X11}
|
||||
- ldm r3, {X4-X9_X11}
|
||||
- stmdb r0!, {X4-X9_X11}
|
||||
-
|
||||
- // constants: x0-x3
|
||||
- adrl X3, .Lexpand_32byte_k
|
||||
- ldm X3, {X0-X3}
|
||||
+ __strd X8_X10, X9_X11, sp, 56
|
||||
+ ldm r3, {X0-X9_X11}
|
||||
__strd X0, X1, sp, 16
|
||||
__strd X2, X3, sp, 24
|
||||
+ __strd X4, X5, sp, 32
|
||||
+ __strd X6, X7, sp, 40
|
||||
+ __strd X8_X10, X9_X11, sp, 48
|
||||
|
||||
+ beq 1f
|
||||
_chacha 20
|
||||
|
||||
- add sp, #76
|
||||
+0: add sp, #76
|
||||
pop {r4-r11, pc}
|
||||
-ENDPROC(chacha20_arm)
|
||||
+
|
||||
+1: _chacha 12
|
||||
+ b 0b
|
||||
+ENDPROC(chacha_doarm)
|
||||
|
||||
/*
|
||||
- * void hchacha20_arm(const u32 state[16], u32 out[8]);
|
||||
+ * void hchacha_block_arm(const u32 state[16], u32 out[8], int nrounds);
|
||||
*/
|
||||
-ENTRY(hchacha20_arm)
|
||||
+ENTRY(hchacha_block_arm)
|
||||
push {r1,r4-r11,lr}
|
||||
|
||||
+ cmp r2, #12 // ChaCha12 ?
|
||||
+
|
||||
mov r14, r0
|
||||
ldmia r14!, {r0-r11} // load x0-x11
|
||||
push {r10-r11} // store x10-x11 to stack
|
||||
ldm r14, {r10-r12,r14} // load x12-x15
|
||||
sub sp, #8
|
||||
|
||||
+ beq 1f
|
||||
_chacha_permute 20
|
||||
|
||||
// Skip over (unused0-unused1, x10-x11)
|
||||
- add sp, #16
|
||||
+0: add sp, #16
|
||||
|
||||
// Fix up rotations of x12-x15
|
||||
ror X12, X12, #drot
|
||||
@@ -458,4 +454,7 @@ ENTRY(hchacha20_arm)
|
||||
stm r4, {X0,X1,X2,X3,X12,X13,X14,X15}
|
||||
|
||||
pop {r4-r11,pc}
|
||||
-ENDPROC(hchacha20_arm)
|
||||
+
|
||||
+1: _chacha_permute 12
|
||||
+ b 0b
|
||||
+ENDPROC(hchacha_block_arm)
|
||||
--- a/arch/arm64/crypto/chacha-neon-glue.c
|
||||
+++ b/arch/arm64/crypto/chacha-neon-glue.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
- * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
|
||||
+ * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers,
|
||||
* including ChaCha20 (RFC7539)
|
||||
*
|
||||
* Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
|
@ -0,0 +1,108 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 8 Nov 2019 13:22:15 +0100
|
||||
Subject: [PATCH] crypto: arm/chacha - expose ARM ChaCha routine as library
|
||||
function
|
||||
|
||||
commit a44a3430d71bad4ee56788a59fff099b291ea54c upstream.
|
||||
|
||||
Expose the accelerated NEON ChaCha routine directly as a symbol
|
||||
export so that users of the ChaCha library API can use it directly.
|
||||
|
||||
Given that calls into the library API will always go through the
|
||||
routines in this module if it is enabled, switch to static keys
|
||||
to select the optimal implementation available (which may be none
|
||||
at all, in which case we defer to the generic implementation for
|
||||
all invocations).
|
||||
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/arm/crypto/Kconfig | 1 +
|
||||
arch/arm/crypto/chacha-glue.c | 41 ++++++++++++++++++++++++++++++++++-
|
||||
2 files changed, 41 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/arm/crypto/Kconfig
|
||||
+++ b/arch/arm/crypto/Kconfig
|
||||
@@ -129,6 +129,7 @@ config CRYPTO_CRC32_ARM_CE
|
||||
config CRYPTO_CHACHA20_NEON
|
||||
tristate "NEON and scalar accelerated ChaCha stream cipher algorithms"
|
||||
select CRYPTO_BLKCIPHER
|
||||
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
|
||||
|
||||
config CRYPTO_NHPOLY1305_NEON
|
||||
tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
|
||||
--- a/arch/arm/crypto/chacha-glue.c
|
||||
+++ b/arch/arm/crypto/chacha-glue.c
|
||||
@@ -11,6 +11,7 @@
|
||||
#include <crypto/internal/chacha.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
+#include <linux/jump_label.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
@@ -29,9 +30,11 @@ asmlinkage void hchacha_block_neon(const
|
||||
asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
|
||||
const u32 *state, int nrounds);
|
||||
|
||||
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
|
||||
+
|
||||
static inline bool neon_usable(void)
|
||||
{
|
||||
- return crypto_simd_usable();
|
||||
+ return static_branch_likely(&use_neon) && crypto_simd_usable();
|
||||
}
|
||||
|
||||
static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
|
||||
@@ -60,6 +63,40 @@ static void chacha_doneon(u32 *state, u8
|
||||
}
|
||||
}
|
||||
|
||||
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
|
||||
+{
|
||||
+ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
|
||||
+ hchacha_block_arm(state, stream, nrounds);
|
||||
+ } else {
|
||||
+ kernel_neon_begin();
|
||||
+ hchacha_block_neon(state, stream, nrounds);
|
||||
+ kernel_neon_end();
|
||||
+ }
|
||||
+}
|
||||
+EXPORT_SYMBOL(hchacha_block_arch);
|
||||
+
|
||||
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
|
||||
+{
|
||||
+ chacha_init_generic(state, key, iv);
|
||||
+}
|
||||
+EXPORT_SYMBOL(chacha_init_arch);
|
||||
+
|
||||
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
|
||||
+ int nrounds)
|
||||
+{
|
||||
+ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
|
||||
+ bytes <= CHACHA_BLOCK_SIZE) {
|
||||
+ chacha_doarm(dst, src, bytes, state, nrounds);
|
||||
+ state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ kernel_neon_begin();
|
||||
+ chacha_doneon(state, dst, src, bytes, nrounds);
|
||||
+ kernel_neon_end();
|
||||
+}
|
||||
+EXPORT_SYMBOL(chacha_crypt_arch);
|
||||
+
|
||||
static int chacha_stream_xor(struct skcipher_request *req,
|
||||
const struct chacha_ctx *ctx, const u8 *iv,
|
||||
bool neon)
|
||||
@@ -269,6 +306,8 @@ static int __init chacha_simd_mod_init(v
|
||||
for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
|
||||
neon_algs[i].base.cra_priority = 0;
|
||||
break;
|
||||
+ default:
|
||||
+ static_branch_enable(&use_neon);
|
||||
}
|
||||
|
||||
err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
|
@ -0,0 +1,451 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Fri, 8 Nov 2019 13:22:16 +0100
|
||||
Subject: [PATCH] crypto: mips/chacha - import 32r2 ChaCha code from Zinc
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
commit 49aa7c00eddf8d8f462b0256bd82e81762d7b0c6 upstream.
|
||||
|
||||
This imports the accelerated MIPS 32r2 ChaCha20 implementation from the
|
||||
Zinc patch set.
|
||||
|
||||
Co-developed-by: René van Dorst <opensource@vdorst.com>
|
||||
Signed-off-by: René van Dorst <opensource@vdorst.com>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/mips/crypto/chacha-core.S | 424 +++++++++++++++++++++++++++++++++
|
||||
1 file changed, 424 insertions(+)
|
||||
create mode 100644 arch/mips/crypto/chacha-core.S
|
||||
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/crypto/chacha-core.S
|
||||
@@ -0,0 +1,424 @@
|
||||
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
+/*
|
||||
+ * Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved.
|
||||
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
+ */
|
||||
+
|
||||
+#define MASK_U32 0x3c
|
||||
+#define CHACHA20_BLOCK_SIZE 64
|
||||
+#define STACK_SIZE 32
|
||||
+
|
||||
+#define X0 $t0
|
||||
+#define X1 $t1
|
||||
+#define X2 $t2
|
||||
+#define X3 $t3
|
||||
+#define X4 $t4
|
||||
+#define X5 $t5
|
||||
+#define X6 $t6
|
||||
+#define X7 $t7
|
||||
+#define X8 $t8
|
||||
+#define X9 $t9
|
||||
+#define X10 $v1
|
||||
+#define X11 $s6
|
||||
+#define X12 $s5
|
||||
+#define X13 $s4
|
||||
+#define X14 $s3
|
||||
+#define X15 $s2
|
||||
+/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */
|
||||
+#define T0 $s1
|
||||
+#define T1 $s0
|
||||
+#define T(n) T ## n
|
||||
+#define X(n) X ## n
|
||||
+
|
||||
+/* Input arguments */
|
||||
+#define STATE $a0
|
||||
+#define OUT $a1
|
||||
+#define IN $a2
|
||||
+#define BYTES $a3
|
||||
+
|
||||
+/* Output argument */
|
||||
+/* NONCE[0] is kept in a register and not in memory.
|
||||
+ * We don't want to touch original value in memory.
|
||||
+ * Must be incremented every loop iteration.
|
||||
+ */
|
||||
+#define NONCE_0 $v0
|
||||
+
|
||||
+/* SAVED_X and SAVED_CA are set in the jump table.
|
||||
+ * Use regs which are overwritten on exit else we don't leak clear data.
|
||||
+ * They are used to handling the last bytes which are not multiple of 4.
|
||||
+ */
|
||||
+#define SAVED_X X15
|
||||
+#define SAVED_CA $s7
|
||||
+
|
||||
+#define IS_UNALIGNED $s7
|
||||
+
|
||||
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
+#define MSB 0
|
||||
+#define LSB 3
|
||||
+#define ROTx rotl
|
||||
+#define ROTR(n) rotr n, 24
|
||||
+#define CPU_TO_LE32(n) \
|
||||
+ wsbh n; \
|
||||
+ rotr n, 16;
|
||||
+#else
|
||||
+#define MSB 3
|
||||
+#define LSB 0
|
||||
+#define ROTx rotr
|
||||
+#define CPU_TO_LE32(n)
|
||||
+#define ROTR(n)
|
||||
+#endif
|
||||
+
|
||||
+#define FOR_EACH_WORD(x) \
|
||||
+ x( 0); \
|
||||
+ x( 1); \
|
||||
+ x( 2); \
|
||||
+ x( 3); \
|
||||
+ x( 4); \
|
||||
+ x( 5); \
|
||||
+ x( 6); \
|
||||
+ x( 7); \
|
||||
+ x( 8); \
|
||||
+ x( 9); \
|
||||
+ x(10); \
|
||||
+ x(11); \
|
||||
+ x(12); \
|
||||
+ x(13); \
|
||||
+ x(14); \
|
||||
+ x(15);
|
||||
+
|
||||
+#define FOR_EACH_WORD_REV(x) \
|
||||
+ x(15); \
|
||||
+ x(14); \
|
||||
+ x(13); \
|
||||
+ x(12); \
|
||||
+ x(11); \
|
||||
+ x(10); \
|
||||
+ x( 9); \
|
||||
+ x( 8); \
|
||||
+ x( 7); \
|
||||
+ x( 6); \
|
||||
+ x( 5); \
|
||||
+ x( 4); \
|
||||
+ x( 3); \
|
||||
+ x( 2); \
|
||||
+ x( 1); \
|
||||
+ x( 0);
|
||||
+
|
||||
+#define PLUS_ONE_0 1
|
||||
+#define PLUS_ONE_1 2
|
||||
+#define PLUS_ONE_2 3
|
||||
+#define PLUS_ONE_3 4
|
||||
+#define PLUS_ONE_4 5
|
||||
+#define PLUS_ONE_5 6
|
||||
+#define PLUS_ONE_6 7
|
||||
+#define PLUS_ONE_7 8
|
||||
+#define PLUS_ONE_8 9
|
||||
+#define PLUS_ONE_9 10
|
||||
+#define PLUS_ONE_10 11
|
||||
+#define PLUS_ONE_11 12
|
||||
+#define PLUS_ONE_12 13
|
||||
+#define PLUS_ONE_13 14
|
||||
+#define PLUS_ONE_14 15
|
||||
+#define PLUS_ONE_15 16
|
||||
+#define PLUS_ONE(x) PLUS_ONE_ ## x
|
||||
+#define _CONCAT3(a,b,c) a ## b ## c
|
||||
+#define CONCAT3(a,b,c) _CONCAT3(a,b,c)
|
||||
+
|
||||
+#define STORE_UNALIGNED(x) \
|
||||
+CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
|
||||
+ .if (x != 12); \
|
||||
+ lw T0, (x*4)(STATE); \
|
||||
+ .endif; \
|
||||
+ lwl T1, (x*4)+MSB ## (IN); \
|
||||
+ lwr T1, (x*4)+LSB ## (IN); \
|
||||
+ .if (x == 12); \
|
||||
+ addu X ## x, NONCE_0; \
|
||||
+ .else; \
|
||||
+ addu X ## x, T0; \
|
||||
+ .endif; \
|
||||
+ CPU_TO_LE32(X ## x); \
|
||||
+ xor X ## x, T1; \
|
||||
+ swl X ## x, (x*4)+MSB ## (OUT); \
|
||||
+ swr X ## x, (x*4)+LSB ## (OUT);
|
||||
+
|
||||
+#define STORE_ALIGNED(x) \
|
||||
+CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
|
||||
+ .if (x != 12); \
|
||||
+ lw T0, (x*4)(STATE); \
|
||||
+ .endif; \
|
||||
+ lw T1, (x*4) ## (IN); \
|
||||
+ .if (x == 12); \
|
||||
+ addu X ## x, NONCE_0; \
|
||||
+ .else; \
|
||||
+ addu X ## x, T0; \
|
||||
+ .endif; \
|
||||
+ CPU_TO_LE32(X ## x); \
|
||||
+ xor X ## x, T1; \
|
||||
+ sw X ## x, (x*4) ## (OUT);
|
||||
+
|
||||
+/* Jump table macro.
|
||||
+ * Used for setup and handling the last bytes, which are not multiple of 4.
|
||||
+ * X15 is free to store Xn
|
||||
+ * Every jumptable entry must be equal in size.
|
||||
+ */
|
||||
+#define JMPTBL_ALIGNED(x) \
|
||||
+.Lchacha20_mips_jmptbl_aligned_ ## x: ; \
|
||||
+ .set noreorder; \
|
||||
+ b .Lchacha20_mips_xor_aligned_ ## x ## _b; \
|
||||
+ .if (x == 12); \
|
||||
+ addu SAVED_X, X ## x, NONCE_0; \
|
||||
+ .else; \
|
||||
+ addu SAVED_X, X ## x, SAVED_CA; \
|
||||
+ .endif; \
|
||||
+ .set reorder
|
||||
+
|
||||
+#define JMPTBL_UNALIGNED(x) \
|
||||
+.Lchacha20_mips_jmptbl_unaligned_ ## x: ; \
|
||||
+ .set noreorder; \
|
||||
+ b .Lchacha20_mips_xor_unaligned_ ## x ## _b; \
|
||||
+ .if (x == 12); \
|
||||
+ addu SAVED_X, X ## x, NONCE_0; \
|
||||
+ .else; \
|
||||
+ addu SAVED_X, X ## x, SAVED_CA; \
|
||||
+ .endif; \
|
||||
+ .set reorder
|
||||
+
|
||||
+#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \
|
||||
+ addu X(A), X(K); \
|
||||
+ addu X(B), X(L); \
|
||||
+ addu X(C), X(M); \
|
||||
+ addu X(D), X(N); \
|
||||
+ xor X(V), X(A); \
|
||||
+ xor X(W), X(B); \
|
||||
+ xor X(Y), X(C); \
|
||||
+ xor X(Z), X(D); \
|
||||
+ rotl X(V), S; \
|
||||
+ rotl X(W), S; \
|
||||
+ rotl X(Y), S; \
|
||||
+ rotl X(Z), S;
|
||||
+
|
||||
+.text
|
||||
+.set reorder
|
||||
+.set noat
|
||||
+.globl chacha20_mips
|
||||
+.ent chacha20_mips
|
||||
+chacha20_mips:
|
||||
+ .frame $sp, STACK_SIZE, $ra
|
||||
+
|
||||
+ addiu $sp, -STACK_SIZE
|
||||
+
|
||||
+ /* Return bytes = 0. */
|
||||
+ beqz BYTES, .Lchacha20_mips_end
|
||||
+
|
||||
+ lw NONCE_0, 48(STATE)
|
||||
+
|
||||
+ /* Save s0-s7 */
|
||||
+ sw $s0, 0($sp)
|
||||
+ sw $s1, 4($sp)
|
||||
+ sw $s2, 8($sp)
|
||||
+ sw $s3, 12($sp)
|
||||
+ sw $s4, 16($sp)
|
||||
+ sw $s5, 20($sp)
|
||||
+ sw $s6, 24($sp)
|
||||
+ sw $s7, 28($sp)
|
||||
+
|
||||
+ /* Test IN or OUT is unaligned.
|
||||
+ * IS_UNALIGNED = ( IN | OUT ) & 0x00000003
|
||||
+ */
|
||||
+ or IS_UNALIGNED, IN, OUT
|
||||
+ andi IS_UNALIGNED, 0x3
|
||||
+
|
||||
+ /* Set number of rounds */
|
||||
+ li $at, 20
|
||||
+
|
||||
+ b .Lchacha20_rounds_start
|
||||
+
|
||||
+.align 4
|
||||
+.Loop_chacha20_rounds:
|
||||
+ addiu IN, CHACHA20_BLOCK_SIZE
|
||||
+ addiu OUT, CHACHA20_BLOCK_SIZE
|
||||
+ addiu NONCE_0, 1
|
||||
+
|
||||
+.Lchacha20_rounds_start:
|
||||
+ lw X0, 0(STATE)
|
||||
+ lw X1, 4(STATE)
|
||||
+ lw X2, 8(STATE)
|
||||
+ lw X3, 12(STATE)
|
||||
+
|
||||
+ lw X4, 16(STATE)
|
||||
+ lw X5, 20(STATE)
|
||||
+ lw X6, 24(STATE)
|
||||
+ lw X7, 28(STATE)
|
||||
+ lw X8, 32(STATE)
|
||||
+ lw X9, 36(STATE)
|
||||
+ lw X10, 40(STATE)
|
||||
+ lw X11, 44(STATE)
|
||||
+
|
||||
+ move X12, NONCE_0
|
||||
+ lw X13, 52(STATE)
|
||||
+ lw X14, 56(STATE)
|
||||
+ lw X15, 60(STATE)
|
||||
+
|
||||
+.Loop_chacha20_xor_rounds:
|
||||
+ addiu $at, -2
|
||||
+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16);
|
||||
+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12);
|
||||
+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8);
|
||||
+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7);
|
||||
+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16);
|
||||
+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12);
|
||||
+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8);
|
||||
+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7);
|
||||
+ bnez $at, .Loop_chacha20_xor_rounds
|
||||
+
|
||||
+ addiu BYTES, -(CHACHA20_BLOCK_SIZE)
|
||||
+
|
||||
+ /* Is data src/dst unaligned? Jump */
|
||||
+ bnez IS_UNALIGNED, .Loop_chacha20_unaligned
|
||||
+
|
||||
+ /* Set number rounds here to fill delayslot. */
|
||||
+ li $at, 20
|
||||
+
|
||||
+ /* BYTES < 0, it has no full block. */
|
||||
+ bltz BYTES, .Lchacha20_mips_no_full_block_aligned
|
||||
+
|
||||
+ FOR_EACH_WORD_REV(STORE_ALIGNED)
|
||||
+
|
||||
+ /* BYTES > 0? Loop again. */
|
||||
+ bgtz BYTES, .Loop_chacha20_rounds
|
||||
+
|
||||
+ /* Place this here to fill delay slot */
|
||||
+ addiu NONCE_0, 1
|
||||
+
|
||||
+ /* BYTES < 0? Handle last bytes */
|
||||
+ bltz BYTES, .Lchacha20_mips_xor_bytes
|
||||
+
|
||||
+.Lchacha20_mips_xor_done:
|
||||
+ /* Restore used registers */
|
||||
+ lw $s0, 0($sp)
|
||||
+ lw $s1, 4($sp)
|
||||
+ lw $s2, 8($sp)
|
||||
+ lw $s3, 12($sp)
|
||||
+ lw $s4, 16($sp)
|
||||
+ lw $s5, 20($sp)
|
||||
+ lw $s6, 24($sp)
|
||||
+ lw $s7, 28($sp)
|
||||
+
|
||||
+ /* Write NONCE_0 back to right location in state */
|
||||
+ sw NONCE_0, 48(STATE)
|
||||
+
|
||||
+.Lchacha20_mips_end:
|
||||
+ addiu $sp, STACK_SIZE
|
||||
+ jr $ra
|
||||
+
|
||||
+.Lchacha20_mips_no_full_block_aligned:
|
||||
+ /* Restore the offset on BYTES */
|
||||
+ addiu BYTES, CHACHA20_BLOCK_SIZE
|
||||
+
|
||||
+ /* Get number of full WORDS */
|
||||
+ andi $at, BYTES, MASK_U32
|
||||
+
|
||||
+ /* Load upper half of jump table addr */
|
||||
+ lui T0, %hi(.Lchacha20_mips_jmptbl_aligned_0)
|
||||
+
|
||||
+ /* Calculate lower half jump table offset */
|
||||
+ ins T0, $at, 1, 6
|
||||
+
|
||||
+ /* Add offset to STATE */
|
||||
+ addu T1, STATE, $at
|
||||
+
|
||||
+ /* Add lower half jump table addr */
|
||||
+ addiu T0, %lo(.Lchacha20_mips_jmptbl_aligned_0)
|
||||
+
|
||||
+ /* Read value from STATE */
|
||||
+ lw SAVED_CA, 0(T1)
|
||||
+
|
||||
+ /* Store remaining bytecounter as negative value */
|
||||
+ subu BYTES, $at, BYTES
|
||||
+
|
||||
+ jr T0
|
||||
+
|
||||
+ /* Jump table */
|
||||
+ FOR_EACH_WORD(JMPTBL_ALIGNED)
|
||||
+
|
||||
+
|
||||
+.Loop_chacha20_unaligned:
|
||||
+ /* Set number rounds here to fill delayslot. */
|
||||
+ li $at, 20
|
||||
+
|
||||
+ /* BYTES > 0, it has no full block. */
|
||||
+ bltz BYTES, .Lchacha20_mips_no_full_block_unaligned
|
||||
+
|
||||
+ FOR_EACH_WORD_REV(STORE_UNALIGNED)
|
||||
+
|
||||
+ /* BYTES > 0? Loop again. */
|
||||
+ bgtz BYTES, .Loop_chacha20_rounds
|
||||
+
|
||||
+ /* Write NONCE_0 back to right location in state */
|
||||
+ sw NONCE_0, 48(STATE)
|
||||
+
|
||||
+ .set noreorder
|
||||
+ /* Fall through to byte handling */
|
||||
+ bgez BYTES, .Lchacha20_mips_xor_done
|
||||
+.Lchacha20_mips_xor_unaligned_0_b:
|
||||
+.Lchacha20_mips_xor_aligned_0_b:
|
||||
+ /* Place this here to fill delay slot */
|
||||
+ addiu NONCE_0, 1
|
||||
+ .set reorder
|
||||
+
|
||||
+.Lchacha20_mips_xor_bytes:
|
||||
+ addu IN, $at
|
||||
+ addu OUT, $at
|
||||
+ /* First byte */
|
||||
+ lbu T1, 0(IN)
|
||||
+ addiu $at, BYTES, 1
|
||||
+ CPU_TO_LE32(SAVED_X)
|
||||
+ ROTR(SAVED_X)
|
||||
+ xor T1, SAVED_X
|
||||
+ sb T1, 0(OUT)
|
||||
+ beqz $at, .Lchacha20_mips_xor_done
|
||||
+ /* Second byte */
|
||||
+ lbu T1, 1(IN)
|
||||
+ addiu $at, BYTES, 2
|
||||
+ ROTx SAVED_X, 8
|
||||
+ xor T1, SAVED_X
|
||||
+ sb T1, 1(OUT)
|
||||
+ beqz $at, .Lchacha20_mips_xor_done
|
||||
+ /* Third byte */
|
||||
+ lbu T1, 2(IN)
|
||||
+ ROTx SAVED_X, 8
|
||||
+ xor T1, SAVED_X
|
||||
+ sb T1, 2(OUT)
|
||||
+ b .Lchacha20_mips_xor_done
|
||||
+
|
||||
+.Lchacha20_mips_no_full_block_unaligned:
|
||||
+ /* Restore the offset on BYTES */
|
||||
+ addiu BYTES, CHACHA20_BLOCK_SIZE
|
||||
+
|
||||
+ /* Get number of full WORDS */
|
||||
+ andi $at, BYTES, MASK_U32
|
||||
+
|
||||
+ /* Load upper half of jump table addr */
|
||||
+ lui T0, %hi(.Lchacha20_mips_jmptbl_unaligned_0)
|
||||
+
|
||||
+ /* Calculate lower half jump table offset */
|
||||
+ ins T0, $at, 1, 6
|
||||
+
|
||||
+ /* Add offset to STATE */
|
||||
+ addu T1, STATE, $at
|
||||
+
|
||||
+ /* Add lower half jump table addr */
|
||||
+ addiu T0, %lo(.Lchacha20_mips_jmptbl_unaligned_0)
|
||||
+
|
||||
+ /* Read value from STATE */
|
||||
+ lw SAVED_CA, 0(T1)
|
||||
+
|
||||
+ /* Store remaining bytecounter as negative value */
|
||||
+ subu BYTES, $at, BYTES
|
||||
+
|
||||
+ jr T0
|
||||
+
|
||||
+ /* Jump table */
|
||||
+ FOR_EACH_WORD(JMPTBL_UNALIGNED)
|
||||
+.end chacha20_mips
|
||||
+.set at
|
@ -0,0 +1,559 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 8 Nov 2019 13:22:17 +0100
|
||||
Subject: [PATCH] crypto: mips/chacha - wire up accelerated 32r2 code from Zinc
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
commit 3a2f58f3ba4f6f44e33d1a48240d5eadb882cb59 upstream.
|
||||
|
||||
This integrates the accelerated MIPS 32r2 implementation of ChaCha
|
||||
into both the API and library interfaces of the kernel crypto stack.
|
||||
|
||||
The significance of this is that, in addition to becoming available
|
||||
as an accelerated library implementation, it can also be used by
|
||||
existing crypto API code such as Adiantum (for block encryption on
|
||||
ultra low performance cores) or IPsec using chacha20poly1305. These
|
||||
are use cases that have already opted into using the abstract crypto
|
||||
API. In order to support Adiantum, the core assembler routine has
|
||||
been adapted to take the round count as a function argument rather
|
||||
than hardcoding it to 20.
|
||||
|
||||
Co-developed-by: René van Dorst <opensource@vdorst.com>
|
||||
Signed-off-by: René van Dorst <opensource@vdorst.com>
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/mips/Makefile | 2 +-
|
||||
arch/mips/crypto/Makefile | 4 +
|
||||
arch/mips/crypto/chacha-core.S | 159 ++++++++++++++++++++++++---------
|
||||
arch/mips/crypto/chacha-glue.c | 150 +++++++++++++++++++++++++++++++
|
||||
crypto/Kconfig | 6 ++
|
||||
5 files changed, 277 insertions(+), 44 deletions(-)
|
||||
create mode 100644 arch/mips/crypto/chacha-glue.c
|
||||
|
||||
--- a/arch/mips/Makefile
|
||||
+++ b/arch/mips/Makefile
|
||||
@@ -334,7 +334,7 @@ libs-$(CONFIG_MIPS_FP_SUPPORT) += arch/m
|
||||
# See arch/mips/Kbuild for content of core part of the kernel
|
||||
core-y += arch/mips/
|
||||
|
||||
-drivers-$(CONFIG_MIPS_CRC_SUPPORT) += arch/mips/crypto/
|
||||
+drivers-y += arch/mips/crypto/
|
||||
drivers-$(CONFIG_OPROFILE) += arch/mips/oprofile/
|
||||
|
||||
# suspend and hibernation support
|
||||
--- a/arch/mips/crypto/Makefile
|
||||
+++ b/arch/mips/crypto/Makefile
|
||||
@@ -4,3 +4,7 @@
|
||||
#
|
||||
|
||||
obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o
|
||||
+
|
||||
+obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o
|
||||
+chacha-mips-y := chacha-core.o chacha-glue.o
|
||||
+AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots
|
||||
--- a/arch/mips/crypto/chacha-core.S
|
||||
+++ b/arch/mips/crypto/chacha-core.S
|
||||
@@ -125,7 +125,7 @@
|
||||
#define CONCAT3(a,b,c) _CONCAT3(a,b,c)
|
||||
|
||||
#define STORE_UNALIGNED(x) \
|
||||
-CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
|
||||
+CONCAT3(.Lchacha_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
|
||||
.if (x != 12); \
|
||||
lw T0, (x*4)(STATE); \
|
||||
.endif; \
|
||||
@@ -142,7 +142,7 @@ CONCAT3(.Lchacha20_mips_xor_unaligned_,
|
||||
swr X ## x, (x*4)+LSB ## (OUT);
|
||||
|
||||
#define STORE_ALIGNED(x) \
|
||||
-CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
|
||||
+CONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
|
||||
.if (x != 12); \
|
||||
lw T0, (x*4)(STATE); \
|
||||
.endif; \
|
||||
@@ -162,9 +162,9 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PL
|
||||
* Every jumptable entry must be equal in size.
|
||||
*/
|
||||
#define JMPTBL_ALIGNED(x) \
|
||||
-.Lchacha20_mips_jmptbl_aligned_ ## x: ; \
|
||||
+.Lchacha_mips_jmptbl_aligned_ ## x: ; \
|
||||
.set noreorder; \
|
||||
- b .Lchacha20_mips_xor_aligned_ ## x ## _b; \
|
||||
+ b .Lchacha_mips_xor_aligned_ ## x ## _b; \
|
||||
.if (x == 12); \
|
||||
addu SAVED_X, X ## x, NONCE_0; \
|
||||
.else; \
|
||||
@@ -173,9 +173,9 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PL
|
||||
.set reorder
|
||||
|
||||
#define JMPTBL_UNALIGNED(x) \
|
||||
-.Lchacha20_mips_jmptbl_unaligned_ ## x: ; \
|
||||
+.Lchacha_mips_jmptbl_unaligned_ ## x: ; \
|
||||
.set noreorder; \
|
||||
- b .Lchacha20_mips_xor_unaligned_ ## x ## _b; \
|
||||
+ b .Lchacha_mips_xor_unaligned_ ## x ## _b; \
|
||||
.if (x == 12); \
|
||||
addu SAVED_X, X ## x, NONCE_0; \
|
||||
.else; \
|
||||
@@ -200,15 +200,18 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PL
|
||||
.text
|
||||
.set reorder
|
||||
.set noat
|
||||
-.globl chacha20_mips
|
||||
-.ent chacha20_mips
|
||||
-chacha20_mips:
|
||||
+.globl chacha_crypt_arch
|
||||
+.ent chacha_crypt_arch
|
||||
+chacha_crypt_arch:
|
||||
.frame $sp, STACK_SIZE, $ra
|
||||
|
||||
+ /* Load number of rounds */
|
||||
+ lw $at, 16($sp)
|
||||
+
|
||||
addiu $sp, -STACK_SIZE
|
||||
|
||||
/* Return bytes = 0. */
|
||||
- beqz BYTES, .Lchacha20_mips_end
|
||||
+ beqz BYTES, .Lchacha_mips_end
|
||||
|
||||
lw NONCE_0, 48(STATE)
|
||||
|
||||
@@ -228,18 +231,15 @@ chacha20_mips:
|
||||
or IS_UNALIGNED, IN, OUT
|
||||
andi IS_UNALIGNED, 0x3
|
||||
|
||||
- /* Set number of rounds */
|
||||
- li $at, 20
|
||||
-
|
||||
- b .Lchacha20_rounds_start
|
||||
+ b .Lchacha_rounds_start
|
||||
|
||||
.align 4
|
||||
-.Loop_chacha20_rounds:
|
||||
+.Loop_chacha_rounds:
|
||||
addiu IN, CHACHA20_BLOCK_SIZE
|
||||
addiu OUT, CHACHA20_BLOCK_SIZE
|
||||
addiu NONCE_0, 1
|
||||
|
||||
-.Lchacha20_rounds_start:
|
||||
+.Lchacha_rounds_start:
|
||||
lw X0, 0(STATE)
|
||||
lw X1, 4(STATE)
|
||||
lw X2, 8(STATE)
|
||||
@@ -259,7 +259,7 @@ chacha20_mips:
|
||||
lw X14, 56(STATE)
|
||||
lw X15, 60(STATE)
|
||||
|
||||
-.Loop_chacha20_xor_rounds:
|
||||
+.Loop_chacha_xor_rounds:
|
||||
addiu $at, -2
|
||||
AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16);
|
||||
AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12);
|
||||
@@ -269,31 +269,31 @@ chacha20_mips:
|
||||
AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12);
|
||||
AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8);
|
||||
AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7);
|
||||
- bnez $at, .Loop_chacha20_xor_rounds
|
||||
+ bnez $at, .Loop_chacha_xor_rounds
|
||||
|
||||
addiu BYTES, -(CHACHA20_BLOCK_SIZE)
|
||||
|
||||
/* Is data src/dst unaligned? Jump */
|
||||
- bnez IS_UNALIGNED, .Loop_chacha20_unaligned
|
||||
+ bnez IS_UNALIGNED, .Loop_chacha_unaligned
|
||||
|
||||
/* Set number rounds here to fill delayslot. */
|
||||
- li $at, 20
|
||||
+ lw $at, (STACK_SIZE+16)($sp)
|
||||
|
||||
/* BYTES < 0, it has no full block. */
|
||||
- bltz BYTES, .Lchacha20_mips_no_full_block_aligned
|
||||
+ bltz BYTES, .Lchacha_mips_no_full_block_aligned
|
||||
|
||||
FOR_EACH_WORD_REV(STORE_ALIGNED)
|
||||
|
||||
/* BYTES > 0? Loop again. */
|
||||
- bgtz BYTES, .Loop_chacha20_rounds
|
||||
+ bgtz BYTES, .Loop_chacha_rounds
|
||||
|
||||
/* Place this here to fill delay slot */
|
||||
addiu NONCE_0, 1
|
||||
|
||||
/* BYTES < 0? Handle last bytes */
|
||||
- bltz BYTES, .Lchacha20_mips_xor_bytes
|
||||
+ bltz BYTES, .Lchacha_mips_xor_bytes
|
||||
|
||||
-.Lchacha20_mips_xor_done:
|
||||
+.Lchacha_mips_xor_done:
|
||||
/* Restore used registers */
|
||||
lw $s0, 0($sp)
|
||||
lw $s1, 4($sp)
|
||||
@@ -307,11 +307,11 @@ chacha20_mips:
|
||||
/* Write NONCE_0 back to right location in state */
|
||||
sw NONCE_0, 48(STATE)
|
||||
|
||||
-.Lchacha20_mips_end:
|
||||
+.Lchacha_mips_end:
|
||||
addiu $sp, STACK_SIZE
|
||||
jr $ra
|
||||
|
||||
-.Lchacha20_mips_no_full_block_aligned:
|
||||
+.Lchacha_mips_no_full_block_aligned:
|
||||
/* Restore the offset on BYTES */
|
||||
addiu BYTES, CHACHA20_BLOCK_SIZE
|
||||
|
||||
@@ -319,7 +319,7 @@ chacha20_mips:
|
||||
andi $at, BYTES, MASK_U32
|
||||
|
||||
/* Load upper half of jump table addr */
|
||||
- lui T0, %hi(.Lchacha20_mips_jmptbl_aligned_0)
|
||||
+ lui T0, %hi(.Lchacha_mips_jmptbl_aligned_0)
|
||||
|
||||
/* Calculate lower half jump table offset */
|
||||
ins T0, $at, 1, 6
|
||||
@@ -328,7 +328,7 @@ chacha20_mips:
|
||||
addu T1, STATE, $at
|
||||
|
||||
/* Add lower half jump table addr */
|
||||
- addiu T0, %lo(.Lchacha20_mips_jmptbl_aligned_0)
|
||||
+ addiu T0, %lo(.Lchacha_mips_jmptbl_aligned_0)
|
||||
|
||||
/* Read value from STATE */
|
||||
lw SAVED_CA, 0(T1)
|
||||
@@ -342,31 +342,31 @@ chacha20_mips:
|
||||
FOR_EACH_WORD(JMPTBL_ALIGNED)
|
||||
|
||||
|
||||
-.Loop_chacha20_unaligned:
|
||||
+.Loop_chacha_unaligned:
|
||||
/* Set number rounds here to fill delayslot. */
|
||||
- li $at, 20
|
||||
+ lw $at, (STACK_SIZE+16)($sp)
|
||||
|
||||
/* BYTES > 0, it has no full block. */
|
||||
- bltz BYTES, .Lchacha20_mips_no_full_block_unaligned
|
||||
+ bltz BYTES, .Lchacha_mips_no_full_block_unaligned
|
||||
|
||||
FOR_EACH_WORD_REV(STORE_UNALIGNED)
|
||||
|
||||
/* BYTES > 0? Loop again. */
|
||||
- bgtz BYTES, .Loop_chacha20_rounds
|
||||
+ bgtz BYTES, .Loop_chacha_rounds
|
||||
|
||||
/* Write NONCE_0 back to right location in state */
|
||||
sw NONCE_0, 48(STATE)
|
||||
|
||||
.set noreorder
|
||||
/* Fall through to byte handling */
|
||||
- bgez BYTES, .Lchacha20_mips_xor_done
|
||||
-.Lchacha20_mips_xor_unaligned_0_b:
|
||||
-.Lchacha20_mips_xor_aligned_0_b:
|
||||
+ bgez BYTES, .Lchacha_mips_xor_done
|
||||
+.Lchacha_mips_xor_unaligned_0_b:
|
||||
+.Lchacha_mips_xor_aligned_0_b:
|
||||
/* Place this here to fill delay slot */
|
||||
addiu NONCE_0, 1
|
||||
.set reorder
|
||||
|
||||
-.Lchacha20_mips_xor_bytes:
|
||||
+.Lchacha_mips_xor_bytes:
|
||||
addu IN, $at
|
||||
addu OUT, $at
|
||||
/* First byte */
|
||||
@@ -376,22 +376,22 @@ chacha20_mips:
|
||||
ROTR(SAVED_X)
|
||||
xor T1, SAVED_X
|
||||
sb T1, 0(OUT)
|
||||
- beqz $at, .Lchacha20_mips_xor_done
|
||||
+ beqz $at, .Lchacha_mips_xor_done
|
||||
/* Second byte */
|
||||
lbu T1, 1(IN)
|
||||
addiu $at, BYTES, 2
|
||||
ROTx SAVED_X, 8
|
||||
xor T1, SAVED_X
|
||||
sb T1, 1(OUT)
|
||||
- beqz $at, .Lchacha20_mips_xor_done
|
||||
+ beqz $at, .Lchacha_mips_xor_done
|
||||
/* Third byte */
|
||||
lbu T1, 2(IN)
|
||||
ROTx SAVED_X, 8
|
||||
xor T1, SAVED_X
|
||||
sb T1, 2(OUT)
|
||||
- b .Lchacha20_mips_xor_done
|
||||
+ b .Lchacha_mips_xor_done
|
||||
|
||||
-.Lchacha20_mips_no_full_block_unaligned:
|
||||
+.Lchacha_mips_no_full_block_unaligned:
|
||||
/* Restore the offset on BYTES */
|
||||
addiu BYTES, CHACHA20_BLOCK_SIZE
|
||||
|
||||
@@ -399,7 +399,7 @@ chacha20_mips:
|
||||
andi $at, BYTES, MASK_U32
|
||||
|
||||
/* Load upper half of jump table addr */
|
||||
- lui T0, %hi(.Lchacha20_mips_jmptbl_unaligned_0)
|
||||
+ lui T0, %hi(.Lchacha_mips_jmptbl_unaligned_0)
|
||||
|
||||
/* Calculate lower half jump table offset */
|
||||
ins T0, $at, 1, 6
|
||||
@@ -408,7 +408,7 @@ chacha20_mips:
|
||||
addu T1, STATE, $at
|
||||
|
||||
/* Add lower half jump table addr */
|
||||
- addiu T0, %lo(.Lchacha20_mips_jmptbl_unaligned_0)
|
||||
+ addiu T0, %lo(.Lchacha_mips_jmptbl_unaligned_0)
|
||||
|
||||
/* Read value from STATE */
|
||||
lw SAVED_CA, 0(T1)
|
||||
@@ -420,5 +420,78 @@ chacha20_mips:
|
||||
|
||||
/* Jump table */
|
||||
FOR_EACH_WORD(JMPTBL_UNALIGNED)
|
||||
-.end chacha20_mips
|
||||
+.end chacha_crypt_arch
|
||||
+.set at
|
||||
+
|
||||
+/* Input arguments
|
||||
+ * STATE $a0
|
||||
+ * OUT $a1
|
||||
+ * NROUND $a2
|
||||
+ */
|
||||
+
|
||||
+#undef X12
|
||||
+#undef X13
|
||||
+#undef X14
|
||||
+#undef X15
|
||||
+
|
||||
+#define X12 $a3
|
||||
+#define X13 $at
|
||||
+#define X14 $v0
|
||||
+#define X15 STATE
|
||||
+
|
||||
+.set noat
|
||||
+.globl hchacha_block_arch
|
||||
+.ent hchacha_block_arch
|
||||
+hchacha_block_arch:
|
||||
+ .frame $sp, STACK_SIZE, $ra
|
||||
+
|
||||
+ addiu $sp, -STACK_SIZE
|
||||
+
|
||||
+ /* Save X11(s6) */
|
||||
+ sw X11, 0($sp)
|
||||
+
|
||||
+ lw X0, 0(STATE)
|
||||
+ lw X1, 4(STATE)
|
||||
+ lw X2, 8(STATE)
|
||||
+ lw X3, 12(STATE)
|
||||
+ lw X4, 16(STATE)
|
||||
+ lw X5, 20(STATE)
|
||||
+ lw X6, 24(STATE)
|
||||
+ lw X7, 28(STATE)
|
||||
+ lw X8, 32(STATE)
|
||||
+ lw X9, 36(STATE)
|
||||
+ lw X10, 40(STATE)
|
||||
+ lw X11, 44(STATE)
|
||||
+ lw X12, 48(STATE)
|
||||
+ lw X13, 52(STATE)
|
||||
+ lw X14, 56(STATE)
|
||||
+ lw X15, 60(STATE)
|
||||
+
|
||||
+.Loop_hchacha_xor_rounds:
|
||||
+ addiu $a2, -2
|
||||
+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16);
|
||||
+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12);
|
||||
+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8);
|
||||
+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7);
|
||||
+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16);
|
||||
+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12);
|
||||
+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8);
|
||||
+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7);
|
||||
+ bnez $a2, .Loop_hchacha_xor_rounds
|
||||
+
|
||||
+ /* Restore used register */
|
||||
+ lw X11, 0($sp)
|
||||
+
|
||||
+ sw X0, 0(OUT)
|
||||
+ sw X1, 4(OUT)
|
||||
+ sw X2, 8(OUT)
|
||||
+ sw X3, 12(OUT)
|
||||
+ sw X12, 16(OUT)
|
||||
+ sw X13, 20(OUT)
|
||||
+ sw X14, 24(OUT)
|
||||
+ sw X15, 28(OUT)
|
||||
+
|
||||
+ addiu $sp, STACK_SIZE
|
||||
+ jr $ra
|
||||
+.end hchacha_block_arch
|
||||
.set at
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/crypto/chacha-glue.c
|
||||
@@ -0,0 +1,150 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0
|
||||
+/*
|
||||
+ * MIPS accelerated ChaCha and XChaCha stream ciphers,
|
||||
+ * including ChaCha20 (RFC7539)
|
||||
+ *
|
||||
+ * Copyright (C) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
|
||||
+ */
|
||||
+
|
||||
+#include <asm/byteorder.h>
|
||||
+#include <crypto/algapi.h>
|
||||
+#include <crypto/internal/chacha.h>
|
||||
+#include <crypto/internal/skcipher.h>
|
||||
+#include <linux/kernel.h>
|
||||
+#include <linux/module.h>
|
||||
+
|
||||
+asmlinkage void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
|
||||
+ unsigned int bytes, int nrounds);
|
||||
+EXPORT_SYMBOL(chacha_crypt_arch);
|
||||
+
|
||||
+asmlinkage void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds);
|
||||
+EXPORT_SYMBOL(hchacha_block_arch);
|
||||
+
|
||||
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
|
||||
+{
|
||||
+ chacha_init_generic(state, key, iv);
|
||||
+}
|
||||
+EXPORT_SYMBOL(chacha_init_arch);
|
||||
+
|
||||
+static int chacha_mips_stream_xor(struct skcipher_request *req,
|
||||
+ const struct chacha_ctx *ctx, const u8 *iv)
|
||||
+{
|
||||
+ struct skcipher_walk walk;
|
||||
+ u32 state[16];
|
||||
+ int err;
|
||||
+
|
||||
+ err = skcipher_walk_virt(&walk, req, false);
|
||||
+
|
||||
+ chacha_init_generic(state, ctx->key, iv);
|
||||
+
|
||||
+ while (walk.nbytes > 0) {
|
||||
+ unsigned int nbytes = walk.nbytes;
|
||||
+
|
||||
+ if (nbytes < walk.total)
|
||||
+ nbytes = round_down(nbytes, walk.stride);
|
||||
+
|
||||
+ chacha_crypt(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||||
+ nbytes, ctx->nrounds);
|
||||
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
+ }
|
||||
+
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
+static int chacha_mips(struct skcipher_request *req)
|
||||
+{
|
||||
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
+
|
||||
+ return chacha_mips_stream_xor(req, ctx, req->iv);
|
||||
+}
|
||||
+
|
||||
+static int xchacha_mips(struct skcipher_request *req)
|
||||
+{
|
||||
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
+ struct chacha_ctx subctx;
|
||||
+ u32 state[16];
|
||||
+ u8 real_iv[16];
|
||||
+
|
||||
+ chacha_init_generic(state, ctx->key, req->iv);
|
||||
+
|
||||
+ hchacha_block(state, subctx.key, ctx->nrounds);
|
||||
+ subctx.nrounds = ctx->nrounds;
|
||||
+
|
||||
+ memcpy(&real_iv[0], req->iv + 24, 8);
|
||||
+ memcpy(&real_iv[8], req->iv + 16, 8);
|
||||
+ return chacha_mips_stream_xor(req, &subctx, real_iv);
|
||||
+}
|
||||
+
|
||||
+static struct skcipher_alg algs[] = {
|
||||
+ {
|
||||
+ .base.cra_name = "chacha20",
|
||||
+ .base.cra_driver_name = "chacha20-mips",
|
||||
+ .base.cra_priority = 200,
|
||||
+ .base.cra_blocksize = 1,
|
||||
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
+ .base.cra_module = THIS_MODULE,
|
||||
+
|
||||
+ .min_keysize = CHACHA_KEY_SIZE,
|
||||
+ .max_keysize = CHACHA_KEY_SIZE,
|
||||
+ .ivsize = CHACHA_IV_SIZE,
|
||||
+ .chunksize = CHACHA_BLOCK_SIZE,
|
||||
+ .setkey = chacha20_setkey,
|
||||
+ .encrypt = chacha_mips,
|
||||
+ .decrypt = chacha_mips,
|
||||
+ }, {
|
||||
+ .base.cra_name = "xchacha20",
|
||||
+ .base.cra_driver_name = "xchacha20-mips",
|
||||
+ .base.cra_priority = 200,
|
||||
+ .base.cra_blocksize = 1,
|
||||
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
+ .base.cra_module = THIS_MODULE,
|
||||
+
|
||||
+ .min_keysize = CHACHA_KEY_SIZE,
|
||||
+ .max_keysize = CHACHA_KEY_SIZE,
|
||||
+ .ivsize = XCHACHA_IV_SIZE,
|
||||
+ .chunksize = CHACHA_BLOCK_SIZE,
|
||||
+ .setkey = chacha20_setkey,
|
||||
+ .encrypt = xchacha_mips,
|
||||
+ .decrypt = xchacha_mips,
|
||||
+ }, {
|
||||
+ .base.cra_name = "xchacha12",
|
||||
+ .base.cra_driver_name = "xchacha12-mips",
|
||||
+ .base.cra_priority = 200,
|
||||
+ .base.cra_blocksize = 1,
|
||||
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
+ .base.cra_module = THIS_MODULE,
|
||||
+
|
||||
+ .min_keysize = CHACHA_KEY_SIZE,
|
||||
+ .max_keysize = CHACHA_KEY_SIZE,
|
||||
+ .ivsize = XCHACHA_IV_SIZE,
|
||||
+ .chunksize = CHACHA_BLOCK_SIZE,
|
||||
+ .setkey = chacha12_setkey,
|
||||
+ .encrypt = xchacha_mips,
|
||||
+ .decrypt = xchacha_mips,
|
||||
+ }
|
||||
+};
|
||||
+
|
||||
+static int __init chacha_simd_mod_init(void)
|
||||
+{
|
||||
+ return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
|
||||
+}
|
||||
+
|
||||
+static void __exit chacha_simd_mod_fini(void)
|
||||
+{
|
||||
+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
|
||||
+}
|
||||
+
|
||||
+module_init(chacha_simd_mod_init);
|
||||
+module_exit(chacha_simd_mod_fini);
|
||||
+
|
||||
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (MIPS accelerated)");
|
||||
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
+MODULE_LICENSE("GPL v2");
|
||||
+MODULE_ALIAS_CRYPTO("chacha20");
|
||||
+MODULE_ALIAS_CRYPTO("chacha20-mips");
|
||||
+MODULE_ALIAS_CRYPTO("xchacha20");
|
||||
+MODULE_ALIAS_CRYPTO("xchacha20-mips");
|
||||
+MODULE_ALIAS_CRYPTO("xchacha12");
|
||||
+MODULE_ALIAS_CRYPTO("xchacha12-mips");
|
||||
--- a/crypto/Kconfig
|
||||
+++ b/crypto/Kconfig
|
||||
@@ -1423,6 +1423,12 @@ config CRYPTO_CHACHA20_X86_64
|
||||
SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20,
|
||||
XChaCha20, and XChaCha12 stream ciphers.
|
||||
|
||||
+config CRYPTO_CHACHA_MIPS
|
||||
+ tristate "ChaCha stream cipher algorithms (MIPS 32r2 optimized)"
|
||||
+ depends on CPU_MIPS32_R2
|
||||
+ select CRYPTO_BLKCIPHER
|
||||
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
|
||||
+
|
||||
config CRYPTO_SEED
|
||||
tristate "SEED cipher algorithm"
|
||||
select CRYPTO_ALGAPI
|
@ -0,0 +1,115 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 8 Nov 2019 13:22:18 +0100
|
||||
Subject: [PATCH] crypto: chacha - unexport chacha_generic routines
|
||||
|
||||
commit 22cf705360707ced15f9fe5423938f313c7df536 upstream.
|
||||
|
||||
Now that all users of generic ChaCha code have moved to the core library,
|
||||
there is no longer a need for the generic ChaCha skcpiher driver to
|
||||
export parts of it implementation for reuse by other drivers. So drop
|
||||
the exports, and make the symbols static.
|
||||
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
crypto/chacha_generic.c | 26 ++++++++------------------
|
||||
include/crypto/internal/chacha.h | 10 ----------
|
||||
2 files changed, 8 insertions(+), 28 deletions(-)
|
||||
|
||||
--- a/crypto/chacha_generic.c
|
||||
+++ b/crypto/chacha_generic.c
|
||||
@@ -21,7 +21,7 @@ static int chacha_stream_xor(struct skci
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
- crypto_chacha_init(state, ctx, iv);
|
||||
+ chacha_init_generic(state, ctx->key, iv);
|
||||
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int nbytes = walk.nbytes;
|
||||
@@ -37,36 +37,27 @@ static int chacha_stream_xor(struct skci
|
||||
return err;
|
||||
}
|
||||
|
||||
-void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv)
|
||||
-{
|
||||
- chacha_init_generic(state, ctx->key, iv);
|
||||
-}
|
||||
-EXPORT_SYMBOL_GPL(crypto_chacha_init);
|
||||
-
|
||||
-int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
- unsigned int keysize)
|
||||
+static int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
+ unsigned int keysize)
|
||||
{
|
||||
return chacha_setkey(tfm, key, keysize, 20);
|
||||
}
|
||||
-EXPORT_SYMBOL_GPL(crypto_chacha20_setkey);
|
||||
|
||||
-int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
- unsigned int keysize)
|
||||
+static int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
+ unsigned int keysize)
|
||||
{
|
||||
return chacha_setkey(tfm, key, keysize, 12);
|
||||
}
|
||||
-EXPORT_SYMBOL_GPL(crypto_chacha12_setkey);
|
||||
|
||||
-int crypto_chacha_crypt(struct skcipher_request *req)
|
||||
+static int crypto_chacha_crypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
return chacha_stream_xor(req, ctx, req->iv);
|
||||
}
|
||||
-EXPORT_SYMBOL_GPL(crypto_chacha_crypt);
|
||||
|
||||
-int crypto_xchacha_crypt(struct skcipher_request *req)
|
||||
+static int crypto_xchacha_crypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
@@ -75,7 +66,7 @@ int crypto_xchacha_crypt(struct skcipher
|
||||
u8 real_iv[16];
|
||||
|
||||
/* Compute the subkey given the original key and first 128 nonce bits */
|
||||
- crypto_chacha_init(state, ctx, req->iv);
|
||||
+ chacha_init_generic(state, ctx->key, req->iv);
|
||||
hchacha_block_generic(state, subctx.key, ctx->nrounds);
|
||||
subctx.nrounds = ctx->nrounds;
|
||||
|
||||
@@ -86,7 +77,6 @@ int crypto_xchacha_crypt(struct skcipher
|
||||
/* Generate the stream and XOR it with the data */
|
||||
return chacha_stream_xor(req, &subctx, real_iv);
|
||||
}
|
||||
-EXPORT_SYMBOL_GPL(crypto_xchacha_crypt);
|
||||
|
||||
static struct skcipher_alg algs[] = {
|
||||
{
|
||||
--- a/include/crypto/internal/chacha.h
|
||||
+++ b/include/crypto/internal/chacha.h
|
||||
@@ -12,8 +12,6 @@ struct chacha_ctx {
|
||||
int nrounds;
|
||||
};
|
||||
|
||||
-void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv);
|
||||
-
|
||||
static inline int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
unsigned int keysize, int nrounds)
|
||||
{
|
||||
@@ -42,12 +40,4 @@ static int inline chacha12_setkey(struct
|
||||
return chacha_setkey(tfm, key, keysize, 12);
|
||||
}
|
||||
|
||||
-int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
- unsigned int keysize);
|
||||
-int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
- unsigned int keysize);
|
||||
-
|
||||
-int crypto_chacha_crypt(struct skcipher_request *req);
|
||||
-int crypto_xchacha_crypt(struct skcipher_request *req);
|
||||
-
|
||||
#endif /* _CRYPTO_CHACHA_H */
|
@ -0,0 +1,649 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 8 Nov 2019 13:22:19 +0100
|
||||
Subject: [PATCH] crypto: poly1305 - move core routines into a separate library
|
||||
|
||||
commit 48ea8c6ebc96bc0990e12ee1c43d0832c23576bb upstream.
|
||||
|
||||
Move the core Poly1305 routines shared between the generic Poly1305
|
||||
shash driver and the Adiantum and NHPoly1305 drivers into a separate
|
||||
library so that using just this pieces does not pull in the crypto
|
||||
API pieces of the generic Poly1305 routine.
|
||||
|
||||
In a subsequent patch, we will augment this generic library with
|
||||
init/update/final routines so that Poyl1305 algorithm can be used
|
||||
directly without the need for using the crypto API's shash abstraction.
|
||||
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/x86/crypto/poly1305_glue.c | 2 +-
|
||||
crypto/Kconfig | 5 +-
|
||||
crypto/adiantum.c | 5 +-
|
||||
crypto/nhpoly1305.c | 3 +-
|
||||
crypto/poly1305_generic.c | 195 ++---------------------------
|
||||
include/crypto/internal/poly1305.h | 67 ++++++++++
|
||||
include/crypto/poly1305.h | 23 ----
|
||||
lib/crypto/Kconfig | 3 +
|
||||
lib/crypto/Makefile | 3 +
|
||||
lib/crypto/poly1305.c | 158 +++++++++++++++++++++++
|
||||
10 files changed, 248 insertions(+), 216 deletions(-)
|
||||
create mode 100644 include/crypto/internal/poly1305.h
|
||||
create mode 100644 lib/crypto/poly1305.c
|
||||
|
||||
--- a/arch/x86/crypto/poly1305_glue.c
|
||||
+++ b/arch/x86/crypto/poly1305_glue.c
|
||||
@@ -7,8 +7,8 @@
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
+#include <crypto/internal/poly1305.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
-#include <crypto/poly1305.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
--- a/crypto/Kconfig
|
||||
+++ b/crypto/Kconfig
|
||||
@@ -446,7 +446,7 @@ config CRYPTO_KEYWRAP
|
||||
config CRYPTO_NHPOLY1305
|
||||
tristate
|
||||
select CRYPTO_HASH
|
||||
- select CRYPTO_POLY1305
|
||||
+ select CRYPTO_LIB_POLY1305_GENERIC
|
||||
|
||||
config CRYPTO_NHPOLY1305_SSE2
|
||||
tristate "NHPoly1305 hash function (x86_64 SSE2 implementation)"
|
||||
@@ -467,7 +467,7 @@ config CRYPTO_NHPOLY1305_AVX2
|
||||
config CRYPTO_ADIANTUM
|
||||
tristate "Adiantum support"
|
||||
select CRYPTO_CHACHA20
|
||||
- select CRYPTO_POLY1305
|
||||
+ select CRYPTO_LIB_POLY1305_GENERIC
|
||||
select CRYPTO_NHPOLY1305
|
||||
select CRYPTO_MANAGER
|
||||
help
|
||||
@@ -686,6 +686,7 @@ config CRYPTO_GHASH
|
||||
config CRYPTO_POLY1305
|
||||
tristate "Poly1305 authenticator algorithm"
|
||||
select CRYPTO_HASH
|
||||
+ select CRYPTO_LIB_POLY1305_GENERIC
|
||||
help
|
||||
Poly1305 authenticator algorithm, RFC7539.
|
||||
|
||||
--- a/crypto/adiantum.c
|
||||
+++ b/crypto/adiantum.c
|
||||
@@ -33,6 +33,7 @@
|
||||
#include <crypto/b128ops.h>
|
||||
#include <crypto/chacha.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
+#include <crypto/internal/poly1305.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/nhpoly1305.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
@@ -242,11 +243,11 @@ static void adiantum_hash_header(struct
|
||||
|
||||
BUILD_BUG_ON(sizeof(header) % POLY1305_BLOCK_SIZE != 0);
|
||||
poly1305_core_blocks(&state, &tctx->header_hash_key,
|
||||
- &header, sizeof(header) / POLY1305_BLOCK_SIZE);
|
||||
+ &header, sizeof(header) / POLY1305_BLOCK_SIZE, 1);
|
||||
|
||||
BUILD_BUG_ON(TWEAK_SIZE % POLY1305_BLOCK_SIZE != 0);
|
||||
poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv,
|
||||
- TWEAK_SIZE / POLY1305_BLOCK_SIZE);
|
||||
+ TWEAK_SIZE / POLY1305_BLOCK_SIZE, 1);
|
||||
|
||||
poly1305_core_emit(&state, &rctx->header_hash);
|
||||
}
|
||||
--- a/crypto/nhpoly1305.c
|
||||
+++ b/crypto/nhpoly1305.c
|
||||
@@ -33,6 +33,7 @@
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
+#include <crypto/internal/poly1305.h>
|
||||
#include <crypto/nhpoly1305.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/kernel.h>
|
||||
@@ -78,7 +79,7 @@ static void process_nh_hash_value(struct
|
||||
BUILD_BUG_ON(NH_HASH_BYTES % POLY1305_BLOCK_SIZE != 0);
|
||||
|
||||
poly1305_core_blocks(&state->poly_state, &key->poly_key, state->nh_hash,
|
||||
- NH_HASH_BYTES / POLY1305_BLOCK_SIZE);
|
||||
+ NH_HASH_BYTES / POLY1305_BLOCK_SIZE, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
--- a/crypto/poly1305_generic.c
|
||||
+++ b/crypto/poly1305_generic.c
|
||||
@@ -13,27 +13,12 @@
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
-#include <crypto/poly1305.h>
|
||||
+#include <crypto/internal/poly1305.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
-static inline u64 mlt(u64 a, u64 b)
|
||||
-{
|
||||
- return a * b;
|
||||
-}
|
||||
-
|
||||
-static inline u32 sr(u64 v, u_char n)
|
||||
-{
|
||||
- return v >> n;
|
||||
-}
|
||||
-
|
||||
-static inline u32 and(u32 v, u32 mask)
|
||||
-{
|
||||
- return v & mask;
|
||||
-}
|
||||
-
|
||||
int crypto_poly1305_init(struct shash_desc *desc)
|
||||
{
|
||||
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
@@ -47,124 +32,8 @@ int crypto_poly1305_init(struct shash_de
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_poly1305_init);
|
||||
|
||||
-void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key)
|
||||
-{
|
||||
- /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
|
||||
- key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff;
|
||||
- key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03;
|
||||
- key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff;
|
||||
- key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff;
|
||||
- key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
|
||||
-}
|
||||
-EXPORT_SYMBOL_GPL(poly1305_core_setkey);
|
||||
-
|
||||
-/*
|
||||
- * Poly1305 requires a unique key for each tag, which implies that we can't set
|
||||
- * it on the tfm that gets accessed by multiple users simultaneously. Instead we
|
||||
- * expect the key as the first 32 bytes in the update() call.
|
||||
- */
|
||||
-unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
|
||||
- const u8 *src, unsigned int srclen)
|
||||
-{
|
||||
- if (!dctx->sset) {
|
||||
- if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
|
||||
- poly1305_core_setkey(&dctx->r, src);
|
||||
- src += POLY1305_BLOCK_SIZE;
|
||||
- srclen -= POLY1305_BLOCK_SIZE;
|
||||
- dctx->rset = true;
|
||||
- }
|
||||
- if (srclen >= POLY1305_BLOCK_SIZE) {
|
||||
- dctx->s[0] = get_unaligned_le32(src + 0);
|
||||
- dctx->s[1] = get_unaligned_le32(src + 4);
|
||||
- dctx->s[2] = get_unaligned_le32(src + 8);
|
||||
- dctx->s[3] = get_unaligned_le32(src + 12);
|
||||
- src += POLY1305_BLOCK_SIZE;
|
||||
- srclen -= POLY1305_BLOCK_SIZE;
|
||||
- dctx->sset = true;
|
||||
- }
|
||||
- }
|
||||
- return srclen;
|
||||
-}
|
||||
-EXPORT_SYMBOL_GPL(crypto_poly1305_setdesckey);
|
||||
-
|
||||
-static void poly1305_blocks_internal(struct poly1305_state *state,
|
||||
- const struct poly1305_key *key,
|
||||
- const void *src, unsigned int nblocks,
|
||||
- u32 hibit)
|
||||
-{
|
||||
- u32 r0, r1, r2, r3, r4;
|
||||
- u32 s1, s2, s3, s4;
|
||||
- u32 h0, h1, h2, h3, h4;
|
||||
- u64 d0, d1, d2, d3, d4;
|
||||
-
|
||||
- if (!nblocks)
|
||||
- return;
|
||||
-
|
||||
- r0 = key->r[0];
|
||||
- r1 = key->r[1];
|
||||
- r2 = key->r[2];
|
||||
- r3 = key->r[3];
|
||||
- r4 = key->r[4];
|
||||
-
|
||||
- s1 = r1 * 5;
|
||||
- s2 = r2 * 5;
|
||||
- s3 = r3 * 5;
|
||||
- s4 = r4 * 5;
|
||||
-
|
||||
- h0 = state->h[0];
|
||||
- h1 = state->h[1];
|
||||
- h2 = state->h[2];
|
||||
- h3 = state->h[3];
|
||||
- h4 = state->h[4];
|
||||
-
|
||||
- do {
|
||||
- /* h += m[i] */
|
||||
- h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff;
|
||||
- h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff;
|
||||
- h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff;
|
||||
- h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff;
|
||||
- h4 += (get_unaligned_le32(src + 12) >> 8) | hibit;
|
||||
-
|
||||
- /* h *= r */
|
||||
- d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
|
||||
- mlt(h3, s2) + mlt(h4, s1);
|
||||
- d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
|
||||
- mlt(h3, s3) + mlt(h4, s2);
|
||||
- d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
|
||||
- mlt(h3, s4) + mlt(h4, s3);
|
||||
- d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
|
||||
- mlt(h3, r0) + mlt(h4, s4);
|
||||
- d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
|
||||
- mlt(h3, r1) + mlt(h4, r0);
|
||||
-
|
||||
- /* (partial) h %= p */
|
||||
- d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff);
|
||||
- d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff);
|
||||
- d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff);
|
||||
- d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff);
|
||||
- h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
|
||||
- h1 += h0 >> 26; h0 = h0 & 0x3ffffff;
|
||||
-
|
||||
- src += POLY1305_BLOCK_SIZE;
|
||||
- } while (--nblocks);
|
||||
-
|
||||
- state->h[0] = h0;
|
||||
- state->h[1] = h1;
|
||||
- state->h[2] = h2;
|
||||
- state->h[3] = h3;
|
||||
- state->h[4] = h4;
|
||||
-}
|
||||
-
|
||||
-void poly1305_core_blocks(struct poly1305_state *state,
|
||||
- const struct poly1305_key *key,
|
||||
- const void *src, unsigned int nblocks)
|
||||
-{
|
||||
- poly1305_blocks_internal(state, key, src, nblocks, 1 << 24);
|
||||
-}
|
||||
-EXPORT_SYMBOL_GPL(poly1305_core_blocks);
|
||||
-
|
||||
-static void poly1305_blocks(struct poly1305_desc_ctx *dctx,
|
||||
- const u8 *src, unsigned int srclen, u32 hibit)
|
||||
+static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
|
||||
+ unsigned int srclen)
|
||||
{
|
||||
unsigned int datalen;
|
||||
|
||||
@@ -174,8 +43,8 @@ static void poly1305_blocks(struct poly1
|
||||
srclen = datalen;
|
||||
}
|
||||
|
||||
- poly1305_blocks_internal(&dctx->h, &dctx->r,
|
||||
- src, srclen / POLY1305_BLOCK_SIZE, hibit);
|
||||
+ poly1305_core_blocks(&dctx->h, &dctx->r, src,
|
||||
+ srclen / POLY1305_BLOCK_SIZE, 1);
|
||||
}
|
||||
|
||||
int crypto_poly1305_update(struct shash_desc *desc,
|
||||
@@ -193,13 +62,13 @@ int crypto_poly1305_update(struct shash_
|
||||
|
||||
if (dctx->buflen == POLY1305_BLOCK_SIZE) {
|
||||
poly1305_blocks(dctx, dctx->buf,
|
||||
- POLY1305_BLOCK_SIZE, 1 << 24);
|
||||
+ POLY1305_BLOCK_SIZE);
|
||||
dctx->buflen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
|
||||
- poly1305_blocks(dctx, src, srclen, 1 << 24);
|
||||
+ poly1305_blocks(dctx, src, srclen);
|
||||
src += srclen - (srclen % POLY1305_BLOCK_SIZE);
|
||||
srclen %= POLY1305_BLOCK_SIZE;
|
||||
}
|
||||
@@ -213,54 +82,6 @@ int crypto_poly1305_update(struct shash_
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_poly1305_update);
|
||||
|
||||
-void poly1305_core_emit(const struct poly1305_state *state, void *dst)
|
||||
-{
|
||||
- u32 h0, h1, h2, h3, h4;
|
||||
- u32 g0, g1, g2, g3, g4;
|
||||
- u32 mask;
|
||||
-
|
||||
- /* fully carry h */
|
||||
- h0 = state->h[0];
|
||||
- h1 = state->h[1];
|
||||
- h2 = state->h[2];
|
||||
- h3 = state->h[3];
|
||||
- h4 = state->h[4];
|
||||
-
|
||||
- h2 += (h1 >> 26); h1 = h1 & 0x3ffffff;
|
||||
- h3 += (h2 >> 26); h2 = h2 & 0x3ffffff;
|
||||
- h4 += (h3 >> 26); h3 = h3 & 0x3ffffff;
|
||||
- h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
|
||||
- h1 += (h0 >> 26); h0 = h0 & 0x3ffffff;
|
||||
-
|
||||
- /* compute h + -p */
|
||||
- g0 = h0 + 5;
|
||||
- g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff;
|
||||
- g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff;
|
||||
- g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff;
|
||||
- g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
|
||||
-
|
||||
- /* select h if h < p, or h + -p if h >= p */
|
||||
- mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
|
||||
- g0 &= mask;
|
||||
- g1 &= mask;
|
||||
- g2 &= mask;
|
||||
- g3 &= mask;
|
||||
- g4 &= mask;
|
||||
- mask = ~mask;
|
||||
- h0 = (h0 & mask) | g0;
|
||||
- h1 = (h1 & mask) | g1;
|
||||
- h2 = (h2 & mask) | g2;
|
||||
- h3 = (h3 & mask) | g3;
|
||||
- h4 = (h4 & mask) | g4;
|
||||
-
|
||||
- /* h = h % (2^128) */
|
||||
- put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0);
|
||||
- put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4);
|
||||
- put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8);
|
||||
- put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12);
|
||||
-}
|
||||
-EXPORT_SYMBOL_GPL(poly1305_core_emit);
|
||||
-
|
||||
int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
|
||||
{
|
||||
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
@@ -274,7 +95,7 @@ int crypto_poly1305_final(struct shash_d
|
||||
dctx->buf[dctx->buflen++] = 1;
|
||||
memset(dctx->buf + dctx->buflen, 0,
|
||||
POLY1305_BLOCK_SIZE - dctx->buflen);
|
||||
- poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0);
|
||||
+ poly1305_core_blocks(&dctx->h, &dctx->r, dctx->buf, 1, 0);
|
||||
}
|
||||
|
||||
poly1305_core_emit(&dctx->h, digest);
|
||||
--- /dev/null
|
||||
+++ b/include/crypto/internal/poly1305.h
|
||||
@@ -0,0 +1,67 @@
|
||||
+/* SPDX-License-Identifier: GPL-2.0 */
|
||||
+/*
|
||||
+ * Common values for the Poly1305 algorithm
|
||||
+ */
|
||||
+
|
||||
+#ifndef _CRYPTO_INTERNAL_POLY1305_H
|
||||
+#define _CRYPTO_INTERNAL_POLY1305_H
|
||||
+
|
||||
+#include <asm/unaligned.h>
|
||||
+#include <linux/types.h>
|
||||
+#include <crypto/poly1305.h>
|
||||
+
|
||||
+struct shash_desc;
|
||||
+
|
||||
+/*
|
||||
+ * Poly1305 core functions. These implement the ε-almost-∆-universal hash
|
||||
+ * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce
|
||||
+ * ("s key") at the end. They also only support block-aligned inputs.
|
||||
+ */
|
||||
+void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key);
|
||||
+static inline void poly1305_core_init(struct poly1305_state *state)
|
||||
+{
|
||||
+ *state = (struct poly1305_state){};
|
||||
+}
|
||||
+
|
||||
+void poly1305_core_blocks(struct poly1305_state *state,
|
||||
+ const struct poly1305_key *key, const void *src,
|
||||
+ unsigned int nblocks, u32 hibit);
|
||||
+void poly1305_core_emit(const struct poly1305_state *state, void *dst);
|
||||
+
|
||||
+/* Crypto API helper functions for the Poly1305 MAC */
|
||||
+int crypto_poly1305_init(struct shash_desc *desc);
|
||||
+
|
||||
+int crypto_poly1305_update(struct shash_desc *desc,
|
||||
+ const u8 *src, unsigned int srclen);
|
||||
+int crypto_poly1305_final(struct shash_desc *desc, u8 *dst);
|
||||
+
|
||||
+/*
|
||||
+ * Poly1305 requires a unique key for each tag, which implies that we can't set
|
||||
+ * it on the tfm that gets accessed by multiple users simultaneously. Instead we
|
||||
+ * expect the key as the first 32 bytes in the update() call.
|
||||
+ */
|
||||
+static inline
|
||||
+unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
|
||||
+ const u8 *src, unsigned int srclen)
|
||||
+{
|
||||
+ if (!dctx->sset) {
|
||||
+ if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
|
||||
+ poly1305_core_setkey(&dctx->r, src);
|
||||
+ src += POLY1305_BLOCK_SIZE;
|
||||
+ srclen -= POLY1305_BLOCK_SIZE;
|
||||
+ dctx->rset = true;
|
||||
+ }
|
||||
+ if (srclen >= POLY1305_BLOCK_SIZE) {
|
||||
+ dctx->s[0] = get_unaligned_le32(src + 0);
|
||||
+ dctx->s[1] = get_unaligned_le32(src + 4);
|
||||
+ dctx->s[2] = get_unaligned_le32(src + 8);
|
||||
+ dctx->s[3] = get_unaligned_le32(src + 12);
|
||||
+ src += POLY1305_BLOCK_SIZE;
|
||||
+ srclen -= POLY1305_BLOCK_SIZE;
|
||||
+ dctx->sset = true;
|
||||
+ }
|
||||
+ }
|
||||
+ return srclen;
|
||||
+}
|
||||
+
|
||||
+#endif
|
||||
--- a/include/crypto/poly1305.h
|
||||
+++ b/include/crypto/poly1305.h
|
||||
@@ -38,27 +38,4 @@ struct poly1305_desc_ctx {
|
||||
bool sset;
|
||||
};
|
||||
|
||||
-/*
|
||||
- * Poly1305 core functions. These implement the ε-almost-∆-universal hash
|
||||
- * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce
|
||||
- * ("s key") at the end. They also only support block-aligned inputs.
|
||||
- */
|
||||
-void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key);
|
||||
-static inline void poly1305_core_init(struct poly1305_state *state)
|
||||
-{
|
||||
- memset(state->h, 0, sizeof(state->h));
|
||||
-}
|
||||
-void poly1305_core_blocks(struct poly1305_state *state,
|
||||
- const struct poly1305_key *key,
|
||||
- const void *src, unsigned int nblocks);
|
||||
-void poly1305_core_emit(const struct poly1305_state *state, void *dst);
|
||||
-
|
||||
-/* Crypto API helper functions for the Poly1305 MAC */
|
||||
-int crypto_poly1305_init(struct shash_desc *desc);
|
||||
-unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
|
||||
- const u8 *src, unsigned int srclen);
|
||||
-int crypto_poly1305_update(struct shash_desc *desc,
|
||||
- const u8 *src, unsigned int srclen);
|
||||
-int crypto_poly1305_final(struct shash_desc *desc, u8 *dst);
|
||||
-
|
||||
#endif
|
||||
--- a/lib/crypto/Kconfig
|
||||
+++ b/lib/crypto/Kconfig
|
||||
@@ -37,5 +37,8 @@ config CRYPTO_LIB_CHACHA
|
||||
config CRYPTO_LIB_DES
|
||||
tristate
|
||||
|
||||
+config CRYPTO_LIB_POLY1305_GENERIC
|
||||
+ tristate
|
||||
+
|
||||
config CRYPTO_LIB_SHA256
|
||||
tristate
|
||||
--- a/lib/crypto/Makefile
|
||||
+++ b/lib/crypto/Makefile
|
||||
@@ -13,5 +13,8 @@ libarc4-y := arc4.o
|
||||
obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
|
||||
libdes-y := des.o
|
||||
|
||||
+obj-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) += libpoly1305.o
|
||||
+libpoly1305-y := poly1305.o
|
||||
+
|
||||
obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
|
||||
libsha256-y := sha256.o
|
||||
--- /dev/null
|
||||
+++ b/lib/crypto/poly1305.c
|
||||
@@ -0,0 +1,158 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
+/*
|
||||
+ * Poly1305 authenticator algorithm, RFC7539
|
||||
+ *
|
||||
+ * Copyright (C) 2015 Martin Willi
|
||||
+ *
|
||||
+ * Based on public domain code by Andrew Moon and Daniel J. Bernstein.
|
||||
+ */
|
||||
+
|
||||
+#include <crypto/internal/poly1305.h>
|
||||
+#include <linux/kernel.h>
|
||||
+#include <linux/module.h>
|
||||
+#include <asm/unaligned.h>
|
||||
+
|
||||
+static inline u64 mlt(u64 a, u64 b)
|
||||
+{
|
||||
+ return a * b;
|
||||
+}
|
||||
+
|
||||
+static inline u32 sr(u64 v, u_char n)
|
||||
+{
|
||||
+ return v >> n;
|
||||
+}
|
||||
+
|
||||
+static inline u32 and(u32 v, u32 mask)
|
||||
+{
|
||||
+ return v & mask;
|
||||
+}
|
||||
+
|
||||
+void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key)
|
||||
+{
|
||||
+ /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
|
||||
+ key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff;
|
||||
+ key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03;
|
||||
+ key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff;
|
||||
+ key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff;
|
||||
+ key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(poly1305_core_setkey);
|
||||
+
|
||||
+void poly1305_core_blocks(struct poly1305_state *state,
|
||||
+ const struct poly1305_key *key, const void *src,
|
||||
+ unsigned int nblocks, u32 hibit)
|
||||
+{
|
||||
+ u32 r0, r1, r2, r3, r4;
|
||||
+ u32 s1, s2, s3, s4;
|
||||
+ u32 h0, h1, h2, h3, h4;
|
||||
+ u64 d0, d1, d2, d3, d4;
|
||||
+
|
||||
+ if (!nblocks)
|
||||
+ return;
|
||||
+
|
||||
+ r0 = key->r[0];
|
||||
+ r1 = key->r[1];
|
||||
+ r2 = key->r[2];
|
||||
+ r3 = key->r[3];
|
||||
+ r4 = key->r[4];
|
||||
+
|
||||
+ s1 = r1 * 5;
|
||||
+ s2 = r2 * 5;
|
||||
+ s3 = r3 * 5;
|
||||
+ s4 = r4 * 5;
|
||||
+
|
||||
+ h0 = state->h[0];
|
||||
+ h1 = state->h[1];
|
||||
+ h2 = state->h[2];
|
||||
+ h3 = state->h[3];
|
||||
+ h4 = state->h[4];
|
||||
+
|
||||
+ do {
|
||||
+ /* h += m[i] */
|
||||
+ h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff;
|
||||
+ h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff;
|
||||
+ h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff;
|
||||
+ h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff;
|
||||
+ h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24);
|
||||
+
|
||||
+ /* h *= r */
|
||||
+ d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
|
||||
+ mlt(h3, s2) + mlt(h4, s1);
|
||||
+ d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
|
||||
+ mlt(h3, s3) + mlt(h4, s2);
|
||||
+ d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
|
||||
+ mlt(h3, s4) + mlt(h4, s3);
|
||||
+ d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
|
||||
+ mlt(h3, r0) + mlt(h4, s4);
|
||||
+ d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
|
||||
+ mlt(h3, r1) + mlt(h4, r0);
|
||||
+
|
||||
+ /* (partial) h %= p */
|
||||
+ d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff);
|
||||
+ d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff);
|
||||
+ d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff);
|
||||
+ d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff);
|
||||
+ h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
|
||||
+ h1 += h0 >> 26; h0 = h0 & 0x3ffffff;
|
||||
+
|
||||
+ src += POLY1305_BLOCK_SIZE;
|
||||
+ } while (--nblocks);
|
||||
+
|
||||
+ state->h[0] = h0;
|
||||
+ state->h[1] = h1;
|
||||
+ state->h[2] = h2;
|
||||
+ state->h[3] = h3;
|
||||
+ state->h[4] = h4;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(poly1305_core_blocks);
|
||||
+
|
||||
+void poly1305_core_emit(const struct poly1305_state *state, void *dst)
|
||||
+{
|
||||
+ u32 h0, h1, h2, h3, h4;
|
||||
+ u32 g0, g1, g2, g3, g4;
|
||||
+ u32 mask;
|
||||
+
|
||||
+ /* fully carry h */
|
||||
+ h0 = state->h[0];
|
||||
+ h1 = state->h[1];
|
||||
+ h2 = state->h[2];
|
||||
+ h3 = state->h[3];
|
||||
+ h4 = state->h[4];
|
||||
+
|
||||
+ h2 += (h1 >> 26); h1 = h1 & 0x3ffffff;
|
||||
+ h3 += (h2 >> 26); h2 = h2 & 0x3ffffff;
|
||||
+ h4 += (h3 >> 26); h3 = h3 & 0x3ffffff;
|
||||
+ h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
|
||||
+ h1 += (h0 >> 26); h0 = h0 & 0x3ffffff;
|
||||
+
|
||||
+ /* compute h + -p */
|
||||
+ g0 = h0 + 5;
|
||||
+ g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff;
|
||||
+ g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff;
|
||||
+ g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff;
|
||||
+ g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
|
||||
+
|
||||
+ /* select h if h < p, or h + -p if h >= p */
|
||||
+ mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
|
||||
+ g0 &= mask;
|
||||
+ g1 &= mask;
|
||||
+ g2 &= mask;
|
||||
+ g3 &= mask;
|
||||
+ g4 &= mask;
|
||||
+ mask = ~mask;
|
||||
+ h0 = (h0 & mask) | g0;
|
||||
+ h1 = (h1 & mask) | g1;
|
||||
+ h2 = (h2 & mask) | g2;
|
||||
+ h3 = (h3 & mask) | g3;
|
||||
+ h4 = (h4 & mask) | g4;
|
||||
+
|
||||
+ /* h = h % (2^128) */
|
||||
+ put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0);
|
||||
+ put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4);
|
||||
+ put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8);
|
||||
+ put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12);
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(poly1305_core_emit);
|
||||
+
|
||||
+MODULE_LICENSE("GPL");
|
||||
+MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
|
@ -0,0 +1,251 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 8 Nov 2019 13:22:20 +0100
|
||||
Subject: [PATCH] crypto: x86/poly1305 - unify Poly1305 state struct with
|
||||
generic code
|
||||
|
||||
commit ad8f5b88383ea685f2b8df2a12ee3e08089a1287 upstream.
|
||||
|
||||
In preparation of exposing a Poly1305 library interface directly from
|
||||
the accelerated x86 driver, align the state descriptor of the x86 code
|
||||
with the one used by the generic driver. This is needed to make the
|
||||
library interface unified between all implementations.
|
||||
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/x86/crypto/poly1305_glue.c | 88 ++++++++++--------------------
|
||||
crypto/poly1305_generic.c | 6 +-
|
||||
include/crypto/internal/poly1305.h | 4 +-
|
||||
include/crypto/poly1305.h | 18 +++---
|
||||
4 files changed, 43 insertions(+), 73 deletions(-)
|
||||
|
||||
--- a/arch/x86/crypto/poly1305_glue.c
|
||||
+++ b/arch/x86/crypto/poly1305_glue.c
|
||||
@@ -14,40 +14,14 @@
|
||||
#include <linux/module.h>
|
||||
#include <asm/simd.h>
|
||||
|
||||
-struct poly1305_simd_desc_ctx {
|
||||
- struct poly1305_desc_ctx base;
|
||||
- /* derived key u set? */
|
||||
- bool uset;
|
||||
-#ifdef CONFIG_AS_AVX2
|
||||
- /* derived keys r^3, r^4 set? */
|
||||
- bool wset;
|
||||
-#endif
|
||||
- /* derived Poly1305 key r^2 */
|
||||
- u32 u[5];
|
||||
- /* ... silently appended r^3 and r^4 when using AVX2 */
|
||||
-};
|
||||
-
|
||||
asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src,
|
||||
const u32 *r, unsigned int blocks);
|
||||
asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
|
||||
unsigned int blocks, const u32 *u);
|
||||
-#ifdef CONFIG_AS_AVX2
|
||||
asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
|
||||
unsigned int blocks, const u32 *u);
|
||||
-static bool poly1305_use_avx2;
|
||||
-#endif
|
||||
|
||||
-static int poly1305_simd_init(struct shash_desc *desc)
|
||||
-{
|
||||
- struct poly1305_simd_desc_ctx *sctx = shash_desc_ctx(desc);
|
||||
-
|
||||
- sctx->uset = false;
|
||||
-#ifdef CONFIG_AS_AVX2
|
||||
- sctx->wset = false;
|
||||
-#endif
|
||||
-
|
||||
- return crypto_poly1305_init(desc);
|
||||
-}
|
||||
+static bool poly1305_use_avx2 __ro_after_init;
|
||||
|
||||
static void poly1305_simd_mult(u32 *a, const u32 *b)
|
||||
{
|
||||
@@ -63,53 +37,49 @@ static void poly1305_simd_mult(u32 *a, c
|
||||
static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
|
||||
const u8 *src, unsigned int srclen)
|
||||
{
|
||||
- struct poly1305_simd_desc_ctx *sctx;
|
||||
unsigned int blocks, datalen;
|
||||
|
||||
- BUILD_BUG_ON(offsetof(struct poly1305_simd_desc_ctx, base));
|
||||
- sctx = container_of(dctx, struct poly1305_simd_desc_ctx, base);
|
||||
-
|
||||
if (unlikely(!dctx->sset)) {
|
||||
datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
|
||||
src += srclen - datalen;
|
||||
srclen = datalen;
|
||||
}
|
||||
|
||||
-#ifdef CONFIG_AS_AVX2
|
||||
- if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
|
||||
- if (unlikely(!sctx->wset)) {
|
||||
- if (!sctx->uset) {
|
||||
- memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
|
||||
- poly1305_simd_mult(sctx->u, dctx->r.r);
|
||||
- sctx->uset = true;
|
||||
+ if (IS_ENABLED(CONFIG_AS_AVX2) &&
|
||||
+ poly1305_use_avx2 &&
|
||||
+ srclen >= POLY1305_BLOCK_SIZE * 4) {
|
||||
+ if (unlikely(dctx->rset < 4)) {
|
||||
+ if (dctx->rset < 2) {
|
||||
+ dctx->r[1] = dctx->r[0];
|
||||
+ poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
|
||||
}
|
||||
- memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
|
||||
- poly1305_simd_mult(sctx->u + 5, dctx->r.r);
|
||||
- memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
|
||||
- poly1305_simd_mult(sctx->u + 10, dctx->r.r);
|
||||
- sctx->wset = true;
|
||||
+ dctx->r[2] = dctx->r[1];
|
||||
+ poly1305_simd_mult(dctx->r[2].r, dctx->r[0].r);
|
||||
+ dctx->r[3] = dctx->r[2];
|
||||
+ poly1305_simd_mult(dctx->r[3].r, dctx->r[0].r);
|
||||
+ dctx->rset = 4;
|
||||
}
|
||||
blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
|
||||
- poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks,
|
||||
- sctx->u);
|
||||
+ poly1305_4block_avx2(dctx->h.h, src, dctx->r[0].r, blocks,
|
||||
+ dctx->r[1].r);
|
||||
src += POLY1305_BLOCK_SIZE * 4 * blocks;
|
||||
srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
|
||||
}
|
||||
-#endif
|
||||
+
|
||||
if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
|
||||
- if (unlikely(!sctx->uset)) {
|
||||
- memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
|
||||
- poly1305_simd_mult(sctx->u, dctx->r.r);
|
||||
- sctx->uset = true;
|
||||
+ if (unlikely(dctx->rset < 2)) {
|
||||
+ dctx->r[1] = dctx->r[0];
|
||||
+ poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
|
||||
+ dctx->rset = 2;
|
||||
}
|
||||
blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
|
||||
- poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks,
|
||||
- sctx->u);
|
||||
+ poly1305_2block_sse2(dctx->h.h, src, dctx->r[0].r,
|
||||
+ blocks, dctx->r[1].r);
|
||||
src += POLY1305_BLOCK_SIZE * 2 * blocks;
|
||||
srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
|
||||
}
|
||||
if (srclen >= POLY1305_BLOCK_SIZE) {
|
||||
- poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1);
|
||||
+ poly1305_block_sse2(dctx->h.h, src, dctx->r[0].r, 1);
|
||||
srclen -= POLY1305_BLOCK_SIZE;
|
||||
}
|
||||
return srclen;
|
||||
@@ -159,10 +129,10 @@ static int poly1305_simd_update(struct s
|
||||
|
||||
static struct shash_alg alg = {
|
||||
.digestsize = POLY1305_DIGEST_SIZE,
|
||||
- .init = poly1305_simd_init,
|
||||
+ .init = crypto_poly1305_init,
|
||||
.update = poly1305_simd_update,
|
||||
.final = crypto_poly1305_final,
|
||||
- .descsize = sizeof(struct poly1305_simd_desc_ctx),
|
||||
+ .descsize = sizeof(struct poly1305_desc_ctx),
|
||||
.base = {
|
||||
.cra_name = "poly1305",
|
||||
.cra_driver_name = "poly1305-simd",
|
||||
@@ -177,14 +147,14 @@ static int __init poly1305_simd_mod_init
|
||||
if (!boot_cpu_has(X86_FEATURE_XMM2))
|
||||
return -ENODEV;
|
||||
|
||||
-#ifdef CONFIG_AS_AVX2
|
||||
- poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
|
||||
+ poly1305_use_avx2 = IS_ENABLED(CONFIG_AS_AVX2) &&
|
||||
+ boot_cpu_has(X86_FEATURE_AVX) &&
|
||||
boot_cpu_has(X86_FEATURE_AVX2) &&
|
||||
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
|
||||
- alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
|
||||
+ alg.descsize = sizeof(struct poly1305_desc_ctx) + 5 * sizeof(u32);
|
||||
if (poly1305_use_avx2)
|
||||
alg.descsize += 10 * sizeof(u32);
|
||||
-#endif
|
||||
+
|
||||
return crypto_register_shash(&alg);
|
||||
}
|
||||
|
||||
--- a/crypto/poly1305_generic.c
|
||||
+++ b/crypto/poly1305_generic.c
|
||||
@@ -25,7 +25,7 @@ int crypto_poly1305_init(struct shash_de
|
||||
|
||||
poly1305_core_init(&dctx->h);
|
||||
dctx->buflen = 0;
|
||||
- dctx->rset = false;
|
||||
+ dctx->rset = 0;
|
||||
dctx->sset = false;
|
||||
|
||||
return 0;
|
||||
@@ -43,7 +43,7 @@ static void poly1305_blocks(struct poly1
|
||||
srclen = datalen;
|
||||
}
|
||||
|
||||
- poly1305_core_blocks(&dctx->h, &dctx->r, src,
|
||||
+ poly1305_core_blocks(&dctx->h, dctx->r, src,
|
||||
srclen / POLY1305_BLOCK_SIZE, 1);
|
||||
}
|
||||
|
||||
@@ -95,7 +95,7 @@ int crypto_poly1305_final(struct shash_d
|
||||
dctx->buf[dctx->buflen++] = 1;
|
||||
memset(dctx->buf + dctx->buflen, 0,
|
||||
POLY1305_BLOCK_SIZE - dctx->buflen);
|
||||
- poly1305_core_blocks(&dctx->h, &dctx->r, dctx->buf, 1, 0);
|
||||
+ poly1305_core_blocks(&dctx->h, dctx->r, dctx->buf, 1, 0);
|
||||
}
|
||||
|
||||
poly1305_core_emit(&dctx->h, digest);
|
||||
--- a/include/crypto/internal/poly1305.h
|
||||
+++ b/include/crypto/internal/poly1305.h
|
||||
@@ -46,10 +46,10 @@ unsigned int crypto_poly1305_setdesckey(
|
||||
{
|
||||
if (!dctx->sset) {
|
||||
if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
|
||||
- poly1305_core_setkey(&dctx->r, src);
|
||||
+ poly1305_core_setkey(dctx->r, src);
|
||||
src += POLY1305_BLOCK_SIZE;
|
||||
srclen -= POLY1305_BLOCK_SIZE;
|
||||
- dctx->rset = true;
|
||||
+ dctx->rset = 1;
|
||||
}
|
||||
if (srclen >= POLY1305_BLOCK_SIZE) {
|
||||
dctx->s[0] = get_unaligned_le32(src + 0);
|
||||
--- a/include/crypto/poly1305.h
|
||||
+++ b/include/crypto/poly1305.h
|
||||
@@ -22,20 +22,20 @@ struct poly1305_state {
|
||||
};
|
||||
|
||||
struct poly1305_desc_ctx {
|
||||
- /* key */
|
||||
- struct poly1305_key r;
|
||||
- /* finalize key */
|
||||
- u32 s[4];
|
||||
- /* accumulator */
|
||||
- struct poly1305_state h;
|
||||
/* partial buffer */
|
||||
u8 buf[POLY1305_BLOCK_SIZE];
|
||||
/* bytes used in partial buffer */
|
||||
unsigned int buflen;
|
||||
- /* r key has been set */
|
||||
- bool rset;
|
||||
- /* s key has been set */
|
||||
+ /* how many keys have been set in r[] */
|
||||
+ unsigned short rset;
|
||||
+ /* whether s[] has been set */
|
||||
bool sset;
|
||||
+ /* finalize key */
|
||||
+ u32 s[4];
|
||||
+ /* accumulator */
|
||||
+ struct poly1305_state h;
|
||||
+ /* key */
|
||||
+ struct poly1305_key r[1];
|
||||
};
|
||||
|
||||
#endif
|
@ -0,0 +1,224 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 8 Nov 2019 13:22:21 +0100
|
||||
Subject: [PATCH] crypto: poly1305 - expose init/update/final library interface
|
||||
|
||||
commit a1d93064094cc5e24d64e35cf093e7191d0c9344 upstream.
|
||||
|
||||
Expose the existing generic Poly1305 code via a init/update/final
|
||||
library interface so that callers are not required to go through
|
||||
the crypto API's shash abstraction to access it. At the same time,
|
||||
make some preparations so that the library implementation can be
|
||||
superseded by an accelerated arch-specific version in the future.
|
||||
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
crypto/poly1305_generic.c | 22 +-----------
|
||||
include/crypto/poly1305.h | 38 +++++++++++++++++++-
|
||||
lib/crypto/Kconfig | 26 ++++++++++++++
|
||||
lib/crypto/poly1305.c | 74 +++++++++++++++++++++++++++++++++++++++
|
||||
4 files changed, 138 insertions(+), 22 deletions(-)
|
||||
|
||||
--- a/crypto/poly1305_generic.c
|
||||
+++ b/crypto/poly1305_generic.c
|
||||
@@ -85,31 +85,11 @@ EXPORT_SYMBOL_GPL(crypto_poly1305_update
|
||||
int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
|
||||
{
|
||||
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
- __le32 digest[4];
|
||||
- u64 f = 0;
|
||||
|
||||
if (unlikely(!dctx->sset))
|
||||
return -ENOKEY;
|
||||
|
||||
- if (unlikely(dctx->buflen)) {
|
||||
- dctx->buf[dctx->buflen++] = 1;
|
||||
- memset(dctx->buf + dctx->buflen, 0,
|
||||
- POLY1305_BLOCK_SIZE - dctx->buflen);
|
||||
- poly1305_core_blocks(&dctx->h, dctx->r, dctx->buf, 1, 0);
|
||||
- }
|
||||
-
|
||||
- poly1305_core_emit(&dctx->h, digest);
|
||||
-
|
||||
- /* mac = (h + s) % (2^128) */
|
||||
- f = (f >> 32) + le32_to_cpu(digest[0]) + dctx->s[0];
|
||||
- put_unaligned_le32(f, dst + 0);
|
||||
- f = (f >> 32) + le32_to_cpu(digest[1]) + dctx->s[1];
|
||||
- put_unaligned_le32(f, dst + 4);
|
||||
- f = (f >> 32) + le32_to_cpu(digest[2]) + dctx->s[2];
|
||||
- put_unaligned_le32(f, dst + 8);
|
||||
- f = (f >> 32) + le32_to_cpu(digest[3]) + dctx->s[3];
|
||||
- put_unaligned_le32(f, dst + 12);
|
||||
-
|
||||
+ poly1305_final_generic(dctx, dst);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_poly1305_final);
|
||||
--- a/include/crypto/poly1305.h
|
||||
+++ b/include/crypto/poly1305.h
|
||||
@@ -35,7 +35,43 @@ struct poly1305_desc_ctx {
|
||||
/* accumulator */
|
||||
struct poly1305_state h;
|
||||
/* key */
|
||||
- struct poly1305_key r[1];
|
||||
+ struct poly1305_key r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE];
|
||||
};
|
||||
|
||||
+void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key);
|
||||
+void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key);
|
||||
+
|
||||
+static inline void poly1305_init(struct poly1305_desc_ctx *desc, const u8 *key)
|
||||
+{
|
||||
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305))
|
||||
+ poly1305_init_arch(desc, key);
|
||||
+ else
|
||||
+ poly1305_init_generic(desc, key);
|
||||
+}
|
||||
+
|
||||
+void poly1305_update_arch(struct poly1305_desc_ctx *desc, const u8 *src,
|
||||
+ unsigned int nbytes);
|
||||
+void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src,
|
||||
+ unsigned int nbytes);
|
||||
+
|
||||
+static inline void poly1305_update(struct poly1305_desc_ctx *desc,
|
||||
+ const u8 *src, unsigned int nbytes)
|
||||
+{
|
||||
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305))
|
||||
+ poly1305_update_arch(desc, src, nbytes);
|
||||
+ else
|
||||
+ poly1305_update_generic(desc, src, nbytes);
|
||||
+}
|
||||
+
|
||||
+void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest);
|
||||
+void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *digest);
|
||||
+
|
||||
+static inline void poly1305_final(struct poly1305_desc_ctx *desc, u8 *digest)
|
||||
+{
|
||||
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305))
|
||||
+ poly1305_final_arch(desc, digest);
|
||||
+ else
|
||||
+ poly1305_final_generic(desc, digest);
|
||||
+}
|
||||
+
|
||||
#endif
|
||||
--- a/lib/crypto/Kconfig
|
||||
+++ b/lib/crypto/Kconfig
|
||||
@@ -37,8 +37,34 @@ config CRYPTO_LIB_CHACHA
|
||||
config CRYPTO_LIB_DES
|
||||
tristate
|
||||
|
||||
+config CRYPTO_LIB_POLY1305_RSIZE
|
||||
+ int
|
||||
+ default 1
|
||||
+
|
||||
+config CRYPTO_ARCH_HAVE_LIB_POLY1305
|
||||
+ tristate
|
||||
+ help
|
||||
+ Declares whether the architecture provides an arch-specific
|
||||
+ accelerated implementation of the Poly1305 library interface,
|
||||
+ either builtin or as a module.
|
||||
+
|
||||
config CRYPTO_LIB_POLY1305_GENERIC
|
||||
tristate
|
||||
+ help
|
||||
+ This symbol can be depended upon by arch implementations of the
|
||||
+ Poly1305 library interface that require the generic code as a
|
||||
+ fallback, e.g., for SIMD implementations. If no arch specific
|
||||
+ implementation is enabled, this implementation serves the users
|
||||
+ of CRYPTO_LIB_POLY1305.
|
||||
+
|
||||
+config CRYPTO_LIB_POLY1305
|
||||
+ tristate "Poly1305 library interface"
|
||||
+ depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305
|
||||
+ select CRYPTO_LIB_POLY1305_GENERIC if CRYPTO_ARCH_HAVE_LIB_POLY1305=n
|
||||
+ help
|
||||
+ Enable the Poly1305 library interface. This interface may be fulfilled
|
||||
+ by either the generic implementation or an arch-specific one, if one
|
||||
+ is available and enabled.
|
||||
|
||||
config CRYPTO_LIB_SHA256
|
||||
tristate
|
||||
--- a/lib/crypto/poly1305.c
|
||||
+++ b/lib/crypto/poly1305.c
|
||||
@@ -154,5 +154,79 @@ void poly1305_core_emit(const struct pol
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(poly1305_core_emit);
|
||||
|
||||
+void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key)
|
||||
+{
|
||||
+ poly1305_core_setkey(desc->r, key);
|
||||
+ desc->s[0] = get_unaligned_le32(key + 16);
|
||||
+ desc->s[1] = get_unaligned_le32(key + 20);
|
||||
+ desc->s[2] = get_unaligned_le32(key + 24);
|
||||
+ desc->s[3] = get_unaligned_le32(key + 28);
|
||||
+ poly1305_core_init(&desc->h);
|
||||
+ desc->buflen = 0;
|
||||
+ desc->sset = true;
|
||||
+ desc->rset = 1;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(poly1305_init_generic);
|
||||
+
|
||||
+void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src,
|
||||
+ unsigned int nbytes)
|
||||
+{
|
||||
+ unsigned int bytes;
|
||||
+
|
||||
+ if (unlikely(desc->buflen)) {
|
||||
+ bytes = min(nbytes, POLY1305_BLOCK_SIZE - desc->buflen);
|
||||
+ memcpy(desc->buf + desc->buflen, src, bytes);
|
||||
+ src += bytes;
|
||||
+ nbytes -= bytes;
|
||||
+ desc->buflen += bytes;
|
||||
+
|
||||
+ if (desc->buflen == POLY1305_BLOCK_SIZE) {
|
||||
+ poly1305_core_blocks(&desc->h, desc->r, desc->buf, 1, 1);
|
||||
+ desc->buflen = 0;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
|
||||
+ poly1305_core_blocks(&desc->h, desc->r, src,
|
||||
+ nbytes / POLY1305_BLOCK_SIZE, 1);
|
||||
+ src += nbytes - (nbytes % POLY1305_BLOCK_SIZE);
|
||||
+ nbytes %= POLY1305_BLOCK_SIZE;
|
||||
+ }
|
||||
+
|
||||
+ if (unlikely(nbytes)) {
|
||||
+ desc->buflen = nbytes;
|
||||
+ memcpy(desc->buf, src, nbytes);
|
||||
+ }
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(poly1305_update_generic);
|
||||
+
|
||||
+void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *dst)
|
||||
+{
|
||||
+ __le32 digest[4];
|
||||
+ u64 f = 0;
|
||||
+
|
||||
+ if (unlikely(desc->buflen)) {
|
||||
+ desc->buf[desc->buflen++] = 1;
|
||||
+ memset(desc->buf + desc->buflen, 0,
|
||||
+ POLY1305_BLOCK_SIZE - desc->buflen);
|
||||
+ poly1305_core_blocks(&desc->h, desc->r, desc->buf, 1, 0);
|
||||
+ }
|
||||
+
|
||||
+ poly1305_core_emit(&desc->h, digest);
|
||||
+
|
||||
+ /* mac = (h + s) % (2^128) */
|
||||
+ f = (f >> 32) + le32_to_cpu(digest[0]) + desc->s[0];
|
||||
+ put_unaligned_le32(f, dst + 0);
|
||||
+ f = (f >> 32) + le32_to_cpu(digest[1]) + desc->s[1];
|
||||
+ put_unaligned_le32(f, dst + 4);
|
||||
+ f = (f >> 32) + le32_to_cpu(digest[2]) + desc->s[2];
|
||||
+ put_unaligned_le32(f, dst + 8);
|
||||
+ f = (f >> 32) + le32_to_cpu(digest[3]) + desc->s[3];
|
||||
+ put_unaligned_le32(f, dst + 12);
|
||||
+
|
||||
+ *desc = (struct poly1305_desc_ctx){};
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(poly1305_final_generic);
|
||||
+
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
|
@ -0,0 +1,217 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 8 Nov 2019 13:22:22 +0100
|
||||
Subject: [PATCH] crypto: x86/poly1305 - depend on generic library not generic
|
||||
shash
|
||||
|
||||
commit 1b2c6a5120489d41c8ea3b8dacd0b4586289b158 upstream.
|
||||
|
||||
Remove the dependency on the generic Poly1305 driver. Instead, depend
|
||||
on the generic library so that we only reuse code without pulling in
|
||||
the generic skcipher implementation as well.
|
||||
|
||||
While at it, remove the logic that prefers the non-SIMD path for short
|
||||
inputs - this is no longer necessary after recent FPU handling changes
|
||||
on x86.
|
||||
|
||||
Since this removes the last remaining user of the routines exported
|
||||
by the generic shash driver, unexport them and make them static.
|
||||
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/x86/crypto/poly1305_glue.c | 66 +++++++++++++++++++++++++-----
|
||||
crypto/Kconfig | 2 +-
|
||||
crypto/poly1305_generic.c | 11 ++---
|
||||
include/crypto/internal/poly1305.h | 9 ----
|
||||
4 files changed, 60 insertions(+), 28 deletions(-)
|
||||
|
||||
--- a/arch/x86/crypto/poly1305_glue.c
|
||||
+++ b/arch/x86/crypto/poly1305_glue.c
|
||||
@@ -34,6 +34,24 @@ static void poly1305_simd_mult(u32 *a, c
|
||||
poly1305_block_sse2(a, m, b, 1);
|
||||
}
|
||||
|
||||
+static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx,
|
||||
+ const u8 *src, unsigned int srclen)
|
||||
+{
|
||||
+ unsigned int datalen;
|
||||
+
|
||||
+ if (unlikely(!dctx->sset)) {
|
||||
+ datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
|
||||
+ src += srclen - datalen;
|
||||
+ srclen = datalen;
|
||||
+ }
|
||||
+ if (srclen >= POLY1305_BLOCK_SIZE) {
|
||||
+ poly1305_core_blocks(&dctx->h, dctx->r, src,
|
||||
+ srclen / POLY1305_BLOCK_SIZE, 1);
|
||||
+ srclen %= POLY1305_BLOCK_SIZE;
|
||||
+ }
|
||||
+ return srclen;
|
||||
+}
|
||||
+
|
||||
static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
|
||||
const u8 *src, unsigned int srclen)
|
||||
{
|
||||
@@ -91,12 +109,6 @@ static int poly1305_simd_update(struct s
|
||||
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
unsigned int bytes;
|
||||
|
||||
- /* kernel_fpu_begin/end is costly, use fallback for small updates */
|
||||
- if (srclen <= 288 || !crypto_simd_usable())
|
||||
- return crypto_poly1305_update(desc, src, srclen);
|
||||
-
|
||||
- kernel_fpu_begin();
|
||||
-
|
||||
if (unlikely(dctx->buflen)) {
|
||||
bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
|
||||
memcpy(dctx->buf + dctx->buflen, src, bytes);
|
||||
@@ -105,25 +117,57 @@ static int poly1305_simd_update(struct s
|
||||
dctx->buflen += bytes;
|
||||
|
||||
if (dctx->buflen == POLY1305_BLOCK_SIZE) {
|
||||
- poly1305_simd_blocks(dctx, dctx->buf,
|
||||
- POLY1305_BLOCK_SIZE);
|
||||
+ if (likely(crypto_simd_usable())) {
|
||||
+ kernel_fpu_begin();
|
||||
+ poly1305_simd_blocks(dctx, dctx->buf,
|
||||
+ POLY1305_BLOCK_SIZE);
|
||||
+ kernel_fpu_end();
|
||||
+ } else {
|
||||
+ poly1305_scalar_blocks(dctx, dctx->buf,
|
||||
+ POLY1305_BLOCK_SIZE);
|
||||
+ }
|
||||
dctx->buflen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
|
||||
- bytes = poly1305_simd_blocks(dctx, src, srclen);
|
||||
+ if (likely(crypto_simd_usable())) {
|
||||
+ kernel_fpu_begin();
|
||||
+ bytes = poly1305_simd_blocks(dctx, src, srclen);
|
||||
+ kernel_fpu_end();
|
||||
+ } else {
|
||||
+ bytes = poly1305_scalar_blocks(dctx, src, srclen);
|
||||
+ }
|
||||
src += srclen - bytes;
|
||||
srclen = bytes;
|
||||
}
|
||||
|
||||
- kernel_fpu_end();
|
||||
-
|
||||
if (unlikely(srclen)) {
|
||||
dctx->buflen = srclen;
|
||||
memcpy(dctx->buf, src, srclen);
|
||||
}
|
||||
+}
|
||||
+
|
||||
+static int crypto_poly1305_init(struct shash_desc *desc)
|
||||
+{
|
||||
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
+
|
||||
+ poly1305_core_init(&dctx->h);
|
||||
+ dctx->buflen = 0;
|
||||
+ dctx->rset = 0;
|
||||
+ dctx->sset = false;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
|
||||
+{
|
||||
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
+
|
||||
+ if (unlikely(!dctx->sset))
|
||||
+ return -ENOKEY;
|
||||
|
||||
+ poly1305_final_generic(dctx, dst);
|
||||
return 0;
|
||||
}
|
||||
|
||||
--- a/crypto/Kconfig
|
||||
+++ b/crypto/Kconfig
|
||||
@@ -697,7 +697,7 @@ config CRYPTO_POLY1305
|
||||
config CRYPTO_POLY1305_X86_64
|
||||
tristate "Poly1305 authenticator algorithm (x86_64/SSE2/AVX2)"
|
||||
depends on X86 && 64BIT
|
||||
- select CRYPTO_POLY1305
|
||||
+ select CRYPTO_LIB_POLY1305_GENERIC
|
||||
help
|
||||
Poly1305 authenticator algorithm, RFC7539.
|
||||
|
||||
--- a/crypto/poly1305_generic.c
|
||||
+++ b/crypto/poly1305_generic.c
|
||||
@@ -19,7 +19,7 @@
|
||||
#include <linux/module.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
-int crypto_poly1305_init(struct shash_desc *desc)
|
||||
+static int crypto_poly1305_init(struct shash_desc *desc)
|
||||
{
|
||||
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
|
||||
@@ -30,7 +30,6 @@ int crypto_poly1305_init(struct shash_de
|
||||
|
||||
return 0;
|
||||
}
|
||||
-EXPORT_SYMBOL_GPL(crypto_poly1305_init);
|
||||
|
||||
static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
|
||||
unsigned int srclen)
|
||||
@@ -47,8 +46,8 @@ static void poly1305_blocks(struct poly1
|
||||
srclen / POLY1305_BLOCK_SIZE, 1);
|
||||
}
|
||||
|
||||
-int crypto_poly1305_update(struct shash_desc *desc,
|
||||
- const u8 *src, unsigned int srclen)
|
||||
+static int crypto_poly1305_update(struct shash_desc *desc,
|
||||
+ const u8 *src, unsigned int srclen)
|
||||
{
|
||||
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
unsigned int bytes;
|
||||
@@ -80,9 +79,8 @@ int crypto_poly1305_update(struct shash_
|
||||
|
||||
return 0;
|
||||
}
|
||||
-EXPORT_SYMBOL_GPL(crypto_poly1305_update);
|
||||
|
||||
-int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
|
||||
+static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
|
||||
{
|
||||
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
|
||||
@@ -92,7 +90,6 @@ int crypto_poly1305_final(struct shash_d
|
||||
poly1305_final_generic(dctx, dst);
|
||||
return 0;
|
||||
}
|
||||
-EXPORT_SYMBOL_GPL(crypto_poly1305_final);
|
||||
|
||||
static struct shash_alg poly1305_alg = {
|
||||
.digestsize = POLY1305_DIGEST_SIZE,
|
||||
--- a/include/crypto/internal/poly1305.h
|
||||
+++ b/include/crypto/internal/poly1305.h
|
||||
@@ -10,8 +10,6 @@
|
||||
#include <linux/types.h>
|
||||
#include <crypto/poly1305.h>
|
||||
|
||||
-struct shash_desc;
|
||||
-
|
||||
/*
|
||||
* Poly1305 core functions. These implement the ε-almost-∆-universal hash
|
||||
* function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce
|
||||
@@ -28,13 +26,6 @@ void poly1305_core_blocks(struct poly130
|
||||
unsigned int nblocks, u32 hibit);
|
||||
void poly1305_core_emit(const struct poly1305_state *state, void *dst);
|
||||
|
||||
-/* Crypto API helper functions for the Poly1305 MAC */
|
||||
-int crypto_poly1305_init(struct shash_desc *desc);
|
||||
-
|
||||
-int crypto_poly1305_update(struct shash_desc *desc,
|
||||
- const u8 *src, unsigned int srclen);
|
||||
-int crypto_poly1305_final(struct shash_desc *desc, u8 *dst);
|
||||
-
|
||||
/*
|
||||
* Poly1305 requires a unique key for each tag, which implies that we can't set
|
||||
* it on the tfm that gets accessed by multiple users simultaneously. Instead we
|
@ -0,0 +1,163 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 8 Nov 2019 13:22:23 +0100
|
||||
Subject: [PATCH] crypto: x86/poly1305 - expose existing driver as poly1305
|
||||
library
|
||||
|
||||
commit f0e89bcfbb894e5844cd1bbf6b3cf7c63cb0f5ac upstream.
|
||||
|
||||
Implement the arch init/update/final Poly1305 library routines in the
|
||||
accelerated SIMD driver for x86 so they are accessible to users of
|
||||
the Poly1305 library interface as well.
|
||||
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/x86/crypto/poly1305_glue.c | 57 ++++++++++++++++++++++++---------
|
||||
crypto/Kconfig | 1 +
|
||||
lib/crypto/Kconfig | 1 +
|
||||
3 files changed, 43 insertions(+), 16 deletions(-)
|
||||
|
||||
--- a/arch/x86/crypto/poly1305_glue.c
|
||||
+++ b/arch/x86/crypto/poly1305_glue.c
|
||||
@@ -10,6 +10,7 @@
|
||||
#include <crypto/internal/poly1305.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <linux/crypto.h>
|
||||
+#include <linux/jump_label.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/simd.h>
|
||||
@@ -21,7 +22,8 @@ asmlinkage void poly1305_2block_sse2(u32
|
||||
asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
|
||||
unsigned int blocks, const u32 *u);
|
||||
|
||||
-static bool poly1305_use_avx2 __ro_after_init;
|
||||
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd);
|
||||
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2);
|
||||
|
||||
static void poly1305_simd_mult(u32 *a, const u32 *b)
|
||||
{
|
||||
@@ -64,7 +66,7 @@ static unsigned int poly1305_simd_blocks
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_AS_AVX2) &&
|
||||
- poly1305_use_avx2 &&
|
||||
+ static_branch_likely(&poly1305_use_avx2) &&
|
||||
srclen >= POLY1305_BLOCK_SIZE * 4) {
|
||||
if (unlikely(dctx->rset < 4)) {
|
||||
if (dctx->rset < 2) {
|
||||
@@ -103,10 +105,15 @@ static unsigned int poly1305_simd_blocks
|
||||
return srclen;
|
||||
}
|
||||
|
||||
-static int poly1305_simd_update(struct shash_desc *desc,
|
||||
- const u8 *src, unsigned int srclen)
|
||||
+void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key)
|
||||
+{
|
||||
+ poly1305_init_generic(desc, key);
|
||||
+}
|
||||
+EXPORT_SYMBOL(poly1305_init_arch);
|
||||
+
|
||||
+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
|
||||
+ unsigned int srclen)
|
||||
{
|
||||
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
unsigned int bytes;
|
||||
|
||||
if (unlikely(dctx->buflen)) {
|
||||
@@ -117,7 +124,8 @@ static int poly1305_simd_update(struct s
|
||||
dctx->buflen += bytes;
|
||||
|
||||
if (dctx->buflen == POLY1305_BLOCK_SIZE) {
|
||||
- if (likely(crypto_simd_usable())) {
|
||||
+ if (static_branch_likely(&poly1305_use_simd) &&
|
||||
+ likely(crypto_simd_usable())) {
|
||||
kernel_fpu_begin();
|
||||
poly1305_simd_blocks(dctx, dctx->buf,
|
||||
POLY1305_BLOCK_SIZE);
|
||||
@@ -131,7 +139,8 @@ static int poly1305_simd_update(struct s
|
||||
}
|
||||
|
||||
if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
|
||||
- if (likely(crypto_simd_usable())) {
|
||||
+ if (static_branch_likely(&poly1305_use_simd) &&
|
||||
+ likely(crypto_simd_usable())) {
|
||||
kernel_fpu_begin();
|
||||
bytes = poly1305_simd_blocks(dctx, src, srclen);
|
||||
kernel_fpu_end();
|
||||
@@ -147,6 +156,13 @@ static int poly1305_simd_update(struct s
|
||||
memcpy(dctx->buf, src, srclen);
|
||||
}
|
||||
}
|
||||
+EXPORT_SYMBOL(poly1305_update_arch);
|
||||
+
|
||||
+void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest)
|
||||
+{
|
||||
+ poly1305_final_generic(desc, digest);
|
||||
+}
|
||||
+EXPORT_SYMBOL(poly1305_final_arch);
|
||||
|
||||
static int crypto_poly1305_init(struct shash_desc *desc)
|
||||
{
|
||||
@@ -171,6 +187,15 @@ static int crypto_poly1305_final(struct
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int poly1305_simd_update(struct shash_desc *desc,
|
||||
+ const u8 *src, unsigned int srclen)
|
||||
+{
|
||||
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
+
|
||||
+ poly1305_update_arch(dctx, src, srclen);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static struct shash_alg alg = {
|
||||
.digestsize = POLY1305_DIGEST_SIZE,
|
||||
.init = crypto_poly1305_init,
|
||||
@@ -189,15 +214,15 @@ static struct shash_alg alg = {
|
||||
static int __init poly1305_simd_mod_init(void)
|
||||
{
|
||||
if (!boot_cpu_has(X86_FEATURE_XMM2))
|
||||
- return -ENODEV;
|
||||
+ return 0;
|
||||
|
||||
- poly1305_use_avx2 = IS_ENABLED(CONFIG_AS_AVX2) &&
|
||||
- boot_cpu_has(X86_FEATURE_AVX) &&
|
||||
- boot_cpu_has(X86_FEATURE_AVX2) &&
|
||||
- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
|
||||
- alg.descsize = sizeof(struct poly1305_desc_ctx) + 5 * sizeof(u32);
|
||||
- if (poly1305_use_avx2)
|
||||
- alg.descsize += 10 * sizeof(u32);
|
||||
+ static_branch_enable(&poly1305_use_simd);
|
||||
+
|
||||
+ if (IS_ENABLED(CONFIG_AS_AVX2) &&
|
||||
+ boot_cpu_has(X86_FEATURE_AVX) &&
|
||||
+ boot_cpu_has(X86_FEATURE_AVX2) &&
|
||||
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
|
||||
+ static_branch_enable(&poly1305_use_avx2);
|
||||
|
||||
return crypto_register_shash(&alg);
|
||||
}
|
||||
--- a/crypto/Kconfig
|
||||
+++ b/crypto/Kconfig
|
||||
@@ -698,6 +698,7 @@ config CRYPTO_POLY1305_X86_64
|
||||
tristate "Poly1305 authenticator algorithm (x86_64/SSE2/AVX2)"
|
||||
depends on X86 && 64BIT
|
||||
select CRYPTO_LIB_POLY1305_GENERIC
|
||||
+ select CRYPTO_ARCH_HAVE_LIB_POLY1305
|
||||
help
|
||||
Poly1305 authenticator algorithm, RFC7539.
|
||||
|
||||
--- a/lib/crypto/Kconfig
|
||||
+++ b/lib/crypto/Kconfig
|
||||
@@ -39,6 +39,7 @@ config CRYPTO_LIB_DES
|
||||
|
||||
config CRYPTO_LIB_POLY1305_RSIZE
|
||||
int
|
||||
+ default 4 if X86_64
|
||||
default 1
|
||||
|
||||
config CRYPTO_ARCH_HAVE_LIB_POLY1305
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,322 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 8 Nov 2019 13:22:29 +0100
|
||||
Subject: [PATCH] crypto: testmgr - add test cases for Blake2s
|
||||
|
||||
commit 17e1df67023a5c9ccaeb5de8bf5b88f63127ecf7 upstream.
|
||||
|
||||
As suggested by Eric for the Blake2b implementation contributed by
|
||||
David, introduce a set of test vectors for Blake2s covering different
|
||||
digest and key sizes.
|
||||
|
||||
blake2s-128 blake2s-160 blake2s-224 blake2s-256
|
||||
---------------------------------------------------
|
||||
len=0 | klen=0 klen=1 klen=16 klen=32
|
||||
len=1 | klen=16 klen=32 klen=0 klen=1
|
||||
len=7 | klen=32 klen=0 klen=1 klen=16
|
||||
len=15 | klen=1 klen=16 klen=32 klen=0
|
||||
len=64 | klen=0 klen=1 klen=16 klen=32
|
||||
len=247 | klen=16 klen=32 klen=0 klen=1
|
||||
len=256 | klen=32 klen=0 klen=1 klen=16
|
||||
|
||||
Cc: David Sterba <dsterba@suse.com>
|
||||
Cc: Eric Biggers <ebiggers@google.com>
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
crypto/testmgr.c | 24 +++++
|
||||
crypto/testmgr.h | 251 +++++++++++++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 275 insertions(+)
|
||||
|
||||
--- a/crypto/testmgr.c
|
||||
+++ b/crypto/testmgr.c
|
||||
@@ -4035,6 +4035,30 @@ static const struct alg_test_desc alg_te
|
||||
.test = alg_test_null,
|
||||
.fips_allowed = 1,
|
||||
}, {
|
||||
+ .alg = "blake2s-128",
|
||||
+ .test = alg_test_hash,
|
||||
+ .suite = {
|
||||
+ .hash = __VECS(blakes2s_128_tv_template)
|
||||
+ }
|
||||
+ }, {
|
||||
+ .alg = "blake2s-160",
|
||||
+ .test = alg_test_hash,
|
||||
+ .suite = {
|
||||
+ .hash = __VECS(blakes2s_160_tv_template)
|
||||
+ }
|
||||
+ }, {
|
||||
+ .alg = "blake2s-224",
|
||||
+ .test = alg_test_hash,
|
||||
+ .suite = {
|
||||
+ .hash = __VECS(blakes2s_224_tv_template)
|
||||
+ }
|
||||
+ }, {
|
||||
+ .alg = "blake2s-256",
|
||||
+ .test = alg_test_hash,
|
||||
+ .suite = {
|
||||
+ .hash = __VECS(blakes2s_256_tv_template)
|
||||
+ }
|
||||
+ }, {
|
||||
.alg = "cbc(aes)",
|
||||
.test = alg_test_skcipher,
|
||||
.fips_allowed = 1,
|
||||
--- a/crypto/testmgr.h
|
||||
+++ b/crypto/testmgr.h
|
||||
@@ -31567,4 +31567,255 @@ static const struct aead_testvec essiv_h
|
||||
},
|
||||
};
|
||||
|
||||
+static const char blake2_ordered_sequence[] =
|
||||
+ "\x00\x01\x02\x03\x04\x05\x06\x07"
|
||||
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
|
||||
+ "\x10\x11\x12\x13\x14\x15\x16\x17"
|
||||
+ "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
|
||||
+ "\x20\x21\x22\x23\x24\x25\x26\x27"
|
||||
+ "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
|
||||
+ "\x30\x31\x32\x33\x34\x35\x36\x37"
|
||||
+ "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
|
||||
+ "\x40\x41\x42\x43\x44\x45\x46\x47"
|
||||
+ "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
|
||||
+ "\x50\x51\x52\x53\x54\x55\x56\x57"
|
||||
+ "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
|
||||
+ "\x60\x61\x62\x63\x64\x65\x66\x67"
|
||||
+ "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
|
||||
+ "\x70\x71\x72\x73\x74\x75\x76\x77"
|
||||
+ "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
|
||||
+ "\x80\x81\x82\x83\x84\x85\x86\x87"
|
||||
+ "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
|
||||
+ "\x90\x91\x92\x93\x94\x95\x96\x97"
|
||||
+ "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
|
||||
+ "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
|
||||
+ "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
|
||||
+ "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
|
||||
+ "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
|
||||
+ "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
|
||||
+ "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
|
||||
+ "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
|
||||
+ "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
|
||||
+ "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
|
||||
+ "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
|
||||
+ "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
|
||||
+ "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
|
||||
+
|
||||
+static const struct hash_testvec blakes2s_128_tv_template[] = {{
|
||||
+ .digest = (u8[]){ 0x64, 0x55, 0x0d, 0x6f, 0xfe, 0x2c, 0x0a, 0x01,
|
||||
+ 0xa1, 0x4a, 0xba, 0x1e, 0xad, 0xe0, 0x20, 0x0c, },
|
||||
+}, {
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 64,
|
||||
+ .digest = (u8[]){ 0xdc, 0x66, 0xca, 0x8f, 0x03, 0x86, 0x58, 0x01,
|
||||
+ 0xb0, 0xff, 0xe0, 0x6e, 0xd8, 0xa1, 0xa9, 0x0e, },
|
||||
+}, {
|
||||
+ .ksize = 16,
|
||||
+ .key = blake2_ordered_sequence,
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 1,
|
||||
+ .digest = (u8[]){ 0x88, 0x1e, 0x42, 0xe7, 0xbb, 0x35, 0x80, 0x82,
|
||||
+ 0x63, 0x7c, 0x0a, 0x0f, 0xd7, 0xec, 0x6c, 0x2f, },
|
||||
+}, {
|
||||
+ .ksize = 32,
|
||||
+ .key = blake2_ordered_sequence,
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 7,
|
||||
+ .digest = (u8[]){ 0xcf, 0x9e, 0x07, 0x2a, 0xd5, 0x22, 0xf2, 0xcd,
|
||||
+ 0xa2, 0xd8, 0x25, 0x21, 0x80, 0x86, 0x73, 0x1c, },
|
||||
+}, {
|
||||
+ .ksize = 1,
|
||||
+ .key = "B",
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 15,
|
||||
+ .digest = (u8[]){ 0xf6, 0x33, 0x5a, 0x2c, 0x22, 0xa0, 0x64, 0xb2,
|
||||
+ 0xb6, 0x3f, 0xeb, 0xbc, 0xd1, 0xc3, 0xe5, 0xb2, },
|
||||
+}, {
|
||||
+ .ksize = 16,
|
||||
+ .key = blake2_ordered_sequence,
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 247,
|
||||
+ .digest = (u8[]){ 0x72, 0x66, 0x49, 0x60, 0xf9, 0x4a, 0xea, 0xbe,
|
||||
+ 0x1f, 0xf4, 0x60, 0xce, 0xb7, 0x81, 0xcb, 0x09, },
|
||||
+}, {
|
||||
+ .ksize = 32,
|
||||
+ .key = blake2_ordered_sequence,
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 256,
|
||||
+ .digest = (u8[]){ 0xd5, 0xa4, 0x0e, 0xc3, 0x16, 0xc7, 0x51, 0xa6,
|
||||
+ 0x3c, 0xd0, 0xd9, 0x11, 0x57, 0xfa, 0x1e, 0xbb, },
|
||||
+}};
|
||||
+
|
||||
+static const struct hash_testvec blakes2s_160_tv_template[] = {{
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 7,
|
||||
+ .digest = (u8[]){ 0xb4, 0xf2, 0x03, 0x49, 0x37, 0xed, 0xb1, 0x3e,
|
||||
+ 0x5b, 0x2a, 0xca, 0x64, 0x82, 0x74, 0xf6, 0x62,
|
||||
+ 0xe3, 0xf2, 0x84, 0xff, },
|
||||
+}, {
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 256,
|
||||
+ .digest = (u8[]){ 0xaa, 0x56, 0x9b, 0xdc, 0x98, 0x17, 0x75, 0xf2,
|
||||
+ 0xb3, 0x68, 0x83, 0xb7, 0x9b, 0x8d, 0x48, 0xb1,
|
||||
+ 0x9b, 0x2d, 0x35, 0x05, },
|
||||
+}, {
|
||||
+ .ksize = 1,
|
||||
+ .key = "B",
|
||||
+ .digest = (u8[]){ 0x50, 0x16, 0xe7, 0x0c, 0x01, 0xd0, 0xd3, 0xc3,
|
||||
+ 0xf4, 0x3e, 0xb1, 0x6e, 0x97, 0xa9, 0x4e, 0xd1,
|
||||
+ 0x79, 0x65, 0x32, 0x93, },
|
||||
+}, {
|
||||
+ .ksize = 32,
|
||||
+ .key = blake2_ordered_sequence,
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 1,
|
||||
+ .digest = (u8[]){ 0x1c, 0x2b, 0xcd, 0x9a, 0x68, 0xca, 0x8c, 0x71,
|
||||
+ 0x90, 0x29, 0x6c, 0x54, 0xfa, 0x56, 0x4a, 0xef,
|
||||
+ 0xa2, 0x3a, 0x56, 0x9c, },
|
||||
+}, {
|
||||
+ .ksize = 16,
|
||||
+ .key = blake2_ordered_sequence,
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 15,
|
||||
+ .digest = (u8[]){ 0x36, 0xc3, 0x5f, 0x9a, 0xdc, 0x7e, 0xbf, 0x19,
|
||||
+ 0x68, 0xaa, 0xca, 0xd8, 0x81, 0xbf, 0x09, 0x34,
|
||||
+ 0x83, 0x39, 0x0f, 0x30, },
|
||||
+}, {
|
||||
+ .ksize = 1,
|
||||
+ .key = "B",
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 64,
|
||||
+ .digest = (u8[]){ 0x86, 0x80, 0x78, 0xa4, 0x14, 0xec, 0x03, 0xe5,
|
||||
+ 0xb6, 0x9a, 0x52, 0x0e, 0x42, 0xee, 0x39, 0x9d,
|
||||
+ 0xac, 0xa6, 0x81, 0x63, },
|
||||
+}, {
|
||||
+ .ksize = 32,
|
||||
+ .key = blake2_ordered_sequence,
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 247,
|
||||
+ .digest = (u8[]){ 0x2d, 0xd8, 0xd2, 0x53, 0x66, 0xfa, 0xa9, 0x01,
|
||||
+ 0x1c, 0x9c, 0xaf, 0xa3, 0xe2, 0x9d, 0x9b, 0x10,
|
||||
+ 0x0a, 0xf6, 0x73, 0xe8, },
|
||||
+}};
|
||||
+
|
||||
+static const struct hash_testvec blakes2s_224_tv_template[] = {{
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 1,
|
||||
+ .digest = (u8[]){ 0x61, 0xb9, 0x4e, 0xc9, 0x46, 0x22, 0xa3, 0x91,
|
||||
+ 0xd2, 0xae, 0x42, 0xe6, 0x45, 0x6c, 0x90, 0x12,
|
||||
+ 0xd5, 0x80, 0x07, 0x97, 0xb8, 0x86, 0x5a, 0xfc,
|
||||
+ 0x48, 0x21, 0x97, 0xbb, },
|
||||
+}, {
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 247,
|
||||
+ .digest = (u8[]){ 0x9e, 0xda, 0xc7, 0x20, 0x2c, 0xd8, 0x48, 0x2e,
|
||||
+ 0x31, 0x94, 0xab, 0x46, 0x6d, 0x94, 0xd8, 0xb4,
|
||||
+ 0x69, 0xcd, 0xae, 0x19, 0x6d, 0x9e, 0x41, 0xcc,
|
||||
+ 0x2b, 0xa4, 0xd5, 0xf6, },
|
||||
+}, {
|
||||
+ .ksize = 16,
|
||||
+ .key = blake2_ordered_sequence,
|
||||
+ .digest = (u8[]){ 0x32, 0xc0, 0xac, 0xf4, 0x3b, 0xd3, 0x07, 0x9f,
|
||||
+ 0xbe, 0xfb, 0xfa, 0x4d, 0x6b, 0x4e, 0x56, 0xb3,
|
||||
+ 0xaa, 0xd3, 0x27, 0xf6, 0x14, 0xbf, 0xb9, 0x32,
|
||||
+ 0xa7, 0x19, 0xfc, 0xb8, },
|
||||
+}, {
|
||||
+ .ksize = 1,
|
||||
+ .key = "B",
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 7,
|
||||
+ .digest = (u8[]){ 0x73, 0xad, 0x5e, 0x6d, 0xb9, 0x02, 0x8e, 0x76,
|
||||
+ 0xf2, 0x66, 0x42, 0x4b, 0x4c, 0xfa, 0x1f, 0xe6,
|
||||
+ 0x2e, 0x56, 0x40, 0xe5, 0xa2, 0xb0, 0x3c, 0xe8,
|
||||
+ 0x7b, 0x45, 0xfe, 0x05, },
|
||||
+}, {
|
||||
+ .ksize = 32,
|
||||
+ .key = blake2_ordered_sequence,
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 15,
|
||||
+ .digest = (u8[]){ 0x16, 0x60, 0xfb, 0x92, 0x54, 0xb3, 0x6e, 0x36,
|
||||
+ 0x81, 0xf4, 0x16, 0x41, 0xc3, 0x3d, 0xd3, 0x43,
|
||||
+ 0x84, 0xed, 0x10, 0x6f, 0x65, 0x80, 0x7a, 0x3e,
|
||||
+ 0x25, 0xab, 0xc5, 0x02, },
|
||||
+}, {
|
||||
+ .ksize = 16,
|
||||
+ .key = blake2_ordered_sequence,
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 64,
|
||||
+ .digest = (u8[]){ 0xca, 0xaa, 0x39, 0x67, 0x9c, 0xf7, 0x6b, 0xc7,
|
||||
+ 0xb6, 0x82, 0xca, 0x0e, 0x65, 0x36, 0x5b, 0x7c,
|
||||
+ 0x24, 0x00, 0xfa, 0x5f, 0xda, 0x06, 0x91, 0x93,
|
||||
+ 0x6a, 0x31, 0x83, 0xb5, },
|
||||
+}, {
|
||||
+ .ksize = 1,
|
||||
+ .key = "B",
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 256,
|
||||
+ .digest = (u8[]){ 0x90, 0x02, 0x26, 0xb5, 0x06, 0x9c, 0x36, 0x86,
|
||||
+ 0x94, 0x91, 0x90, 0x1e, 0x7d, 0x2a, 0x71, 0xb2,
|
||||
+ 0x48, 0xb5, 0xe8, 0x16, 0xfd, 0x64, 0x33, 0x45,
|
||||
+ 0xb3, 0xd7, 0xec, 0xcc, },
|
||||
+}};
|
||||
+
|
||||
+static const struct hash_testvec blakes2s_256_tv_template[] = {{
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 15,
|
||||
+ .digest = (u8[]){ 0xd9, 0x7c, 0x82, 0x8d, 0x81, 0x82, 0xa7, 0x21,
|
||||
+ 0x80, 0xa0, 0x6a, 0x78, 0x26, 0x83, 0x30, 0x67,
|
||||
+ 0x3f, 0x7c, 0x4e, 0x06, 0x35, 0x94, 0x7c, 0x04,
|
||||
+ 0xc0, 0x23, 0x23, 0xfd, 0x45, 0xc0, 0xa5, 0x2d, },
|
||||
+}, {
|
||||
+ .ksize = 32,
|
||||
+ .key = blake2_ordered_sequence,
|
||||
+ .digest = (u8[]){ 0x48, 0xa8, 0x99, 0x7d, 0xa4, 0x07, 0x87, 0x6b,
|
||||
+ 0x3d, 0x79, 0xc0, 0xd9, 0x23, 0x25, 0xad, 0x3b,
|
||||
+ 0x89, 0xcb, 0xb7, 0x54, 0xd8, 0x6a, 0xb7, 0x1a,
|
||||
+ 0xee, 0x04, 0x7a, 0xd3, 0x45, 0xfd, 0x2c, 0x49, },
|
||||
+}, {
|
||||
+ .ksize = 1,
|
||||
+ .key = "B",
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 1,
|
||||
+ .digest = (u8[]){ 0x22, 0x27, 0xae, 0xaa, 0x6e, 0x81, 0x56, 0x03,
|
||||
+ 0xa7, 0xe3, 0xa1, 0x18, 0xa5, 0x9a, 0x2c, 0x18,
|
||||
+ 0xf4, 0x63, 0xbc, 0x16, 0x70, 0xf1, 0xe7, 0x4b,
|
||||
+ 0x00, 0x6d, 0x66, 0x16, 0xae, 0x9e, 0x74, 0x4e, },
|
||||
+}, {
|
||||
+ .ksize = 16,
|
||||
+ .key = blake2_ordered_sequence,
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 7,
|
||||
+ .digest = (u8[]){ 0x58, 0x5d, 0xa8, 0x60, 0x1c, 0xa4, 0xd8, 0x03,
|
||||
+ 0x86, 0x86, 0x84, 0x64, 0xd7, 0xa0, 0x8e, 0x15,
|
||||
+ 0x2f, 0x05, 0xa2, 0x1b, 0xbc, 0xef, 0x7a, 0x34,
|
||||
+ 0xb3, 0xc5, 0xbc, 0x4b, 0xf0, 0x32, 0xeb, 0x12, },
|
||||
+}, {
|
||||
+ .ksize = 32,
|
||||
+ .key = blake2_ordered_sequence,
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 64,
|
||||
+ .digest = (u8[]){ 0x89, 0x75, 0xb0, 0x57, 0x7f, 0xd3, 0x55, 0x66,
|
||||
+ 0xd7, 0x50, 0xb3, 0x62, 0xb0, 0x89, 0x7a, 0x26,
|
||||
+ 0xc3, 0x99, 0x13, 0x6d, 0xf0, 0x7b, 0xab, 0xab,
|
||||
+ 0xbd, 0xe6, 0x20, 0x3f, 0xf2, 0x95, 0x4e, 0xd4, },
|
||||
+}, {
|
||||
+ .ksize = 1,
|
||||
+ .key = "B",
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 247,
|
||||
+ .digest = (u8[]){ 0x2e, 0x74, 0x1c, 0x1d, 0x03, 0xf4, 0x9d, 0x84,
|
||||
+ 0x6f, 0xfc, 0x86, 0x32, 0x92, 0x49, 0x7e, 0x66,
|
||||
+ 0xd7, 0xc3, 0x10, 0x88, 0xfe, 0x28, 0xb3, 0xe0,
|
||||
+ 0xbf, 0x50, 0x75, 0xad, 0x8e, 0xa4, 0xe6, 0xb2, },
|
||||
+}, {
|
||||
+ .ksize = 16,
|
||||
+ .key = blake2_ordered_sequence,
|
||||
+ .plaintext = blake2_ordered_sequence,
|
||||
+ .psize = 256,
|
||||
+ .digest = (u8[]){ 0xb9, 0xd2, 0x81, 0x0e, 0x3a, 0xb1, 0x62, 0x9b,
|
||||
+ 0xad, 0x44, 0x05, 0xf4, 0x92, 0x2e, 0x99, 0xc1,
|
||||
+ 0x4a, 0x47, 0xbb, 0x5b, 0x6f, 0xb2, 0x96, 0xed,
|
||||
+ 0xd5, 0x06, 0xb5, 0x3a, 0x7c, 0x7a, 0x65, 0x1d, },
|
||||
+}};
|
||||
+
|
||||
#endif /* _CRYPTO_TESTMGR_H */
|
@ -0,0 +1,245 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 8 Nov 2019 13:22:30 +0100
|
||||
Subject: [PATCH] crypto: blake2s - implement generic shash driver
|
||||
|
||||
commit 7f9b0880925f1f9d7d59504ea0892d2ae9cfc233 upstream.
|
||||
|
||||
Wire up our newly added Blake2s implementation via the shash API.
|
||||
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
crypto/Kconfig | 18 ++++
|
||||
crypto/Makefile | 1 +
|
||||
crypto/blake2s_generic.c | 171 ++++++++++++++++++++++++++++++
|
||||
include/crypto/internal/blake2s.h | 5 +
|
||||
4 files changed, 195 insertions(+)
|
||||
create mode 100644 crypto/blake2s_generic.c
|
||||
|
||||
--- a/crypto/Kconfig
|
||||
+++ b/crypto/Kconfig
|
||||
@@ -639,6 +639,24 @@ config CRYPTO_XXHASH
|
||||
xxHash non-cryptographic hash algorithm. Extremely fast, working at
|
||||
speeds close to RAM limits.
|
||||
|
||||
+config CRYPTO_BLAKE2S
|
||||
+ tristate "BLAKE2s digest algorithm"
|
||||
+ select CRYPTO_LIB_BLAKE2S_GENERIC
|
||||
+ select CRYPTO_HASH
|
||||
+ help
|
||||
+ Implementation of cryptographic hash function BLAKE2s
|
||||
+ optimized for 8-32bit platforms and can produce digests of any size
|
||||
+ between 1 to 32. The keyed hash is also implemented.
|
||||
+
|
||||
+ This module provides the following algorithms:
|
||||
+
|
||||
+ - blake2s-128
|
||||
+ - blake2s-160
|
||||
+ - blake2s-224
|
||||
+ - blake2s-256
|
||||
+
|
||||
+ See https://blake2.net for further information.
|
||||
+
|
||||
config CRYPTO_CRCT10DIF
|
||||
tristate "CRCT10DIF algorithm"
|
||||
select CRYPTO_HASH
|
||||
--- a/crypto/Makefile
|
||||
+++ b/crypto/Makefile
|
||||
@@ -74,6 +74,7 @@ obj-$(CONFIG_CRYPTO_STREEBOG) += streebo
|
||||
obj-$(CONFIG_CRYPTO_WP512) += wp512.o
|
||||
CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
|
||||
obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o
|
||||
+obj-$(CONFIG_CRYPTO_BLAKE2S) += blake2s_generic.o
|
||||
obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
|
||||
obj-$(CONFIG_CRYPTO_ECB) += ecb.o
|
||||
obj-$(CONFIG_CRYPTO_CBC) += cbc.o
|
||||
--- /dev/null
|
||||
+++ b/crypto/blake2s_generic.c
|
||||
@@ -0,0 +1,171 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
+/*
|
||||
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
+ */
|
||||
+
|
||||
+#include <crypto/internal/blake2s.h>
|
||||
+#include <crypto/internal/simd.h>
|
||||
+#include <crypto/internal/hash.h>
|
||||
+
|
||||
+#include <linux/types.h>
|
||||
+#include <linux/jump_label.h>
|
||||
+#include <linux/kernel.h>
|
||||
+#include <linux/module.h>
|
||||
+
|
||||
+static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key,
|
||||
+ unsigned int keylen)
|
||||
+{
|
||||
+ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm);
|
||||
+
|
||||
+ if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) {
|
||||
+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ memcpy(tctx->key, key, keylen);
|
||||
+ tctx->keylen = keylen;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int crypto_blake2s_init(struct shash_desc *desc)
|
||||
+{
|
||||
+ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
|
||||
+ struct blake2s_state *state = shash_desc_ctx(desc);
|
||||
+ const int outlen = crypto_shash_digestsize(desc->tfm);
|
||||
+
|
||||
+ if (tctx->keylen)
|
||||
+ blake2s_init_key(state, outlen, tctx->key, tctx->keylen);
|
||||
+ else
|
||||
+ blake2s_init(state, outlen);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int crypto_blake2s_update(struct shash_desc *desc, const u8 *in,
|
||||
+ unsigned int inlen)
|
||||
+{
|
||||
+ struct blake2s_state *state = shash_desc_ctx(desc);
|
||||
+ const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
|
||||
+
|
||||
+ if (unlikely(!inlen))
|
||||
+ return 0;
|
||||
+ if (inlen > fill) {
|
||||
+ memcpy(state->buf + state->buflen, in, fill);
|
||||
+ blake2s_compress_generic(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
|
||||
+ state->buflen = 0;
|
||||
+ in += fill;
|
||||
+ inlen -= fill;
|
||||
+ }
|
||||
+ if (inlen > BLAKE2S_BLOCK_SIZE) {
|
||||
+ const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
|
||||
+ /* Hash one less (full) block than strictly possible */
|
||||
+ blake2s_compress_generic(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
|
||||
+ in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
|
||||
+ inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
|
||||
+ }
|
||||
+ memcpy(state->buf + state->buflen, in, inlen);
|
||||
+ state->buflen += inlen;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int crypto_blake2s_final(struct shash_desc *desc, u8 *out)
|
||||
+{
|
||||
+ struct blake2s_state *state = shash_desc_ctx(desc);
|
||||
+
|
||||
+ blake2s_set_lastblock(state);
|
||||
+ memset(state->buf + state->buflen, 0,
|
||||
+ BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
|
||||
+ blake2s_compress_generic(state, state->buf, 1, state->buflen);
|
||||
+ cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
|
||||
+ memcpy(out, state->h, state->outlen);
|
||||
+ memzero_explicit(state, sizeof(*state));
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static struct shash_alg blake2s_algs[] = {{
|
||||
+ .base.cra_name = "blake2s-128",
|
||||
+ .base.cra_driver_name = "blake2s-128-generic",
|
||||
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
|
||||
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
|
||||
+ .base.cra_priority = 200,
|
||||
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
|
||||
+ .base.cra_module = THIS_MODULE,
|
||||
+
|
||||
+ .digestsize = BLAKE2S_128_HASH_SIZE,
|
||||
+ .setkey = crypto_blake2s_setkey,
|
||||
+ .init = crypto_blake2s_init,
|
||||
+ .update = crypto_blake2s_update,
|
||||
+ .final = crypto_blake2s_final,
|
||||
+ .descsize = sizeof(struct blake2s_state),
|
||||
+}, {
|
||||
+ .base.cra_name = "blake2s-160",
|
||||
+ .base.cra_driver_name = "blake2s-160-generic",
|
||||
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
|
||||
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
|
||||
+ .base.cra_priority = 200,
|
||||
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
|
||||
+ .base.cra_module = THIS_MODULE,
|
||||
+
|
||||
+ .digestsize = BLAKE2S_160_HASH_SIZE,
|
||||
+ .setkey = crypto_blake2s_setkey,
|
||||
+ .init = crypto_blake2s_init,
|
||||
+ .update = crypto_blake2s_update,
|
||||
+ .final = crypto_blake2s_final,
|
||||
+ .descsize = sizeof(struct blake2s_state),
|
||||
+}, {
|
||||
+ .base.cra_name = "blake2s-224",
|
||||
+ .base.cra_driver_name = "blake2s-224-generic",
|
||||
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
|
||||
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
|
||||
+ .base.cra_priority = 200,
|
||||
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
|
||||
+ .base.cra_module = THIS_MODULE,
|
||||
+
|
||||
+ .digestsize = BLAKE2S_224_HASH_SIZE,
|
||||
+ .setkey = crypto_blake2s_setkey,
|
||||
+ .init = crypto_blake2s_init,
|
||||
+ .update = crypto_blake2s_update,
|
||||
+ .final = crypto_blake2s_final,
|
||||
+ .descsize = sizeof(struct blake2s_state),
|
||||
+}, {
|
||||
+ .base.cra_name = "blake2s-256",
|
||||
+ .base.cra_driver_name = "blake2s-256-generic",
|
||||
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
|
||||
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
|
||||
+ .base.cra_priority = 200,
|
||||
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
|
||||
+ .base.cra_module = THIS_MODULE,
|
||||
+
|
||||
+ .digestsize = BLAKE2S_256_HASH_SIZE,
|
||||
+ .setkey = crypto_blake2s_setkey,
|
||||
+ .init = crypto_blake2s_init,
|
||||
+ .update = crypto_blake2s_update,
|
||||
+ .final = crypto_blake2s_final,
|
||||
+ .descsize = sizeof(struct blake2s_state),
|
||||
+}};
|
||||
+
|
||||
+static int __init blake2s_mod_init(void)
|
||||
+{
|
||||
+ return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
|
||||
+}
|
||||
+
|
||||
+static void __exit blake2s_mod_exit(void)
|
||||
+{
|
||||
+ crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
|
||||
+}
|
||||
+
|
||||
+subsys_initcall(blake2s_mod_init);
|
||||
+module_exit(blake2s_mod_exit);
|
||||
+
|
||||
+MODULE_ALIAS_CRYPTO("blake2s-128");
|
||||
+MODULE_ALIAS_CRYPTO("blake2s-128-generic");
|
||||
+MODULE_ALIAS_CRYPTO("blake2s-160");
|
||||
+MODULE_ALIAS_CRYPTO("blake2s-160-generic");
|
||||
+MODULE_ALIAS_CRYPTO("blake2s-224");
|
||||
+MODULE_ALIAS_CRYPTO("blake2s-224-generic");
|
||||
+MODULE_ALIAS_CRYPTO("blake2s-256");
|
||||
+MODULE_ALIAS_CRYPTO("blake2s-256-generic");
|
||||
+MODULE_LICENSE("GPL v2");
|
||||
--- a/include/crypto/internal/blake2s.h
|
||||
+++ b/include/crypto/internal/blake2s.h
|
||||
@@ -5,6 +5,11 @@
|
||||
|
||||
#include <crypto/blake2s.h>
|
||||
|
||||
+struct blake2s_tfm_ctx {
|
||||
+ u8 key[BLAKE2S_KEY_SIZE];
|
||||
+ unsigned int keylen;
|
||||
+};
|
||||
+
|
||||
void blake2s_compress_generic(struct blake2s_state *state,const u8 *block,
|
||||
size_t nblocks, const u32 inc);
|
||||
|
@ -0,0 +1,557 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Fri, 8 Nov 2019 13:22:31 +0100
|
||||
Subject: [PATCH] crypto: blake2s - x86_64 SIMD implementation
|
||||
|
||||
commit ed0356eda153f6a95649e11feb7b07083caf9e20 upstream.
|
||||
|
||||
These implementations from Samuel Neves support AVX and AVX-512VL.
|
||||
Originally this used AVX-512F, but Skylake thermal throttling made
|
||||
AVX-512VL more attractive and possible to do with negligable difference.
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: Samuel Neves <sneves@dei.uc.pt>
|
||||
Co-developed-by: Samuel Neves <sneves@dei.uc.pt>
|
||||
[ardb: move to arch/x86/crypto, wire into lib/crypto framework]
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/x86/crypto/Makefile | 2 +
|
||||
arch/x86/crypto/blake2s-core.S | 258 +++++++++++++++++++++++++++++++++
|
||||
arch/x86/crypto/blake2s-glue.c | 233 +++++++++++++++++++++++++++++
|
||||
crypto/Kconfig | 6 +
|
||||
4 files changed, 499 insertions(+)
|
||||
create mode 100644 arch/x86/crypto/blake2s-core.S
|
||||
create mode 100644 arch/x86/crypto/blake2s-glue.c
|
||||
|
||||
--- a/arch/x86/crypto/Makefile
|
||||
+++ b/arch/x86/crypto/Makefile
|
||||
@@ -48,6 +48,7 @@ ifeq ($(avx_supported),yes)
|
||||
obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o
|
||||
+ obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o
|
||||
endif
|
||||
|
||||
# These modules require assembler to support AVX2.
|
||||
@@ -70,6 +71,7 @@ serpent-sse2-x86_64-y := serpent-sse2-x8
|
||||
aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
|
||||
|
||||
nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o
|
||||
+blake2s-x86_64-y := blake2s-core.o blake2s-glue.o
|
||||
|
||||
ifeq ($(avx_supported),yes)
|
||||
camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
|
||||
--- /dev/null
|
||||
+++ b/arch/x86/crypto/blake2s-core.S
|
||||
@@ -0,0 +1,258 @@
|
||||
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
+/*
|
||||
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
+ * Copyright (C) 2017-2019 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
|
||||
+ */
|
||||
+
|
||||
+#include <linux/linkage.h>
|
||||
+
|
||||
+.section .rodata.cst32.BLAKE2S_IV, "aM", @progbits, 32
|
||||
+.align 32
|
||||
+IV: .octa 0xA54FF53A3C6EF372BB67AE856A09E667
|
||||
+ .octa 0x5BE0CD191F83D9AB9B05688C510E527F
|
||||
+.section .rodata.cst16.ROT16, "aM", @progbits, 16
|
||||
+.align 16
|
||||
+ROT16: .octa 0x0D0C0F0E09080B0A0504070601000302
|
||||
+.section .rodata.cst16.ROR328, "aM", @progbits, 16
|
||||
+.align 16
|
||||
+ROR328: .octa 0x0C0F0E0D080B0A090407060500030201
|
||||
+.section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 160
|
||||
+.align 64
|
||||
+SIGMA:
|
||||
+.byte 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13
|
||||
+.byte 14, 4, 9, 13, 10, 8, 15, 6, 5, 1, 0, 11, 3, 12, 2, 7
|
||||
+.byte 11, 12, 5, 15, 8, 0, 2, 13, 9, 10, 3, 7, 4, 14, 6, 1
|
||||
+.byte 7, 3, 13, 11, 9, 1, 12, 14, 15, 2, 5, 4, 8, 6, 10, 0
|
||||
+.byte 9, 5, 2, 10, 0, 7, 4, 15, 3, 14, 11, 6, 13, 1, 12, 8
|
||||
+.byte 2, 6, 0, 8, 12, 10, 11, 3, 1, 4, 7, 15, 9, 13, 5, 14
|
||||
+.byte 12, 1, 14, 4, 5, 15, 13, 10, 8, 0, 6, 9, 11, 7, 3, 2
|
||||
+.byte 13, 7, 12, 3, 11, 14, 1, 9, 2, 5, 15, 8, 10, 0, 4, 6
|
||||
+.byte 6, 14, 11, 0, 15, 9, 3, 8, 10, 12, 13, 1, 5, 2, 7, 4
|
||||
+.byte 10, 8, 7, 1, 2, 4, 6, 5, 13, 15, 9, 3, 0, 11, 14, 12
|
||||
+#ifdef CONFIG_AS_AVX512
|
||||
+.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640
|
||||
+.align 64
|
||||
+SIGMA2:
|
||||
+.long 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13
|
||||
+.long 8, 2, 13, 15, 10, 9, 12, 3, 6, 4, 0, 14, 5, 11, 1, 7
|
||||
+.long 11, 13, 8, 6, 5, 10, 14, 3, 2, 4, 12, 15, 1, 0, 7, 9
|
||||
+.long 11, 10, 7, 0, 8, 15, 1, 13, 3, 6, 2, 12, 4, 14, 9, 5
|
||||
+.long 4, 10, 9, 14, 15, 0, 11, 8, 1, 7, 3, 13, 2, 5, 6, 12
|
||||
+.long 2, 11, 4, 15, 14, 3, 10, 8, 13, 6, 5, 7, 0, 12, 1, 9
|
||||
+.long 4, 8, 15, 9, 14, 11, 13, 5, 3, 2, 1, 12, 6, 10, 7, 0
|
||||
+.long 6, 13, 0, 14, 12, 2, 1, 11, 15, 4, 5, 8, 7, 9, 3, 10
|
||||
+.long 15, 5, 4, 13, 10, 7, 3, 11, 12, 2, 0, 6, 9, 8, 1, 14
|
||||
+.long 8, 7, 14, 11, 13, 15, 0, 12, 10, 4, 5, 6, 3, 2, 1, 9
|
||||
+#endif /* CONFIG_AS_AVX512 */
|
||||
+
|
||||
+.text
|
||||
+#ifdef CONFIG_AS_SSSE3
|
||||
+ENTRY(blake2s_compress_ssse3)
|
||||
+ testq %rdx,%rdx
|
||||
+ je .Lendofloop
|
||||
+ movdqu (%rdi),%xmm0
|
||||
+ movdqu 0x10(%rdi),%xmm1
|
||||
+ movdqa ROT16(%rip),%xmm12
|
||||
+ movdqa ROR328(%rip),%xmm13
|
||||
+ movdqu 0x20(%rdi),%xmm14
|
||||
+ movq %rcx,%xmm15
|
||||
+ leaq SIGMA+0xa0(%rip),%r8
|
||||
+ jmp .Lbeginofloop
|
||||
+ .align 32
|
||||
+.Lbeginofloop:
|
||||
+ movdqa %xmm0,%xmm10
|
||||
+ movdqa %xmm1,%xmm11
|
||||
+ paddq %xmm15,%xmm14
|
||||
+ movdqa IV(%rip),%xmm2
|
||||
+ movdqa %xmm14,%xmm3
|
||||
+ pxor IV+0x10(%rip),%xmm3
|
||||
+ leaq SIGMA(%rip),%rcx
|
||||
+.Lroundloop:
|
||||
+ movzbl (%rcx),%eax
|
||||
+ movd (%rsi,%rax,4),%xmm4
|
||||
+ movzbl 0x1(%rcx),%eax
|
||||
+ movd (%rsi,%rax,4),%xmm5
|
||||
+ movzbl 0x2(%rcx),%eax
|
||||
+ movd (%rsi,%rax,4),%xmm6
|
||||
+ movzbl 0x3(%rcx),%eax
|
||||
+ movd (%rsi,%rax,4),%xmm7
|
||||
+ punpckldq %xmm5,%xmm4
|
||||
+ punpckldq %xmm7,%xmm6
|
||||
+ punpcklqdq %xmm6,%xmm4
|
||||
+ paddd %xmm4,%xmm0
|
||||
+ paddd %xmm1,%xmm0
|
||||
+ pxor %xmm0,%xmm3
|
||||
+ pshufb %xmm12,%xmm3
|
||||
+ paddd %xmm3,%xmm2
|
||||
+ pxor %xmm2,%xmm1
|
||||
+ movdqa %xmm1,%xmm8
|
||||
+ psrld $0xc,%xmm1
|
||||
+ pslld $0x14,%xmm8
|
||||
+ por %xmm8,%xmm1
|
||||
+ movzbl 0x4(%rcx),%eax
|
||||
+ movd (%rsi,%rax,4),%xmm5
|
||||
+ movzbl 0x5(%rcx),%eax
|
||||
+ movd (%rsi,%rax,4),%xmm6
|
||||
+ movzbl 0x6(%rcx),%eax
|
||||
+ movd (%rsi,%rax,4),%xmm7
|
||||
+ movzbl 0x7(%rcx),%eax
|
||||
+ movd (%rsi,%rax,4),%xmm4
|
||||
+ punpckldq %xmm6,%xmm5
|
||||
+ punpckldq %xmm4,%xmm7
|
||||
+ punpcklqdq %xmm7,%xmm5
|
||||
+ paddd %xmm5,%xmm0
|
||||
+ paddd %xmm1,%xmm0
|
||||
+ pxor %xmm0,%xmm3
|
||||
+ pshufb %xmm13,%xmm3
|
||||
+ paddd %xmm3,%xmm2
|
||||
+ pxor %xmm2,%xmm1
|
||||
+ movdqa %xmm1,%xmm8
|
||||
+ psrld $0x7,%xmm1
|
||||
+ pslld $0x19,%xmm8
|
||||
+ por %xmm8,%xmm1
|
||||
+ pshufd $0x93,%xmm0,%xmm0
|
||||
+ pshufd $0x4e,%xmm3,%xmm3
|
||||
+ pshufd $0x39,%xmm2,%xmm2
|
||||
+ movzbl 0x8(%rcx),%eax
|
||||
+ movd (%rsi,%rax,4),%xmm6
|
||||
+ movzbl 0x9(%rcx),%eax
|
||||
+ movd (%rsi,%rax,4),%xmm7
|
||||
+ movzbl 0xa(%rcx),%eax
|
||||
+ movd (%rsi,%rax,4),%xmm4
|
||||
+ movzbl 0xb(%rcx),%eax
|
||||
+ movd (%rsi,%rax,4),%xmm5
|
||||
+ punpckldq %xmm7,%xmm6
|
||||
+ punpckldq %xmm5,%xmm4
|
||||
+ punpcklqdq %xmm4,%xmm6
|
||||
+ paddd %xmm6,%xmm0
|
||||
+ paddd %xmm1,%xmm0
|
||||
+ pxor %xmm0,%xmm3
|
||||
+ pshufb %xmm12,%xmm3
|
||||
+ paddd %xmm3,%xmm2
|
||||
+ pxor %xmm2,%xmm1
|
||||
+ movdqa %xmm1,%xmm8
|
||||
+ psrld $0xc,%xmm1
|
||||
+ pslld $0x14,%xmm8
|
||||
+ por %xmm8,%xmm1
|
||||
+ movzbl 0xc(%rcx),%eax
|
||||
+ movd (%rsi,%rax,4),%xmm7
|
||||
+ movzbl 0xd(%rcx),%eax
|
||||
+ movd (%rsi,%rax,4),%xmm4
|
||||
+ movzbl 0xe(%rcx),%eax
|
||||
+ movd (%rsi,%rax,4),%xmm5
|
||||
+ movzbl 0xf(%rcx),%eax
|
||||
+ movd (%rsi,%rax,4),%xmm6
|
||||
+ punpckldq %xmm4,%xmm7
|
||||
+ punpckldq %xmm6,%xmm5
|
||||
+ punpcklqdq %xmm5,%xmm7
|
||||
+ paddd %xmm7,%xmm0
|
||||
+ paddd %xmm1,%xmm0
|
||||
+ pxor %xmm0,%xmm3
|
||||
+ pshufb %xmm13,%xmm3
|
||||
+ paddd %xmm3,%xmm2
|
||||
+ pxor %xmm2,%xmm1
|
||||
+ movdqa %xmm1,%xmm8
|
||||
+ psrld $0x7,%xmm1
|
||||
+ pslld $0x19,%xmm8
|
||||
+ por %xmm8,%xmm1
|
||||
+ pshufd $0x39,%xmm0,%xmm0
|
||||
+ pshufd $0x4e,%xmm3,%xmm3
|
||||
+ pshufd $0x93,%xmm2,%xmm2
|
||||
+ addq $0x10,%rcx
|
||||
+ cmpq %r8,%rcx
|
||||
+ jnz .Lroundloop
|
||||
+ pxor %xmm2,%xmm0
|
||||
+ pxor %xmm3,%xmm1
|
||||
+ pxor %xmm10,%xmm0
|
||||
+ pxor %xmm11,%xmm1
|
||||
+ addq $0x40,%rsi
|
||||
+ decq %rdx
|
||||
+ jnz .Lbeginofloop
|
||||
+ movdqu %xmm0,(%rdi)
|
||||
+ movdqu %xmm1,0x10(%rdi)
|
||||
+ movdqu %xmm14,0x20(%rdi)
|
||||
+.Lendofloop:
|
||||
+ ret
|
||||
+ENDPROC(blake2s_compress_ssse3)
|
||||
+#endif /* CONFIG_AS_SSSE3 */
|
||||
+
|
||||
+#ifdef CONFIG_AS_AVX512
|
||||
+ENTRY(blake2s_compress_avx512)
|
||||
+ vmovdqu (%rdi),%xmm0
|
||||
+ vmovdqu 0x10(%rdi),%xmm1
|
||||
+ vmovdqu 0x20(%rdi),%xmm4
|
||||
+ vmovq %rcx,%xmm5
|
||||
+ vmovdqa IV(%rip),%xmm14
|
||||
+ vmovdqa IV+16(%rip),%xmm15
|
||||
+ jmp .Lblake2s_compress_avx512_mainloop
|
||||
+.align 32
|
||||
+.Lblake2s_compress_avx512_mainloop:
|
||||
+ vmovdqa %xmm0,%xmm10
|
||||
+ vmovdqa %xmm1,%xmm11
|
||||
+ vpaddq %xmm5,%xmm4,%xmm4
|
||||
+ vmovdqa %xmm14,%xmm2
|
||||
+ vpxor %xmm15,%xmm4,%xmm3
|
||||
+ vmovdqu (%rsi),%ymm6
|
||||
+ vmovdqu 0x20(%rsi),%ymm7
|
||||
+ addq $0x40,%rsi
|
||||
+ leaq SIGMA2(%rip),%rax
|
||||
+ movb $0xa,%cl
|
||||
+.Lblake2s_compress_avx512_roundloop:
|
||||
+ addq $0x40,%rax
|
||||
+ vmovdqa -0x40(%rax),%ymm8
|
||||
+ vmovdqa -0x20(%rax),%ymm9
|
||||
+ vpermi2d %ymm7,%ymm6,%ymm8
|
||||
+ vpermi2d %ymm7,%ymm6,%ymm9
|
||||
+ vmovdqa %ymm8,%ymm6
|
||||
+ vmovdqa %ymm9,%ymm7
|
||||
+ vpaddd %xmm8,%xmm0,%xmm0
|
||||
+ vpaddd %xmm1,%xmm0,%xmm0
|
||||
+ vpxor %xmm0,%xmm3,%xmm3
|
||||
+ vprord $0x10,%xmm3,%xmm3
|
||||
+ vpaddd %xmm3,%xmm2,%xmm2
|
||||
+ vpxor %xmm2,%xmm1,%xmm1
|
||||
+ vprord $0xc,%xmm1,%xmm1
|
||||
+ vextracti128 $0x1,%ymm8,%xmm8
|
||||
+ vpaddd %xmm8,%xmm0,%xmm0
|
||||
+ vpaddd %xmm1,%xmm0,%xmm0
|
||||
+ vpxor %xmm0,%xmm3,%xmm3
|
||||
+ vprord $0x8,%xmm3,%xmm3
|
||||
+ vpaddd %xmm3,%xmm2,%xmm2
|
||||
+ vpxor %xmm2,%xmm1,%xmm1
|
||||
+ vprord $0x7,%xmm1,%xmm1
|
||||
+ vpshufd $0x93,%xmm0,%xmm0
|
||||
+ vpshufd $0x4e,%xmm3,%xmm3
|
||||
+ vpshufd $0x39,%xmm2,%xmm2
|
||||
+ vpaddd %xmm9,%xmm0,%xmm0
|
||||
+ vpaddd %xmm1,%xmm0,%xmm0
|
||||
+ vpxor %xmm0,%xmm3,%xmm3
|
||||
+ vprord $0x10,%xmm3,%xmm3
|
||||
+ vpaddd %xmm3,%xmm2,%xmm2
|
||||
+ vpxor %xmm2,%xmm1,%xmm1
|
||||
+ vprord $0xc,%xmm1,%xmm1
|
||||
+ vextracti128 $0x1,%ymm9,%xmm9
|
||||
+ vpaddd %xmm9,%xmm0,%xmm0
|
||||
+ vpaddd %xmm1,%xmm0,%xmm0
|
||||
+ vpxor %xmm0,%xmm3,%xmm3
|
||||
+ vprord $0x8,%xmm3,%xmm3
|
||||
+ vpaddd %xmm3,%xmm2,%xmm2
|
||||
+ vpxor %xmm2,%xmm1,%xmm1
|
||||
+ vprord $0x7,%xmm1,%xmm1
|
||||
+ vpshufd $0x39,%xmm0,%xmm0
|
||||
+ vpshufd $0x4e,%xmm3,%xmm3
|
||||
+ vpshufd $0x93,%xmm2,%xmm2
|
||||
+ decb %cl
|
||||
+ jne .Lblake2s_compress_avx512_roundloop
|
||||
+ vpxor %xmm10,%xmm0,%xmm0
|
||||
+ vpxor %xmm11,%xmm1,%xmm1
|
||||
+ vpxor %xmm2,%xmm0,%xmm0
|
||||
+ vpxor %xmm3,%xmm1,%xmm1
|
||||
+ decq %rdx
|
||||
+ jne .Lblake2s_compress_avx512_mainloop
|
||||
+ vmovdqu %xmm0,(%rdi)
|
||||
+ vmovdqu %xmm1,0x10(%rdi)
|
||||
+ vmovdqu %xmm4,0x20(%rdi)
|
||||
+ vzeroupper
|
||||
+ retq
|
||||
+ENDPROC(blake2s_compress_avx512)
|
||||
+#endif /* CONFIG_AS_AVX512 */
|
||||
--- /dev/null
|
||||
+++ b/arch/x86/crypto/blake2s-glue.c
|
||||
@@ -0,0 +1,233 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
+/*
|
||||
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
+ */
|
||||
+
|
||||
+#include <crypto/internal/blake2s.h>
|
||||
+#include <crypto/internal/simd.h>
|
||||
+#include <crypto/internal/hash.h>
|
||||
+
|
||||
+#include <linux/types.h>
|
||||
+#include <linux/jump_label.h>
|
||||
+#include <linux/kernel.h>
|
||||
+#include <linux/module.h>
|
||||
+
|
||||
+#include <asm/cpufeature.h>
|
||||
+#include <asm/fpu/api.h>
|
||||
+#include <asm/processor.h>
|
||||
+#include <asm/simd.h>
|
||||
+
|
||||
+asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state,
|
||||
+ const u8 *block, const size_t nblocks,
|
||||
+ const u32 inc);
|
||||
+asmlinkage void blake2s_compress_avx512(struct blake2s_state *state,
|
||||
+ const u8 *block, const size_t nblocks,
|
||||
+ const u32 inc);
|
||||
+
|
||||
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3);
|
||||
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512);
|
||||
+
|
||||
+void blake2s_compress_arch(struct blake2s_state *state,
|
||||
+ const u8 *block, size_t nblocks,
|
||||
+ const u32 inc)
|
||||
+{
|
||||
+ /* SIMD disables preemption, so relax after processing each page. */
|
||||
+ BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8);
|
||||
+
|
||||
+ if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
|
||||
+ blake2s_compress_generic(state, block, nblocks, inc);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ for (;;) {
|
||||
+ const size_t blocks = min_t(size_t, nblocks,
|
||||
+ PAGE_SIZE / BLAKE2S_BLOCK_SIZE);
|
||||
+
|
||||
+ kernel_fpu_begin();
|
||||
+ if (IS_ENABLED(CONFIG_AS_AVX512) &&
|
||||
+ static_branch_likely(&blake2s_use_avx512))
|
||||
+ blake2s_compress_avx512(state, block, blocks, inc);
|
||||
+ else
|
||||
+ blake2s_compress_ssse3(state, block, blocks, inc);
|
||||
+ kernel_fpu_end();
|
||||
+
|
||||
+ nblocks -= blocks;
|
||||
+ if (!nblocks)
|
||||
+ break;
|
||||
+ block += blocks * BLAKE2S_BLOCK_SIZE;
|
||||
+ }
|
||||
+}
|
||||
+EXPORT_SYMBOL(blake2s_compress_arch);
|
||||
+
|
||||
+static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key,
|
||||
+ unsigned int keylen)
|
||||
+{
|
||||
+ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm);
|
||||
+
|
||||
+ if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) {
|
||||
+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ memcpy(tctx->key, key, keylen);
|
||||
+ tctx->keylen = keylen;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int crypto_blake2s_init(struct shash_desc *desc)
|
||||
+{
|
||||
+ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
|
||||
+ struct blake2s_state *state = shash_desc_ctx(desc);
|
||||
+ const int outlen = crypto_shash_digestsize(desc->tfm);
|
||||
+
|
||||
+ if (tctx->keylen)
|
||||
+ blake2s_init_key(state, outlen, tctx->key, tctx->keylen);
|
||||
+ else
|
||||
+ blake2s_init(state, outlen);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int crypto_blake2s_update(struct shash_desc *desc, const u8 *in,
|
||||
+ unsigned int inlen)
|
||||
+{
|
||||
+ struct blake2s_state *state = shash_desc_ctx(desc);
|
||||
+ const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
|
||||
+
|
||||
+ if (unlikely(!inlen))
|
||||
+ return 0;
|
||||
+ if (inlen > fill) {
|
||||
+ memcpy(state->buf + state->buflen, in, fill);
|
||||
+ blake2s_compress_arch(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
|
||||
+ state->buflen = 0;
|
||||
+ in += fill;
|
||||
+ inlen -= fill;
|
||||
+ }
|
||||
+ if (inlen > BLAKE2S_BLOCK_SIZE) {
|
||||
+ const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
|
||||
+ /* Hash one less (full) block than strictly possible */
|
||||
+ blake2s_compress_arch(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
|
||||
+ in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
|
||||
+ inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
|
||||
+ }
|
||||
+ memcpy(state->buf + state->buflen, in, inlen);
|
||||
+ state->buflen += inlen;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int crypto_blake2s_final(struct shash_desc *desc, u8 *out)
|
||||
+{
|
||||
+ struct blake2s_state *state = shash_desc_ctx(desc);
|
||||
+
|
||||
+ blake2s_set_lastblock(state);
|
||||
+ memset(state->buf + state->buflen, 0,
|
||||
+ BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
|
||||
+ blake2s_compress_arch(state, state->buf, 1, state->buflen);
|
||||
+ cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
|
||||
+ memcpy(out, state->h, state->outlen);
|
||||
+ memzero_explicit(state, sizeof(*state));
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static struct shash_alg blake2s_algs[] = {{
|
||||
+ .base.cra_name = "blake2s-128",
|
||||
+ .base.cra_driver_name = "blake2s-128-x86",
|
||||
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
|
||||
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
|
||||
+ .base.cra_priority = 200,
|
||||
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
|
||||
+ .base.cra_module = THIS_MODULE,
|
||||
+
|
||||
+ .digestsize = BLAKE2S_128_HASH_SIZE,
|
||||
+ .setkey = crypto_blake2s_setkey,
|
||||
+ .init = crypto_blake2s_init,
|
||||
+ .update = crypto_blake2s_update,
|
||||
+ .final = crypto_blake2s_final,
|
||||
+ .descsize = sizeof(struct blake2s_state),
|
||||
+}, {
|
||||
+ .base.cra_name = "blake2s-160",
|
||||
+ .base.cra_driver_name = "blake2s-160-x86",
|
||||
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
|
||||
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
|
||||
+ .base.cra_priority = 200,
|
||||
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
|
||||
+ .base.cra_module = THIS_MODULE,
|
||||
+
|
||||
+ .digestsize = BLAKE2S_160_HASH_SIZE,
|
||||
+ .setkey = crypto_blake2s_setkey,
|
||||
+ .init = crypto_blake2s_init,
|
||||
+ .update = crypto_blake2s_update,
|
||||
+ .final = crypto_blake2s_final,
|
||||
+ .descsize = sizeof(struct blake2s_state),
|
||||
+}, {
|
||||
+ .base.cra_name = "blake2s-224",
|
||||
+ .base.cra_driver_name = "blake2s-224-x86",
|
||||
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
|
||||
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
|
||||
+ .base.cra_priority = 200,
|
||||
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
|
||||
+ .base.cra_module = THIS_MODULE,
|
||||
+
|
||||
+ .digestsize = BLAKE2S_224_HASH_SIZE,
|
||||
+ .setkey = crypto_blake2s_setkey,
|
||||
+ .init = crypto_blake2s_init,
|
||||
+ .update = crypto_blake2s_update,
|
||||
+ .final = crypto_blake2s_final,
|
||||
+ .descsize = sizeof(struct blake2s_state),
|
||||
+}, {
|
||||
+ .base.cra_name = "blake2s-256",
|
||||
+ .base.cra_driver_name = "blake2s-256-x86",
|
||||
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
|
||||
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
|
||||
+ .base.cra_priority = 200,
|
||||
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
|
||||
+ .base.cra_module = THIS_MODULE,
|
||||
+
|
||||
+ .digestsize = BLAKE2S_256_HASH_SIZE,
|
||||
+ .setkey = crypto_blake2s_setkey,
|
||||
+ .init = crypto_blake2s_init,
|
||||
+ .update = crypto_blake2s_update,
|
||||
+ .final = crypto_blake2s_final,
|
||||
+ .descsize = sizeof(struct blake2s_state),
|
||||
+}};
|
||||
+
|
||||
+static int __init blake2s_mod_init(void)
|
||||
+{
|
||||
+ if (!boot_cpu_has(X86_FEATURE_SSSE3))
|
||||
+ return 0;
|
||||
+
|
||||
+ static_branch_enable(&blake2s_use_ssse3);
|
||||
+
|
||||
+ if (IS_ENABLED(CONFIG_AS_AVX512) &&
|
||||
+ boot_cpu_has(X86_FEATURE_AVX) &&
|
||||
+ boot_cpu_has(X86_FEATURE_AVX2) &&
|
||||
+ boot_cpu_has(X86_FEATURE_AVX512F) &&
|
||||
+ boot_cpu_has(X86_FEATURE_AVX512VL) &&
|
||||
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
|
||||
+ XFEATURE_MASK_AVX512, NULL))
|
||||
+ static_branch_enable(&blake2s_use_avx512);
|
||||
+
|
||||
+ return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
|
||||
+}
|
||||
+
|
||||
+static void __exit blake2s_mod_exit(void)
|
||||
+{
|
||||
+ if (boot_cpu_has(X86_FEATURE_SSSE3))
|
||||
+ crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
|
||||
+}
|
||||
+
|
||||
+module_init(blake2s_mod_init);
|
||||
+module_exit(blake2s_mod_exit);
|
||||
+
|
||||
+MODULE_ALIAS_CRYPTO("blake2s-128");
|
||||
+MODULE_ALIAS_CRYPTO("blake2s-128-x86");
|
||||
+MODULE_ALIAS_CRYPTO("blake2s-160");
|
||||
+MODULE_ALIAS_CRYPTO("blake2s-160-x86");
|
||||
+MODULE_ALIAS_CRYPTO("blake2s-224");
|
||||
+MODULE_ALIAS_CRYPTO("blake2s-224-x86");
|
||||
+MODULE_ALIAS_CRYPTO("blake2s-256");
|
||||
+MODULE_ALIAS_CRYPTO("blake2s-256-x86");
|
||||
+MODULE_LICENSE("GPL v2");
|
||||
--- a/crypto/Kconfig
|
||||
+++ b/crypto/Kconfig
|
||||
@@ -657,6 +657,12 @@ config CRYPTO_BLAKE2S
|
||||
|
||||
See https://blake2.net for further information.
|
||||
|
||||
+config CRYPTO_BLAKE2S_X86
|
||||
+ tristate "BLAKE2s digest algorithm (x86 accelerated version)"
|
||||
+ depends on X86 && 64BIT
|
||||
+ select CRYPTO_LIB_BLAKE2S_GENERIC
|
||||
+ select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
|
||||
+
|
||||
config CRYPTO_CRCT10DIF
|
||||
tristate "CRCT10DIF algorithm"
|
||||
select CRYPTO_HASH
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,136 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 8 Nov 2019 13:22:34 +0100
|
||||
Subject: [PATCH] crypto: curve25519 - implement generic KPP driver
|
||||
|
||||
commit ee772cb641135739c1530647391d5a04c39db192 upstream.
|
||||
|
||||
Expose the generic Curve25519 library via the crypto API KPP interface.
|
||||
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
crypto/Kconfig | 5 +++
|
||||
crypto/Makefile | 1 +
|
||||
crypto/curve25519-generic.c | 90 +++++++++++++++++++++++++++++++++++++
|
||||
3 files changed, 96 insertions(+)
|
||||
create mode 100644 crypto/curve25519-generic.c
|
||||
|
||||
--- a/crypto/Kconfig
|
||||
+++ b/crypto/Kconfig
|
||||
@@ -264,6 +264,11 @@ config CRYPTO_ECRDSA
|
||||
standard algorithms (called GOST algorithms). Only signature verification
|
||||
is implemented.
|
||||
|
||||
+config CRYPTO_CURVE25519
|
||||
+ tristate "Curve25519 algorithm"
|
||||
+ select CRYPTO_KPP
|
||||
+ select CRYPTO_LIB_CURVE25519_GENERIC
|
||||
+
|
||||
comment "Authenticated Encryption with Associated Data"
|
||||
|
||||
config CRYPTO_CCM
|
||||
--- a/crypto/Makefile
|
||||
+++ b/crypto/Makefile
|
||||
@@ -167,6 +167,7 @@ obj-$(CONFIG_CRYPTO_ZSTD) += zstd.o
|
||||
obj-$(CONFIG_CRYPTO_OFB) += ofb.o
|
||||
obj-$(CONFIG_CRYPTO_ECC) += ecc.o
|
||||
obj-$(CONFIG_CRYPTO_ESSIV) += essiv.o
|
||||
+obj-$(CONFIG_CRYPTO_CURVE25519) += curve25519-generic.o
|
||||
|
||||
ecdh_generic-y += ecdh.o
|
||||
ecdh_generic-y += ecdh_helper.o
|
||||
--- /dev/null
|
||||
+++ b/crypto/curve25519-generic.c
|
||||
@@ -0,0 +1,90 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
+
|
||||
+#include <crypto/curve25519.h>
|
||||
+#include <crypto/internal/kpp.h>
|
||||
+#include <crypto/kpp.h>
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/scatterlist.h>
|
||||
+
|
||||
+static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
|
||||
+ unsigned int len)
|
||||
+{
|
||||
+ u8 *secret = kpp_tfm_ctx(tfm);
|
||||
+
|
||||
+ if (!len)
|
||||
+ curve25519_generate_secret(secret);
|
||||
+ else if (len == CURVE25519_KEY_SIZE &&
|
||||
+ crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
|
||||
+ memcpy(secret, buf, CURVE25519_KEY_SIZE);
|
||||
+ else
|
||||
+ return -EINVAL;
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int curve25519_compute_value(struct kpp_request *req)
|
||||
+{
|
||||
+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
|
||||
+ const u8 *secret = kpp_tfm_ctx(tfm);
|
||||
+ u8 public_key[CURVE25519_KEY_SIZE];
|
||||
+ u8 buf[CURVE25519_KEY_SIZE];
|
||||
+ int copied, nbytes;
|
||||
+ u8 const *bp;
|
||||
+
|
||||
+ if (req->src) {
|
||||
+ copied = sg_copy_to_buffer(req->src,
|
||||
+ sg_nents_for_len(req->src,
|
||||
+ CURVE25519_KEY_SIZE),
|
||||
+ public_key, CURVE25519_KEY_SIZE);
|
||||
+ if (copied != CURVE25519_KEY_SIZE)
|
||||
+ return -EINVAL;
|
||||
+ bp = public_key;
|
||||
+ } else {
|
||||
+ bp = curve25519_base_point;
|
||||
+ }
|
||||
+
|
||||
+ curve25519_generic(buf, secret, bp);
|
||||
+
|
||||
+ /* might want less than we've got */
|
||||
+ nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
|
||||
+ copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
|
||||
+ nbytes),
|
||||
+ buf, nbytes);
|
||||
+ if (copied != nbytes)
|
||||
+ return -EINVAL;
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
|
||||
+{
|
||||
+ return CURVE25519_KEY_SIZE;
|
||||
+}
|
||||
+
|
||||
+static struct kpp_alg curve25519_alg = {
|
||||
+ .base.cra_name = "curve25519",
|
||||
+ .base.cra_driver_name = "curve25519-generic",
|
||||
+ .base.cra_priority = 100,
|
||||
+ .base.cra_module = THIS_MODULE,
|
||||
+ .base.cra_ctxsize = CURVE25519_KEY_SIZE,
|
||||
+
|
||||
+ .set_secret = curve25519_set_secret,
|
||||
+ .generate_public_key = curve25519_compute_value,
|
||||
+ .compute_shared_secret = curve25519_compute_value,
|
||||
+ .max_size = curve25519_max_size,
|
||||
+};
|
||||
+
|
||||
+static int curve25519_init(void)
|
||||
+{
|
||||
+ return crypto_register_kpp(&curve25519_alg);
|
||||
+}
|
||||
+
|
||||
+static void curve25519_exit(void)
|
||||
+{
|
||||
+ crypto_unregister_kpp(&curve25519_alg);
|
||||
+}
|
||||
+
|
||||
+subsys_initcall(curve25519_init);
|
||||
+module_exit(curve25519_exit);
|
||||
+
|
||||
+MODULE_ALIAS_CRYPTO("curve25519");
|
||||
+MODULE_ALIAS_CRYPTO("curve25519-generic");
|
||||
+MODULE_LICENSE("GPL");
|
@ -0,0 +1,75 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 8 Nov 2019 13:22:35 +0100
|
||||
Subject: [PATCH] crypto: lib/curve25519 - work around Clang stack spilling
|
||||
issue
|
||||
|
||||
commit 660bb8e1f833ea63185fe80fde847e3e42f18e3b upstream.
|
||||
|
||||
Arnd reports that the 32-bit generic library code for Curve25119 ends
|
||||
up using an excessive amount of stack space when built with Clang:
|
||||
|
||||
lib/crypto/curve25519-fiat32.c:756:6: error: stack frame size
|
||||
of 1384 bytes in function 'curve25519_generic'
|
||||
[-Werror,-Wframe-larger-than=]
|
||||
|
||||
Let's give some hints to the compiler regarding which routines should
|
||||
not be inlined, to prevent it from running out of registers and spilling
|
||||
to the stack. The resulting code performs identically under both GCC
|
||||
and Clang, and makes the warning go away.
|
||||
|
||||
Suggested-by: Arnd Bergmann <arnd@arndb.de>
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
lib/crypto/curve25519-fiat32.c | 10 +++++-----
|
||||
1 file changed, 5 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/lib/crypto/curve25519-fiat32.c
|
||||
+++ b/lib/crypto/curve25519-fiat32.c
|
||||
@@ -223,7 +223,7 @@ static __always_inline void fe_1(fe *h)
|
||||
h->v[0] = 1;
|
||||
}
|
||||
|
||||
-static void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
|
||||
+static noinline void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
|
||||
{
|
||||
{ const u32 x20 = in1[9];
|
||||
{ const u32 x21 = in1[8];
|
||||
@@ -266,7 +266,7 @@ static __always_inline void fe_add(fe_lo
|
||||
fe_add_impl(h->v, f->v, g->v);
|
||||
}
|
||||
|
||||
-static void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
|
||||
+static noinline void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
|
||||
{
|
||||
{ const u32 x20 = in1[9];
|
||||
{ const u32 x21 = in1[8];
|
||||
@@ -309,7 +309,7 @@ static __always_inline void fe_sub(fe_lo
|
||||
fe_sub_impl(h->v, f->v, g->v);
|
||||
}
|
||||
|
||||
-static void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
|
||||
+static noinline void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
|
||||
{
|
||||
{ const u32 x20 = in1[9];
|
||||
{ const u32 x21 = in1[8];
|
||||
@@ -441,7 +441,7 @@ fe_mul_tll(fe *h, const fe_loose *f, con
|
||||
fe_mul_impl(h->v, f->v, g->v);
|
||||
}
|
||||
|
||||
-static void fe_sqr_impl(u32 out[10], const u32 in1[10])
|
||||
+static noinline void fe_sqr_impl(u32 out[10], const u32 in1[10])
|
||||
{
|
||||
{ const u32 x17 = in1[9];
|
||||
{ const u32 x18 = in1[8];
|
||||
@@ -619,7 +619,7 @@ static __always_inline void fe_invert(fe
|
||||
*
|
||||
* Preconditions: b in {0,1}
|
||||
*/
|
||||
-static __always_inline void fe_cswap(fe *f, fe *g, unsigned int b)
|
||||
+static noinline void fe_cswap(fe *f, fe *g, unsigned int b)
|
||||
{
|
||||
unsigned i;
|
||||
b = 0 - b;
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,295 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 8 Nov 2019 13:22:40 +0100
|
||||
Subject: [PATCH] crypto: lib/chacha20poly1305 - reimplement crypt_from_sg()
|
||||
routine
|
||||
|
||||
commit d95312a3ccc0cd544d374be2fc45aeaa803e5fd9 upstream.
|
||||
|
||||
Reimplement the library routines to perform chacha20poly1305 en/decryption
|
||||
on scatterlists, without [ab]using the [deprecated] blkcipher interface,
|
||||
which is rather heavyweight and does things we don't really need.
|
||||
|
||||
Instead, we use the sg_miter API in a novel and clever way, to iterate
|
||||
over the scatterlist in-place (i.e., source == destination, which is the
|
||||
only way this library is expected to be used). That way, we don't have to
|
||||
iterate over two scatterlists in parallel.
|
||||
|
||||
Another optimization is that, instead of relying on the blkcipher walker
|
||||
to present the input in suitable chunks, we recognize that ChaCha is a
|
||||
streamcipher, and so we can simply deal with partial blocks by keeping a
|
||||
block of cipherstream on the stack and use crypto_xor() to mix it with
|
||||
the in/output.
|
||||
|
||||
Finally, we omit the scatterwalk_and_copy() call if the last element of
|
||||
the scatterlist covers the MAC as well (which is the common case),
|
||||
avoiding the need to walk the scatterlist and kmap() the page twice.
|
||||
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
include/crypto/chacha20poly1305.h | 11 ++
|
||||
lib/crypto/chacha20poly1305-selftest.c | 45 ++++++++
|
||||
lib/crypto/chacha20poly1305.c | 150 +++++++++++++++++++++++++
|
||||
3 files changed, 206 insertions(+)
|
||||
|
||||
--- a/include/crypto/chacha20poly1305.h
|
||||
+++ b/include/crypto/chacha20poly1305.h
|
||||
@@ -7,6 +7,7 @@
|
||||
#define __CHACHA20POLY1305_H
|
||||
|
||||
#include <linux/types.h>
|
||||
+#include <linux/scatterlist.h>
|
||||
|
||||
enum chacha20poly1305_lengths {
|
||||
XCHACHA20POLY1305_NONCE_SIZE = 24,
|
||||
@@ -34,4 +35,14 @@ bool __must_check xchacha20poly1305_decr
|
||||
const size_t ad_len, const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
|
||||
const u8 key[CHACHA20POLY1305_KEY_SIZE]);
|
||||
|
||||
+bool chacha20poly1305_encrypt_sg_inplace(struct scatterlist *src, size_t src_len,
|
||||
+ const u8 *ad, const size_t ad_len,
|
||||
+ const u64 nonce,
|
||||
+ const u8 key[CHACHA20POLY1305_KEY_SIZE]);
|
||||
+
|
||||
+bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, size_t src_len,
|
||||
+ const u8 *ad, const size_t ad_len,
|
||||
+ const u64 nonce,
|
||||
+ const u8 key[CHACHA20POLY1305_KEY_SIZE]);
|
||||
+
|
||||
#endif /* __CHACHA20POLY1305_H */
|
||||
--- a/lib/crypto/chacha20poly1305-selftest.c
|
||||
+++ b/lib/crypto/chacha20poly1305-selftest.c
|
||||
@@ -7250,6 +7250,7 @@ bool __init chacha20poly1305_selftest(vo
|
||||
enum { MAXIMUM_TEST_BUFFER_LEN = 1UL << 12 };
|
||||
size_t i;
|
||||
u8 *computed_output = NULL, *heap_src = NULL;
|
||||
+ struct scatterlist sg_src;
|
||||
bool success = true, ret;
|
||||
|
||||
heap_src = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL);
|
||||
@@ -7280,6 +7281,29 @@ bool __init chacha20poly1305_selftest(vo
|
||||
}
|
||||
}
|
||||
|
||||
+ for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) {
|
||||
+ if (chacha20poly1305_enc_vectors[i].nlen != 8)
|
||||
+ continue;
|
||||
+ memcpy(heap_src, chacha20poly1305_enc_vectors[i].input,
|
||||
+ chacha20poly1305_enc_vectors[i].ilen);
|
||||
+ sg_init_one(&sg_src, heap_src,
|
||||
+ chacha20poly1305_enc_vectors[i].ilen + POLY1305_DIGEST_SIZE);
|
||||
+ chacha20poly1305_encrypt_sg_inplace(&sg_src,
|
||||
+ chacha20poly1305_enc_vectors[i].ilen,
|
||||
+ chacha20poly1305_enc_vectors[i].assoc,
|
||||
+ chacha20poly1305_enc_vectors[i].alen,
|
||||
+ get_unaligned_le64(chacha20poly1305_enc_vectors[i].nonce),
|
||||
+ chacha20poly1305_enc_vectors[i].key);
|
||||
+ if (memcmp(heap_src,
|
||||
+ chacha20poly1305_enc_vectors[i].output,
|
||||
+ chacha20poly1305_enc_vectors[i].ilen +
|
||||
+ POLY1305_DIGEST_SIZE)) {
|
||||
+ pr_err("chacha20poly1305 sg encryption self-test %zu: FAIL\n",
|
||||
+ i + 1);
|
||||
+ success = false;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) {
|
||||
memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN);
|
||||
ret = chacha20poly1305_decrypt(computed_output,
|
||||
@@ -7301,6 +7325,27 @@ bool __init chacha20poly1305_selftest(vo
|
||||
}
|
||||
}
|
||||
|
||||
+ for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) {
|
||||
+ memcpy(heap_src, chacha20poly1305_dec_vectors[i].input,
|
||||
+ chacha20poly1305_dec_vectors[i].ilen);
|
||||
+ sg_init_one(&sg_src, heap_src,
|
||||
+ chacha20poly1305_dec_vectors[i].ilen);
|
||||
+ ret = chacha20poly1305_decrypt_sg_inplace(&sg_src,
|
||||
+ chacha20poly1305_dec_vectors[i].ilen,
|
||||
+ chacha20poly1305_dec_vectors[i].assoc,
|
||||
+ chacha20poly1305_dec_vectors[i].alen,
|
||||
+ get_unaligned_le64(chacha20poly1305_dec_vectors[i].nonce),
|
||||
+ chacha20poly1305_dec_vectors[i].key);
|
||||
+ if (!decryption_success(ret,
|
||||
+ chacha20poly1305_dec_vectors[i].failure,
|
||||
+ memcmp(heap_src, chacha20poly1305_dec_vectors[i].output,
|
||||
+ chacha20poly1305_dec_vectors[i].ilen -
|
||||
+ POLY1305_DIGEST_SIZE))) {
|
||||
+ pr_err("chacha20poly1305 sg decryption self-test %zu: FAIL\n",
|
||||
+ i + 1);
|
||||
+ success = false;
|
||||
+ }
|
||||
+ }
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_enc_vectors); ++i) {
|
||||
memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN);
|
||||
--- a/lib/crypto/chacha20poly1305.c
|
||||
+++ b/lib/crypto/chacha20poly1305.c
|
||||
@@ -11,6 +11,7 @@
|
||||
#include <crypto/chacha20poly1305.h>
|
||||
#include <crypto/chacha.h>
|
||||
#include <crypto/poly1305.h>
|
||||
+#include <crypto/scatterwalk.h>
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
#include <linux/kernel.h>
|
||||
@@ -205,6 +206,155 @@ bool xchacha20poly1305_decrypt(u8 *dst,
|
||||
}
|
||||
EXPORT_SYMBOL(xchacha20poly1305_decrypt);
|
||||
|
||||
+static
|
||||
+bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src,
|
||||
+ const size_t src_len,
|
||||
+ const u8 *ad, const size_t ad_len,
|
||||
+ const u64 nonce,
|
||||
+ const u8 key[CHACHA20POLY1305_KEY_SIZE],
|
||||
+ int encrypt)
|
||||
+{
|
||||
+ const u8 *pad0 = page_address(ZERO_PAGE(0));
|
||||
+ struct poly1305_desc_ctx poly1305_state;
|
||||
+ u32 chacha_state[CHACHA_STATE_WORDS];
|
||||
+ struct sg_mapping_iter miter;
|
||||
+ size_t partial = 0;
|
||||
+ unsigned int flags;
|
||||
+ bool ret = true;
|
||||
+ int sl;
|
||||
+ union {
|
||||
+ struct {
|
||||
+ u32 k[CHACHA_KEY_WORDS];
|
||||
+ __le64 iv[2];
|
||||
+ };
|
||||
+ u8 block0[POLY1305_KEY_SIZE];
|
||||
+ u8 chacha_stream[CHACHA_BLOCK_SIZE];
|
||||
+ struct {
|
||||
+ u8 mac[2][POLY1305_DIGEST_SIZE];
|
||||
+ };
|
||||
+ __le64 lens[2];
|
||||
+ } b __aligned(16);
|
||||
+
|
||||
+ chacha_load_key(b.k, key);
|
||||
+
|
||||
+ b.iv[0] = 0;
|
||||
+ b.iv[1] = cpu_to_le64(nonce);
|
||||
+
|
||||
+ chacha_init(chacha_state, b.k, (u8 *)b.iv);
|
||||
+ chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20);
|
||||
+ poly1305_init(&poly1305_state, b.block0);
|
||||
+
|
||||
+ if (unlikely(ad_len)) {
|
||||
+ poly1305_update(&poly1305_state, ad, ad_len);
|
||||
+ if (ad_len & 0xf)
|
||||
+ poly1305_update(&poly1305_state, pad0, 0x10 - (ad_len & 0xf));
|
||||
+ }
|
||||
+
|
||||
+ flags = SG_MITER_TO_SG;
|
||||
+ if (!preemptible())
|
||||
+ flags |= SG_MITER_ATOMIC;
|
||||
+
|
||||
+ sg_miter_start(&miter, src, sg_nents(src), flags);
|
||||
+
|
||||
+ for (sl = src_len; sl > 0 && sg_miter_next(&miter); sl -= miter.length) {
|
||||
+ u8 *addr = miter.addr;
|
||||
+ size_t length = min_t(size_t, sl, miter.length);
|
||||
+
|
||||
+ if (!encrypt)
|
||||
+ poly1305_update(&poly1305_state, addr, length);
|
||||
+
|
||||
+ if (unlikely(partial)) {
|
||||
+ size_t l = min(length, CHACHA_BLOCK_SIZE - partial);
|
||||
+
|
||||
+ crypto_xor(addr, b.chacha_stream + partial, l);
|
||||
+ partial = (partial + l) & (CHACHA_BLOCK_SIZE - 1);
|
||||
+
|
||||
+ addr += l;
|
||||
+ length -= l;
|
||||
+ }
|
||||
+
|
||||
+ if (likely(length >= CHACHA_BLOCK_SIZE || length == sl)) {
|
||||
+ size_t l = length;
|
||||
+
|
||||
+ if (unlikely(length < sl))
|
||||
+ l &= ~(CHACHA_BLOCK_SIZE - 1);
|
||||
+ chacha_crypt(chacha_state, addr, addr, l, 20);
|
||||
+ addr += l;
|
||||
+ length -= l;
|
||||
+ }
|
||||
+
|
||||
+ if (unlikely(length > 0)) {
|
||||
+ chacha_crypt(chacha_state, b.chacha_stream, pad0,
|
||||
+ CHACHA_BLOCK_SIZE, 20);
|
||||
+ crypto_xor(addr, b.chacha_stream, length);
|
||||
+ partial = length;
|
||||
+ }
|
||||
+
|
||||
+ if (encrypt)
|
||||
+ poly1305_update(&poly1305_state, miter.addr,
|
||||
+ min_t(size_t, sl, miter.length));
|
||||
+ }
|
||||
+
|
||||
+ if (src_len & 0xf)
|
||||
+ poly1305_update(&poly1305_state, pad0, 0x10 - (src_len & 0xf));
|
||||
+
|
||||
+ b.lens[0] = cpu_to_le64(ad_len);
|
||||
+ b.lens[1] = cpu_to_le64(src_len);
|
||||
+ poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens));
|
||||
+
|
||||
+ if (likely(sl <= -POLY1305_DIGEST_SIZE)) {
|
||||
+ if (encrypt) {
|
||||
+ poly1305_final(&poly1305_state,
|
||||
+ miter.addr + miter.length + sl);
|
||||
+ ret = true;
|
||||
+ } else {
|
||||
+ poly1305_final(&poly1305_state, b.mac[0]);
|
||||
+ ret = !crypto_memneq(b.mac[0],
|
||||
+ miter.addr + miter.length + sl,
|
||||
+ POLY1305_DIGEST_SIZE);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ sg_miter_stop(&miter);
|
||||
+
|
||||
+ if (unlikely(sl > -POLY1305_DIGEST_SIZE)) {
|
||||
+ poly1305_final(&poly1305_state, b.mac[1]);
|
||||
+ scatterwalk_map_and_copy(b.mac[encrypt], src, src_len,
|
||||
+ sizeof(b.mac[1]), encrypt);
|
||||
+ ret = encrypt ||
|
||||
+ !crypto_memneq(b.mac[0], b.mac[1], POLY1305_DIGEST_SIZE);
|
||||
+ }
|
||||
+
|
||||
+ memzero_explicit(chacha_state, sizeof(chacha_state));
|
||||
+ memzero_explicit(&b, sizeof(b));
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+bool chacha20poly1305_encrypt_sg_inplace(struct scatterlist *src, size_t src_len,
|
||||
+ const u8 *ad, const size_t ad_len,
|
||||
+ const u64 nonce,
|
||||
+ const u8 key[CHACHA20POLY1305_KEY_SIZE])
|
||||
+{
|
||||
+ return chacha20poly1305_crypt_sg_inplace(src, src_len, ad, ad_len,
|
||||
+ nonce, key, 1);
|
||||
+}
|
||||
+EXPORT_SYMBOL(chacha20poly1305_encrypt_sg_inplace);
|
||||
+
|
||||
+bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, size_t src_len,
|
||||
+ const u8 *ad, const size_t ad_len,
|
||||
+ const u64 nonce,
|
||||
+ const u8 key[CHACHA20POLY1305_KEY_SIZE])
|
||||
+{
|
||||
+ if (unlikely(src_len < POLY1305_DIGEST_SIZE))
|
||||
+ return false;
|
||||
+
|
||||
+ return chacha20poly1305_crypt_sg_inplace(src,
|
||||
+ src_len - POLY1305_DIGEST_SIZE,
|
||||
+ ad, ad_len, nonce, key, 0);
|
||||
+}
|
||||
+EXPORT_SYMBOL(chacha20poly1305_decrypt_sg_inplace);
|
||||
+
|
||||
static int __init mod_init(void)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
|
@ -0,0 +1,68 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Biggers <ebiggers@google.com>
|
||||
Date: Sun, 17 Nov 2019 23:21:29 -0800
|
||||
Subject: [PATCH] crypto: chacha_generic - remove unnecessary setkey()
|
||||
functions
|
||||
|
||||
commit 2043323a799a660bc84bbee404cf7a2617ec6157 upstream.
|
||||
|
||||
Use chacha20_setkey() and chacha12_setkey() from
|
||||
<crypto/internal/chacha.h> instead of defining them again in
|
||||
chacha_generic.c.
|
||||
|
||||
Signed-off-by: Eric Biggers <ebiggers@google.com>
|
||||
Acked-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
crypto/chacha_generic.c | 18 +++---------------
|
||||
1 file changed, 3 insertions(+), 15 deletions(-)
|
||||
|
||||
--- a/crypto/chacha_generic.c
|
||||
+++ b/crypto/chacha_generic.c
|
||||
@@ -37,18 +37,6 @@ static int chacha_stream_xor(struct skci
|
||||
return err;
|
||||
}
|
||||
|
||||
-static int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
- unsigned int keysize)
|
||||
-{
|
||||
- return chacha_setkey(tfm, key, keysize, 20);
|
||||
-}
|
||||
-
|
||||
-static int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
- unsigned int keysize)
|
||||
-{
|
||||
- return chacha_setkey(tfm, key, keysize, 12);
|
||||
-}
|
||||
-
|
||||
static int crypto_chacha_crypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
@@ -91,7 +79,7 @@ static struct skcipher_alg algs[] = {
|
||||
.max_keysize = CHACHA_KEY_SIZE,
|
||||
.ivsize = CHACHA_IV_SIZE,
|
||||
.chunksize = CHACHA_BLOCK_SIZE,
|
||||
- .setkey = crypto_chacha20_setkey,
|
||||
+ .setkey = chacha20_setkey,
|
||||
.encrypt = crypto_chacha_crypt,
|
||||
.decrypt = crypto_chacha_crypt,
|
||||
}, {
|
||||
@@ -106,7 +94,7 @@ static struct skcipher_alg algs[] = {
|
||||
.max_keysize = CHACHA_KEY_SIZE,
|
||||
.ivsize = XCHACHA_IV_SIZE,
|
||||
.chunksize = CHACHA_BLOCK_SIZE,
|
||||
- .setkey = crypto_chacha20_setkey,
|
||||
+ .setkey = chacha20_setkey,
|
||||
.encrypt = crypto_xchacha_crypt,
|
||||
.decrypt = crypto_xchacha_crypt,
|
||||
}, {
|
||||
@@ -121,7 +109,7 @@ static struct skcipher_alg algs[] = {
|
||||
.max_keysize = CHACHA_KEY_SIZE,
|
||||
.ivsize = XCHACHA_IV_SIZE,
|
||||
.chunksize = CHACHA_BLOCK_SIZE,
|
||||
- .setkey = crypto_chacha12_setkey,
|
||||
+ .setkey = chacha12_setkey,
|
||||
.encrypt = crypto_xchacha_crypt,
|
||||
.decrypt = crypto_xchacha_crypt,
|
||||
}
|
@ -0,0 +1,31 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Biggers <ebiggers@google.com>
|
||||
Date: Sun, 17 Nov 2019 23:21:58 -0800
|
||||
Subject: [PATCH] crypto: x86/chacha - only unregister algorithms if registered
|
||||
|
||||
commit b62755aed3a3f5ca9edd2718339ccea3b6bbbe57 upstream.
|
||||
|
||||
It's not valid to call crypto_unregister_skciphers() without a prior
|
||||
call to crypto_register_skciphers().
|
||||
|
||||
Fixes: 84e03fa39fbe ("crypto: x86/chacha - expose SIMD ChaCha routine as library function")
|
||||
Signed-off-by: Eric Biggers <ebiggers@google.com>
|
||||
Acked-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/x86/crypto/chacha_glue.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/x86/crypto/chacha_glue.c
|
||||
+++ b/arch/x86/crypto/chacha_glue.c
|
||||
@@ -304,7 +304,8 @@ static int __init chacha_simd_mod_init(v
|
||||
|
||||
static void __exit chacha_simd_mod_fini(void)
|
||||
{
|
||||
- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
|
||||
+ if (boot_cpu_has(X86_FEATURE_SSSE3))
|
||||
+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
module_init(chacha_simd_mod_init);
|
@ -0,0 +1,83 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Biggers <ebiggers@google.com>
|
||||
Date: Sun, 17 Nov 2019 23:22:16 -0800
|
||||
Subject: [PATCH] crypto: lib/chacha20poly1305 - use chacha20_crypt()
|
||||
|
||||
commit 413808b71e6204b0cc1eeaa77960f7c3cd381d33 upstream.
|
||||
|
||||
Use chacha20_crypt() instead of chacha_crypt(), since it's not really
|
||||
appropriate for users of the ChaCha library API to be passing the number
|
||||
of rounds as an argument.
|
||||
|
||||
Signed-off-by: Eric Biggers <ebiggers@google.com>
|
||||
Acked-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
lib/crypto/chacha20poly1305.c | 16 ++++++++--------
|
||||
1 file changed, 8 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/lib/crypto/chacha20poly1305.c
|
||||
+++ b/lib/crypto/chacha20poly1305.c
|
||||
@@ -66,14 +66,14 @@ __chacha20poly1305_encrypt(u8 *dst, cons
|
||||
__le64 lens[2];
|
||||
} b;
|
||||
|
||||
- chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20);
|
||||
+ chacha20_crypt(chacha_state, b.block0, pad0, sizeof(b.block0));
|
||||
poly1305_init(&poly1305_state, b.block0);
|
||||
|
||||
poly1305_update(&poly1305_state, ad, ad_len);
|
||||
if (ad_len & 0xf)
|
||||
poly1305_update(&poly1305_state, pad0, 0x10 - (ad_len & 0xf));
|
||||
|
||||
- chacha_crypt(chacha_state, dst, src, src_len, 20);
|
||||
+ chacha20_crypt(chacha_state, dst, src, src_len);
|
||||
|
||||
poly1305_update(&poly1305_state, dst, src_len);
|
||||
if (src_len & 0xf)
|
||||
@@ -140,7 +140,7 @@ __chacha20poly1305_decrypt(u8 *dst, cons
|
||||
if (unlikely(src_len < POLY1305_DIGEST_SIZE))
|
||||
return false;
|
||||
|
||||
- chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20);
|
||||
+ chacha20_crypt(chacha_state, b.block0, pad0, sizeof(b.block0));
|
||||
poly1305_init(&poly1305_state, b.block0);
|
||||
|
||||
poly1305_update(&poly1305_state, ad, ad_len);
|
||||
@@ -160,7 +160,7 @@ __chacha20poly1305_decrypt(u8 *dst, cons
|
||||
|
||||
ret = crypto_memneq(b.mac, src + dst_len, POLY1305_DIGEST_SIZE);
|
||||
if (likely(!ret))
|
||||
- chacha_crypt(chacha_state, dst, src, dst_len, 20);
|
||||
+ chacha20_crypt(chacha_state, dst, src, dst_len);
|
||||
|
||||
memzero_explicit(&b, sizeof(b));
|
||||
|
||||
@@ -241,7 +241,7 @@ bool chacha20poly1305_crypt_sg_inplace(s
|
||||
b.iv[1] = cpu_to_le64(nonce);
|
||||
|
||||
chacha_init(chacha_state, b.k, (u8 *)b.iv);
|
||||
- chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20);
|
||||
+ chacha20_crypt(chacha_state, b.block0, pad0, sizeof(b.block0));
|
||||
poly1305_init(&poly1305_state, b.block0);
|
||||
|
||||
if (unlikely(ad_len)) {
|
||||
@@ -278,14 +278,14 @@ bool chacha20poly1305_crypt_sg_inplace(s
|
||||
|
||||
if (unlikely(length < sl))
|
||||
l &= ~(CHACHA_BLOCK_SIZE - 1);
|
||||
- chacha_crypt(chacha_state, addr, addr, l, 20);
|
||||
+ chacha20_crypt(chacha_state, addr, addr, l);
|
||||
addr += l;
|
||||
length -= l;
|
||||
}
|
||||
|
||||
if (unlikely(length > 0)) {
|
||||
- chacha_crypt(chacha_state, b.chacha_stream, pad0,
|
||||
- CHACHA_BLOCK_SIZE, 20);
|
||||
+ chacha20_crypt(chacha_state, b.chacha_stream, pad0,
|
||||
+ CHACHA_BLOCK_SIZE);
|
||||
crypto_xor(addr, b.chacha_stream, length);
|
||||
partial = length;
|
||||
}
|
@ -0,0 +1,275 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Mon, 25 Nov 2019 11:31:12 +0100
|
||||
Subject: [PATCH] crypto: arch - conditionalize crypto api in arch glue for lib
|
||||
code
|
||||
|
||||
commit 8394bfec51e0e565556101bcc4e2fe7551104cd8 upstream.
|
||||
|
||||
For glue code that's used by Zinc, the actual Crypto API functions might
|
||||
not necessarily exist, and don't need to exist either. Before this
|
||||
patch, there are valid build configurations that lead to a unbuildable
|
||||
kernel. This fixes it to conditionalize those symbols on the existence
|
||||
of the proper config entry.
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Acked-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/arm/crypto/chacha-glue.c | 26 ++++++++++++++++----------
|
||||
arch/arm/crypto/curve25519-glue.c | 5 +++--
|
||||
arch/arm/crypto/poly1305-glue.c | 9 ++++++---
|
||||
arch/arm64/crypto/chacha-neon-glue.c | 5 +++--
|
||||
arch/arm64/crypto/poly1305-glue.c | 5 +++--
|
||||
arch/mips/crypto/chacha-glue.c | 6 ++++--
|
||||
arch/mips/crypto/poly1305-glue.c | 6 ++++--
|
||||
arch/x86/crypto/blake2s-glue.c | 6 ++++--
|
||||
arch/x86/crypto/chacha_glue.c | 5 +++--
|
||||
arch/x86/crypto/curve25519-x86_64.c | 7 ++++---
|
||||
arch/x86/crypto/poly1305_glue.c | 5 +++--
|
||||
11 files changed, 53 insertions(+), 32 deletions(-)
|
||||
|
||||
--- a/arch/arm/crypto/chacha-glue.c
|
||||
+++ b/arch/arm/crypto/chacha-glue.c
|
||||
@@ -286,11 +286,13 @@ static struct skcipher_alg neon_algs[] =
|
||||
|
||||
static int __init chacha_simd_mod_init(void)
|
||||
{
|
||||
- int err;
|
||||
+ int err = 0;
|
||||
|
||||
- err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
|
||||
- if (err)
|
||||
- return err;
|
||||
+ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER)) {
|
||||
+ err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+ }
|
||||
|
||||
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
|
||||
int i;
|
||||
@@ -310,18 +312,22 @@ static int __init chacha_simd_mod_init(v
|
||||
static_branch_enable(&use_neon);
|
||||
}
|
||||
|
||||
- err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
|
||||
- if (err)
|
||||
- crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
|
||||
+ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER)) {
|
||||
+ err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
|
||||
+ if (err)
|
||||
+ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
|
||||
+ }
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static void __exit chacha_simd_mod_fini(void)
|
||||
{
|
||||
- crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
|
||||
- if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
|
||||
- crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
|
||||
+ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER)) {
|
||||
+ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
|
||||
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
|
||||
+ crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
|
||||
+ }
|
||||
}
|
||||
|
||||
module_init(chacha_simd_mod_init);
|
||||
--- a/arch/arm/crypto/curve25519-glue.c
|
||||
+++ b/arch/arm/crypto/curve25519-glue.c
|
||||
@@ -108,14 +108,15 @@ static int __init mod_init(void)
|
||||
{
|
||||
if (elf_hwcap & HWCAP_NEON) {
|
||||
static_branch_enable(&have_neon);
|
||||
- return crypto_register_kpp(&curve25519_alg);
|
||||
+ return IS_REACHABLE(CONFIG_CRYPTO_KPP) ?
|
||||
+ crypto_register_kpp(&curve25519_alg) : 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit mod_exit(void)
|
||||
{
|
||||
- if (elf_hwcap & HWCAP_NEON)
|
||||
+ if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && elf_hwcap & HWCAP_NEON)
|
||||
crypto_unregister_kpp(&curve25519_alg);
|
||||
}
|
||||
|
||||
--- a/arch/arm/crypto/poly1305-glue.c
|
||||
+++ b/arch/arm/crypto/poly1305-glue.c
|
||||
@@ -249,16 +249,19 @@ static int __init arm_poly1305_mod_init(
|
||||
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
|
||||
(elf_hwcap & HWCAP_NEON))
|
||||
static_branch_enable(&have_neon);
|
||||
- else
|
||||
+ else if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
|
||||
/* register only the first entry */
|
||||
return crypto_register_shash(&arm_poly1305_algs[0]);
|
||||
|
||||
- return crypto_register_shashes(arm_poly1305_algs,
|
||||
- ARRAY_SIZE(arm_poly1305_algs));
|
||||
+ return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
|
||||
+ crypto_register_shashes(arm_poly1305_algs,
|
||||
+ ARRAY_SIZE(arm_poly1305_algs)) : 0;
|
||||
}
|
||||
|
||||
static void __exit arm_poly1305_mod_exit(void)
|
||||
{
|
||||
+ if (!IS_REACHABLE(CONFIG_CRYPTO_HASH))
|
||||
+ return;
|
||||
if (!static_branch_likely(&have_neon)) {
|
||||
crypto_unregister_shash(&arm_poly1305_algs[0]);
|
||||
return;
|
||||
--- a/arch/arm64/crypto/chacha-neon-glue.c
|
||||
+++ b/arch/arm64/crypto/chacha-neon-glue.c
|
||||
@@ -211,12 +211,13 @@ static int __init chacha_simd_mod_init(v
|
||||
|
||||
static_branch_enable(&have_neon);
|
||||
|
||||
- return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
|
||||
+ return IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) ?
|
||||
+ crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
|
||||
}
|
||||
|
||||
static void __exit chacha_simd_mod_fini(void)
|
||||
{
|
||||
- if (cpu_have_named_feature(ASIMD))
|
||||
+ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) && cpu_have_named_feature(ASIMD))
|
||||
crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
--- a/arch/arm64/crypto/poly1305-glue.c
|
||||
+++ b/arch/arm64/crypto/poly1305-glue.c
|
||||
@@ -220,12 +220,13 @@ static int __init neon_poly1305_mod_init
|
||||
|
||||
static_branch_enable(&have_neon);
|
||||
|
||||
- return crypto_register_shash(&neon_poly1305_alg);
|
||||
+ return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
|
||||
+ crypto_register_shash(&neon_poly1305_alg) : 0;
|
||||
}
|
||||
|
||||
static void __exit neon_poly1305_mod_exit(void)
|
||||
{
|
||||
- if (cpu_have_named_feature(ASIMD))
|
||||
+ if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD))
|
||||
crypto_unregister_shash(&neon_poly1305_alg);
|
||||
}
|
||||
|
||||
--- a/arch/mips/crypto/chacha-glue.c
|
||||
+++ b/arch/mips/crypto/chacha-glue.c
|
||||
@@ -128,12 +128,14 @@ static struct skcipher_alg algs[] = {
|
||||
|
||||
static int __init chacha_simd_mod_init(void)
|
||||
{
|
||||
- return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
|
||||
+ return IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) ?
|
||||
+ crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
|
||||
}
|
||||
|
||||
static void __exit chacha_simd_mod_fini(void)
|
||||
{
|
||||
- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
|
||||
+ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER))
|
||||
+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
module_init(chacha_simd_mod_init);
|
||||
--- a/arch/mips/crypto/poly1305-glue.c
|
||||
+++ b/arch/mips/crypto/poly1305-glue.c
|
||||
@@ -187,12 +187,14 @@ static struct shash_alg mips_poly1305_al
|
||||
|
||||
static int __init mips_poly1305_mod_init(void)
|
||||
{
|
||||
- return crypto_register_shash(&mips_poly1305_alg);
|
||||
+ return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
|
||||
+ crypto_register_shash(&mips_poly1305_alg) : 0;
|
||||
}
|
||||
|
||||
static void __exit mips_poly1305_mod_exit(void)
|
||||
{
|
||||
- crypto_unregister_shash(&mips_poly1305_alg);
|
||||
+ if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
|
||||
+ crypto_unregister_shash(&mips_poly1305_alg);
|
||||
}
|
||||
|
||||
module_init(mips_poly1305_mod_init);
|
||||
--- a/arch/x86/crypto/blake2s-glue.c
|
||||
+++ b/arch/x86/crypto/blake2s-glue.c
|
||||
@@ -210,12 +210,14 @@ static int __init blake2s_mod_init(void)
|
||||
XFEATURE_MASK_AVX512, NULL))
|
||||
static_branch_enable(&blake2s_use_avx512);
|
||||
|
||||
- return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
|
||||
+ return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
|
||||
+ crypto_register_shashes(blake2s_algs,
|
||||
+ ARRAY_SIZE(blake2s_algs)) : 0;
|
||||
}
|
||||
|
||||
static void __exit blake2s_mod_exit(void)
|
||||
{
|
||||
- if (boot_cpu_has(X86_FEATURE_SSSE3))
|
||||
+ if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3))
|
||||
crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
|
||||
}
|
||||
|
||||
--- a/arch/x86/crypto/chacha_glue.c
|
||||
+++ b/arch/x86/crypto/chacha_glue.c
|
||||
@@ -299,12 +299,13 @@ static int __init chacha_simd_mod_init(v
|
||||
boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */
|
||||
static_branch_enable(&chacha_use_avx512vl);
|
||||
}
|
||||
- return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
|
||||
+ return IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) ?
|
||||
+ crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
|
||||
}
|
||||
|
||||
static void __exit chacha_simd_mod_fini(void)
|
||||
{
|
||||
- if (boot_cpu_has(X86_FEATURE_SSSE3))
|
||||
+ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) && boot_cpu_has(X86_FEATURE_SSSE3))
|
||||
crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
--- a/arch/x86/crypto/curve25519-x86_64.c
|
||||
+++ b/arch/x86/crypto/curve25519-x86_64.c
|
||||
@@ -2457,13 +2457,14 @@ static int __init curve25519_mod_init(vo
|
||||
static_branch_enable(&curve25519_use_adx);
|
||||
else
|
||||
return 0;
|
||||
- return crypto_register_kpp(&curve25519_alg);
|
||||
+ return IS_REACHABLE(CONFIG_CRYPTO_KPP) ?
|
||||
+ crypto_register_kpp(&curve25519_alg) : 0;
|
||||
}
|
||||
|
||||
static void __exit curve25519_mod_exit(void)
|
||||
{
|
||||
- if (boot_cpu_has(X86_FEATURE_BMI2) ||
|
||||
- boot_cpu_has(X86_FEATURE_ADX))
|
||||
+ if (IS_REACHABLE(CONFIG_CRYPTO_KPP) &&
|
||||
+ (boot_cpu_has(X86_FEATURE_BMI2) || boot_cpu_has(X86_FEATURE_ADX)))
|
||||
crypto_unregister_kpp(&curve25519_alg);
|
||||
}
|
||||
|
||||
--- a/arch/x86/crypto/poly1305_glue.c
|
||||
+++ b/arch/x86/crypto/poly1305_glue.c
|
||||
@@ -224,12 +224,13 @@ static int __init poly1305_simd_mod_init
|
||||
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
|
||||
static_branch_enable(&poly1305_use_avx2);
|
||||
|
||||
- return crypto_register_shash(&alg);
|
||||
+ return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? crypto_register_shash(&alg) : 0;
|
||||
}
|
||||
|
||||
static void __exit poly1305_simd_mod_exit(void)
|
||||
{
|
||||
- crypto_unregister_shash(&alg);
|
||||
+ if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
|
||||
+ crypto_unregister_shash(&alg);
|
||||
}
|
||||
|
||||
module_init(poly1305_simd_mod_init);
|
@ -0,0 +1,35 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Valdis=20Kl=C4=93tnieks?= <valdis.kletnieks@vt.edu>
|
||||
Date: Thu, 5 Dec 2019 20:58:36 -0500
|
||||
Subject: [PATCH] crypto: chacha - fix warning message in header file
|
||||
|
||||
commit 579d705cd64e44f3fcda1a6cfd5f37468a5ddf63 upstream.
|
||||
|
||||
Building with W=1 causes a warning:
|
||||
|
||||
CC [M] arch/x86/crypto/chacha_glue.o
|
||||
In file included from arch/x86/crypto/chacha_glue.c:10:
|
||||
./include/crypto/internal/chacha.h:37:1: warning: 'inline' is not at beginning of declaration [-Wold-style-declaration]
|
||||
37 | static int inline chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
| ^~~~~~
|
||||
|
||||
Straighten out the order to match the rest of the header file.
|
||||
|
||||
Signed-off-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
include/crypto/internal/chacha.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/include/crypto/internal/chacha.h
|
||||
+++ b/include/crypto/internal/chacha.h
|
||||
@@ -34,7 +34,7 @@ static inline int chacha20_setkey(struct
|
||||
return chacha_setkey(tfm, key, keysize, 20);
|
||||
}
|
||||
|
||||
-static int inline chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
+static inline int chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
unsigned int keysize)
|
||||
{
|
||||
return chacha_setkey(tfm, key, keysize, 12);
|
@ -0,0 +1,38 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Wed, 11 Dec 2019 10:26:39 +0100
|
||||
Subject: [PATCH] crypto: arm/curve25519 - add arch-specific key generation
|
||||
function
|
||||
|
||||
commit 84faa307249b341f6ad8de3e1869d77a65e26669 upstream.
|
||||
|
||||
Somehow this was forgotten when Zinc was being split into oddly shaped
|
||||
pieces, resulting in linker errors. The x86_64 glue has a specific key
|
||||
generation implementation, but the Arm one does not. However, it can
|
||||
still receive the NEON speedups by calling the ordinary DH function
|
||||
using the base point.
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Acked-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/arm/crypto/curve25519-glue.c | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
--- a/arch/arm/crypto/curve25519-glue.c
|
||||
+++ b/arch/arm/crypto/curve25519-glue.c
|
||||
@@ -38,6 +38,13 @@ void curve25519_arch(u8 out[CURVE25519_K
|
||||
}
|
||||
EXPORT_SYMBOL(curve25519_arch);
|
||||
|
||||
+void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
|
||||
+ const u8 secret[CURVE25519_KEY_SIZE])
|
||||
+{
|
||||
+ return curve25519_arch(pub, secret, curve25519_base_point);
|
||||
+}
|
||||
+EXPORT_SYMBOL(curve25519_base_arch);
|
||||
+
|
||||
static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
|
||||
unsigned int len)
|
||||
{
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,171 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Sun, 5 Jan 2020 22:40:49 -0500
|
||||
Subject: [PATCH] crypto: {arm,arm64,mips}/poly1305 - remove redundant
|
||||
non-reduction from emit
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
commit 31899908a0d248b030b4464425b86c717e0007d4 upstream.
|
||||
|
||||
This appears to be some kind of copy and paste error, and is actually
|
||||
dead code.
|
||||
|
||||
Pre: f = 0 ⇒ (f >> 32) = 0
|
||||
f = (f >> 32) + le32_to_cpu(digest[0]);
|
||||
Post: 0 ≤ f < 2³²
|
||||
put_unaligned_le32(f, dst);
|
||||
|
||||
Pre: 0 ≤ f < 2³² ⇒ (f >> 32) = 0
|
||||
f = (f >> 32) + le32_to_cpu(digest[1]);
|
||||
Post: 0 ≤ f < 2³²
|
||||
put_unaligned_le32(f, dst + 4);
|
||||
|
||||
Pre: 0 ≤ f < 2³² ⇒ (f >> 32) = 0
|
||||
f = (f >> 32) + le32_to_cpu(digest[2]);
|
||||
Post: 0 ≤ f < 2³²
|
||||
put_unaligned_le32(f, dst + 8);
|
||||
|
||||
Pre: 0 ≤ f < 2³² ⇒ (f >> 32) = 0
|
||||
f = (f >> 32) + le32_to_cpu(digest[3]);
|
||||
Post: 0 ≤ f < 2³²
|
||||
put_unaligned_le32(f, dst + 12);
|
||||
|
||||
Therefore this sequence is redundant. And Andy's code appears to handle
|
||||
misalignment acceptably.
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Tested-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/arm/crypto/poly1305-glue.c | 18 ++----------------
|
||||
arch/arm64/crypto/poly1305-glue.c | 18 ++----------------
|
||||
arch/mips/crypto/poly1305-glue.c | 18 ++----------------
|
||||
3 files changed, 6 insertions(+), 48 deletions(-)
|
||||
|
||||
--- a/arch/arm/crypto/poly1305-glue.c
|
||||
+++ b/arch/arm/crypto/poly1305-glue.c
|
||||
@@ -20,7 +20,7 @@
|
||||
|
||||
void poly1305_init_arm(void *state, const u8 *key);
|
||||
void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit);
|
||||
-void poly1305_emit_arm(void *state, __le32 *digest, const u32 *nonce);
|
||||
+void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce);
|
||||
|
||||
void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
|
||||
{
|
||||
@@ -179,9 +179,6 @@ EXPORT_SYMBOL(poly1305_update_arch);
|
||||
|
||||
void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
|
||||
{
|
||||
- __le32 digest[4];
|
||||
- u64 f = 0;
|
||||
-
|
||||
if (unlikely(dctx->buflen)) {
|
||||
dctx->buf[dctx->buflen++] = 1;
|
||||
memset(dctx->buf + dctx->buflen, 0,
|
||||
@@ -189,18 +186,7 @@ void poly1305_final_arch(struct poly1305
|
||||
poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
|
||||
}
|
||||
|
||||
- poly1305_emit_arm(&dctx->h, digest, dctx->s);
|
||||
-
|
||||
- /* mac = (h + s) % (2^128) */
|
||||
- f = (f >> 32) + le32_to_cpu(digest[0]);
|
||||
- put_unaligned_le32(f, dst);
|
||||
- f = (f >> 32) + le32_to_cpu(digest[1]);
|
||||
- put_unaligned_le32(f, dst + 4);
|
||||
- f = (f >> 32) + le32_to_cpu(digest[2]);
|
||||
- put_unaligned_le32(f, dst + 8);
|
||||
- f = (f >> 32) + le32_to_cpu(digest[3]);
|
||||
- put_unaligned_le32(f, dst + 12);
|
||||
-
|
||||
+ poly1305_emit_arm(&dctx->h, dst, dctx->s);
|
||||
*dctx = (struct poly1305_desc_ctx){};
|
||||
}
|
||||
EXPORT_SYMBOL(poly1305_final_arch);
|
||||
--- a/arch/arm64/crypto/poly1305-glue.c
|
||||
+++ b/arch/arm64/crypto/poly1305-glue.c
|
||||
@@ -21,7 +21,7 @@
|
||||
asmlinkage void poly1305_init_arm64(void *state, const u8 *key);
|
||||
asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit);
|
||||
asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
|
||||
-asmlinkage void poly1305_emit(void *state, __le32 *digest, const u32 *nonce);
|
||||
+asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce);
|
||||
|
||||
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
|
||||
|
||||
@@ -162,9 +162,6 @@ EXPORT_SYMBOL(poly1305_update_arch);
|
||||
|
||||
void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
|
||||
{
|
||||
- __le32 digest[4];
|
||||
- u64 f = 0;
|
||||
-
|
||||
if (unlikely(dctx->buflen)) {
|
||||
dctx->buf[dctx->buflen++] = 1;
|
||||
memset(dctx->buf + dctx->buflen, 0,
|
||||
@@ -172,18 +169,7 @@ void poly1305_final_arch(struct poly1305
|
||||
poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
|
||||
}
|
||||
|
||||
- poly1305_emit(&dctx->h, digest, dctx->s);
|
||||
-
|
||||
- /* mac = (h + s) % (2^128) */
|
||||
- f = (f >> 32) + le32_to_cpu(digest[0]);
|
||||
- put_unaligned_le32(f, dst);
|
||||
- f = (f >> 32) + le32_to_cpu(digest[1]);
|
||||
- put_unaligned_le32(f, dst + 4);
|
||||
- f = (f >> 32) + le32_to_cpu(digest[2]);
|
||||
- put_unaligned_le32(f, dst + 8);
|
||||
- f = (f >> 32) + le32_to_cpu(digest[3]);
|
||||
- put_unaligned_le32(f, dst + 12);
|
||||
-
|
||||
+ poly1305_emit(&dctx->h, dst, dctx->s);
|
||||
*dctx = (struct poly1305_desc_ctx){};
|
||||
}
|
||||
EXPORT_SYMBOL(poly1305_final_arch);
|
||||
--- a/arch/mips/crypto/poly1305-glue.c
|
||||
+++ b/arch/mips/crypto/poly1305-glue.c
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
asmlinkage void poly1305_init_mips(void *state, const u8 *key);
|
||||
asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit);
|
||||
-asmlinkage void poly1305_emit_mips(void *state, __le32 *digest, const u32 *nonce);
|
||||
+asmlinkage void poly1305_emit_mips(void *state, u8 *digest, const u32 *nonce);
|
||||
|
||||
void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
|
||||
{
|
||||
@@ -134,9 +134,6 @@ EXPORT_SYMBOL(poly1305_update_arch);
|
||||
|
||||
void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
|
||||
{
|
||||
- __le32 digest[4];
|
||||
- u64 f = 0;
|
||||
-
|
||||
if (unlikely(dctx->buflen)) {
|
||||
dctx->buf[dctx->buflen++] = 1;
|
||||
memset(dctx->buf + dctx->buflen, 0,
|
||||
@@ -144,18 +141,7 @@ void poly1305_final_arch(struct poly1305
|
||||
poly1305_blocks_mips(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
|
||||
}
|
||||
|
||||
- poly1305_emit_mips(&dctx->h, digest, dctx->s);
|
||||
-
|
||||
- /* mac = (h + s) % (2^128) */
|
||||
- f = (f >> 32) + le32_to_cpu(digest[0]);
|
||||
- put_unaligned_le32(f, dst);
|
||||
- f = (f >> 32) + le32_to_cpu(digest[1]);
|
||||
- put_unaligned_le32(f, dst + 4);
|
||||
- f = (f >> 32) + le32_to_cpu(digest[2]);
|
||||
- put_unaligned_le32(f, dst + 8);
|
||||
- f = (f >> 32) + le32_to_cpu(digest[3]);
|
||||
- put_unaligned_le32(f, dst + 12);
|
||||
-
|
||||
+ poly1305_emit_mips(&dctx->h, dst, dctx->s);
|
||||
*dctx = (struct poly1305_desc_ctx){};
|
||||
}
|
||||
EXPORT_SYMBOL(poly1305_final_arch);
|
@ -0,0 +1,102 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Date: Wed, 8 Jan 2020 12:37:35 +0800
|
||||
Subject: [PATCH] crypto: curve25519 - Fix selftest build error
|
||||
|
||||
commit a8bdf2c42ee4d1ee42af1f3601f85de94e70a421 upstream.
|
||||
|
||||
If CRYPTO_CURVE25519 is y, CRYPTO_LIB_CURVE25519_GENERIC will be
|
||||
y, but CRYPTO_LIB_CURVE25519 may be set to m, this causes build
|
||||
errors:
|
||||
|
||||
lib/crypto/curve25519-selftest.o: In function `curve25519':
|
||||
curve25519-selftest.c:(.text.unlikely+0xc): undefined reference to `curve25519_arch'
|
||||
lib/crypto/curve25519-selftest.o: In function `curve25519_selftest':
|
||||
curve25519-selftest.c:(.init.text+0x17e): undefined reference to `curve25519_base_arch'
|
||||
|
||||
This is because the curve25519 self-test code is being controlled
|
||||
by the GENERIC option rather than the overall CURVE25519 option,
|
||||
as is the case with blake2s. To recap, the GENERIC and ARCH options
|
||||
for CURVE25519 are internal only and selected by users such as
|
||||
the Crypto API, or the externally visible CURVE25519 option which
|
||||
in turn is selected by wireguard. The self-test is specific to the
|
||||
the external CURVE25519 option and should not be enabled by the
|
||||
Crypto API.
|
||||
|
||||
This patch fixes this by splitting the GENERIC module from the
|
||||
CURVE25519 module with the latter now containing just the self-test.
|
||||
|
||||
Reported-by: Hulk Robot <hulkci@huawei.com>
|
||||
Fixes: aa127963f1ca ("crypto: lib/curve25519 - re-add selftests")
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Reviewed-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
lib/crypto/Makefile | 9 ++++++---
|
||||
lib/crypto/curve25519-generic.c | 24 ++++++++++++++++++++++++
|
||||
lib/crypto/curve25519.c | 7 -------
|
||||
3 files changed, 30 insertions(+), 10 deletions(-)
|
||||
create mode 100644 lib/crypto/curve25519-generic.c
|
||||
|
||||
--- a/lib/crypto/Makefile
|
||||
+++ b/lib/crypto/Makefile
|
||||
@@ -19,9 +19,12 @@ libblake2s-y += blake2s.o
|
||||
obj-$(CONFIG_CRYPTO_LIB_CHACHA20POLY1305) += libchacha20poly1305.o
|
||||
libchacha20poly1305-y += chacha20poly1305.o
|
||||
|
||||
-obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519.o
|
||||
-libcurve25519-y := curve25519-fiat32.o
|
||||
-libcurve25519-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o
|
||||
+obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519-generic.o
|
||||
+libcurve25519-generic-y := curve25519-fiat32.o
|
||||
+libcurve25519-generic-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o
|
||||
+libcurve25519-generic-y += curve25519-generic.o
|
||||
+
|
||||
+obj-$(CONFIG_CRYPTO_LIB_CURVE25519) += libcurve25519.o
|
||||
libcurve25519-y += curve25519.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
|
||||
--- /dev/null
|
||||
+++ b/lib/crypto/curve25519-generic.c
|
||||
@@ -0,0 +1,24 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
+/*
|
||||
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
+ *
|
||||
+ * This is an implementation of the Curve25519 ECDH algorithm, using either
|
||||
+ * a 32-bit implementation or a 64-bit implementation with 128-bit integers,
|
||||
+ * depending on what is supported by the target compiler.
|
||||
+ *
|
||||
+ * Information: https://cr.yp.to/ecdh.html
|
||||
+ */
|
||||
+
|
||||
+#include <crypto/curve25519.h>
|
||||
+#include <linux/module.h>
|
||||
+
|
||||
+const u8 curve25519_null_point[CURVE25519_KEY_SIZE] __aligned(32) = { 0 };
|
||||
+const u8 curve25519_base_point[CURVE25519_KEY_SIZE] __aligned(32) = { 9 };
|
||||
+
|
||||
+EXPORT_SYMBOL(curve25519_null_point);
|
||||
+EXPORT_SYMBOL(curve25519_base_point);
|
||||
+EXPORT_SYMBOL(curve25519_generic);
|
||||
+
|
||||
+MODULE_LICENSE("GPL v2");
|
||||
+MODULE_DESCRIPTION("Curve25519 scalar multiplication");
|
||||
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
|
||||
--- a/lib/crypto/curve25519.c
|
||||
+++ b/lib/crypto/curve25519.c
|
||||
@@ -15,13 +15,6 @@
|
||||
|
||||
bool curve25519_selftest(void);
|
||||
|
||||
-const u8 curve25519_null_point[CURVE25519_KEY_SIZE] __aligned(32) = { 0 };
|
||||
-const u8 curve25519_base_point[CURVE25519_KEY_SIZE] __aligned(32) = { 9 };
|
||||
-
|
||||
-EXPORT_SYMBOL(curve25519_null_point);
|
||||
-EXPORT_SYMBOL(curve25519_base_point);
|
||||
-EXPORT_SYMBOL(curve25519_generic);
|
||||
-
|
||||
static int __init mod_init(void)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
|
@ -0,0 +1,23 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Thu, 16 Jan 2020 18:23:55 +0100
|
||||
Subject: [PATCH] crypto: x86/poly1305 - fix .gitignore typo
|
||||
|
||||
commit 1f6868995326cc82102049e349d8dbd116bdb656 upstream.
|
||||
|
||||
Admist the kbuild robot induced changes, the .gitignore file for the
|
||||
generated file wasn't updated with the non-clashing filename. This
|
||||
commit adjusts that.
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/x86/crypto/.gitignore | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/x86/crypto/.gitignore
|
||||
+++ b/arch/x86/crypto/.gitignore
|
||||
@@ -1 +1 @@
|
||||
-poly1305-x86_64.S
|
||||
+poly1305-x86_64-cryptogams.S
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,36 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Fri, 17 Jan 2020 11:42:22 +0100
|
||||
Subject: [PATCH] crypto: x86/poly1305 - emit does base conversion itself
|
||||
|
||||
commit f9e7fe32a792726186301423ff63a465d63386e1 upstream.
|
||||
|
||||
The emit code does optional base conversion itself in assembly, so we
|
||||
don't need to do that here. Also, neither one of these functions uses
|
||||
simd instructions, so checking for that doesn't make sense either.
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/x86/crypto/poly1305_glue.c | 8 ++------
|
||||
1 file changed, 2 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/arch/x86/crypto/poly1305_glue.c
|
||||
+++ b/arch/x86/crypto/poly1305_glue.c
|
||||
@@ -123,13 +123,9 @@ static void poly1305_simd_blocks(void *c
|
||||
static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
|
||||
const u32 nonce[4])
|
||||
{
|
||||
- struct poly1305_arch_internal *state = ctx;
|
||||
-
|
||||
- if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) ||
|
||||
- !state->is_base2_26 || !crypto_simd_usable()) {
|
||||
- convert_to_base2_64(ctx);
|
||||
+ if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx))
|
||||
poly1305_emit_x86_64(ctx, mac, nonce);
|
||||
- } else
|
||||
+ else
|
||||
poly1305_emit_avx(ctx, mac, nonce);
|
||||
}
|
||||
|
@ -0,0 +1,58 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 17 Jan 2020 17:43:18 +0100
|
||||
Subject: [PATCH] crypto: arm/chacha - fix build failured when kernel mode NEON
|
||||
is disabled
|
||||
|
||||
commit 0bc81767c5bd9d005fae1099fb39eb3688370cb1 upstream.
|
||||
|
||||
When the ARM accelerated ChaCha driver is built as part of a configuration
|
||||
that has kernel mode NEON disabled, we expect the compiler to propagate
|
||||
the build time constant expression IS_ENABLED(CONFIG_KERNEL_MODE_NEON) in
|
||||
a way that eliminates all the cross-object references to the actual NEON
|
||||
routines, which allows the chacha-neon-core.o object to be omitted from
|
||||
the build entirely.
|
||||
|
||||
Unfortunately, this fails to work as expected in some cases, and we may
|
||||
end up with a build error such as
|
||||
|
||||
chacha-glue.c:(.text+0xc0): undefined reference to `chacha_4block_xor_neon'
|
||||
|
||||
caused by the fact that chacha_doneon() has not been eliminated from the
|
||||
object code, even though it will never be called in practice.
|
||||
|
||||
Let's fix this by adding some IS_ENABLED(CONFIG_KERNEL_MODE_NEON) tests
|
||||
that are not strictly needed from a logical point of view, but should
|
||||
help the compiler infer that the NEON code paths are unreachable in
|
||||
those cases.
|
||||
|
||||
Fixes: b36d8c09e710c71f ("crypto: arm/chacha - remove dependency on generic ...")
|
||||
Reported-by: Russell King <linux@armlinux.org.uk>
|
||||
Cc: Arnd Bergmann <arnd@arndb.de>
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/arm/crypto/chacha-glue.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/arch/arm/crypto/chacha-glue.c
|
||||
+++ b/arch/arm/crypto/chacha-glue.c
|
||||
@@ -115,7 +115,7 @@ static int chacha_stream_xor(struct skci
|
||||
if (nbytes < walk.total)
|
||||
nbytes = round_down(nbytes, walk.stride);
|
||||
|
||||
- if (!neon) {
|
||||
+ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
|
||||
chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
nbytes, state, ctx->nrounds);
|
||||
state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
|
||||
@@ -159,7 +159,7 @@ static int do_xchacha(struct skcipher_re
|
||||
|
||||
chacha_init_generic(state, ctx->key, req->iv);
|
||||
|
||||
- if (!neon) {
|
||||
+ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
|
||||
hchacha_block_arm(state, subctx.key, ctx->nrounds);
|
||||
} else {
|
||||
kernel_neon_begin();
|
@ -0,0 +1,40 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Fri, 17 Jan 2020 12:01:36 +0100
|
||||
Subject: [PATCH] crypto: Kconfig - allow tests to be disabled when manager is
|
||||
disabled
|
||||
|
||||
commit 2343d1529aff8b552589f622c23932035ed7a05d upstream.
|
||||
|
||||
The library code uses CRYPTO_MANAGER_DISABLE_TESTS to conditionalize its
|
||||
tests, but the library code can also exist without CRYPTO_MANAGER. That
|
||||
means on minimal configs, the test code winds up being built with no way
|
||||
to disable it.
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
crypto/Kconfig | 4 ----
|
||||
1 file changed, 4 deletions(-)
|
||||
|
||||
--- a/crypto/Kconfig
|
||||
+++ b/crypto/Kconfig
|
||||
@@ -136,8 +136,6 @@ config CRYPTO_USER
|
||||
Userspace configuration for cryptographic instantiations such as
|
||||
cbc(aes).
|
||||
|
||||
-if CRYPTO_MANAGER2
|
||||
-
|
||||
config CRYPTO_MANAGER_DISABLE_TESTS
|
||||
bool "Disable run-time self tests"
|
||||
default y
|
||||
@@ -155,8 +153,6 @@ config CRYPTO_MANAGER_EXTRA_TESTS
|
||||
This is intended for developer use only, as these tests take much
|
||||
longer to run than the normal self tests.
|
||||
|
||||
-endif # if CRYPTO_MANAGER2
|
||||
-
|
||||
config CRYPTO_GF128MUL
|
||||
tristate
|
||||
|
@ -0,0 +1,40 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Thu, 6 Feb 2020 12:42:01 +0100
|
||||
Subject: [PATCH] crypto: chacha20poly1305 - prevent integer overflow on large
|
||||
input
|
||||
|
||||
commit c9cc0517bba9f0213f1e55172feceb99e5512daf upstream.
|
||||
|
||||
This code assigns src_len (size_t) to sl (int), which causes problems
|
||||
when src_len is very large. Probably nobody in the kernel should be
|
||||
passing this much data to chacha20poly1305 all in one go anyway, so I
|
||||
don't think we need to change the algorithm or introduce larger types
|
||||
or anything. But we should at least error out early in this case and
|
||||
print a warning so that we get reports if this does happen and can look
|
||||
into why anybody is possibly passing it that much data or if they're
|
||||
accidently passing -1 or similar.
|
||||
|
||||
Fixes: d95312a3ccc0 ("crypto: lib/chacha20poly1305 - reimplement crypt_from_sg() routine")
|
||||
Cc: Ard Biesheuvel <ardb@kernel.org>
|
||||
Cc: stable@vger.kernel.org # 5.5+
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Acked-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
lib/crypto/chacha20poly1305.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
--- a/lib/crypto/chacha20poly1305.c
|
||||
+++ b/lib/crypto/chacha20poly1305.c
|
||||
@@ -235,6 +235,9 @@ bool chacha20poly1305_crypt_sg_inplace(s
|
||||
__le64 lens[2];
|
||||
} b __aligned(16);
|
||||
|
||||
+ if (WARN_ON(src_len > INT_MAX))
|
||||
+ return false;
|
||||
+
|
||||
chacha_load_key(b.k, key);
|
||||
|
||||
b.iv[0] = 0;
|
@ -0,0 +1,84 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Sun, 1 Mar 2020 22:52:35 +0800
|
||||
Subject: [PATCH] crypto: x86/curve25519 - support assemblers with no adx
|
||||
support
|
||||
|
||||
commit 1579f1bc3b753d17a44de3457d5c6f4a5b14c752 upstream.
|
||||
|
||||
Some older version of GAS do not support the ADX instructions, similarly
|
||||
to how they also don't support AVX and such. This commit adds the same
|
||||
build-time detection mechanisms we use for AVX and others for ADX, and
|
||||
then makes sure that the curve25519 library dispatcher calls the right
|
||||
functions.
|
||||
|
||||
Reported-by: Willy Tarreau <w@1wt.eu>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/x86/Makefile | 5 +++--
|
||||
arch/x86/crypto/Makefile | 7 ++++++-
|
||||
include/crypto/curve25519.h | 6 ++++--
|
||||
3 files changed, 13 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/arch/x86/Makefile
|
||||
+++ b/arch/x86/Makefile
|
||||
@@ -197,9 +197,10 @@ avx2_instr :=$(call as-instr,vpbroadcast
|
||||
avx512_instr :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,-DCONFIG_AS_AVX512=1)
|
||||
sha1_ni_instr :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA1_NI=1)
|
||||
sha256_ni_instr :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA256_NI=1)
|
||||
+adx_instr := $(call as-instr,adox %r10$(comma)%r10,-DCONFIG_AS_ADX=1)
|
||||
|
||||
-KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr)
|
||||
-KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr)
|
||||
+KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) $(adx_instr)
|
||||
+KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) $(adx_instr)
|
||||
|
||||
KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
|
||||
|
||||
--- a/arch/x86/crypto/Makefile
|
||||
+++ b/arch/x86/crypto/Makefile
|
||||
@@ -11,6 +11,7 @@ avx2_supported := $(call as-instr,vpgath
|
||||
avx512_supported :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,yes,no)
|
||||
sha1_ni_supported :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,yes,no)
|
||||
sha256_ni_supported :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,yes,no)
|
||||
+adx_supported := $(call as-instr,adox %r10$(comma)%r10,yes,no)
|
||||
|
||||
obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
|
||||
|
||||
@@ -39,7 +40,11 @@ obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2)
|
||||
|
||||
obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o
|
||||
obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o
|
||||
-obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
|
||||
+
|
||||
+# These modules require the assembler to support ADX.
|
||||
+ifeq ($(adx_supported),yes)
|
||||
+ obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
|
||||
+endif
|
||||
|
||||
# These modules require assembler to support AVX.
|
||||
ifeq ($(avx_supported),yes)
|
||||
--- a/include/crypto/curve25519.h
|
||||
+++ b/include/crypto/curve25519.h
|
||||
@@ -33,7 +33,8 @@ bool __must_check curve25519(u8 mypublic
|
||||
const u8 secret[CURVE25519_KEY_SIZE],
|
||||
const u8 basepoint[CURVE25519_KEY_SIZE])
|
||||
{
|
||||
- if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519))
|
||||
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519) &&
|
||||
+ (!IS_ENABLED(CONFIG_CRYPTO_CURVE25519_X86) || IS_ENABLED(CONFIG_AS_ADX)))
|
||||
curve25519_arch(mypublic, secret, basepoint);
|
||||
else
|
||||
curve25519_generic(mypublic, secret, basepoint);
|
||||
@@ -49,7 +50,8 @@ __must_check curve25519_generate_public(
|
||||
CURVE25519_KEY_SIZE)))
|
||||
return false;
|
||||
|
||||
- if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519))
|
||||
+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519) &&
|
||||
+ (!IS_ENABLED(CONFIG_CRYPTO_CURVE25519_X86) || IS_ENABLED(CONFIG_AS_ADX)))
|
||||
curve25519_base_arch(pub, secret);
|
||||
else
|
||||
curve25519_generic(pub, secret, curve25519_base_point);
|
@ -0,0 +1,68 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Wed, 18 Mar 2020 20:27:32 -0600
|
||||
Subject: [PATCH] crypto: arm64/chacha - correctly walk through blocks
|
||||
|
||||
commit c8cfcb78c65877313cda7bcbace624d3dbd1f3b3 upstream.
|
||||
|
||||
Prior, passing in chunks of 2, 3, or 4, followed by any additional
|
||||
chunks would result in the chacha state counter getting out of sync,
|
||||
resulting in incorrect encryption/decryption, which is a pretty nasty
|
||||
crypto vuln: "why do images look weird on webpages?" WireGuard users
|
||||
never experienced this prior, because we have always, out of tree, used
|
||||
a different crypto library, until the recent Frankenzinc addition. This
|
||||
commit fixes the issue by advancing the pointers and state counter by
|
||||
the actual size processed. It also fixes up a bug in the (optional,
|
||||
costly) stride test that prevented it from running on arm64.
|
||||
|
||||
Fixes: b3aad5bad26a ("crypto: arm64/chacha - expose arm64 ChaCha routine as library function")
|
||||
Reported-and-tested-by: Emil Renner Berthing <kernel@esmil.dk>
|
||||
Cc: Ard Biesheuvel <ardb@kernel.org>
|
||||
Cc: stable@vger.kernel.org # v5.5+
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Reviewed-by: Eric Biggers <ebiggers@google.com>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/arm64/crypto/chacha-neon-glue.c | 8 ++++----
|
||||
lib/crypto/chacha20poly1305-selftest.c | 11 ++++++++---
|
||||
2 files changed, 12 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/arch/arm64/crypto/chacha-neon-glue.c
|
||||
+++ b/arch/arm64/crypto/chacha-neon-glue.c
|
||||
@@ -55,10 +55,10 @@ static void chacha_doneon(u32 *state, u8
|
||||
break;
|
||||
}
|
||||
chacha_4block_xor_neon(state, dst, src, nrounds, l);
|
||||
- bytes -= CHACHA_BLOCK_SIZE * 5;
|
||||
- src += CHACHA_BLOCK_SIZE * 5;
|
||||
- dst += CHACHA_BLOCK_SIZE * 5;
|
||||
- state[12] += 5;
|
||||
+ bytes -= l;
|
||||
+ src += l;
|
||||
+ dst += l;
|
||||
+ state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
--- a/lib/crypto/chacha20poly1305-selftest.c
|
||||
+++ b/lib/crypto/chacha20poly1305-selftest.c
|
||||
@@ -9028,10 +9028,15 @@ bool __init chacha20poly1305_selftest(vo
|
||||
&& total_len <= 1 << 10; ++total_len) {
|
||||
for (i = 0; i <= total_len; ++i) {
|
||||
for (j = i; j <= total_len; ++j) {
|
||||
+ k = 0;
|
||||
sg_init_table(sg_src, 3);
|
||||
- sg_set_buf(&sg_src[0], input, i);
|
||||
- sg_set_buf(&sg_src[1], input + i, j - i);
|
||||
- sg_set_buf(&sg_src[2], input + j, total_len - j);
|
||||
+ if (i)
|
||||
+ sg_set_buf(&sg_src[k++], input, i);
|
||||
+ if (j - i)
|
||||
+ sg_set_buf(&sg_src[k++], input + i, j - i);
|
||||
+ if (total_len - j)
|
||||
+ sg_set_buf(&sg_src[k++], input + j, total_len - j);
|
||||
+ sg_init_marker(sg_src, k);
|
||||
memset(computed_output, 0, total_len);
|
||||
memset(input, 0, total_len);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,376 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Sun, 1 Mar 2020 16:06:56 +0800
|
||||
Subject: [PATCH] crypto: x86/curve25519 - leave r12 as spare register
|
||||
|
||||
commit dc7fc3a53ae158263196b1892b672aedf67796c5 upstream.
|
||||
|
||||
This updates to the newer register selection proved by HACL*, which
|
||||
leads to a more compact instruction encoding, and saves around 100
|
||||
cycles.
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/x86/crypto/curve25519-x86_64.c | 110 ++++++++++++++--------------
|
||||
1 file changed, 55 insertions(+), 55 deletions(-)
|
||||
|
||||
--- a/arch/x86/crypto/curve25519-x86_64.c
|
||||
+++ b/arch/x86/crypto/curve25519-x86_64.c
|
||||
@@ -167,28 +167,28 @@ static inline void fmul(u64 *out, const
|
||||
" movq 0(%1), %%rdx;"
|
||||
" mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);"
|
||||
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
|
||||
- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;"
|
||||
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
|
||||
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
|
||||
" adox %%rdx, %%rax;"
|
||||
/* Compute src1[1] * src2 */
|
||||
" movq 8(%1), %%rdx;"
|
||||
" mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
|
||||
- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);"
|
||||
- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
|
||||
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);"
|
||||
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
|
||||
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
|
||||
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
|
||||
/* Compute src1[2] * src2 */
|
||||
" movq 16(%1), %%rdx;"
|
||||
" mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
|
||||
- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);"
|
||||
- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
|
||||
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);"
|
||||
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
|
||||
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
|
||||
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
|
||||
/* Compute src1[3] * src2 */
|
||||
" movq 24(%1), %%rdx;"
|
||||
" mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
|
||||
- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);"
|
||||
- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;"
|
||||
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);"
|
||||
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;"
|
||||
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
|
||||
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);"
|
||||
/* Line up pointers */
|
||||
@@ -202,11 +202,11 @@ static inline void fmul(u64 *out, const
|
||||
" mulxq 32(%1), %%r8, %%r13;"
|
||||
" xor %3, %3;"
|
||||
" adoxq 0(%1), %%r8;"
|
||||
- " mulxq 40(%1), %%r9, %%r12;"
|
||||
+ " mulxq 40(%1), %%r9, %%rbx;"
|
||||
" adcx %%r13, %%r9;"
|
||||
" adoxq 8(%1), %%r9;"
|
||||
" mulxq 48(%1), %%r10, %%r13;"
|
||||
- " adcx %%r12, %%r10;"
|
||||
+ " adcx %%rbx, %%r10;"
|
||||
" adoxq 16(%1), %%r10;"
|
||||
" mulxq 56(%1), %%r11, %%rax;"
|
||||
" adcx %%r13, %%r11;"
|
||||
@@ -231,7 +231,7 @@ static inline void fmul(u64 *out, const
|
||||
" movq %%r8, 0(%0);"
|
||||
: "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2)
|
||||
:
|
||||
- : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc"
|
||||
+ : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -248,28 +248,28 @@ static inline void fmul2(u64 *out, const
|
||||
" movq 0(%1), %%rdx;"
|
||||
" mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);"
|
||||
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
|
||||
- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;"
|
||||
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
|
||||
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
|
||||
" adox %%rdx, %%rax;"
|
||||
/* Compute src1[1] * src2 */
|
||||
" movq 8(%1), %%rdx;"
|
||||
" mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
|
||||
- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);"
|
||||
- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
|
||||
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);"
|
||||
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
|
||||
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
|
||||
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
|
||||
/* Compute src1[2] * src2 */
|
||||
" movq 16(%1), %%rdx;"
|
||||
" mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
|
||||
- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);"
|
||||
- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
|
||||
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);"
|
||||
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
|
||||
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
|
||||
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
|
||||
/* Compute src1[3] * src2 */
|
||||
" movq 24(%1), %%rdx;"
|
||||
" mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
|
||||
- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);"
|
||||
- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;"
|
||||
+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);"
|
||||
+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;"
|
||||
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
|
||||
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);"
|
||||
|
||||
@@ -279,28 +279,28 @@ static inline void fmul2(u64 *out, const
|
||||
" movq 32(%1), %%rdx;"
|
||||
" mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 64(%0);"
|
||||
" mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);"
|
||||
- " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;"
|
||||
+ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
|
||||
" mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
|
||||
" adox %%rdx, %%rax;"
|
||||
/* Compute src1[1] * src2 */
|
||||
" movq 40(%1), %%rdx;"
|
||||
" mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);"
|
||||
- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 80(%0);"
|
||||
- " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
|
||||
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 80(%0);"
|
||||
+ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
|
||||
" mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
|
||||
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
|
||||
/* Compute src1[2] * src2 */
|
||||
" movq 48(%1), %%rdx;"
|
||||
" mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);"
|
||||
- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 88(%0);"
|
||||
- " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
|
||||
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 88(%0);"
|
||||
+ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
|
||||
" mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
|
||||
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
|
||||
/* Compute src1[3] * src2 */
|
||||
" movq 56(%1), %%rdx;"
|
||||
" mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);"
|
||||
- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 96(%0);"
|
||||
- " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 104(%0);" " mov $0, %%r8;"
|
||||
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 96(%0);"
|
||||
+ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 104(%0);" " mov $0, %%r8;"
|
||||
" mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;"
|
||||
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);"
|
||||
/* Line up pointers */
|
||||
@@ -314,11 +314,11 @@ static inline void fmul2(u64 *out, const
|
||||
" mulxq 32(%1), %%r8, %%r13;"
|
||||
" xor %3, %3;"
|
||||
" adoxq 0(%1), %%r8;"
|
||||
- " mulxq 40(%1), %%r9, %%r12;"
|
||||
+ " mulxq 40(%1), %%r9, %%rbx;"
|
||||
" adcx %%r13, %%r9;"
|
||||
" adoxq 8(%1), %%r9;"
|
||||
" mulxq 48(%1), %%r10, %%r13;"
|
||||
- " adcx %%r12, %%r10;"
|
||||
+ " adcx %%rbx, %%r10;"
|
||||
" adoxq 16(%1), %%r10;"
|
||||
" mulxq 56(%1), %%r11, %%rax;"
|
||||
" adcx %%r13, %%r11;"
|
||||
@@ -347,11 +347,11 @@ static inline void fmul2(u64 *out, const
|
||||
" mulxq 96(%1), %%r8, %%r13;"
|
||||
" xor %3, %3;"
|
||||
" adoxq 64(%1), %%r8;"
|
||||
- " mulxq 104(%1), %%r9, %%r12;"
|
||||
+ " mulxq 104(%1), %%r9, %%rbx;"
|
||||
" adcx %%r13, %%r9;"
|
||||
" adoxq 72(%1), %%r9;"
|
||||
" mulxq 112(%1), %%r10, %%r13;"
|
||||
- " adcx %%r12, %%r10;"
|
||||
+ " adcx %%rbx, %%r10;"
|
||||
" adoxq 80(%1), %%r10;"
|
||||
" mulxq 120(%1), %%r11, %%rax;"
|
||||
" adcx %%r13, %%r11;"
|
||||
@@ -376,7 +376,7 @@ static inline void fmul2(u64 *out, const
|
||||
" movq %%r8, 32(%0);"
|
||||
: "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2)
|
||||
:
|
||||
- : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc"
|
||||
+ : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -388,11 +388,11 @@ static inline void fmul_scalar(u64 *out,
|
||||
asm volatile(
|
||||
/* Compute the raw multiplication of f1*f2 */
|
||||
" mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */
|
||||
- " mulxq 8(%2), %%r9, %%r12;" /* f1[1]*f2 */
|
||||
+ " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */
|
||||
" add %%rcx, %%r9;"
|
||||
" mov $0, %%rcx;"
|
||||
" mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */
|
||||
- " adcx %%r12, %%r10;"
|
||||
+ " adcx %%rbx, %%r10;"
|
||||
" mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */
|
||||
" adcx %%r13, %%r11;"
|
||||
" adcx %%rcx, %%rax;"
|
||||
@@ -419,7 +419,7 @@ static inline void fmul_scalar(u64 *out,
|
||||
" movq %%r8, 0(%1);"
|
||||
: "+&r" (f2_r)
|
||||
: "r" (out), "r" (f1)
|
||||
- : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "memory", "cc"
|
||||
+ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "memory", "cc"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -520,8 +520,8 @@ static inline void fsqr(u64 *out, const
|
||||
" mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
|
||||
" mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
|
||||
" movq 24(%1), %%rdx;" /* f[3] */
|
||||
- " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
|
||||
- " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */
|
||||
+ " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
|
||||
+ " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */
|
||||
" movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
|
||||
" mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
|
||||
|
||||
@@ -531,12 +531,12 @@ static inline void fsqr(u64 *out, const
|
||||
" adcx %%r8, %%r8;"
|
||||
" adox %%rcx, %%r11;"
|
||||
" adcx %%r9, %%r9;"
|
||||
- " adox %%r15, %%r12;"
|
||||
+ " adox %%r15, %%rbx;"
|
||||
" adcx %%r10, %%r10;"
|
||||
" adox %%r15, %%r13;"
|
||||
" adcx %%r11, %%r11;"
|
||||
" adox %%r15, %%r14;"
|
||||
- " adcx %%r12, %%r12;"
|
||||
+ " adcx %%rbx, %%rbx;"
|
||||
" adcx %%r13, %%r13;"
|
||||
" adcx %%r14, %%r14;"
|
||||
|
||||
@@ -549,7 +549,7 @@ static inline void fsqr(u64 *out, const
|
||||
" adcx %%rcx, %%r10;" " movq %%r10, 24(%0);"
|
||||
" movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
|
||||
" adcx %%rax, %%r11;" " movq %%r11, 32(%0);"
|
||||
- " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);"
|
||||
+ " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);"
|
||||
" movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
|
||||
" adcx %%rax, %%r13;" " movq %%r13, 48(%0);"
|
||||
" adcx %%rcx, %%r14;" " movq %%r14, 56(%0);"
|
||||
@@ -565,11 +565,11 @@ static inline void fsqr(u64 *out, const
|
||||
" mulxq 32(%1), %%r8, %%r13;"
|
||||
" xor %%rcx, %%rcx;"
|
||||
" adoxq 0(%1), %%r8;"
|
||||
- " mulxq 40(%1), %%r9, %%r12;"
|
||||
+ " mulxq 40(%1), %%r9, %%rbx;"
|
||||
" adcx %%r13, %%r9;"
|
||||
" adoxq 8(%1), %%r9;"
|
||||
" mulxq 48(%1), %%r10, %%r13;"
|
||||
- " adcx %%r12, %%r10;"
|
||||
+ " adcx %%rbx, %%r10;"
|
||||
" adoxq 16(%1), %%r10;"
|
||||
" mulxq 56(%1), %%r11, %%rax;"
|
||||
" adcx %%r13, %%r11;"
|
||||
@@ -594,7 +594,7 @@ static inline void fsqr(u64 *out, const
|
||||
" movq %%r8, 0(%0);"
|
||||
: "+&r" (tmp), "+&r" (f), "+&r" (out)
|
||||
:
|
||||
- : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc"
|
||||
+ : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -611,8 +611,8 @@ static inline void fsqr2(u64 *out, const
|
||||
" mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
|
||||
" mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
|
||||
" movq 24(%1), %%rdx;" /* f[3] */
|
||||
- " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
|
||||
- " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */
|
||||
+ " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
|
||||
+ " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */
|
||||
" movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
|
||||
" mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
|
||||
|
||||
@@ -622,12 +622,12 @@ static inline void fsqr2(u64 *out, const
|
||||
" adcx %%r8, %%r8;"
|
||||
" adox %%rcx, %%r11;"
|
||||
" adcx %%r9, %%r9;"
|
||||
- " adox %%r15, %%r12;"
|
||||
+ " adox %%r15, %%rbx;"
|
||||
" adcx %%r10, %%r10;"
|
||||
" adox %%r15, %%r13;"
|
||||
" adcx %%r11, %%r11;"
|
||||
" adox %%r15, %%r14;"
|
||||
- " adcx %%r12, %%r12;"
|
||||
+ " adcx %%rbx, %%rbx;"
|
||||
" adcx %%r13, %%r13;"
|
||||
" adcx %%r14, %%r14;"
|
||||
|
||||
@@ -640,7 +640,7 @@ static inline void fsqr2(u64 *out, const
|
||||
" adcx %%rcx, %%r10;" " movq %%r10, 24(%0);"
|
||||
" movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
|
||||
" adcx %%rax, %%r11;" " movq %%r11, 32(%0);"
|
||||
- " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);"
|
||||
+ " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);"
|
||||
" movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
|
||||
" adcx %%rax, %%r13;" " movq %%r13, 48(%0);"
|
||||
" adcx %%rcx, %%r14;" " movq %%r14, 56(%0);"
|
||||
@@ -651,8 +651,8 @@ static inline void fsqr2(u64 *out, const
|
||||
" mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
|
||||
" mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
|
||||
" movq 56(%1), %%rdx;" /* f[3] */
|
||||
- " mulxq 40(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
|
||||
- " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */
|
||||
+ " mulxq 40(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
|
||||
+ " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */
|
||||
" movq 40(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
|
||||
" mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
|
||||
|
||||
@@ -662,12 +662,12 @@ static inline void fsqr2(u64 *out, const
|
||||
" adcx %%r8, %%r8;"
|
||||
" adox %%rcx, %%r11;"
|
||||
" adcx %%r9, %%r9;"
|
||||
- " adox %%r15, %%r12;"
|
||||
+ " adox %%r15, %%rbx;"
|
||||
" adcx %%r10, %%r10;"
|
||||
" adox %%r15, %%r13;"
|
||||
" adcx %%r11, %%r11;"
|
||||
" adox %%r15, %%r14;"
|
||||
- " adcx %%r12, %%r12;"
|
||||
+ " adcx %%rbx, %%rbx;"
|
||||
" adcx %%r13, %%r13;"
|
||||
" adcx %%r14, %%r14;"
|
||||
|
||||
@@ -680,7 +680,7 @@ static inline void fsqr2(u64 *out, const
|
||||
" adcx %%rcx, %%r10;" " movq %%r10, 88(%0);"
|
||||
" movq 48(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
|
||||
" adcx %%rax, %%r11;" " movq %%r11, 96(%0);"
|
||||
- " adcx %%rcx, %%r12;" " movq %%r12, 104(%0);"
|
||||
+ " adcx %%rcx, %%rbx;" " movq %%rbx, 104(%0);"
|
||||
" movq 56(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
|
||||
" adcx %%rax, %%r13;" " movq %%r13, 112(%0);"
|
||||
" adcx %%rcx, %%r14;" " movq %%r14, 120(%0);"
|
||||
@@ -694,11 +694,11 @@ static inline void fsqr2(u64 *out, const
|
||||
" mulxq 32(%1), %%r8, %%r13;"
|
||||
" xor %%rcx, %%rcx;"
|
||||
" adoxq 0(%1), %%r8;"
|
||||
- " mulxq 40(%1), %%r9, %%r12;"
|
||||
+ " mulxq 40(%1), %%r9, %%rbx;"
|
||||
" adcx %%r13, %%r9;"
|
||||
" adoxq 8(%1), %%r9;"
|
||||
" mulxq 48(%1), %%r10, %%r13;"
|
||||
- " adcx %%r12, %%r10;"
|
||||
+ " adcx %%rbx, %%r10;"
|
||||
" adoxq 16(%1), %%r10;"
|
||||
" mulxq 56(%1), %%r11, %%rax;"
|
||||
" adcx %%r13, %%r11;"
|
||||
@@ -727,11 +727,11 @@ static inline void fsqr2(u64 *out, const
|
||||
" mulxq 96(%1), %%r8, %%r13;"
|
||||
" xor %%rcx, %%rcx;"
|
||||
" adoxq 64(%1), %%r8;"
|
||||
- " mulxq 104(%1), %%r9, %%r12;"
|
||||
+ " mulxq 104(%1), %%r9, %%rbx;"
|
||||
" adcx %%r13, %%r9;"
|
||||
" adoxq 72(%1), %%r9;"
|
||||
" mulxq 112(%1), %%r10, %%r13;"
|
||||
- " adcx %%r12, %%r10;"
|
||||
+ " adcx %%rbx, %%r10;"
|
||||
" adoxq 80(%1), %%r10;"
|
||||
" mulxq 120(%1), %%r11, %%rax;"
|
||||
" adcx %%r13, %%r11;"
|
||||
@@ -756,7 +756,7 @@ static inline void fsqr2(u64 *out, const
|
||||
" movq %%r8, 32(%0);"
|
||||
: "+&r" (tmp), "+&r" (f), "+&r" (out)
|
||||
:
|
||||
- : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc"
|
||||
+ : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc"
|
||||
);
|
||||
}
|
||||
|
@ -0,0 +1,35 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Thu, 19 Mar 2020 11:56:17 -0600
|
||||
Subject: [PATCH] crypto: arm[64]/poly1305 - add artifact to .gitignore files
|
||||
|
||||
commit 6e4e00d8b68ca7eb30d08afb740033e0d36abe55 upstream.
|
||||
|
||||
The .S_shipped yields a .S, and the pattern in these directories is to
|
||||
add that to .gitignore so that git-status doesn't raise a fuss.
|
||||
|
||||
Fixes: a6b803b3ddc7 ("crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
|
||||
Fixes: f569ca164751 ("crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
|
||||
Reported-by: Emil Renner Berthing <kernel@esmil.dk>
|
||||
Cc: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/arm/crypto/.gitignore | 1 +
|
||||
arch/arm64/crypto/.gitignore | 1 +
|
||||
2 files changed, 2 insertions(+)
|
||||
|
||||
--- a/arch/arm/crypto/.gitignore
|
||||
+++ b/arch/arm/crypto/.gitignore
|
||||
@@ -1,3 +1,4 @@
|
||||
aesbs-core.S
|
||||
sha256-core.S
|
||||
sha512-core.S
|
||||
+poly1305-core.S
|
||||
--- a/arch/arm64/crypto/.gitignore
|
||||
+++ b/arch/arm64/crypto/.gitignore
|
||||
@@ -1,2 +1,3 @@
|
||||
sha256-core.S
|
||||
sha512-core.S
|
||||
+poly1305-core.S
|
@ -0,0 +1,243 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Thu, 23 Apr 2020 15:54:04 -0600
|
||||
Subject: [PATCH] crypto: arch/lib - limit simd usage to 4k chunks
|
||||
|
||||
commit 706024a52c614b478b63f7728d202532ce6591a9 upstream.
|
||||
|
||||
The initial Zinc patchset, after some mailing list discussion, contained
|
||||
code to ensure that kernel_fpu_enable would not be kept on for more than
|
||||
a 4k chunk, since it disables preemption. The choice of 4k isn't totally
|
||||
scientific, but it's not a bad guess either, and it's what's used in
|
||||
both the x86 poly1305, blake2s, and nhpoly1305 code already (in the form
|
||||
of PAGE_SIZE, which this commit corrects to be explicitly 4k for the
|
||||
former two).
|
||||
|
||||
Ard did some back of the envelope calculations and found that
|
||||
at 5 cycles/byte (overestimate) on a 1ghz processor (pretty slow), 4k
|
||||
means we have a maximum preemption disabling of 20us, which Sebastian
|
||||
confirmed was probably a good limit.
|
||||
|
||||
Unfortunately the chunking appears to have been left out of the final
|
||||
patchset that added the glue code. So, this commit adds it back in.
|
||||
|
||||
Fixes: 84e03fa39fbe ("crypto: x86/chacha - expose SIMD ChaCha routine as library function")
|
||||
Fixes: b3aad5bad26a ("crypto: arm64/chacha - expose arm64 ChaCha routine as library function")
|
||||
Fixes: a44a3430d71b ("crypto: arm/chacha - expose ARM ChaCha routine as library function")
|
||||
Fixes: d7d7b8535662 ("crypto: x86/poly1305 - wire up faster implementations for kernel")
|
||||
Fixes: f569ca164751 ("crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
|
||||
Fixes: a6b803b3ddc7 ("crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
|
||||
Fixes: ed0356eda153 ("crypto: blake2s - x86_64 SIMD implementation")
|
||||
Cc: Eric Biggers <ebiggers@google.com>
|
||||
Cc: Ard Biesheuvel <ardb@kernel.org>
|
||||
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Cc: stable@vger.kernel.org
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/arm/crypto/chacha-glue.c | 14 +++++++++++---
|
||||
arch/arm/crypto/poly1305-glue.c | 15 +++++++++++----
|
||||
arch/arm64/crypto/chacha-neon-glue.c | 14 +++++++++++---
|
||||
arch/arm64/crypto/poly1305-glue.c | 15 +++++++++++----
|
||||
arch/x86/crypto/blake2s-glue.c | 10 ++++------
|
||||
arch/x86/crypto/chacha_glue.c | 14 +++++++++++---
|
||||
arch/x86/crypto/poly1305_glue.c | 13 ++++++-------
|
||||
7 files changed, 65 insertions(+), 30 deletions(-)
|
||||
|
||||
--- a/arch/arm/crypto/chacha-glue.c
|
||||
+++ b/arch/arm/crypto/chacha-glue.c
|
||||
@@ -91,9 +91,17 @@ void chacha_crypt_arch(u32 *state, u8 *d
|
||||
return;
|
||||
}
|
||||
|
||||
- kernel_neon_begin();
|
||||
- chacha_doneon(state, dst, src, bytes, nrounds);
|
||||
- kernel_neon_end();
|
||||
+ do {
|
||||
+ unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
|
||||
+
|
||||
+ kernel_neon_begin();
|
||||
+ chacha_doneon(state, dst, src, todo, nrounds);
|
||||
+ kernel_neon_end();
|
||||
+
|
||||
+ bytes -= todo;
|
||||
+ src += todo;
|
||||
+ dst += todo;
|
||||
+ } while (bytes);
|
||||
}
|
||||
EXPORT_SYMBOL(chacha_crypt_arch);
|
||||
|
||||
--- a/arch/arm/crypto/poly1305-glue.c
|
||||
+++ b/arch/arm/crypto/poly1305-glue.c
|
||||
@@ -160,13 +160,20 @@ void poly1305_update_arch(struct poly130
|
||||
unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
|
||||
|
||||
if (static_branch_likely(&have_neon) && do_neon) {
|
||||
- kernel_neon_begin();
|
||||
- poly1305_blocks_neon(&dctx->h, src, len, 1);
|
||||
- kernel_neon_end();
|
||||
+ do {
|
||||
+ unsigned int todo = min_t(unsigned int, len, SZ_4K);
|
||||
+
|
||||
+ kernel_neon_begin();
|
||||
+ poly1305_blocks_neon(&dctx->h, src, todo, 1);
|
||||
+ kernel_neon_end();
|
||||
+
|
||||
+ len -= todo;
|
||||
+ src += todo;
|
||||
+ } while (len);
|
||||
} else {
|
||||
poly1305_blocks_arm(&dctx->h, src, len, 1);
|
||||
+ src += len;
|
||||
}
|
||||
- src += len;
|
||||
nbytes %= POLY1305_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
--- a/arch/arm64/crypto/chacha-neon-glue.c
|
||||
+++ b/arch/arm64/crypto/chacha-neon-glue.c
|
||||
@@ -87,9 +87,17 @@ void chacha_crypt_arch(u32 *state, u8 *d
|
||||
!crypto_simd_usable())
|
||||
return chacha_crypt_generic(state, dst, src, bytes, nrounds);
|
||||
|
||||
- kernel_neon_begin();
|
||||
- chacha_doneon(state, dst, src, bytes, nrounds);
|
||||
- kernel_neon_end();
|
||||
+ do {
|
||||
+ unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
|
||||
+
|
||||
+ kernel_neon_begin();
|
||||
+ chacha_doneon(state, dst, src, todo, nrounds);
|
||||
+ kernel_neon_end();
|
||||
+
|
||||
+ bytes -= todo;
|
||||
+ src += todo;
|
||||
+ dst += todo;
|
||||
+ } while (bytes);
|
||||
}
|
||||
EXPORT_SYMBOL(chacha_crypt_arch);
|
||||
|
||||
--- a/arch/arm64/crypto/poly1305-glue.c
|
||||
+++ b/arch/arm64/crypto/poly1305-glue.c
|
||||
@@ -143,13 +143,20 @@ void poly1305_update_arch(struct poly130
|
||||
unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
|
||||
|
||||
if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
|
||||
- kernel_neon_begin();
|
||||
- poly1305_blocks_neon(&dctx->h, src, len, 1);
|
||||
- kernel_neon_end();
|
||||
+ do {
|
||||
+ unsigned int todo = min_t(unsigned int, len, SZ_4K);
|
||||
+
|
||||
+ kernel_neon_begin();
|
||||
+ poly1305_blocks_neon(&dctx->h, src, todo, 1);
|
||||
+ kernel_neon_end();
|
||||
+
|
||||
+ len -= todo;
|
||||
+ src += todo;
|
||||
+ } while (len);
|
||||
} else {
|
||||
poly1305_blocks(&dctx->h, src, len, 1);
|
||||
+ src += len;
|
||||
}
|
||||
- src += len;
|
||||
nbytes %= POLY1305_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
--- a/arch/x86/crypto/blake2s-glue.c
|
||||
+++ b/arch/x86/crypto/blake2s-glue.c
|
||||
@@ -32,16 +32,16 @@ void blake2s_compress_arch(struct blake2
|
||||
const u32 inc)
|
||||
{
|
||||
/* SIMD disables preemption, so relax after processing each page. */
|
||||
- BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8);
|
||||
+ BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);
|
||||
|
||||
if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
|
||||
blake2s_compress_generic(state, block, nblocks, inc);
|
||||
return;
|
||||
}
|
||||
|
||||
- for (;;) {
|
||||
+ do {
|
||||
const size_t blocks = min_t(size_t, nblocks,
|
||||
- PAGE_SIZE / BLAKE2S_BLOCK_SIZE);
|
||||
+ SZ_4K / BLAKE2S_BLOCK_SIZE);
|
||||
|
||||
kernel_fpu_begin();
|
||||
if (IS_ENABLED(CONFIG_AS_AVX512) &&
|
||||
@@ -52,10 +52,8 @@ void blake2s_compress_arch(struct blake2
|
||||
kernel_fpu_end();
|
||||
|
||||
nblocks -= blocks;
|
||||
- if (!nblocks)
|
||||
- break;
|
||||
block += blocks * BLAKE2S_BLOCK_SIZE;
|
||||
- }
|
||||
+ } while (nblocks);
|
||||
}
|
||||
EXPORT_SYMBOL(blake2s_compress_arch);
|
||||
|
||||
--- a/arch/x86/crypto/chacha_glue.c
|
||||
+++ b/arch/x86/crypto/chacha_glue.c
|
||||
@@ -154,9 +154,17 @@ void chacha_crypt_arch(u32 *state, u8 *d
|
||||
bytes <= CHACHA_BLOCK_SIZE)
|
||||
return chacha_crypt_generic(state, dst, src, bytes, nrounds);
|
||||
|
||||
- kernel_fpu_begin();
|
||||
- chacha_dosimd(state, dst, src, bytes, nrounds);
|
||||
- kernel_fpu_end();
|
||||
+ do {
|
||||
+ unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
|
||||
+
|
||||
+ kernel_fpu_begin();
|
||||
+ chacha_dosimd(state, dst, src, todo, nrounds);
|
||||
+ kernel_fpu_end();
|
||||
+
|
||||
+ bytes -= todo;
|
||||
+ src += todo;
|
||||
+ dst += todo;
|
||||
+ } while (bytes);
|
||||
}
|
||||
EXPORT_SYMBOL(chacha_crypt_arch);
|
||||
|
||||
--- a/arch/x86/crypto/poly1305_glue.c
|
||||
+++ b/arch/x86/crypto/poly1305_glue.c
|
||||
@@ -91,8 +91,8 @@ static void poly1305_simd_blocks(void *c
|
||||
struct poly1305_arch_internal *state = ctx;
|
||||
|
||||
/* SIMD disables preemption, so relax after processing each page. */
|
||||
- BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
|
||||
- PAGE_SIZE % POLY1305_BLOCK_SIZE);
|
||||
+ BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE ||
|
||||
+ SZ_4K % POLY1305_BLOCK_SIZE);
|
||||
|
||||
if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) ||
|
||||
(len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) ||
|
||||
@@ -102,8 +102,8 @@ static void poly1305_simd_blocks(void *c
|
||||
return;
|
||||
}
|
||||
|
||||
- for (;;) {
|
||||
- const size_t bytes = min_t(size_t, len, PAGE_SIZE);
|
||||
+ do {
|
||||
+ const size_t bytes = min_t(size_t, len, SZ_4K);
|
||||
|
||||
kernel_fpu_begin();
|
||||
if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512))
|
||||
@@ -113,11 +113,10 @@ static void poly1305_simd_blocks(void *c
|
||||
else
|
||||
poly1305_blocks_avx(ctx, inp, bytes, padbit);
|
||||
kernel_fpu_end();
|
||||
+
|
||||
len -= bytes;
|
||||
- if (!len)
|
||||
- break;
|
||||
inp += bytes;
|
||||
- }
|
||||
+ } while (len);
|
||||
}
|
||||
|
||||
static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
|
@ -0,0 +1,38 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Date: Wed, 8 Jul 2020 12:41:13 +1000
|
||||
Subject: [PATCH] crypto: lib/chacha20poly1305 - Add missing function
|
||||
declaration
|
||||
|
||||
commit 06cc2afbbdf9a9e8df3e2f8db724997dd6e1b4ac upstream.
|
||||
|
||||
This patch adds a declaration for chacha20poly1305_selftest to
|
||||
silence a sparse warning.
|
||||
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
include/crypto/chacha20poly1305.h | 2 ++
|
||||
lib/crypto/chacha20poly1305.c | 2 --
|
||||
2 files changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/include/crypto/chacha20poly1305.h
|
||||
+++ b/include/crypto/chacha20poly1305.h
|
||||
@@ -45,4 +45,6 @@ bool chacha20poly1305_decrypt_sg_inplace
|
||||
const u64 nonce,
|
||||
const u8 key[CHACHA20POLY1305_KEY_SIZE]);
|
||||
|
||||
+bool chacha20poly1305_selftest(void);
|
||||
+
|
||||
#endif /* __CHACHA20POLY1305_H */
|
||||
--- a/lib/crypto/chacha20poly1305.c
|
||||
+++ b/lib/crypto/chacha20poly1305.c
|
||||
@@ -21,8 +21,6 @@
|
||||
|
||||
#define CHACHA_KEY_WORDS (CHACHA_KEY_SIZE / sizeof(u32))
|
||||
|
||||
-bool __init chacha20poly1305_selftest(void);
|
||||
-
|
||||
static void chacha_load_key(u32 *k, const u8 *in)
|
||||
{
|
||||
k[0] = get_unaligned_le32(in);
|
@ -0,0 +1,147 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Wed, 8 Jul 2020 12:11:18 +0300
|
||||
Subject: [PATCH] crypto: x86/chacha-sse3 - use unaligned loads for state array
|
||||
|
||||
commit e79a31715193686e92dadb4caedfbb1f5de3659c upstream.
|
||||
|
||||
Due to the fact that the x86 port does not support allocating objects
|
||||
on the stack with an alignment that exceeds 8 bytes, we have a rather
|
||||
ugly hack in the x86 code for ChaCha to ensure that the state array is
|
||||
aligned to 16 bytes, allowing the SSE3 implementation of the algorithm
|
||||
to use aligned loads.
|
||||
|
||||
Given that the performance benefit of using of aligned loads appears to
|
||||
be limited (~0.25% for 1k blocks using tcrypt on a Corei7-8650U), and
|
||||
the fact that this hack has leaked into generic ChaCha code, let's just
|
||||
remove it.
|
||||
|
||||
Cc: Martin Willi <martin@strongswan.org>
|
||||
Cc: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Cc: Eric Biggers <ebiggers@kernel.org>
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Reviewed-by: Martin Willi <martin@strongswan.org>
|
||||
Reviewed-by: Eric Biggers <ebiggers@google.com>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/x86/crypto/chacha-ssse3-x86_64.S | 16 ++++++++--------
|
||||
arch/x86/crypto/chacha_glue.c | 17 ++---------------
|
||||
include/crypto/chacha.h | 4 ----
|
||||
3 files changed, 10 insertions(+), 27 deletions(-)
|
||||
|
||||
--- a/arch/x86/crypto/chacha-ssse3-x86_64.S
|
||||
+++ b/arch/x86/crypto/chacha-ssse3-x86_64.S
|
||||
@@ -120,10 +120,10 @@ ENTRY(chacha_block_xor_ssse3)
|
||||
FRAME_BEGIN
|
||||
|
||||
# x0..3 = s0..3
|
||||
- movdqa 0x00(%rdi),%xmm0
|
||||
- movdqa 0x10(%rdi),%xmm1
|
||||
- movdqa 0x20(%rdi),%xmm2
|
||||
- movdqa 0x30(%rdi),%xmm3
|
||||
+ movdqu 0x00(%rdi),%xmm0
|
||||
+ movdqu 0x10(%rdi),%xmm1
|
||||
+ movdqu 0x20(%rdi),%xmm2
|
||||
+ movdqu 0x30(%rdi),%xmm3
|
||||
movdqa %xmm0,%xmm8
|
||||
movdqa %xmm1,%xmm9
|
||||
movdqa %xmm2,%xmm10
|
||||
@@ -205,10 +205,10 @@ ENTRY(hchacha_block_ssse3)
|
||||
# %edx: nrounds
|
||||
FRAME_BEGIN
|
||||
|
||||
- movdqa 0x00(%rdi),%xmm0
|
||||
- movdqa 0x10(%rdi),%xmm1
|
||||
- movdqa 0x20(%rdi),%xmm2
|
||||
- movdqa 0x30(%rdi),%xmm3
|
||||
+ movdqu 0x00(%rdi),%xmm0
|
||||
+ movdqu 0x10(%rdi),%xmm1
|
||||
+ movdqu 0x20(%rdi),%xmm2
|
||||
+ movdqu 0x30(%rdi),%xmm3
|
||||
|
||||
mov %edx,%r8d
|
||||
call chacha_permute
|
||||
--- a/arch/x86/crypto/chacha_glue.c
|
||||
+++ b/arch/x86/crypto/chacha_glue.c
|
||||
@@ -14,8 +14,6 @@
|
||||
#include <linux/module.h>
|
||||
#include <asm/simd.h>
|
||||
|
||||
-#define CHACHA_STATE_ALIGN 16
|
||||
-
|
||||
asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
|
||||
unsigned int len, int nrounds);
|
||||
asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
|
||||
@@ -125,8 +123,6 @@ static void chacha_dosimd(u32 *state, u8
|
||||
|
||||
void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
|
||||
{
|
||||
- state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
|
||||
-
|
||||
if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) {
|
||||
hchacha_block_generic(state, stream, nrounds);
|
||||
} else {
|
||||
@@ -139,8 +135,6 @@ EXPORT_SYMBOL(hchacha_block_arch);
|
||||
|
||||
void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
|
||||
{
|
||||
- state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
|
||||
-
|
||||
chacha_init_generic(state, key, iv);
|
||||
}
|
||||
EXPORT_SYMBOL(chacha_init_arch);
|
||||
@@ -148,8 +142,6 @@ EXPORT_SYMBOL(chacha_init_arch);
|
||||
void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
|
||||
int nrounds)
|
||||
{
|
||||
- state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
|
||||
-
|
||||
if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() ||
|
||||
bytes <= CHACHA_BLOCK_SIZE)
|
||||
return chacha_crypt_generic(state, dst, src, bytes, nrounds);
|
||||
@@ -171,15 +163,12 @@ EXPORT_SYMBOL(chacha_crypt_arch);
|
||||
static int chacha_simd_stream_xor(struct skcipher_request *req,
|
||||
const struct chacha_ctx *ctx, const u8 *iv)
|
||||
{
|
||||
- u32 *state, state_buf[16 + 2] __aligned(8);
|
||||
+ u32 state[CHACHA_STATE_WORDS] __aligned(8);
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
- BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
|
||||
- state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
|
||||
-
|
||||
chacha_init_generic(state, ctx->key, iv);
|
||||
|
||||
while (walk.nbytes > 0) {
|
||||
@@ -218,12 +207,10 @@ static int xchacha_simd(struct skcipher_
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
- u32 *state, state_buf[16 + 2] __aligned(8);
|
||||
+ u32 state[CHACHA_STATE_WORDS] __aligned(8);
|
||||
struct chacha_ctx subctx;
|
||||
u8 real_iv[16];
|
||||
|
||||
- BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
|
||||
- state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
|
||||
chacha_init_generic(state, ctx->key, req->iv);
|
||||
|
||||
if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) {
|
||||
--- a/include/crypto/chacha.h
|
||||
+++ b/include/crypto/chacha.h
|
||||
@@ -25,11 +25,7 @@
|
||||
#define CHACHA_BLOCK_SIZE 64
|
||||
#define CHACHAPOLY_IV_SIZE 12
|
||||
|
||||
-#ifdef CONFIG_X86_64
|
||||
-#define CHACHA_STATE_WORDS ((CHACHA_BLOCK_SIZE + 12) / sizeof(u32))
|
||||
-#else
|
||||
#define CHACHA_STATE_WORDS (CHACHA_BLOCK_SIZE / sizeof(u32))
|
||||
-#endif
|
||||
|
||||
/* 192-bit nonce, then 64-bit stream position */
|
||||
#define XCHACHA_IV_SIZE 32
|
@ -0,0 +1,46 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Date: Thu, 23 Jul 2020 17:50:48 +1000
|
||||
Subject: [PATCH] crypto: x86/curve25519 - Remove unused carry variables
|
||||
|
||||
commit 054a5540fb8f7268e2c79e9deab4242db15c8cba upstream.
|
||||
|
||||
The carry variables are assigned but never used, which upsets
|
||||
the compiler. This patch removes them.
|
||||
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Reviewed-by: Karthikeyan Bhargavan <karthik.bhargavan@gmail.com>
|
||||
Acked-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/x86/crypto/curve25519-x86_64.c | 6 ++----
|
||||
1 file changed, 2 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/arch/x86/crypto/curve25519-x86_64.c
|
||||
+++ b/arch/x86/crypto/curve25519-x86_64.c
|
||||
@@ -948,10 +948,8 @@ static void store_felem(u64 *b, u64 *f)
|
||||
{
|
||||
u64 f30 = f[3U];
|
||||
u64 top_bit0 = f30 >> (u32)63U;
|
||||
- u64 carry0;
|
||||
u64 f31;
|
||||
u64 top_bit;
|
||||
- u64 carry;
|
||||
u64 f0;
|
||||
u64 f1;
|
||||
u64 f2;
|
||||
@@ -970,11 +968,11 @@ static void store_felem(u64 *b, u64 *f)
|
||||
u64 o2;
|
||||
u64 o3;
|
||||
f[3U] = f30 & (u64)0x7fffffffffffffffU;
|
||||
- carry0 = add_scalar(f, f, (u64)19U * top_bit0);
|
||||
+ add_scalar(f, f, (u64)19U * top_bit0);
|
||||
f31 = f[3U];
|
||||
top_bit = f31 >> (u32)63U;
|
||||
f[3U] = f31 & (u64)0x7fffffffffffffffU;
|
||||
- carry = add_scalar(f, f, (u64)19U * top_bit);
|
||||
+ add_scalar(f, f, (u64)19U * top_bit);
|
||||
f0 = f[0U];
|
||||
f1 = f[1U];
|
||||
f2 = f[2U];
|
@ -0,0 +1,36 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Fabio Estevam <festevam@gmail.com>
|
||||
Date: Mon, 24 Aug 2020 11:09:53 -0300
|
||||
Subject: [PATCH] crypto: arm/curve25519 - include <linux/scatterlist.h>
|
||||
|
||||
commit 6779d0e6b0fe193ab3010ea201782ca6f75a3862 upstream.
|
||||
|
||||
Building ARM allmodconfig leads to the following warnings:
|
||||
|
||||
arch/arm/crypto/curve25519-glue.c:73:12: error: implicit declaration of function 'sg_copy_to_buffer' [-Werror=implicit-function-declaration]
|
||||
arch/arm/crypto/curve25519-glue.c:74:9: error: implicit declaration of function 'sg_nents_for_len' [-Werror=implicit-function-declaration]
|
||||
arch/arm/crypto/curve25519-glue.c:88:11: error: implicit declaration of function 'sg_copy_from_buffer' [-Werror=implicit-function-declaration]
|
||||
|
||||
Include <linux/scatterlist.h> to fix such warnings
|
||||
|
||||
Reported-by: Olof's autobuilder <build@lixom.net>
|
||||
Fixes: 0c3dc787a62a ("crypto: algapi - Remove skbuff.h inclusion")
|
||||
Signed-off-by: Fabio Estevam <festevam@gmail.com>
|
||||
Acked-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Acked-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/arm/crypto/curve25519-glue.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/arch/arm/crypto/curve25519-glue.c
|
||||
+++ b/arch/arm/crypto/curve25519-glue.c
|
||||
@@ -16,6 +16,7 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/jump_label.h>
|
||||
+#include <linux/scatterlist.h>
|
||||
#include <crypto/curve25519.h>
|
||||
|
||||
asmlinkage void curve25519_neon(u8 mypublic[CURVE25519_KEY_SIZE],
|
@ -0,0 +1,33 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Date: Tue, 25 Aug 2020 11:23:00 +1000
|
||||
Subject: [PATCH] crypto: arm/poly1305 - Add prototype for poly1305_blocks_neon
|
||||
|
||||
commit 51982ea02aef972132eb35c583d3e4c5b83166e5 upstream.
|
||||
|
||||
This patch adds a prototype for poly1305_blocks_neon to slience
|
||||
a compiler warning:
|
||||
|
||||
CC [M] arch/arm/crypto/poly1305-glue.o
|
||||
../arch/arm/crypto/poly1305-glue.c:25:13: warning: no previous prototype for `poly1305_blocks_neon' [-Wmissing-prototypes]
|
||||
void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
|
||||
^~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Acked-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/arm/crypto/poly1305-glue.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/arch/arm/crypto/poly1305-glue.c
|
||||
+++ b/arch/arm/crypto/poly1305-glue.c
|
||||
@@ -20,6 +20,7 @@
|
||||
|
||||
void poly1305_init_arm(void *state, const u8 *key);
|
||||
void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit);
|
||||
+void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
|
||||
void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce);
|
||||
|
||||
void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
|
@ -0,0 +1,261 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Uros Bizjak <ubizjak@gmail.com>
|
||||
Date: Thu, 27 Aug 2020 19:30:58 +0200
|
||||
Subject: [PATCH] crypto: curve25519-x86_64 - Use XORL r32,32
|
||||
|
||||
commit db719539fd3889836900bf912755aa30a5985e9a upstream.
|
||||
|
||||
x86_64 zero extends 32bit operations, so for 64bit operands,
|
||||
XORL r32,r32 is functionally equal to XORL r64,r64, but avoids
|
||||
a REX prefix byte when legacy registers are used.
|
||||
|
||||
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
|
||||
Cc: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Cc: "David S. Miller" <davem@davemloft.net>
|
||||
Acked-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/x86/crypto/curve25519-x86_64.c | 68 ++++++++++++++---------------
|
||||
1 file changed, 34 insertions(+), 34 deletions(-)
|
||||
|
||||
--- a/arch/x86/crypto/curve25519-x86_64.c
|
||||
+++ b/arch/x86/crypto/curve25519-x86_64.c
|
||||
@@ -45,11 +45,11 @@ static inline u64 add_scalar(u64 *out, c
|
||||
|
||||
asm volatile(
|
||||
/* Clear registers to propagate the carry bit */
|
||||
- " xor %%r8, %%r8;"
|
||||
- " xor %%r9, %%r9;"
|
||||
- " xor %%r10, %%r10;"
|
||||
- " xor %%r11, %%r11;"
|
||||
- " xor %1, %1;"
|
||||
+ " xor %%r8d, %%r8d;"
|
||||
+ " xor %%r9d, %%r9d;"
|
||||
+ " xor %%r10d, %%r10d;"
|
||||
+ " xor %%r11d, %%r11d;"
|
||||
+ " xor %k1, %k1;"
|
||||
|
||||
/* Begin addition chain */
|
||||
" addq 0(%3), %0;"
|
||||
@@ -93,7 +93,7 @@ static inline void fadd(u64 *out, const
|
||||
" cmovc %0, %%rax;"
|
||||
|
||||
/* Step 2: Add carry*38 to the original sum */
|
||||
- " xor %%rcx, %%rcx;"
|
||||
+ " xor %%ecx, %%ecx;"
|
||||
" add %%rax, %%r8;"
|
||||
" adcx %%rcx, %%r9;"
|
||||
" movq %%r9, 8(%1);"
|
||||
@@ -165,28 +165,28 @@ static inline void fmul(u64 *out, const
|
||||
|
||||
/* Compute src1[0] * src2 */
|
||||
" movq 0(%1), %%rdx;"
|
||||
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);"
|
||||
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " movq %%r8, 0(%0);"
|
||||
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
|
||||
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
|
||||
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
|
||||
" adox %%rdx, %%rax;"
|
||||
/* Compute src1[1] * src2 */
|
||||
" movq 8(%1), %%rdx;"
|
||||
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
|
||||
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
|
||||
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);"
|
||||
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
|
||||
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
|
||||
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
|
||||
/* Compute src1[2] * src2 */
|
||||
" movq 16(%1), %%rdx;"
|
||||
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
|
||||
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
|
||||
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);"
|
||||
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
|
||||
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
|
||||
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
|
||||
/* Compute src1[3] * src2 */
|
||||
" movq 24(%1), %%rdx;"
|
||||
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
|
||||
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
|
||||
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);"
|
||||
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;"
|
||||
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
|
||||
@@ -200,7 +200,7 @@ static inline void fmul(u64 *out, const
|
||||
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
|
||||
" mov $38, %%rdx;"
|
||||
" mulxq 32(%1), %%r8, %%r13;"
|
||||
- " xor %3, %3;"
|
||||
+ " xor %k3, %k3;"
|
||||
" adoxq 0(%1), %%r8;"
|
||||
" mulxq 40(%1), %%r9, %%rbx;"
|
||||
" adcx %%r13, %%r9;"
|
||||
@@ -246,28 +246,28 @@ static inline void fmul2(u64 *out, const
|
||||
|
||||
/* Compute src1[0] * src2 */
|
||||
" movq 0(%1), %%rdx;"
|
||||
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);"
|
||||
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " movq %%r8, 0(%0);"
|
||||
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
|
||||
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
|
||||
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
|
||||
" adox %%rdx, %%rax;"
|
||||
/* Compute src1[1] * src2 */
|
||||
" movq 8(%1), %%rdx;"
|
||||
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
|
||||
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
|
||||
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);"
|
||||
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
|
||||
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
|
||||
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
|
||||
/* Compute src1[2] * src2 */
|
||||
" movq 16(%1), %%rdx;"
|
||||
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
|
||||
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
|
||||
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);"
|
||||
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
|
||||
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
|
||||
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
|
||||
/* Compute src1[3] * src2 */
|
||||
" movq 24(%1), %%rdx;"
|
||||
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
|
||||
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
|
||||
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);"
|
||||
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;"
|
||||
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
|
||||
@@ -277,29 +277,29 @@ static inline void fmul2(u64 *out, const
|
||||
|
||||
/* Compute src1[0] * src2 */
|
||||
" movq 32(%1), %%rdx;"
|
||||
- " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 64(%0);"
|
||||
- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);"
|
||||
+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " movq %%r8, 64(%0);"
|
||||
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);"
|
||||
" mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
|
||||
" mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
|
||||
" adox %%rdx, %%rax;"
|
||||
/* Compute src1[1] * src2 */
|
||||
" movq 40(%1), %%rdx;"
|
||||
- " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);"
|
||||
- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 80(%0);"
|
||||
+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);"
|
||||
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 80(%0);"
|
||||
" mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
|
||||
" mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
|
||||
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
|
||||
/* Compute src1[2] * src2 */
|
||||
" movq 48(%1), %%rdx;"
|
||||
- " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);"
|
||||
- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 88(%0);"
|
||||
+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);"
|
||||
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 88(%0);"
|
||||
" mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
|
||||
" mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
|
||||
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
|
||||
/* Compute src1[3] * src2 */
|
||||
" movq 56(%1), %%rdx;"
|
||||
- " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);"
|
||||
- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 96(%0);"
|
||||
+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);"
|
||||
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 96(%0);"
|
||||
" mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 104(%0);" " mov $0, %%r8;"
|
||||
" mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;"
|
||||
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);"
|
||||
@@ -312,7 +312,7 @@ static inline void fmul2(u64 *out, const
|
||||
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
|
||||
" mov $38, %%rdx;"
|
||||
" mulxq 32(%1), %%r8, %%r13;"
|
||||
- " xor %3, %3;"
|
||||
+ " xor %k3, %k3;"
|
||||
" adoxq 0(%1), %%r8;"
|
||||
" mulxq 40(%1), %%r9, %%rbx;"
|
||||
" adcx %%r13, %%r9;"
|
||||
@@ -345,7 +345,7 @@ static inline void fmul2(u64 *out, const
|
||||
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
|
||||
" mov $38, %%rdx;"
|
||||
" mulxq 96(%1), %%r8, %%r13;"
|
||||
- " xor %3, %3;"
|
||||
+ " xor %k3, %k3;"
|
||||
" adoxq 64(%1), %%r8;"
|
||||
" mulxq 104(%1), %%r9, %%rbx;"
|
||||
" adcx %%r13, %%r9;"
|
||||
@@ -516,7 +516,7 @@ static inline void fsqr(u64 *out, const
|
||||
|
||||
/* Step 1: Compute all partial products */
|
||||
" movq 0(%1), %%rdx;" /* f[0] */
|
||||
- " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */
|
||||
+ " mulxq 8(%1), %%r8, %%r14;" " xor %%r15d, %%r15d;" /* f[1]*f[0] */
|
||||
" mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
|
||||
" mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
|
||||
" movq 24(%1), %%rdx;" /* f[3] */
|
||||
@@ -526,7 +526,7 @@ static inline void fsqr(u64 *out, const
|
||||
" mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
|
||||
|
||||
/* Step 2: Compute two parallel carry chains */
|
||||
- " xor %%r15, %%r15;"
|
||||
+ " xor %%r15d, %%r15d;"
|
||||
" adox %%rax, %%r10;"
|
||||
" adcx %%r8, %%r8;"
|
||||
" adox %%rcx, %%r11;"
|
||||
@@ -563,7 +563,7 @@ static inline void fsqr(u64 *out, const
|
||||
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
|
||||
" mov $38, %%rdx;"
|
||||
" mulxq 32(%1), %%r8, %%r13;"
|
||||
- " xor %%rcx, %%rcx;"
|
||||
+ " xor %%ecx, %%ecx;"
|
||||
" adoxq 0(%1), %%r8;"
|
||||
" mulxq 40(%1), %%r9, %%rbx;"
|
||||
" adcx %%r13, %%r9;"
|
||||
@@ -607,7 +607,7 @@ static inline void fsqr2(u64 *out, const
|
||||
asm volatile(
|
||||
/* Step 1: Compute all partial products */
|
||||
" movq 0(%1), %%rdx;" /* f[0] */
|
||||
- " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */
|
||||
+ " mulxq 8(%1), %%r8, %%r14;" " xor %%r15d, %%r15d;" /* f[1]*f[0] */
|
||||
" mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
|
||||
" mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
|
||||
" movq 24(%1), %%rdx;" /* f[3] */
|
||||
@@ -617,7 +617,7 @@ static inline void fsqr2(u64 *out, const
|
||||
" mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
|
||||
|
||||
/* Step 2: Compute two parallel carry chains */
|
||||
- " xor %%r15, %%r15;"
|
||||
+ " xor %%r15d, %%r15d;"
|
||||
" adox %%rax, %%r10;"
|
||||
" adcx %%r8, %%r8;"
|
||||
" adox %%rcx, %%r11;"
|
||||
@@ -647,7 +647,7 @@ static inline void fsqr2(u64 *out, const
|
||||
|
||||
/* Step 1: Compute all partial products */
|
||||
" movq 32(%1), %%rdx;" /* f[0] */
|
||||
- " mulxq 40(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */
|
||||
+ " mulxq 40(%1), %%r8, %%r14;" " xor %%r15d, %%r15d;" /* f[1]*f[0] */
|
||||
" mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
|
||||
" mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
|
||||
" movq 56(%1), %%rdx;" /* f[3] */
|
||||
@@ -657,7 +657,7 @@ static inline void fsqr2(u64 *out, const
|
||||
" mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
|
||||
|
||||
/* Step 2: Compute two parallel carry chains */
|
||||
- " xor %%r15, %%r15;"
|
||||
+ " xor %%r15d, %%r15d;"
|
||||
" adox %%rax, %%r10;"
|
||||
" adcx %%r8, %%r8;"
|
||||
" adox %%rcx, %%r11;"
|
||||
@@ -692,7 +692,7 @@ static inline void fsqr2(u64 *out, const
|
||||
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
|
||||
" mov $38, %%rdx;"
|
||||
" mulxq 32(%1), %%r8, %%r13;"
|
||||
- " xor %%rcx, %%rcx;"
|
||||
+ " xor %%ecx, %%ecx;"
|
||||
" adoxq 0(%1), %%r8;"
|
||||
" mulxq 40(%1), %%r9, %%rbx;"
|
||||
" adcx %%r13, %%r9;"
|
||||
@@ -725,7 +725,7 @@ static inline void fsqr2(u64 *out, const
|
||||
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
|
||||
" mov $38, %%rdx;"
|
||||
" mulxq 96(%1), %%r8, %%r13;"
|
||||
- " xor %%rcx, %%rcx;"
|
||||
+ " xor %%ecx, %%ecx;"
|
||||
" adoxq 64(%1), %%r8;"
|
||||
" mulxq 104(%1), %%r9, %%rbx;"
|
||||
" adcx %%r13, %%r9;"
|
@ -0,0 +1,59 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Uros Bizjak <ubizjak@gmail.com>
|
||||
Date: Thu, 27 Aug 2020 19:38:31 +0200
|
||||
Subject: [PATCH] crypto: poly1305-x86_64 - Use XORL r32,32
|
||||
|
||||
commit 7dfd1e01b3dfc13431b1b25720cf2692a7e111ef upstream.
|
||||
|
||||
x86_64 zero extends 32bit operations, so for 64bit operands,
|
||||
XORL r32,r32 is functionally equal to XORQ r64,r64, but avoids
|
||||
a REX prefix byte when legacy registers are used.
|
||||
|
||||
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
|
||||
Cc: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Cc: "David S. Miller" <davem@davemloft.net>
|
||||
Acked-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 8 ++++----
|
||||
1 file changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
|
||||
+++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
|
||||
@@ -246,7 +246,7 @@ $code.=<<___ if (!$kernel);
|
||||
___
|
||||
&declare_function("poly1305_init_x86_64", 32, 3);
|
||||
$code.=<<___;
|
||||
- xor %rax,%rax
|
||||
+ xor %eax,%eax
|
||||
mov %rax,0($ctx) # initialize hash value
|
||||
mov %rax,8($ctx)
|
||||
mov %rax,16($ctx)
|
||||
@@ -2869,7 +2869,7 @@ $code.=<<___;
|
||||
.type poly1305_init_base2_44,\@function,3
|
||||
.align 32
|
||||
poly1305_init_base2_44:
|
||||
- xor %rax,%rax
|
||||
+ xor %eax,%eax
|
||||
mov %rax,0($ctx) # initialize hash value
|
||||
mov %rax,8($ctx)
|
||||
mov %rax,16($ctx)
|
||||
@@ -3963,7 +3963,7 @@ xor128_decrypt_n_pad:
|
||||
mov \$16,$len
|
||||
sub %r10,$len
|
||||
xor %eax,%eax
|
||||
- xor %r11,%r11
|
||||
+ xor %r11d,%r11d
|
||||
.Loop_dec_byte:
|
||||
mov ($inp,$otp),%r11b
|
||||
mov ($otp),%al
|
||||
@@ -4101,7 +4101,7 @@ avx_handler:
|
||||
.long 0xa548f3fc # cld; rep movsq
|
||||
|
||||
mov $disp,%rsi
|
||||
- xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
|
||||
+ xor %ecx,%ecx # arg1, UNW_FLAG_NHANDLER
|
||||
mov 8(%rsi),%rdx # arg2, disp->ImageBase
|
||||
mov 0(%rsi),%r8 # arg3, disp->ControlPc
|
||||
mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
|
@ -0,0 +1,29 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Date: Thu, 24 Sep 2020 13:29:04 +1000
|
||||
Subject: [PATCH] crypto: x86/poly1305 - Remove assignments with no effect
|
||||
|
||||
commit 4a0c1de64bf9d9027a6f19adfba89fc27893db23 upstream.
|
||||
|
||||
This patch removes a few ineffectual assignments from the function
|
||||
crypto_poly1305_setdctxkey.
|
||||
|
||||
Reported-by: kernel test robot <lkp@intel.com>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/x86/crypto/poly1305_glue.c | 3 ---
|
||||
1 file changed, 3 deletions(-)
|
||||
|
||||
--- a/arch/x86/crypto/poly1305_glue.c
|
||||
+++ b/arch/x86/crypto/poly1305_glue.c
|
||||
@@ -157,9 +157,6 @@ static unsigned int crypto_poly1305_setd
|
||||
dctx->s[1] = get_unaligned_le32(&inp[4]);
|
||||
dctx->s[2] = get_unaligned_le32(&inp[8]);
|
||||
dctx->s[3] = get_unaligned_le32(&inp[12]);
|
||||
- inp += POLY1305_BLOCK_SIZE;
|
||||
- len -= POLY1305_BLOCK_SIZE;
|
||||
- acc += POLY1305_BLOCK_SIZE;
|
||||
dctx->sset = true;
|
||||
}
|
||||
}
|
@ -0,0 +1,33 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Biggers <ebiggers@google.com>
|
||||
Date: Fri, 23 Oct 2020 15:27:48 -0700
|
||||
Subject: [PATCH] crypto: x86/poly1305 - add back a needed assignment
|
||||
|
||||
commit c3a98c3ad5c0dc60a1ac66bf91147a3f39cac96b upstream.
|
||||
|
||||
One of the assignments that was removed by commit 4a0c1de64bf9 ("crypto:
|
||||
x86/poly1305 - Remove assignments with no effect") is actually needed,
|
||||
since it affects the return value.
|
||||
|
||||
This fixes the following crypto self-test failure:
|
||||
|
||||
alg: shash: poly1305-simd test failed (wrong result) on test vector 2, cfg="init+update+final aligned buffer"
|
||||
|
||||
Fixes: 4a0c1de64bf9 ("crypto: x86/poly1305 - Remove assignments with no effect")
|
||||
Signed-off-by: Eric Biggers <ebiggers@google.com>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/x86/crypto/poly1305_glue.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/arch/x86/crypto/poly1305_glue.c
|
||||
+++ b/arch/x86/crypto/poly1305_glue.c
|
||||
@@ -157,6 +157,7 @@ static unsigned int crypto_poly1305_setd
|
||||
dctx->s[1] = get_unaligned_le32(&inp[4]);
|
||||
dctx->s[2] = get_unaligned_le32(&inp[8]);
|
||||
dctx->s[3] = get_unaligned_le32(&inp[12]);
|
||||
+ acc += POLY1305_BLOCK_SIZE;
|
||||
dctx->sset = true;
|
||||
}
|
||||
}
|
@ -0,0 +1,33 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Mon, 2 Nov 2020 14:48:15 +0100
|
||||
Subject: [PATCH] crypto: Kconfig - CRYPTO_MANAGER_EXTRA_TESTS requires the
|
||||
manager
|
||||
|
||||
commit 6569e3097f1c4a490bdf2b23d326855e04942dfd upstream.
|
||||
|
||||
The extra tests in the manager actually require the manager to be
|
||||
selected too. Otherwise the linker gives errors like:
|
||||
|
||||
ld: arch/x86/crypto/chacha_glue.o: in function `chacha_simd_stream_xor':
|
||||
chacha_glue.c:(.text+0x422): undefined reference to `crypto_simd_disabled_for_test'
|
||||
|
||||
Fixes: 2343d1529aff ("crypto: Kconfig - allow tests to be disabled when manager is disabled")
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
crypto/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/crypto/Kconfig
|
||||
+++ b/crypto/Kconfig
|
||||
@@ -145,7 +145,7 @@ config CRYPTO_MANAGER_DISABLE_TESTS
|
||||
|
||||
config CRYPTO_MANAGER_EXTRA_TESTS
|
||||
bool "Enable extra run-time crypto self tests"
|
||||
- depends on DEBUG_KERNEL && !CRYPTO_MANAGER_DISABLE_TESTS
|
||||
+ depends on DEBUG_KERNEL && !CRYPTO_MANAGER_DISABLE_TESTS && CRYPTO_MANAGER
|
||||
help
|
||||
Enable extra run-time self tests of registered crypto algorithms,
|
||||
including randomized fuzz tests.
|
@ -0,0 +1,272 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Tue, 3 Nov 2020 17:28:09 +0100
|
||||
Subject: [PATCH] crypto: arm/chacha-neon - optimize for non-block size
|
||||
multiples
|
||||
|
||||
commit 86cd97ec4b943af35562a74688bc4e909b32c3d1 upstream.
|
||||
|
||||
The current NEON based ChaCha implementation for ARM is optimized for
|
||||
multiples of 4x the ChaCha block size (64 bytes). This makes sense for
|
||||
block encryption, but given that ChaCha is also often used in the
|
||||
context of networking, it makes sense to consider arbitrary length
|
||||
inputs as well.
|
||||
|
||||
For example, WireGuard typically uses 1420 byte packets, and performing
|
||||
ChaCha encryption involves 5 invocations of chacha_4block_xor_neon()
|
||||
and 3 invocations of chacha_block_xor_neon(), where the last one also
|
||||
involves a memcpy() using a buffer on the stack to process the final
|
||||
chunk of 1420 % 64 == 12 bytes.
|
||||
|
||||
Let's optimize for this case as well, by letting chacha_4block_xor_neon()
|
||||
deal with any input size between 64 and 256 bytes, using NEON permutation
|
||||
instructions and overlapping loads and stores. This way, the 140 byte
|
||||
tail of a 1420 byte input buffer can simply be processed in one go.
|
||||
|
||||
This results in the following performance improvements for 1420 byte
|
||||
blocks, without significant impact on power-of-2 input sizes. (Note
|
||||
that Raspberry Pi is widely used in combination with a 32-bit kernel,
|
||||
even though the core is 64-bit capable)
|
||||
|
||||
Cortex-A8 (BeagleBone) : 7%
|
||||
Cortex-A15 (Calxeda Midway) : 21%
|
||||
Cortex-A53 (Raspberry Pi 3) : 3%
|
||||
Cortex-A72 (Raspberry Pi 4) : 19%
|
||||
|
||||
Cc: Eric Biggers <ebiggers@google.com>
|
||||
Cc: "Jason A . Donenfeld" <Jason@zx2c4.com>
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/arm/crypto/chacha-glue.c | 34 +++++------
|
||||
arch/arm/crypto/chacha-neon-core.S | 97 +++++++++++++++++++++++++++---
|
||||
2 files changed, 107 insertions(+), 24 deletions(-)
|
||||
|
||||
--- a/arch/arm/crypto/chacha-glue.c
|
||||
+++ b/arch/arm/crypto/chacha-glue.c
|
||||
@@ -23,7 +23,7 @@
|
||||
asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
|
||||
int nrounds);
|
||||
asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
|
||||
- int nrounds);
|
||||
+ int nrounds, unsigned int nbytes);
|
||||
asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
|
||||
asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
|
||||
|
||||
@@ -42,24 +42,24 @@ static void chacha_doneon(u32 *state, u8
|
||||
{
|
||||
u8 buf[CHACHA_BLOCK_SIZE];
|
||||
|
||||
- while (bytes >= CHACHA_BLOCK_SIZE * 4) {
|
||||
- chacha_4block_xor_neon(state, dst, src, nrounds);
|
||||
- bytes -= CHACHA_BLOCK_SIZE * 4;
|
||||
- src += CHACHA_BLOCK_SIZE * 4;
|
||||
- dst += CHACHA_BLOCK_SIZE * 4;
|
||||
- state[12] += 4;
|
||||
- }
|
||||
- while (bytes >= CHACHA_BLOCK_SIZE) {
|
||||
- chacha_block_xor_neon(state, dst, src, nrounds);
|
||||
- bytes -= CHACHA_BLOCK_SIZE;
|
||||
- src += CHACHA_BLOCK_SIZE;
|
||||
- dst += CHACHA_BLOCK_SIZE;
|
||||
- state[12]++;
|
||||
+ while (bytes > CHACHA_BLOCK_SIZE) {
|
||||
+ unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U);
|
||||
+
|
||||
+ chacha_4block_xor_neon(state, dst, src, nrounds, l);
|
||||
+ bytes -= l;
|
||||
+ src += l;
|
||||
+ dst += l;
|
||||
+ state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
|
||||
}
|
||||
if (bytes) {
|
||||
- memcpy(buf, src, bytes);
|
||||
- chacha_block_xor_neon(state, buf, buf, nrounds);
|
||||
- memcpy(dst, buf, bytes);
|
||||
+ const u8 *s = src;
|
||||
+ u8 *d = dst;
|
||||
+
|
||||
+ if (bytes != CHACHA_BLOCK_SIZE)
|
||||
+ s = d = memcpy(buf, src, bytes);
|
||||
+ chacha_block_xor_neon(state, d, s, nrounds);
|
||||
+ if (d != dst)
|
||||
+ memcpy(dst, buf, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
--- a/arch/arm/crypto/chacha-neon-core.S
|
||||
+++ b/arch/arm/crypto/chacha-neon-core.S
|
||||
@@ -47,6 +47,7 @@
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
+#include <asm/cache.h>
|
||||
|
||||
.text
|
||||
.fpu neon
|
||||
@@ -205,7 +206,7 @@ ENDPROC(hchacha_block_neon)
|
||||
|
||||
.align 5
|
||||
ENTRY(chacha_4block_xor_neon)
|
||||
- push {r4-r5}
|
||||
+ push {r4, lr}
|
||||
mov r4, sp // preserve the stack pointer
|
||||
sub ip, sp, #0x20 // allocate a 32 byte buffer
|
||||
bic ip, ip, #0x1f // aligned to 32 bytes
|
||||
@@ -229,10 +230,10 @@ ENTRY(chacha_4block_xor_neon)
|
||||
vld1.32 {q0-q1}, [r0]
|
||||
vld1.32 {q2-q3}, [ip]
|
||||
|
||||
- adr r5, .Lctrinc
|
||||
+ adr lr, .Lctrinc
|
||||
vdup.32 q15, d7[1]
|
||||
vdup.32 q14, d7[0]
|
||||
- vld1.32 {q4}, [r5, :128]
|
||||
+ vld1.32 {q4}, [lr, :128]
|
||||
vdup.32 q13, d6[1]
|
||||
vdup.32 q12, d6[0]
|
||||
vdup.32 q11, d5[1]
|
||||
@@ -455,7 +456,7 @@ ENTRY(chacha_4block_xor_neon)
|
||||
|
||||
// Re-interleave the words in the first two rows of each block (x0..7).
|
||||
// Also add the counter values 0-3 to x12[0-3].
|
||||
- vld1.32 {q8}, [r5, :128] // load counter values 0-3
|
||||
+ vld1.32 {q8}, [lr, :128] // load counter values 0-3
|
||||
vzip.32 q0, q1 // => (0 1 0 1) (0 1 0 1)
|
||||
vzip.32 q2, q3 // => (2 3 2 3) (2 3 2 3)
|
||||
vzip.32 q4, q5 // => (4 5 4 5) (4 5 4 5)
|
||||
@@ -493,6 +494,8 @@ ENTRY(chacha_4block_xor_neon)
|
||||
|
||||
// Re-interleave the words in the last two rows of each block (x8..15).
|
||||
vld1.32 {q8-q9}, [sp, :256]
|
||||
+ mov sp, r4 // restore original stack pointer
|
||||
+ ldr r4, [r4, #8] // load number of bytes
|
||||
vzip.32 q12, q13 // => (12 13 12 13) (12 13 12 13)
|
||||
vzip.32 q14, q15 // => (14 15 14 15) (14 15 14 15)
|
||||
vzip.32 q8, q9 // => (8 9 8 9) (8 9 8 9)
|
||||
@@ -520,41 +523,121 @@ ENTRY(chacha_4block_xor_neon)
|
||||
// XOR the rest of the data with the keystream
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
+ subs r4, r4, #96
|
||||
veor q0, q0, q8
|
||||
veor q1, q1, q12
|
||||
+ ble .Lle96
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
+ subs r4, r4, #32
|
||||
veor q0, q0, q2
|
||||
veor q1, q1, q6
|
||||
+ ble .Lle128
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
+ subs r4, r4, #32
|
||||
veor q0, q0, q10
|
||||
veor q1, q1, q14
|
||||
+ ble .Lle160
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
+ subs r4, r4, #32
|
||||
veor q0, q0, q4
|
||||
veor q1, q1, q5
|
||||
+ ble .Lle192
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
+ subs r4, r4, #32
|
||||
veor q0, q0, q9
|
||||
veor q1, q1, q13
|
||||
+ ble .Lle224
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
+ subs r4, r4, #32
|
||||
veor q0, q0, q3
|
||||
veor q1, q1, q7
|
||||
+ blt .Llt256
|
||||
+.Lout:
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]
|
||||
- mov sp, r4 // restore original stack pointer
|
||||
veor q0, q0, q11
|
||||
veor q1, q1, q15
|
||||
vst1.8 {q0-q1}, [r1]
|
||||
|
||||
- pop {r4-r5}
|
||||
- bx lr
|
||||
+ pop {r4, pc}
|
||||
+
|
||||
+.Lle192:
|
||||
+ vmov q4, q9
|
||||
+ vmov q5, q13
|
||||
+
|
||||
+.Lle160:
|
||||
+ // nothing to do
|
||||
+
|
||||
+.Lfinalblock:
|
||||
+ // Process the final block if processing less than 4 full blocks.
|
||||
+ // Entered with 32 bytes of ChaCha cipher stream in q4-q5, and the
|
||||
+ // previous 32 byte output block that still needs to be written at
|
||||
+ // [r1] in q0-q1.
|
||||
+ beq .Lfullblock
|
||||
+
|
||||
+.Lpartialblock:
|
||||
+ adr lr, .Lpermute + 32
|
||||
+ add r2, r2, r4
|
||||
+ add lr, lr, r4
|
||||
+ add r4, r4, r1
|
||||
+
|
||||
+ vld1.8 {q2-q3}, [lr]
|
||||
+ vld1.8 {q6-q7}, [r2]
|
||||
+
|
||||
+ add r4, r4, #32
|
||||
+
|
||||
+ vtbl.8 d4, {q4-q5}, d4
|
||||
+ vtbl.8 d5, {q4-q5}, d5
|
||||
+ vtbl.8 d6, {q4-q5}, d6
|
||||
+ vtbl.8 d7, {q4-q5}, d7
|
||||
+
|
||||
+ veor q6, q6, q2
|
||||
+ veor q7, q7, q3
|
||||
+
|
||||
+ vst1.8 {q6-q7}, [r4] // overlapping stores
|
||||
+ vst1.8 {q0-q1}, [r1]
|
||||
+ pop {r4, pc}
|
||||
+
|
||||
+.Lfullblock:
|
||||
+ vmov q11, q4
|
||||
+ vmov q15, q5
|
||||
+ b .Lout
|
||||
+.Lle96:
|
||||
+ vmov q4, q2
|
||||
+ vmov q5, q6
|
||||
+ b .Lfinalblock
|
||||
+.Lle128:
|
||||
+ vmov q4, q10
|
||||
+ vmov q5, q14
|
||||
+ b .Lfinalblock
|
||||
+.Lle224:
|
||||
+ vmov q4, q3
|
||||
+ vmov q5, q7
|
||||
+ b .Lfinalblock
|
||||
+.Llt256:
|
||||
+ vmov q4, q11
|
||||
+ vmov q5, q15
|
||||
+ b .Lpartialblock
|
||||
ENDPROC(chacha_4block_xor_neon)
|
||||
+
|
||||
+ .align L1_CACHE_SHIFT
|
||||
+.Lpermute:
|
||||
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
|
||||
+ .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
|
||||
+ .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
|
||||
+ .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
|
||||
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
|
||||
+ .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
|
||||
+ .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
|
||||
+ .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
|
@ -0,0 +1,324 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Fri, 6 Nov 2020 17:39:38 +0100
|
||||
Subject: [PATCH] crypto: arm64/chacha - simplify tail block handling
|
||||
|
||||
commit c4fc6328d6c67690a7e6e03f43a5a976a13120ef upstream.
|
||||
|
||||
Based on lessons learnt from optimizing the 32-bit version of this driver,
|
||||
we can simplify the arm64 version considerably, by reordering the final
|
||||
two stores when the last block is not a multiple of 64 bytes. This removes
|
||||
the need to use permutation instructions to calculate the elements that are
|
||||
clobbered by the final overlapping store, given that the store of the
|
||||
penultimate block now follows it, and that one carries the correct values
|
||||
for those elements already.
|
||||
|
||||
While at it, simplify the overlapping loads as well, by calculating the
|
||||
address of the final overlapping load upfront, and switching to this
|
||||
address for every load that would otherwise extend past the end of the
|
||||
source buffer.
|
||||
|
||||
There is no impact on performance, but the resulting code is substantially
|
||||
smaller and easier to follow.
|
||||
|
||||
Cc: Eric Biggers <ebiggers@google.com>
|
||||
Cc: "Jason A . Donenfeld" <Jason@zx2c4.com>
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/arm64/crypto/chacha-neon-core.S | 193 ++++++++++-----------------
|
||||
1 file changed, 69 insertions(+), 124 deletions(-)
|
||||
|
||||
--- a/arch/arm64/crypto/chacha-neon-core.S
|
||||
+++ b/arch/arm64/crypto/chacha-neon-core.S
|
||||
@@ -195,7 +195,6 @@ ENTRY(chacha_4block_xor_neon)
|
||||
adr_l x10, .Lpermute
|
||||
and x5, x4, #63
|
||||
add x10, x10, x5
|
||||
- add x11, x10, #64
|
||||
|
||||
//
|
||||
// This function encrypts four consecutive ChaCha blocks by loading
|
||||
@@ -645,11 +644,11 @@ CPU_BE( rev a15, a15 )
|
||||
zip2 v31.4s, v14.4s, v15.4s
|
||||
eor a15, a15, w9
|
||||
|
||||
- mov x3, #64
|
||||
+ add x3, x2, x4
|
||||
+ sub x3, x3, #128 // start of last block
|
||||
+
|
||||
subs x5, x4, #128
|
||||
- add x6, x5, x2
|
||||
- csel x3, x3, xzr, ge
|
||||
- csel x2, x2, x6, ge
|
||||
+ csel x2, x2, x3, ge
|
||||
|
||||
// interleave 64-bit words in state n, n+2
|
||||
zip1 v0.2d, v16.2d, v18.2d
|
||||
@@ -658,13 +657,10 @@ CPU_BE( rev a15, a15 )
|
||||
zip1 v8.2d, v17.2d, v19.2d
|
||||
zip2 v12.2d, v17.2d, v19.2d
|
||||
stp a2, a3, [x1, #-56]
|
||||
- ld1 {v16.16b-v19.16b}, [x2], x3
|
||||
|
||||
subs x6, x4, #192
|
||||
- ccmp x3, xzr, #4, lt
|
||||
- add x7, x6, x2
|
||||
- csel x3, x3, xzr, eq
|
||||
- csel x2, x2, x7, eq
|
||||
+ ld1 {v16.16b-v19.16b}, [x2], #64
|
||||
+ csel x2, x2, x3, ge
|
||||
|
||||
zip1 v1.2d, v20.2d, v22.2d
|
||||
zip2 v5.2d, v20.2d, v22.2d
|
||||
@@ -672,13 +668,10 @@ CPU_BE( rev a15, a15 )
|
||||
zip1 v9.2d, v21.2d, v23.2d
|
||||
zip2 v13.2d, v21.2d, v23.2d
|
||||
stp a6, a7, [x1, #-40]
|
||||
- ld1 {v20.16b-v23.16b}, [x2], x3
|
||||
|
||||
subs x7, x4, #256
|
||||
- ccmp x3, xzr, #4, lt
|
||||
- add x8, x7, x2
|
||||
- csel x3, x3, xzr, eq
|
||||
- csel x2, x2, x8, eq
|
||||
+ ld1 {v20.16b-v23.16b}, [x2], #64
|
||||
+ csel x2, x2, x3, ge
|
||||
|
||||
zip1 v2.2d, v24.2d, v26.2d
|
||||
zip2 v6.2d, v24.2d, v26.2d
|
||||
@@ -686,12 +679,10 @@ CPU_BE( rev a15, a15 )
|
||||
zip1 v10.2d, v25.2d, v27.2d
|
||||
zip2 v14.2d, v25.2d, v27.2d
|
||||
stp a10, a11, [x1, #-24]
|
||||
- ld1 {v24.16b-v27.16b}, [x2], x3
|
||||
|
||||
subs x8, x4, #320
|
||||
- ccmp x3, xzr, #4, lt
|
||||
- add x9, x8, x2
|
||||
- csel x2, x2, x9, eq
|
||||
+ ld1 {v24.16b-v27.16b}, [x2], #64
|
||||
+ csel x2, x2, x3, ge
|
||||
|
||||
zip1 v3.2d, v28.2d, v30.2d
|
||||
zip2 v7.2d, v28.2d, v30.2d
|
||||
@@ -699,151 +690,105 @@ CPU_BE( rev a15, a15 )
|
||||
zip1 v11.2d, v29.2d, v31.2d
|
||||
zip2 v15.2d, v29.2d, v31.2d
|
||||
stp a14, a15, [x1, #-8]
|
||||
+
|
||||
+ tbnz x5, #63, .Lt128
|
||||
ld1 {v28.16b-v31.16b}, [x2]
|
||||
|
||||
// xor with corresponding input, write to output
|
||||
- tbnz x5, #63, 0f
|
||||
eor v16.16b, v16.16b, v0.16b
|
||||
eor v17.16b, v17.16b, v1.16b
|
||||
eor v18.16b, v18.16b, v2.16b
|
||||
eor v19.16b, v19.16b, v3.16b
|
||||
- st1 {v16.16b-v19.16b}, [x1], #64
|
||||
- cbz x5, .Lout
|
||||
|
||||
- tbnz x6, #63, 1f
|
||||
+ tbnz x6, #63, .Lt192
|
||||
+
|
||||
eor v20.16b, v20.16b, v4.16b
|
||||
eor v21.16b, v21.16b, v5.16b
|
||||
eor v22.16b, v22.16b, v6.16b
|
||||
eor v23.16b, v23.16b, v7.16b
|
||||
- st1 {v20.16b-v23.16b}, [x1], #64
|
||||
- cbz x6, .Lout
|
||||
|
||||
- tbnz x7, #63, 2f
|
||||
+ st1 {v16.16b-v19.16b}, [x1], #64
|
||||
+ tbnz x7, #63, .Lt256
|
||||
+
|
||||
eor v24.16b, v24.16b, v8.16b
|
||||
eor v25.16b, v25.16b, v9.16b
|
||||
eor v26.16b, v26.16b, v10.16b
|
||||
eor v27.16b, v27.16b, v11.16b
|
||||
- st1 {v24.16b-v27.16b}, [x1], #64
|
||||
- cbz x7, .Lout
|
||||
|
||||
- tbnz x8, #63, 3f
|
||||
+ st1 {v20.16b-v23.16b}, [x1], #64
|
||||
+ tbnz x8, #63, .Lt320
|
||||
+
|
||||
eor v28.16b, v28.16b, v12.16b
|
||||
eor v29.16b, v29.16b, v13.16b
|
||||
eor v30.16b, v30.16b, v14.16b
|
||||
eor v31.16b, v31.16b, v15.16b
|
||||
+
|
||||
+ st1 {v24.16b-v27.16b}, [x1], #64
|
||||
st1 {v28.16b-v31.16b}, [x1]
|
||||
|
||||
.Lout: frame_pop
|
||||
ret
|
||||
|
||||
- // fewer than 128 bytes of in/output
|
||||
-0: ld1 {v8.16b}, [x10]
|
||||
- ld1 {v9.16b}, [x11]
|
||||
- movi v10.16b, #16
|
||||
- sub x2, x1, #64
|
||||
- add x1, x1, x5
|
||||
- ld1 {v16.16b-v19.16b}, [x2]
|
||||
- tbl v4.16b, {v0.16b-v3.16b}, v8.16b
|
||||
- tbx v20.16b, {v16.16b-v19.16b}, v9.16b
|
||||
- add v8.16b, v8.16b, v10.16b
|
||||
- add v9.16b, v9.16b, v10.16b
|
||||
- tbl v5.16b, {v0.16b-v3.16b}, v8.16b
|
||||
- tbx v21.16b, {v16.16b-v19.16b}, v9.16b
|
||||
- add v8.16b, v8.16b, v10.16b
|
||||
- add v9.16b, v9.16b, v10.16b
|
||||
- tbl v6.16b, {v0.16b-v3.16b}, v8.16b
|
||||
- tbx v22.16b, {v16.16b-v19.16b}, v9.16b
|
||||
- add v8.16b, v8.16b, v10.16b
|
||||
- add v9.16b, v9.16b, v10.16b
|
||||
- tbl v7.16b, {v0.16b-v3.16b}, v8.16b
|
||||
- tbx v23.16b, {v16.16b-v19.16b}, v9.16b
|
||||
-
|
||||
- eor v20.16b, v20.16b, v4.16b
|
||||
- eor v21.16b, v21.16b, v5.16b
|
||||
- eor v22.16b, v22.16b, v6.16b
|
||||
- eor v23.16b, v23.16b, v7.16b
|
||||
- st1 {v20.16b-v23.16b}, [x1]
|
||||
- b .Lout
|
||||
-
|
||||
// fewer than 192 bytes of in/output
|
||||
-1: ld1 {v8.16b}, [x10]
|
||||
- ld1 {v9.16b}, [x11]
|
||||
- movi v10.16b, #16
|
||||
- add x1, x1, x6
|
||||
- tbl v0.16b, {v4.16b-v7.16b}, v8.16b
|
||||
- tbx v20.16b, {v16.16b-v19.16b}, v9.16b
|
||||
- add v8.16b, v8.16b, v10.16b
|
||||
- add v9.16b, v9.16b, v10.16b
|
||||
- tbl v1.16b, {v4.16b-v7.16b}, v8.16b
|
||||
- tbx v21.16b, {v16.16b-v19.16b}, v9.16b
|
||||
- add v8.16b, v8.16b, v10.16b
|
||||
- add v9.16b, v9.16b, v10.16b
|
||||
- tbl v2.16b, {v4.16b-v7.16b}, v8.16b
|
||||
- tbx v22.16b, {v16.16b-v19.16b}, v9.16b
|
||||
- add v8.16b, v8.16b, v10.16b
|
||||
- add v9.16b, v9.16b, v10.16b
|
||||
- tbl v3.16b, {v4.16b-v7.16b}, v8.16b
|
||||
- tbx v23.16b, {v16.16b-v19.16b}, v9.16b
|
||||
-
|
||||
- eor v20.16b, v20.16b, v0.16b
|
||||
- eor v21.16b, v21.16b, v1.16b
|
||||
- eor v22.16b, v22.16b, v2.16b
|
||||
- eor v23.16b, v23.16b, v3.16b
|
||||
- st1 {v20.16b-v23.16b}, [x1]
|
||||
+.Lt192: cbz x5, 1f // exactly 128 bytes?
|
||||
+ ld1 {v28.16b-v31.16b}, [x10]
|
||||
+ add x5, x5, x1
|
||||
+ tbl v28.16b, {v4.16b-v7.16b}, v28.16b
|
||||
+ tbl v29.16b, {v4.16b-v7.16b}, v29.16b
|
||||
+ tbl v30.16b, {v4.16b-v7.16b}, v30.16b
|
||||
+ tbl v31.16b, {v4.16b-v7.16b}, v31.16b
|
||||
+
|
||||
+0: eor v20.16b, v20.16b, v28.16b
|
||||
+ eor v21.16b, v21.16b, v29.16b
|
||||
+ eor v22.16b, v22.16b, v30.16b
|
||||
+ eor v23.16b, v23.16b, v31.16b
|
||||
+ st1 {v20.16b-v23.16b}, [x5] // overlapping stores
|
||||
+1: st1 {v16.16b-v19.16b}, [x1]
|
||||
b .Lout
|
||||
|
||||
+ // fewer than 128 bytes of in/output
|
||||
+.Lt128: ld1 {v28.16b-v31.16b}, [x10]
|
||||
+ add x5, x5, x1
|
||||
+ sub x1, x1, #64
|
||||
+ tbl v28.16b, {v0.16b-v3.16b}, v28.16b
|
||||
+ tbl v29.16b, {v0.16b-v3.16b}, v29.16b
|
||||
+ tbl v30.16b, {v0.16b-v3.16b}, v30.16b
|
||||
+ tbl v31.16b, {v0.16b-v3.16b}, v31.16b
|
||||
+ ld1 {v16.16b-v19.16b}, [x1] // reload first output block
|
||||
+ b 0b
|
||||
+
|
||||
// fewer than 256 bytes of in/output
|
||||
-2: ld1 {v4.16b}, [x10]
|
||||
- ld1 {v5.16b}, [x11]
|
||||
- movi v6.16b, #16
|
||||
- add x1, x1, x7
|
||||
+.Lt256: cbz x6, 2f // exactly 192 bytes?
|
||||
+ ld1 {v4.16b-v7.16b}, [x10]
|
||||
+ add x6, x6, x1
|
||||
tbl v0.16b, {v8.16b-v11.16b}, v4.16b
|
||||
- tbx v24.16b, {v20.16b-v23.16b}, v5.16b
|
||||
- add v4.16b, v4.16b, v6.16b
|
||||
- add v5.16b, v5.16b, v6.16b
|
||||
- tbl v1.16b, {v8.16b-v11.16b}, v4.16b
|
||||
- tbx v25.16b, {v20.16b-v23.16b}, v5.16b
|
||||
- add v4.16b, v4.16b, v6.16b
|
||||
- add v5.16b, v5.16b, v6.16b
|
||||
- tbl v2.16b, {v8.16b-v11.16b}, v4.16b
|
||||
- tbx v26.16b, {v20.16b-v23.16b}, v5.16b
|
||||
- add v4.16b, v4.16b, v6.16b
|
||||
- add v5.16b, v5.16b, v6.16b
|
||||
- tbl v3.16b, {v8.16b-v11.16b}, v4.16b
|
||||
- tbx v27.16b, {v20.16b-v23.16b}, v5.16b
|
||||
-
|
||||
- eor v24.16b, v24.16b, v0.16b
|
||||
- eor v25.16b, v25.16b, v1.16b
|
||||
- eor v26.16b, v26.16b, v2.16b
|
||||
- eor v27.16b, v27.16b, v3.16b
|
||||
- st1 {v24.16b-v27.16b}, [x1]
|
||||
+ tbl v1.16b, {v8.16b-v11.16b}, v5.16b
|
||||
+ tbl v2.16b, {v8.16b-v11.16b}, v6.16b
|
||||
+ tbl v3.16b, {v8.16b-v11.16b}, v7.16b
|
||||
+
|
||||
+ eor v28.16b, v28.16b, v0.16b
|
||||
+ eor v29.16b, v29.16b, v1.16b
|
||||
+ eor v30.16b, v30.16b, v2.16b
|
||||
+ eor v31.16b, v31.16b, v3.16b
|
||||
+ st1 {v28.16b-v31.16b}, [x6] // overlapping stores
|
||||
+2: st1 {v20.16b-v23.16b}, [x1]
|
||||
b .Lout
|
||||
|
||||
// fewer than 320 bytes of in/output
|
||||
-3: ld1 {v4.16b}, [x10]
|
||||
- ld1 {v5.16b}, [x11]
|
||||
- movi v6.16b, #16
|
||||
- add x1, x1, x8
|
||||
+.Lt320: cbz x7, 3f // exactly 256 bytes?
|
||||
+ ld1 {v4.16b-v7.16b}, [x10]
|
||||
+ add x7, x7, x1
|
||||
tbl v0.16b, {v12.16b-v15.16b}, v4.16b
|
||||
- tbx v28.16b, {v24.16b-v27.16b}, v5.16b
|
||||
- add v4.16b, v4.16b, v6.16b
|
||||
- add v5.16b, v5.16b, v6.16b
|
||||
- tbl v1.16b, {v12.16b-v15.16b}, v4.16b
|
||||
- tbx v29.16b, {v24.16b-v27.16b}, v5.16b
|
||||
- add v4.16b, v4.16b, v6.16b
|
||||
- add v5.16b, v5.16b, v6.16b
|
||||
- tbl v2.16b, {v12.16b-v15.16b}, v4.16b
|
||||
- tbx v30.16b, {v24.16b-v27.16b}, v5.16b
|
||||
- add v4.16b, v4.16b, v6.16b
|
||||
- add v5.16b, v5.16b, v6.16b
|
||||
- tbl v3.16b, {v12.16b-v15.16b}, v4.16b
|
||||
- tbx v31.16b, {v24.16b-v27.16b}, v5.16b
|
||||
+ tbl v1.16b, {v12.16b-v15.16b}, v5.16b
|
||||
+ tbl v2.16b, {v12.16b-v15.16b}, v6.16b
|
||||
+ tbl v3.16b, {v12.16b-v15.16b}, v7.16b
|
||||
|
||||
eor v28.16b, v28.16b, v0.16b
|
||||
eor v29.16b, v29.16b, v1.16b
|
||||
eor v30.16b, v30.16b, v2.16b
|
||||
eor v31.16b, v31.16b, v3.16b
|
||||
- st1 {v28.16b-v31.16b}, [x1]
|
||||
+ st1 {v28.16b-v31.16b}, [x7] // overlapping stores
|
||||
+3: st1 {v24.16b-v27.16b}, [x1]
|
||||
b .Lout
|
||||
ENDPROC(chacha_4block_xor_neon)
|
||||
|
||||
@@ -851,7 +796,7 @@ ENDPROC(chacha_4block_xor_neon)
|
||||
.align L1_CACHE_SHIFT
|
||||
.Lpermute:
|
||||
.set .Li, 0
|
||||
- .rept 192
|
||||
+ .rept 128
|
||||
.byte (.Li - 64)
|
||||
.set .Li, .Li + 1
|
||||
.endr
|
@ -0,0 +1,37 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Fri, 15 Jan 2021 20:30:12 +0100
|
||||
Subject: [PATCH] crypto: lib/chacha20poly1305 - define empty module exit
|
||||
function
|
||||
|
||||
commit ac88c322d0f2917d41d13553c69e9d7f043c8b6f upstream.
|
||||
|
||||
With no mod_exit function, users are unable to unload the module after
|
||||
use. I'm not aware of any reason why module unloading should be
|
||||
prohibited for this one, so this commit simply adds an empty exit
|
||||
function.
|
||||
|
||||
Reported-and-tested-by: John Donnelly <john.p.donnelly@oracle.com>
|
||||
Acked-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
lib/crypto/chacha20poly1305.c | 5 +++++
|
||||
1 file changed, 5 insertions(+)
|
||||
|
||||
--- a/lib/crypto/chacha20poly1305.c
|
||||
+++ b/lib/crypto/chacha20poly1305.c
|
||||
@@ -364,7 +364,12 @@ static int __init mod_init(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static void __exit mod_exit(void)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
module_init(mod_init);
|
||||
+module_exit(mod_exit);
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_DESCRIPTION("ChaCha20Poly1305 AEAD construction");
|
||||
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
|
@ -0,0 +1,38 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Ard Biesheuvel <ardb@kernel.org>
|
||||
Date: Sun, 13 Dec 2020 15:39:29 +0100
|
||||
Subject: [PATCH] crypto: arm/chacha-neon - add missing counter increment
|
||||
|
||||
commit fd16931a2f518a32753920ff20895e5cf04c8ff1 upstream.
|
||||
|
||||
Commit 86cd97ec4b943af3 ("crypto: arm/chacha-neon - optimize for non-block
|
||||
size multiples") refactored the chacha block handling in the glue code in
|
||||
a way that may result in the counter increment to be omitted when calling
|
||||
chacha_block_xor_neon() to process a full block. This violates the skcipher
|
||||
API, which requires that the output IV is suitable for handling more input
|
||||
as long as the preceding input has been presented in round multiples of the
|
||||
block size. Also, the same code is exposed via the chacha library interface
|
||||
whose callers may actually rely on this increment to occur even for final
|
||||
blocks that are smaller than the chacha block size.
|
||||
|
||||
So increment the counter after calling chacha_block_xor_neon().
|
||||
|
||||
Fixes: 86cd97ec4b943af3 ("crypto: arm/chacha-neon - optimize for non-block size multiples")
|
||||
Reported-by: Eric Biggers <ebiggers@kernel.org>
|
||||
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
arch/arm/crypto/chacha-glue.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/arch/arm/crypto/chacha-glue.c
|
||||
+++ b/arch/arm/crypto/chacha-glue.c
|
||||
@@ -60,6 +60,7 @@ static void chacha_doneon(u32 *state, u8
|
||||
chacha_block_xor_neon(state, d, s, nrounds);
|
||||
if (d != dst)
|
||||
memcpy(dst, buf, bytes);
|
||||
+ state[12]++;
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,30 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Sun, 15 Dec 2019 22:08:01 +0100
|
||||
Subject: [PATCH] wireguard: Kconfig: select parent dependency for crypto
|
||||
|
||||
commit d7c68a38bb4f9b7c1a2e4a772872c752ee5c44a6 upstream.
|
||||
|
||||
This fixes the crypto selection submenu depenencies. Otherwise, we'd
|
||||
wind up issuing warnings in which certain dependencies we also select
|
||||
couldn't be satisfied. This condition was triggered by the addition of
|
||||
the test suite autobuilder in the previous commit.
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/Kconfig | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/drivers/net/Kconfig
|
||||
+++ b/drivers/net/Kconfig
|
||||
@@ -85,6 +85,8 @@ config WIREGUARD
|
||||
select CRYPTO_POLY1305_X86_64 if X86 && 64BIT
|
||||
select CRYPTO_BLAKE2S_X86 if X86 && 64BIT
|
||||
select CRYPTO_CURVE25519_X86 if X86 && 64BIT
|
||||
+ select ARM_CRYPTO if ARM
|
||||
+ select ARM64_CRYPTO if ARM64
|
||||
select CRYPTO_CHACHA20_NEON if (ARM || ARM64) && KERNEL_MODE_NEON
|
||||
select CRYPTO_POLY1305_NEON if ARM64 && KERNEL_MODE_NEON
|
||||
select CRYPTO_POLY1305_ARM if ARM
|
@ -0,0 +1,66 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Soref <jsoref@gmail.com>
|
||||
Date: Sun, 15 Dec 2019 22:08:02 +0100
|
||||
Subject: [PATCH] wireguard: global: fix spelling mistakes in comments
|
||||
|
||||
commit a2ec8b5706944d228181c8b91d815f41d6dd8e7b upstream.
|
||||
|
||||
This fixes two spelling errors in source code comments.
|
||||
|
||||
Signed-off-by: Josh Soref <jsoref@gmail.com>
|
||||
[Jason: rewrote commit message]
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/wireguard/receive.c | 2 +-
|
||||
include/uapi/linux/wireguard.h | 8 ++++----
|
||||
2 files changed, 5 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/drivers/net/wireguard/receive.c
|
||||
+++ b/drivers/net/wireguard/receive.c
|
||||
@@ -380,7 +380,7 @@ static void wg_packet_consume_data_done(
|
||||
/* We've already verified the Poly1305 auth tag, which means this packet
|
||||
* was not modified in transit. We can therefore tell the networking
|
||||
* stack that all checksums of every layer of encapsulation have already
|
||||
- * been checked "by the hardware" and therefore is unneccessary to check
|
||||
+ * been checked "by the hardware" and therefore is unnecessary to check
|
||||
* again in software.
|
||||
*/
|
||||
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
||||
--- a/include/uapi/linux/wireguard.h
|
||||
+++ b/include/uapi/linux/wireguard.h
|
||||
@@ -18,13 +18,13 @@
|
||||
* one but not both of:
|
||||
*
|
||||
* WGDEVICE_A_IFINDEX: NLA_U32
|
||||
- * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
|
||||
+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1
|
||||
*
|
||||
* The kernel will then return several messages (NLM_F_MULTI) containing the
|
||||
* following tree of nested items:
|
||||
*
|
||||
* WGDEVICE_A_IFINDEX: NLA_U32
|
||||
- * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
|
||||
+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1
|
||||
* WGDEVICE_A_PRIVATE_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
|
||||
* WGDEVICE_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
|
||||
* WGDEVICE_A_LISTEN_PORT: NLA_U16
|
||||
@@ -77,7 +77,7 @@
|
||||
* WGDEVICE_A_IFINDEX and WGDEVICE_A_IFNAME:
|
||||
*
|
||||
* WGDEVICE_A_IFINDEX: NLA_U32
|
||||
- * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
|
||||
+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1
|
||||
* WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current
|
||||
* peers should be removed prior to adding the list below.
|
||||
* WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove
|
||||
@@ -121,7 +121,7 @@
|
||||
* filling in information not contained in the prior. Note that if
|
||||
* WGDEVICE_F_REPLACE_PEERS is specified in the first message, it probably
|
||||
* should not be specified in fragments that come after, so that the list
|
||||
- * of peers is only cleared the first time but appened after. Likewise for
|
||||
+ * of peers is only cleared the first time but appended after. Likewise for
|
||||
* peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the first message
|
||||
* of a peer, it likely should not be specified in subsequent fragments.
|
||||
*
|
@ -0,0 +1,28 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: YueHaibing <yuehaibing@huawei.com>
|
||||
Date: Sun, 15 Dec 2019 22:08:03 +0100
|
||||
Subject: [PATCH] wireguard: main: remove unused include <linux/version.h>
|
||||
|
||||
commit 43967b6ff91e53bcce5ae08c16a0588a475b53a1 upstream.
|
||||
|
||||
Remove <linux/version.h> from the includes for main.c, which is unused.
|
||||
|
||||
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
|
||||
[Jason: reworded commit message]
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/wireguard/main.c | 1 -
|
||||
1 file changed, 1 deletion(-)
|
||||
|
||||
--- a/drivers/net/wireguard/main.c
|
||||
+++ b/drivers/net/wireguard/main.c
|
||||
@@ -12,7 +12,6 @@
|
||||
|
||||
#include <uapi/linux/wireguard.h>
|
||||
|
||||
-#include <linux/version.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/genetlink.h>
|
@ -0,0 +1,41 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Wei Yongjun <weiyongjun1@huawei.com>
|
||||
Date: Sun, 15 Dec 2019 22:08:04 +0100
|
||||
Subject: [PATCH] wireguard: allowedips: use kfree_rcu() instead of call_rcu()
|
||||
|
||||
commit d89ee7d5c73af15c1c6f12b016cdf469742b5726 upstream.
|
||||
|
||||
The callback function of call_rcu() just calls a kfree(), so we
|
||||
can use kfree_rcu() instead of call_rcu() + callback function.
|
||||
|
||||
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/wireguard/allowedips.c | 7 +------
|
||||
1 file changed, 1 insertion(+), 6 deletions(-)
|
||||
|
||||
--- a/drivers/net/wireguard/allowedips.c
|
||||
+++ b/drivers/net/wireguard/allowedips.c
|
||||
@@ -31,11 +31,6 @@ static void copy_and_assign_cidr(struct
|
||||
#define CHOOSE_NODE(parent, key) \
|
||||
parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1]
|
||||
|
||||
-static void node_free_rcu(struct rcu_head *rcu)
|
||||
-{
|
||||
- kfree(container_of(rcu, struct allowedips_node, rcu));
|
||||
-}
|
||||
-
|
||||
static void push_rcu(struct allowedips_node **stack,
|
||||
struct allowedips_node __rcu *p, unsigned int *len)
|
||||
{
|
||||
@@ -112,7 +107,7 @@ static void walk_remove_by_peer(struct a
|
||||
if (!node->bit[0] || !node->bit[1]) {
|
||||
rcu_assign_pointer(*nptr, DEREF(
|
||||
&node->bit[!REF(node->bit[0])]));
|
||||
- call_rcu(&node->rcu, node_free_rcu);
|
||||
+ kfree_rcu(node, rcu);
|
||||
node = DEREF(nptr);
|
||||
}
|
||||
}
|
@ -0,0 +1,373 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Thu, 2 Jan 2020 17:47:49 +0100
|
||||
Subject: [PATCH] wireguard: selftests: remove ancient kernel compatibility
|
||||
code
|
||||
|
||||
commit 9a69a4c8802adf642bc4a13d471b5a86b44ed434 upstream.
|
||||
|
||||
Quite a bit of the test suite was designed to work with ancient kernels.
|
||||
Thankfully we no longer have to deal with this. This commit updates
|
||||
things that we can finally update and removes things that we can finally
|
||||
remove, to avoid the build-up of the last several years as a result of
|
||||
having to support ancient kernels. We can finally rely on suppress_
|
||||
prefixlength being available. On the build side of things, the no-PIE
|
||||
hack is no longer required, and we can bump some of the tools, repair
|
||||
our m68k and i686-kvm support, and get better coverage of the static
|
||||
branches used in the crypto lib and in udp_tunnel.
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
tools/testing/selftests/wireguard/netns.sh | 11 +--
|
||||
.../testing/selftests/wireguard/qemu/Makefile | 82 ++++++++++---------
|
||||
.../selftests/wireguard/qemu/arch/m68k.config | 2 +-
|
||||
tools/testing/selftests/wireguard/qemu/init.c | 1 +
|
||||
.../selftests/wireguard/qemu/kernel.config | 2 +
|
||||
5 files changed, 50 insertions(+), 48 deletions(-)
|
||||
|
||||
--- a/tools/testing/selftests/wireguard/netns.sh
|
||||
+++ b/tools/testing/selftests/wireguard/netns.sh
|
||||
@@ -37,7 +37,7 @@ n2() { pretty 2 "$*"; maybe_exec ip netn
|
||||
ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; }
|
||||
ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; }
|
||||
ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; }
|
||||
-sleep() { read -t "$1" -N 0 || true; }
|
||||
+sleep() { read -t "$1" -N 1 || true; }
|
||||
waitiperf() { pretty "${1//*-}" "wait for iperf:5201"; while [[ $(ss -N "$1" -tlp 'sport = 5201') != *iperf3* ]]; do sleep 0.1; done; }
|
||||
waitncatudp() { pretty "${1//*-}" "wait for udp:1111"; while [[ $(ss -N "$1" -ulp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; }
|
||||
waitncattcp() { pretty "${1//*-}" "wait for tcp:1111"; while [[ $(ss -N "$1" -tlp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; }
|
||||
@@ -294,12 +294,9 @@ ip1 -6 rule add table main suppress_pref
|
||||
ip1 -4 route add default dev wg0 table 51820
|
||||
ip1 -4 rule add not fwmark 51820 table 51820
|
||||
ip1 -4 rule add table main suppress_prefixlength 0
|
||||
-# suppress_prefixlength only got added in 3.12, and we want to support 3.10+.
|
||||
-if [[ $(ip1 -4 rule show all) == *suppress_prefixlength* ]]; then
|
||||
- # Flood the pings instead of sending just one, to trigger routing table reference counting bugs.
|
||||
- n1 ping -W 1 -c 100 -f 192.168.99.7
|
||||
- n1 ping -W 1 -c 100 -f abab::1111
|
||||
-fi
|
||||
+# Flood the pings instead of sending just one, to trigger routing table reference counting bugs.
|
||||
+n1 ping -W 1 -c 100 -f 192.168.99.7
|
||||
+n1 ping -W 1 -c 100 -f abab::1111
|
||||
|
||||
n0 iptables -t nat -F
|
||||
ip0 link del vethrc
|
||||
--- a/tools/testing/selftests/wireguard/qemu/Makefile
|
||||
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
|
||||
@@ -5,6 +5,7 @@
|
||||
PWD := $(shell pwd)
|
||||
|
||||
CHOST := $(shell gcc -dumpmachine)
|
||||
+HOST_ARCH := $(firstword $(subst -, ,$(CHOST)))
|
||||
ifneq (,$(ARCH))
|
||||
CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc))))))
|
||||
ifeq (,$(CBUILD))
|
||||
@@ -37,19 +38,19 @@ endef
|
||||
define file_download =
|
||||
$(DISTFILES_PATH)/$(1):
|
||||
mkdir -p $(DISTFILES_PATH)
|
||||
- flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -t inf --retry-on-http-error=404 -O $$@.tmp $(2)$(1) || rm -f $$@.tmp'
|
||||
+ flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp'
|
||||
if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi
|
||||
endef
|
||||
|
||||
-$(eval $(call tar_download,MUSL,musl,1.1.20,.tar.gz,https://www.musl-libc.org/releases/,44be8771d0e6c6b5f82dd15662eb2957c9a3173a19a8b49966ac0542bbd40d61))
|
||||
+$(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3))
|
||||
$(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81))
|
||||
-$(eval $(call tar_download,IPERF,iperf,3.1.7,.tar.gz,http://downloads.es.net/pub/iperf/,a4ef73406fe92250602b8da2ae89ec53211f805df97a1d1d629db5a14043734f))
|
||||
+$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
|
||||
$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
|
||||
-$(eval $(call tar_download,IPROUTE2,iproute2,5.1.0,.tar.gz,https://www.kernel.org/pub/linux/utils/net/iproute2/,9b43707d6075ecdca14803ca8ce0c8553848c49fa1586d12fd508d66577243f2))
|
||||
-$(eval $(call tar_download,IPTABLES,iptables,1.6.1,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,0fc2d7bd5d7be11311726466789d4c65fb4c8e096c9182b56ce97440864f0cf5))
|
||||
-$(eval $(call tar_download,NMAP,nmap,7.60,.tar.bz2,https://nmap.org/dist/,a8796ecc4fa6c38aad6139d9515dc8113023a82e9d787e5a5fb5fa1b05516f21))
|
||||
-$(eval $(call tar_download,IPUTILS,iputils,s20161105,.tar.gz,https://github.com/iputils/iputils/archive/s20161105.tar.gz/#,f813092f03d17294fd23544b129b95cdb87fe19f7970a51908a6b88509acad8a))
|
||||
-$(eval $(call tar_download,WIREGUARD_TOOLS,WireGuard,0.0.20191212,.tar.xz,https://git.zx2c4.com/WireGuard/snapshot/,b0d718380f7a8822b2f12d75e462fa4eafa3a77871002981f367cd4fe2a1b071))
|
||||
+$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae))
|
||||
+$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c))
|
||||
+$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa))
|
||||
+$(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
|
||||
+$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20191226,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,aa8af0fdc9872d369d8c890a84dbc2a2466b55795dccd5b47721b2d97644b04f))
|
||||
|
||||
KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
|
||||
rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
|
||||
@@ -59,23 +60,21 @@ export CFLAGS ?= -O3 -pipe
|
||||
export LDFLAGS ?=
|
||||
export CPPFLAGS := -I$(BUILD_PATH)/include
|
||||
|
||||
-ifeq ($(CHOST),$(CBUILD))
|
||||
+ifeq ($(HOST_ARCH),$(ARCH))
|
||||
CROSS_COMPILE_FLAG := --host=$(CHOST)
|
||||
-NOPIE_GCC := gcc -fno-PIE
|
||||
CFLAGS += -march=native
|
||||
STRIP := strip
|
||||
else
|
||||
$(info Cross compilation: building for $(CBUILD) using $(CHOST))
|
||||
CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST)
|
||||
export CROSS_COMPILE=$(CBUILD)-
|
||||
-NOPIE_GCC := $(CBUILD)-gcc -fno-PIE
|
||||
STRIP := $(CBUILD)-strip
|
||||
endif
|
||||
ifeq ($(ARCH),aarch64)
|
||||
QEMU_ARCH := aarch64
|
||||
KERNEL_ARCH := arm64
|
||||
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
|
||||
-ifeq ($(CHOST),$(CBUILD))
|
||||
+ifeq ($(HOST_ARCH),$(ARCH))
|
||||
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
|
||||
else
|
||||
QEMU_MACHINE := -cpu cortex-a53 -machine virt
|
||||
@@ -85,7 +84,7 @@ else ifeq ($(ARCH),aarch64_be)
|
||||
QEMU_ARCH := aarch64
|
||||
KERNEL_ARCH := arm64
|
||||
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
|
||||
-ifeq ($(CHOST),$(CBUILD))
|
||||
+ifeq ($(HOST_ARCH),$(ARCH))
|
||||
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
|
||||
else
|
||||
QEMU_MACHINE := -cpu cortex-a53 -machine virt
|
||||
@@ -95,7 +94,7 @@ else ifeq ($(ARCH),arm)
|
||||
QEMU_ARCH := arm
|
||||
KERNEL_ARCH := arm
|
||||
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
|
||||
-ifeq ($(CHOST),$(CBUILD))
|
||||
+ifeq ($(HOST_ARCH),$(ARCH))
|
||||
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
|
||||
else
|
||||
QEMU_MACHINE := -cpu cortex-a15 -machine virt
|
||||
@@ -105,7 +104,7 @@ else ifeq ($(ARCH),armeb)
|
||||
QEMU_ARCH := arm
|
||||
KERNEL_ARCH := arm
|
||||
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
|
||||
-ifeq ($(CHOST),$(CBUILD))
|
||||
+ifeq ($(HOST_ARCH),$(ARCH))
|
||||
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
|
||||
else
|
||||
QEMU_MACHINE := -cpu cortex-a15 -machine virt
|
||||
@@ -116,7 +115,7 @@ else ifeq ($(ARCH),x86_64)
|
||||
QEMU_ARCH := x86_64
|
||||
KERNEL_ARCH := x86_64
|
||||
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
|
||||
-ifeq ($(CHOST),$(CBUILD))
|
||||
+ifeq ($(HOST_ARCH),$(ARCH))
|
||||
QEMU_MACHINE := -cpu host -machine q35,accel=kvm
|
||||
else
|
||||
QEMU_MACHINE := -cpu Skylake-Server -machine q35
|
||||
@@ -126,7 +125,7 @@ else ifeq ($(ARCH),i686)
|
||||
QEMU_ARCH := i386
|
||||
KERNEL_ARCH := x86
|
||||
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
|
||||
-ifeq ($(subst i686,x86_64,$(CBUILD)),$(CHOST))
|
||||
+ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH))
|
||||
QEMU_MACHINE := -cpu host -machine q35,accel=kvm
|
||||
else
|
||||
QEMU_MACHINE := -cpu coreduo -machine q35
|
||||
@@ -136,7 +135,7 @@ else ifeq ($(ARCH),mips64)
|
||||
QEMU_ARCH := mips64
|
||||
KERNEL_ARCH := mips
|
||||
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
|
||||
-ifeq ($(CHOST),$(CBUILD))
|
||||
+ifeq ($(HOST_ARCH),$(ARCH))
|
||||
QEMU_MACHINE := -cpu host -machine malta,accel=kvm
|
||||
CFLAGS += -EB
|
||||
else
|
||||
@@ -147,7 +146,7 @@ else ifeq ($(ARCH),mips64el)
|
||||
QEMU_ARCH := mips64el
|
||||
KERNEL_ARCH := mips
|
||||
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
|
||||
-ifeq ($(CHOST),$(CBUILD))
|
||||
+ifeq ($(HOST_ARCH),$(ARCH))
|
||||
QEMU_MACHINE := -cpu host -machine malta,accel=kvm
|
||||
CFLAGS += -EL
|
||||
else
|
||||
@@ -158,7 +157,7 @@ else ifeq ($(ARCH),mips)
|
||||
QEMU_ARCH := mips
|
||||
KERNEL_ARCH := mips
|
||||
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
|
||||
-ifeq ($(CHOST),$(CBUILD))
|
||||
+ifeq ($(HOST_ARCH),$(ARCH))
|
||||
QEMU_MACHINE := -cpu host -machine malta,accel=kvm
|
||||
CFLAGS += -EB
|
||||
else
|
||||
@@ -169,7 +168,7 @@ else ifeq ($(ARCH),mipsel)
|
||||
QEMU_ARCH := mipsel
|
||||
KERNEL_ARCH := mips
|
||||
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
|
||||
-ifeq ($(CHOST),$(CBUILD))
|
||||
+ifeq ($(HOST_ARCH),$(ARCH))
|
||||
QEMU_MACHINE := -cpu host -machine malta,accel=kvm
|
||||
CFLAGS += -EL
|
||||
else
|
||||
@@ -180,7 +179,7 @@ else ifeq ($(ARCH),powerpc64le)
|
||||
QEMU_ARCH := ppc64
|
||||
KERNEL_ARCH := powerpc
|
||||
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
|
||||
-ifeq ($(CHOST),$(CBUILD))
|
||||
+ifeq ($(HOST_ARCH),$(ARCH))
|
||||
QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
|
||||
else
|
||||
QEMU_MACHINE := -machine pseries
|
||||
@@ -190,7 +189,7 @@ else ifeq ($(ARCH),powerpc)
|
||||
QEMU_ARCH := ppc
|
||||
KERNEL_ARCH := powerpc
|
||||
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/powerpc/boot/uImage
|
||||
-ifeq ($(CHOST),$(CBUILD))
|
||||
+ifeq ($(HOST_ARCH),$(ARCH))
|
||||
QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500
|
||||
else
|
||||
QEMU_MACHINE := -machine ppce500
|
||||
@@ -200,10 +199,11 @@ else ifeq ($(ARCH),m68k)
|
||||
QEMU_ARCH := m68k
|
||||
KERNEL_ARCH := m68k
|
||||
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
|
||||
-ifeq ($(CHOST),$(CBUILD))
|
||||
-QEMU_MACHINE := -cpu host,accel=kvm -machine q800
|
||||
+KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config)
|
||||
+ifeq ($(HOST_ARCH),$(ARCH))
|
||||
+QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
|
||||
else
|
||||
-QEMU_MACHINE := -machine q800
|
||||
+QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
|
||||
endif
|
||||
else
|
||||
$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k)
|
||||
@@ -238,14 +238,14 @@ $(BUILD_PATH)/init-cpio-spec.txt:
|
||||
echo "nod /dev/console 644 0 0 c 5 1" >> $@
|
||||
echo "dir /bin 755 0 0" >> $@
|
||||
echo "file /bin/iperf3 $(IPERF_PATH)/src/iperf3 755 0 0" >> $@
|
||||
- echo "file /bin/wg $(WIREGUARD_TOOLS_PATH)/src/tools/wg 755 0 0" >> $@
|
||||
+ echo "file /bin/wg $(WIREGUARD_TOOLS_PATH)/src/wg 755 0 0" >> $@
|
||||
echo "file /bin/bash $(BASH_PATH)/bash 755 0 0" >> $@
|
||||
echo "file /bin/ip $(IPROUTE2_PATH)/ip/ip 755 0 0" >> $@
|
||||
echo "file /bin/ss $(IPROUTE2_PATH)/misc/ss 755 0 0" >> $@
|
||||
echo "file /bin/ping $(IPUTILS_PATH)/ping 755 0 0" >> $@
|
||||
echo "file /bin/ncat $(NMAP_PATH)/ncat/ncat 755 0 0" >> $@
|
||||
- echo "file /bin/xtables-multi $(IPTABLES_PATH)/iptables/xtables-multi 755 0 0" >> $@
|
||||
- echo "slink /bin/iptables xtables-multi 777 0 0" >> $@
|
||||
+ echo "file /bin/xtables-legacy-multi $(IPTABLES_PATH)/iptables/xtables-legacy-multi 755 0 0" >> $@
|
||||
+ echo "slink /bin/iptables xtables-legacy-multi 777 0 0" >> $@
|
||||
echo "slink /bin/ping6 ping 777 0 0" >> $@
|
||||
echo "dir /lib 755 0 0" >> $@
|
||||
echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@
|
||||
@@ -260,8 +260,8 @@ $(KERNEL_BUILD_PATH)/.config: kernel.con
|
||||
cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config
|
||||
$(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,)
|
||||
|
||||
-$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/tools/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
|
||||
- $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) CC="$(NOPIE_GCC)"
|
||||
+$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
|
||||
+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE)
|
||||
|
||||
$(BUILD_PATH)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config
|
||||
$(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install
|
||||
@@ -280,7 +280,7 @@ $(BUILD_PATH)/include/.installed: $(MUSL
|
||||
|
||||
$(MUSL_CC): $(MUSL_PATH)/lib/libc.so
|
||||
sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs
|
||||
- printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" -fno-stack-protector -no-pie "$$@"\n' > $(BUILD_PATH)/musl-gcc
|
||||
+ printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" "$$@"\n' > $(BUILD_PATH)/musl-gcc
|
||||
chmod +x $(BUILD_PATH)/musl-gcc
|
||||
|
||||
$(IPERF_PATH)/.installed: $(IPERF_TAR)
|
||||
@@ -291,7 +291,7 @@ $(IPERF_PATH)/.installed: $(IPERF_TAR)
|
||||
touch $@
|
||||
|
||||
$(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS)
|
||||
- cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared
|
||||
+ cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --with-openssl=no
|
||||
$(MAKE) -C $(IPERF_PATH)
|
||||
$(STRIP) -s $@
|
||||
|
||||
@@ -308,8 +308,8 @@ $(WIREGUARD_TOOLS_PATH)/.installed: $(WI
|
||||
flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
|
||||
touch $@
|
||||
|
||||
-$(WIREGUARD_TOOLS_PATH)/src/tools/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
|
||||
- LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src/tools LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg
|
||||
+$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
|
||||
+ LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg
|
||||
$(STRIP) -s $@
|
||||
|
||||
$(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS)
|
||||
@@ -323,7 +323,8 @@ $(IPUTILS_PATH)/.installed: $(IPUTILS_TA
|
||||
touch $@
|
||||
|
||||
$(IPUTILS_PATH)/ping: | $(IPUTILS_PATH)/.installed $(USERSPACE_DEPS)
|
||||
- $(MAKE) -C $(IPUTILS_PATH) USE_CAP=no USE_IDN=no USE_NETTLE=no USE_CRYPTO=no ping
|
||||
+ sed -i /atexit/d $(IPUTILS_PATH)/ping.c
|
||||
+ cd $(IPUTILS_PATH) && $(CC) $(CFLAGS) -std=c99 -o $@ ping.c ping_common.c ping6_common.c iputils_common.c -D_GNU_SOURCE -D'IPUTILS_VERSION(f)=f' -lresolv $(LDFLAGS)
|
||||
$(STRIP) -s $@
|
||||
|
||||
$(BASH_PATH)/.installed: $(BASH_TAR)
|
||||
@@ -357,7 +358,7 @@ $(IPTABLES_PATH)/.installed: $(IPTABLES_
|
||||
sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure
|
||||
touch $@
|
||||
|
||||
-$(IPTABLES_PATH)/iptables/xtables-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
|
||||
+$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
|
||||
cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include
|
||||
$(MAKE) -C $(IPTABLES_PATH)
|
||||
$(STRIP) -s $@
|
||||
@@ -368,8 +369,9 @@ $(NMAP_PATH)/.installed: $(NMAP_TAR)
|
||||
touch $@
|
||||
|
||||
$(NMAP_PATH)/ncat/ncat: | $(NMAP_PATH)/.installed $(USERSPACE_DEPS)
|
||||
- cd $(NMAP_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --without-ndiff --without-zenmap --without-nping --with-libpcap=included --with-libpcre=included --with-libdnet=included --without-liblua --with-liblinear=included --without-nmap-update --without-openssl --with-pcap=linux
|
||||
- $(MAKE) -C $(NMAP_PATH) build-ncat
|
||||
+ cd $(NMAP_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --without-ndiff --without-zenmap --without-nping --with-libpcap=included --with-libpcre=included --with-libdnet=included --without-liblua --with-liblinear=included --without-nmap-update --without-openssl --with-pcap=linux --without-libssh
|
||||
+ $(MAKE) -C $(NMAP_PATH)/libpcap
|
||||
+ $(MAKE) -C $(NMAP_PATH)/ncat
|
||||
$(STRIP) -s $@
|
||||
|
||||
clean:
|
||||
@@ -379,7 +381,7 @@ distclean: clean
|
||||
rm -rf $(DISTFILES_PATH)
|
||||
|
||||
menuconfig: $(KERNEL_BUILD_PATH)/.config
|
||||
- $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) CC="$(NOPIE_GCC)" menuconfig
|
||||
+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) menuconfig
|
||||
|
||||
.PHONY: qemu build clean distclean menuconfig
|
||||
.DELETE_ON_ERROR:
|
||||
--- a/tools/testing/selftests/wireguard/qemu/arch/m68k.config
|
||||
+++ b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
|
||||
@@ -1,9 +1,9 @@
|
||||
CONFIG_MMU=y
|
||||
+CONFIG_M68KCLASSIC=y
|
||||
CONFIG_M68040=y
|
||||
CONFIG_MAC=y
|
||||
CONFIG_SERIAL_PMACZILOG=y
|
||||
CONFIG_SERIAL_PMACZILOG_TTYS=y
|
||||
CONFIG_SERIAL_PMACZILOG_CONSOLE=y
|
||||
-CONFIG_CMDLINE_BOOL=y
|
||||
CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
|
||||
CONFIG_FRAME_WARN=1024
|
||||
--- a/tools/testing/selftests/wireguard/qemu/init.c
|
||||
+++ b/tools/testing/selftests/wireguard/qemu/init.c
|
||||
@@ -21,6 +21,7 @@
|
||||
#include <sys/reboot.h>
|
||||
#include <sys/utsname.h>
|
||||
#include <sys/sendfile.h>
|
||||
+#include <sys/sysmacros.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/version.h>
|
||||
|
||||
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
|
||||
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
|
||||
@@ -39,6 +39,7 @@ CONFIG_PRINTK=y
|
||||
CONFIG_KALLSYMS=y
|
||||
CONFIG_BUG=y
|
||||
CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
|
||||
+CONFIG_JUMP_LABEL=y
|
||||
CONFIG_EMBEDDED=n
|
||||
CONFIG_BASE_FULL=y
|
||||
CONFIG_FUTEX=y
|
||||
@@ -55,6 +56,7 @@ CONFIG_NO_HZ_IDLE=y
|
||||
CONFIG_NO_HZ_FULL=n
|
||||
CONFIG_HZ_PERIODIC=n
|
||||
CONFIG_HIGH_RES_TIMERS=y
|
||||
+CONFIG_COMPAT_32BIT_TIME=y
|
||||
CONFIG_ARCH_RANDOM=y
|
||||
CONFIG_FILE_LOCKING=y
|
||||
CONFIG_POSIX_TIMERS=y
|
@ -0,0 +1,39 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Thu, 2 Jan 2020 17:47:50 +0100
|
||||
Subject: [PATCH] wireguard: queueing: do not account for pfmemalloc when
|
||||
clearing skb header
|
||||
|
||||
commit 04d2ea92a18417619182cbb79063f154892b0150 upstream.
|
||||
|
||||
Before 8b7008620b84 ("net: Don't copy pfmemalloc flag in __copy_skb_
|
||||
header()"), the pfmemalloc flag used to be between headers_start and
|
||||
headers_end, which is a region we clear when preparing the packet for
|
||||
encryption/decryption. This is a parameter we certainly want to
|
||||
preserve, which is why 8b7008620b84 moved it out of there. The code here
|
||||
was written in a world before 8b7008620b84, though, where we had to
|
||||
manually account for it. This commit brings things up to speed.
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/wireguard/queueing.h | 3 ---
|
||||
1 file changed, 3 deletions(-)
|
||||
|
||||
--- a/drivers/net/wireguard/queueing.h
|
||||
+++ b/drivers/net/wireguard/queueing.h
|
||||
@@ -83,13 +83,10 @@ static inline __be16 wg_skb_examine_untr
|
||||
|
||||
static inline void wg_reset_packet(struct sk_buff *skb)
|
||||
{
|
||||
- const int pfmemalloc = skb->pfmemalloc;
|
||||
-
|
||||
skb_scrub_packet(skb, true);
|
||||
memset(&skb->headers_start, 0,
|
||||
offsetof(struct sk_buff, headers_end) -
|
||||
offsetof(struct sk_buff, headers_start));
|
||||
- skb->pfmemalloc = pfmemalloc;
|
||||
skb->queue_mapping = 0;
|
||||
skb->nohdr = 0;
|
||||
skb->peeked = 0;
|
@ -0,0 +1,34 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Thu, 2 Jan 2020 17:47:51 +0100
|
||||
Subject: [PATCH] wireguard: socket: mark skbs as not on list when receiving
|
||||
via gro
|
||||
|
||||
commit 736775d06bac60d7a353e405398b48b2bd8b1e54 upstream.
|
||||
|
||||
Certain drivers will pass gro skbs to udp, at which point the udp driver
|
||||
simply iterates through them and passes them off to encap_rcv, which is
|
||||
where we pick up. At the moment, we're not attempting to coalesce these
|
||||
into bundles, but we also don't want to wind up having cascaded lists of
|
||||
skbs treated separately. The right behavior here, then, is to just mark
|
||||
each incoming one as not on a list. This can be seen in practice, for
|
||||
example, with Qualcomm's rmnet_perf driver.
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Tested-by: Yaroslav Furman <yaro330@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/wireguard/socket.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/drivers/net/wireguard/socket.c
|
||||
+++ b/drivers/net/wireguard/socket.c
|
||||
@@ -333,6 +333,7 @@ static int wg_receive(struct sock *sk, s
|
||||
wg = sk->sk_user_data;
|
||||
if (unlikely(!wg))
|
||||
goto err;
|
||||
+ skb_mark_not_on_list(skb);
|
||||
wg_packet_receive(wg, skb);
|
||||
return 0;
|
||||
|
@ -0,0 +1,164 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Dumazet <edumazet@google.com>
|
||||
Date: Tue, 4 Feb 2020 22:17:25 +0100
|
||||
Subject: [PATCH] wireguard: allowedips: fix use-after-free in
|
||||
root_remove_peer_lists
|
||||
|
||||
commit 9981159fc3b677b357f84e069a11de5a5ec8a2a8 upstream.
|
||||
|
||||
In the unlikely case a new node could not be allocated, we need to
|
||||
remove @newnode from @peer->allowedips_list before freeing it.
|
||||
|
||||
syzbot reported:
|
||||
|
||||
BUG: KASAN: use-after-free in __list_del_entry_valid+0xdc/0xf5 lib/list_debug.c:54
|
||||
Read of size 8 at addr ffff88809881a538 by task syz-executor.4/30133
|
||||
|
||||
CPU: 0 PID: 30133 Comm: syz-executor.4 Not tainted 5.5.0-syzkaller #0
|
||||
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
|
||||
Call Trace:
|
||||
__dump_stack lib/dump_stack.c:77 [inline]
|
||||
dump_stack+0x197/0x210 lib/dump_stack.c:118
|
||||
print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374
|
||||
__kasan_report.cold+0x1b/0x32 mm/kasan/report.c:506
|
||||
kasan_report+0x12/0x20 mm/kasan/common.c:639
|
||||
__asan_report_load8_noabort+0x14/0x20 mm/kasan/generic_report.c:135
|
||||
__list_del_entry_valid+0xdc/0xf5 lib/list_debug.c:54
|
||||
__list_del_entry include/linux/list.h:132 [inline]
|
||||
list_del include/linux/list.h:146 [inline]
|
||||
root_remove_peer_lists+0x24f/0x4b0 drivers/net/wireguard/allowedips.c:65
|
||||
wg_allowedips_free+0x232/0x390 drivers/net/wireguard/allowedips.c:300
|
||||
wg_peer_remove_all+0xd5/0x620 drivers/net/wireguard/peer.c:187
|
||||
wg_set_device+0xd01/0x1350 drivers/net/wireguard/netlink.c:542
|
||||
genl_family_rcv_msg_doit net/netlink/genetlink.c:672 [inline]
|
||||
genl_family_rcv_msg net/netlink/genetlink.c:717 [inline]
|
||||
genl_rcv_msg+0x67d/0xea0 net/netlink/genetlink.c:734
|
||||
netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477
|
||||
genl_rcv+0x29/0x40 net/netlink/genetlink.c:745
|
||||
netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline]
|
||||
netlink_unicast+0x59e/0x7e0 net/netlink/af_netlink.c:1328
|
||||
netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1917
|
||||
sock_sendmsg_nosec net/socket.c:652 [inline]
|
||||
sock_sendmsg+0xd7/0x130 net/socket.c:672
|
||||
____sys_sendmsg+0x753/0x880 net/socket.c:2343
|
||||
___sys_sendmsg+0x100/0x170 net/socket.c:2397
|
||||
__sys_sendmsg+0x105/0x1d0 net/socket.c:2430
|
||||
__do_sys_sendmsg net/socket.c:2439 [inline]
|
||||
__se_sys_sendmsg net/socket.c:2437 [inline]
|
||||
__x64_sys_sendmsg+0x78/0xb0 net/socket.c:2437
|
||||
do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294
|
||||
entry_SYSCALL_64_after_hwframe+0x49/0xbe
|
||||
RIP: 0033:0x45b399
|
||||
Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00
|
||||
RSP: 002b:00007f99a9bcdc78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
|
||||
RAX: ffffffffffffffda RBX: 00007f99a9bce6d4 RCX: 000000000045b399
|
||||
RDX: 0000000000000000 RSI: 0000000020001340 RDI: 0000000000000003
|
||||
RBP: 000000000075bf20 R08: 0000000000000000 R09: 0000000000000000
|
||||
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000004
|
||||
R13: 00000000000009ba R14: 00000000004cb2b8 R15: 0000000000000009
|
||||
|
||||
Allocated by task 30103:
|
||||
save_stack+0x23/0x90 mm/kasan/common.c:72
|
||||
set_track mm/kasan/common.c:80 [inline]
|
||||
__kasan_kmalloc mm/kasan/common.c:513 [inline]
|
||||
__kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:486
|
||||
kasan_kmalloc+0x9/0x10 mm/kasan/common.c:527
|
||||
kmem_cache_alloc_trace+0x158/0x790 mm/slab.c:3551
|
||||
kmalloc include/linux/slab.h:556 [inline]
|
||||
kzalloc include/linux/slab.h:670 [inline]
|
||||
add+0x70a/0x1970 drivers/net/wireguard/allowedips.c:236
|
||||
wg_allowedips_insert_v4+0xf6/0x160 drivers/net/wireguard/allowedips.c:320
|
||||
set_allowedip drivers/net/wireguard/netlink.c:343 [inline]
|
||||
set_peer+0xfb9/0x1150 drivers/net/wireguard/netlink.c:468
|
||||
wg_set_device+0xbd4/0x1350 drivers/net/wireguard/netlink.c:591
|
||||
genl_family_rcv_msg_doit net/netlink/genetlink.c:672 [inline]
|
||||
genl_family_rcv_msg net/netlink/genetlink.c:717 [inline]
|
||||
genl_rcv_msg+0x67d/0xea0 net/netlink/genetlink.c:734
|
||||
netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477
|
||||
genl_rcv+0x29/0x40 net/netlink/genetlink.c:745
|
||||
netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline]
|
||||
netlink_unicast+0x59e/0x7e0 net/netlink/af_netlink.c:1328
|
||||
netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1917
|
||||
sock_sendmsg_nosec net/socket.c:652 [inline]
|
||||
sock_sendmsg+0xd7/0x130 net/socket.c:672
|
||||
____sys_sendmsg+0x753/0x880 net/socket.c:2343
|
||||
___sys_sendmsg+0x100/0x170 net/socket.c:2397
|
||||
__sys_sendmsg+0x105/0x1d0 net/socket.c:2430
|
||||
__do_sys_sendmsg net/socket.c:2439 [inline]
|
||||
__se_sys_sendmsg net/socket.c:2437 [inline]
|
||||
__x64_sys_sendmsg+0x78/0xb0 net/socket.c:2437
|
||||
do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294
|
||||
entry_SYSCALL_64_after_hwframe+0x49/0xbe
|
||||
|
||||
Freed by task 30103:
|
||||
save_stack+0x23/0x90 mm/kasan/common.c:72
|
||||
set_track mm/kasan/common.c:80 [inline]
|
||||
kasan_set_free_info mm/kasan/common.c:335 [inline]
|
||||
__kasan_slab_free+0x102/0x150 mm/kasan/common.c:474
|
||||
kasan_slab_free+0xe/0x10 mm/kasan/common.c:483
|
||||
__cache_free mm/slab.c:3426 [inline]
|
||||
kfree+0x10a/0x2c0 mm/slab.c:3757
|
||||
add+0x12d2/0x1970 drivers/net/wireguard/allowedips.c:266
|
||||
wg_allowedips_insert_v4+0xf6/0x160 drivers/net/wireguard/allowedips.c:320
|
||||
set_allowedip drivers/net/wireguard/netlink.c:343 [inline]
|
||||
set_peer+0xfb9/0x1150 drivers/net/wireguard/netlink.c:468
|
||||
wg_set_device+0xbd4/0x1350 drivers/net/wireguard/netlink.c:591
|
||||
genl_family_rcv_msg_doit net/netlink/genetlink.c:672 [inline]
|
||||
genl_family_rcv_msg net/netlink/genetlink.c:717 [inline]
|
||||
genl_rcv_msg+0x67d/0xea0 net/netlink/genetlink.c:734
|
||||
netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477
|
||||
genl_rcv+0x29/0x40 net/netlink/genetlink.c:745
|
||||
netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline]
|
||||
netlink_unicast+0x59e/0x7e0 net/netlink/af_netlink.c:1328
|
||||
netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1917
|
||||
sock_sendmsg_nosec net/socket.c:652 [inline]
|
||||
sock_sendmsg+0xd7/0x130 net/socket.c:672
|
||||
____sys_sendmsg+0x753/0x880 net/socket.c:2343
|
||||
___sys_sendmsg+0x100/0x170 net/socket.c:2397
|
||||
__sys_sendmsg+0x105/0x1d0 net/socket.c:2430
|
||||
__do_sys_sendmsg net/socket.c:2439 [inline]
|
||||
__se_sys_sendmsg net/socket.c:2437 [inline]
|
||||
__x64_sys_sendmsg+0x78/0xb0 net/socket.c:2437
|
||||
do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294
|
||||
entry_SYSCALL_64_after_hwframe+0x49/0xbe
|
||||
|
||||
The buggy address belongs to the object at ffff88809881a500
|
||||
which belongs to the cache kmalloc-64 of size 64
|
||||
The buggy address is located 56 bytes inside of
|
||||
64-byte region [ffff88809881a500, ffff88809881a540)
|
||||
The buggy address belongs to the page:
|
||||
page:ffffea0002620680 refcount:1 mapcount:0 mapping:ffff8880aa400380 index:0x0
|
||||
raw: 00fffe0000000200 ffffea000250b748 ffffea000254bac8 ffff8880aa400380
|
||||
raw: 0000000000000000 ffff88809881a000 0000000100000020 0000000000000000
|
||||
page dumped because: kasan: bad access detected
|
||||
|
||||
Memory state around the buggy address:
|
||||
ffff88809881a400: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
|
||||
ffff88809881a480: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc
|
||||
>ffff88809881a500: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
|
||||
^
|
||||
ffff88809881a580: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
|
||||
ffff88809881a600: 00 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc
|
||||
|
||||
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
|
||||
Signed-off-by: Eric Dumazet <edumazet@google.com>
|
||||
Reported-by: syzbot <syzkaller@googlegroups.com>
|
||||
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Cc: wireguard@lists.zx2c4.com
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/wireguard/allowedips.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/drivers/net/wireguard/allowedips.c
|
||||
+++ b/drivers/net/wireguard/allowedips.c
|
||||
@@ -263,6 +263,7 @@ static int add(struct allowedips_node __
|
||||
} else {
|
||||
node = kzalloc(sizeof(*node), GFP_KERNEL);
|
||||
if (unlikely(!node)) {
|
||||
+ list_del(&newnode->peer_list);
|
||||
kfree(newnode);
|
||||
return -ENOMEM;
|
||||
}
|
@ -0,0 +1,233 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Tue, 4 Feb 2020 22:17:26 +0100
|
||||
Subject: [PATCH] wireguard: noise: reject peers with low order public keys
|
||||
|
||||
commit ec31c2676a10e064878927b243fada8c2fb0c03c upstream.
|
||||
|
||||
Our static-static calculation returns a failure if the public key is of
|
||||
low order. We check for this when peers are added, and don't allow them
|
||||
to be added if they're low order, except in the case where we haven't
|
||||
yet been given a private key. In that case, we would defer the removal
|
||||
of the peer until we're given a private key, since at that point we're
|
||||
doing new static-static calculations which incur failures we can act on.
|
||||
This meant, however, that we wound up removing peers rather late in the
|
||||
configuration flow.
|
||||
|
||||
Syzkaller points out that peer_remove calls flush_workqueue, which in
|
||||
turn might then wait for sending a handshake initiation to complete.
|
||||
Since handshake initiation needs the static identity lock, holding the
|
||||
static identity lock while calling peer_remove can result in a rare
|
||||
deadlock. We have precisely this case in this situation of late-stage
|
||||
peer removal based on an invalid public key. We can't drop the lock when
|
||||
removing, because then incoming handshakes might interact with a bogus
|
||||
static-static calculation.
|
||||
|
||||
While the band-aid patch for this would involve breaking up the peer
|
||||
removal into two steps like wg_peer_remove_all does, in order to solve
|
||||
the locking issue, there's actually a much more elegant way of fixing
|
||||
this:
|
||||
|
||||
If the static-static calculation succeeds with one private key, it
|
||||
*must* succeed with all others, because all 32-byte strings map to valid
|
||||
private keys, thanks to clamping. That means we can get rid of this
|
||||
silly dance and locking headaches of removing peers late in the
|
||||
configuration flow, and instead just reject them early on, regardless of
|
||||
whether the device has yet been assigned a private key. For the case
|
||||
where the device doesn't yet have a private key, we safely use zeros
|
||||
just for the purposes of checking for low order points by way of
|
||||
checking the output of the calculation.
|
||||
|
||||
The following PoC will trigger the deadlock:
|
||||
|
||||
ip link add wg0 type wireguard
|
||||
ip addr add 10.0.0.1/24 dev wg0
|
||||
ip link set wg0 up
|
||||
ping -f 10.0.0.2 &
|
||||
while true; do
|
||||
wg set wg0 private-key /dev/null peer AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= allowed-ips 10.0.0.0/24 endpoint 10.0.0.3:1234
|
||||
wg set wg0 private-key <(echo AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=)
|
||||
done
|
||||
|
||||
[ 0.949105] ======================================================
|
||||
[ 0.949550] WARNING: possible circular locking dependency detected
|
||||
[ 0.950143] 5.5.0-debug+ #18 Not tainted
|
||||
[ 0.950431] ------------------------------------------------------
|
||||
[ 0.950959] wg/89 is trying to acquire lock:
|
||||
[ 0.951252] ffff8880333e2128 ((wq_completion)wg-kex-wg0){+.+.}, at: flush_workqueue+0xe3/0x12f0
|
||||
[ 0.951865]
|
||||
[ 0.951865] but task is already holding lock:
|
||||
[ 0.952280] ffff888032819bc0 (&wg->static_identity.lock){++++}, at: wg_set_device+0x95d/0xcc0
|
||||
[ 0.953011]
|
||||
[ 0.953011] which lock already depends on the new lock.
|
||||
[ 0.953011]
|
||||
[ 0.953651]
|
||||
[ 0.953651] the existing dependency chain (in reverse order) is:
|
||||
[ 0.954292]
|
||||
[ 0.954292] -> #2 (&wg->static_identity.lock){++++}:
|
||||
[ 0.954804] lock_acquire+0x127/0x350
|
||||
[ 0.955133] down_read+0x83/0x410
|
||||
[ 0.955428] wg_noise_handshake_create_initiation+0x97/0x700
|
||||
[ 0.955885] wg_packet_send_handshake_initiation+0x13a/0x280
|
||||
[ 0.956401] wg_packet_handshake_send_worker+0x10/0x20
|
||||
[ 0.956841] process_one_work+0x806/0x1500
|
||||
[ 0.957167] worker_thread+0x8c/0xcb0
|
||||
[ 0.957549] kthread+0x2ee/0x3b0
|
||||
[ 0.957792] ret_from_fork+0x24/0x30
|
||||
[ 0.958234]
|
||||
[ 0.958234] -> #1 ((work_completion)(&peer->transmit_handshake_work)){+.+.}:
|
||||
[ 0.958808] lock_acquire+0x127/0x350
|
||||
[ 0.959075] process_one_work+0x7ab/0x1500
|
||||
[ 0.959369] worker_thread+0x8c/0xcb0
|
||||
[ 0.959639] kthread+0x2ee/0x3b0
|
||||
[ 0.959896] ret_from_fork+0x24/0x30
|
||||
[ 0.960346]
|
||||
[ 0.960346] -> #0 ((wq_completion)wg-kex-wg0){+.+.}:
|
||||
[ 0.960945] check_prev_add+0x167/0x1e20
|
||||
[ 0.961351] __lock_acquire+0x2012/0x3170
|
||||
[ 0.961725] lock_acquire+0x127/0x350
|
||||
[ 0.961990] flush_workqueue+0x106/0x12f0
|
||||
[ 0.962280] peer_remove_after_dead+0x160/0x220
|
||||
[ 0.962600] wg_set_device+0xa24/0xcc0
|
||||
[ 0.962994] genl_rcv_msg+0x52f/0xe90
|
||||
[ 0.963298] netlink_rcv_skb+0x111/0x320
|
||||
[ 0.963618] genl_rcv+0x1f/0x30
|
||||
[ 0.963853] netlink_unicast+0x3f6/0x610
|
||||
[ 0.964245] netlink_sendmsg+0x700/0xb80
|
||||
[ 0.964586] __sys_sendto+0x1dd/0x2c0
|
||||
[ 0.964854] __x64_sys_sendto+0xd8/0x1b0
|
||||
[ 0.965141] do_syscall_64+0x90/0xd9a
|
||||
[ 0.965408] entry_SYSCALL_64_after_hwframe+0x49/0xbe
|
||||
[ 0.965769]
|
||||
[ 0.965769] other info that might help us debug this:
|
||||
[ 0.965769]
|
||||
[ 0.966337] Chain exists of:
|
||||
[ 0.966337] (wq_completion)wg-kex-wg0 --> (work_completion)(&peer->transmit_handshake_work) --> &wg->static_identity.lock
|
||||
[ 0.966337]
|
||||
[ 0.967417] Possible unsafe locking scenario:
|
||||
[ 0.967417]
|
||||
[ 0.967836] CPU0 CPU1
|
||||
[ 0.968155] ---- ----
|
||||
[ 0.968497] lock(&wg->static_identity.lock);
|
||||
[ 0.968779] lock((work_completion)(&peer->transmit_handshake_work));
|
||||
[ 0.969345] lock(&wg->static_identity.lock);
|
||||
[ 0.969809] lock((wq_completion)wg-kex-wg0);
|
||||
[ 0.970146]
|
||||
[ 0.970146] *** DEADLOCK ***
|
||||
[ 0.970146]
|
||||
[ 0.970531] 5 locks held by wg/89:
|
||||
[ 0.970908] #0: ffffffff827433c8 (cb_lock){++++}, at: genl_rcv+0x10/0x30
|
||||
[ 0.971400] #1: ffffffff82743480 (genl_mutex){+.+.}, at: genl_rcv_msg+0x642/0xe90
|
||||
[ 0.971924] #2: ffffffff827160c0 (rtnl_mutex){+.+.}, at: wg_set_device+0x9f/0xcc0
|
||||
[ 0.972488] #3: ffff888032819de0 (&wg->device_update_lock){+.+.}, at: wg_set_device+0xb0/0xcc0
|
||||
[ 0.973095] #4: ffff888032819bc0 (&wg->static_identity.lock){++++}, at: wg_set_device+0x95d/0xcc0
|
||||
[ 0.973653]
|
||||
[ 0.973653] stack backtrace:
|
||||
[ 0.973932] CPU: 1 PID: 89 Comm: wg Not tainted 5.5.0-debug+ #18
|
||||
[ 0.974476] Call Trace:
|
||||
[ 0.974638] dump_stack+0x97/0xe0
|
||||
[ 0.974869] check_noncircular+0x312/0x3e0
|
||||
[ 0.975132] ? print_circular_bug+0x1f0/0x1f0
|
||||
[ 0.975410] ? __kernel_text_address+0x9/0x30
|
||||
[ 0.975727] ? unwind_get_return_address+0x51/0x90
|
||||
[ 0.976024] check_prev_add+0x167/0x1e20
|
||||
[ 0.976367] ? graph_lock+0x70/0x160
|
||||
[ 0.976682] __lock_acquire+0x2012/0x3170
|
||||
[ 0.976998] ? register_lock_class+0x1140/0x1140
|
||||
[ 0.977323] lock_acquire+0x127/0x350
|
||||
[ 0.977627] ? flush_workqueue+0xe3/0x12f0
|
||||
[ 0.977890] flush_workqueue+0x106/0x12f0
|
||||
[ 0.978147] ? flush_workqueue+0xe3/0x12f0
|
||||
[ 0.978410] ? find_held_lock+0x2c/0x110
|
||||
[ 0.978662] ? lock_downgrade+0x6e0/0x6e0
|
||||
[ 0.978919] ? queue_rcu_work+0x60/0x60
|
||||
[ 0.979166] ? netif_napi_del+0x151/0x3b0
|
||||
[ 0.979501] ? peer_remove_after_dead+0x160/0x220
|
||||
[ 0.979871] peer_remove_after_dead+0x160/0x220
|
||||
[ 0.980232] wg_set_device+0xa24/0xcc0
|
||||
[ 0.980516] ? deref_stack_reg+0x8e/0xc0
|
||||
[ 0.980801] ? set_peer+0xe10/0xe10
|
||||
[ 0.981040] ? __ww_mutex_check_waiters+0x150/0x150
|
||||
[ 0.981430] ? __nla_validate_parse+0x163/0x270
|
||||
[ 0.981719] ? genl_family_rcv_msg_attrs_parse+0x13f/0x310
|
||||
[ 0.982078] genl_rcv_msg+0x52f/0xe90
|
||||
[ 0.982348] ? genl_family_rcv_msg_attrs_parse+0x310/0x310
|
||||
[ 0.982690] ? register_lock_class+0x1140/0x1140
|
||||
[ 0.983049] netlink_rcv_skb+0x111/0x320
|
||||
[ 0.983298] ? genl_family_rcv_msg_attrs_parse+0x310/0x310
|
||||
[ 0.983645] ? netlink_ack+0x880/0x880
|
||||
[ 0.983888] genl_rcv+0x1f/0x30
|
||||
[ 0.984168] netlink_unicast+0x3f6/0x610
|
||||
[ 0.984443] ? netlink_detachskb+0x60/0x60
|
||||
[ 0.984729] ? find_held_lock+0x2c/0x110
|
||||
[ 0.984976] netlink_sendmsg+0x700/0xb80
|
||||
[ 0.985220] ? netlink_broadcast_filtered+0xa60/0xa60
|
||||
[ 0.985533] __sys_sendto+0x1dd/0x2c0
|
||||
[ 0.985763] ? __x64_sys_getpeername+0xb0/0xb0
|
||||
[ 0.986039] ? sockfd_lookup_light+0x17/0x160
|
||||
[ 0.986397] ? __sys_recvmsg+0x8c/0xf0
|
||||
[ 0.986711] ? __sys_recvmsg_sock+0xd0/0xd0
|
||||
[ 0.987018] __x64_sys_sendto+0xd8/0x1b0
|
||||
[ 0.987283] ? lockdep_hardirqs_on+0x39b/0x5a0
|
||||
[ 0.987666] do_syscall_64+0x90/0xd9a
|
||||
[ 0.987903] entry_SYSCALL_64_after_hwframe+0x49/0xbe
|
||||
[ 0.988223] RIP: 0033:0x7fe77c12003e
|
||||
[ 0.988508] Code: c3 8b 07 85 c0 75 24 49 89 fb 48 89 f0 48 89 d7 48 89 ce 4c 89 c2 4d 89 ca 4c 8b 44 24 08 4c 8b 4c 24 10 4c 4
|
||||
[ 0.989666] RSP: 002b:00007fffada2ed58 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
|
||||
[ 0.990137] RAX: ffffffffffffffda RBX: 00007fe77c159d48 RCX: 00007fe77c12003e
|
||||
[ 0.990583] RDX: 0000000000000040 RSI: 000055fd1d38e020 RDI: 0000000000000004
|
||||
[ 0.991091] RBP: 000055fd1d38e020 R08: 000055fd1cb63358 R09: 000000000000000c
|
||||
[ 0.991568] R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000002c
|
||||
[ 0.992014] R13: 0000000000000004 R14: 000055fd1d38e020 R15: 0000000000000001
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Reported-by: syzbot <syzkaller@googlegroups.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/wireguard/netlink.c | 6 ++----
|
||||
drivers/net/wireguard/noise.c | 10 +++++++---
|
||||
2 files changed, 9 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/drivers/net/wireguard/netlink.c
|
||||
+++ b/drivers/net/wireguard/netlink.c
|
||||
@@ -575,10 +575,8 @@ static int wg_set_device(struct sk_buff
|
||||
private_key);
|
||||
list_for_each_entry_safe(peer, temp, &wg->peer_list,
|
||||
peer_list) {
|
||||
- if (wg_noise_precompute_static_static(peer))
|
||||
- wg_noise_expire_current_peer_keypairs(peer);
|
||||
- else
|
||||
- wg_peer_remove(peer);
|
||||
+ BUG_ON(!wg_noise_precompute_static_static(peer));
|
||||
+ wg_noise_expire_current_peer_keypairs(peer);
|
||||
}
|
||||
wg_cookie_checker_precompute_device_keys(&wg->cookie_checker);
|
||||
up_write(&wg->static_identity.lock);
|
||||
--- a/drivers/net/wireguard/noise.c
|
||||
+++ b/drivers/net/wireguard/noise.c
|
||||
@@ -46,17 +46,21 @@ void __init wg_noise_init(void)
|
||||
/* Must hold peer->handshake.static_identity->lock */
|
||||
bool wg_noise_precompute_static_static(struct wg_peer *peer)
|
||||
{
|
||||
- bool ret = true;
|
||||
+ bool ret;
|
||||
|
||||
down_write(&peer->handshake.lock);
|
||||
- if (peer->handshake.static_identity->has_identity)
|
||||
+ if (peer->handshake.static_identity->has_identity) {
|
||||
ret = curve25519(
|
||||
peer->handshake.precomputed_static_static,
|
||||
peer->handshake.static_identity->static_private,
|
||||
peer->handshake.remote_static);
|
||||
- else
|
||||
+ } else {
|
||||
+ u8 empty[NOISE_PUBLIC_KEY_LEN] = { 0 };
|
||||
+
|
||||
+ ret = curve25519(empty, empty, peer->handshake.remote_static);
|
||||
memset(peer->handshake.precomputed_static_static, 0,
|
||||
NOISE_PUBLIC_KEY_LEN);
|
||||
+ }
|
||||
up_write(&peer->handshake.lock);
|
||||
return ret;
|
||||
}
|
@ -0,0 +1,34 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Tue, 4 Feb 2020 22:17:27 +0100
|
||||
Subject: [PATCH] wireguard: selftests: ensure non-addition of peers with
|
||||
failed precomputation
|
||||
|
||||
commit f9398acba6a4ae9cb98bfe4d56414d376eff8d57 upstream.
|
||||
|
||||
Ensure that peers with low order points are ignored, both in the case
|
||||
where we already have a device private key and in the case where we do
|
||||
not. This adds points that naturally give a zero output.
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
tools/testing/selftests/wireguard/netns.sh | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
--- a/tools/testing/selftests/wireguard/netns.sh
|
||||
+++ b/tools/testing/selftests/wireguard/netns.sh
|
||||
@@ -516,6 +516,12 @@ n0 wg set wg0 peer "$pub2" allowed-ips 0
|
||||
n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0
|
||||
n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75
|
||||
n0 wg set wg0 peer "$pub2" allowed-ips ::/0
|
||||
+n0 wg set wg0 peer "$pub2" remove
|
||||
+low_order_points=( AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38= )
|
||||
+n0 wg set wg0 private-key /dev/null ${low_order_points[@]/#/peer }
|
||||
+[[ -z $(n0 wg show wg0 peers) ]]
|
||||
+n0 wg set wg0 private-key <(echo "$key1") ${low_order_points[@]/#/peer }
|
||||
+[[ -z $(n0 wg show wg0 peers) ]]
|
||||
ip0 link del wg0
|
||||
|
||||
declare -A objects
|
@ -0,0 +1,77 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Tue, 4 Feb 2020 22:17:29 +0100
|
||||
Subject: [PATCH] wireguard: selftests: tie socket waiting to target pid
|
||||
|
||||
commit 88f404a9b1d75388225b1c67b6dd327cb2182777 upstream.
|
||||
|
||||
Without this, we wind up proceeding too early sometimes when the
|
||||
previous process has just used the same listening port. So, we tie the
|
||||
listening socket query to the specific pid we're interested in.
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
tools/testing/selftests/wireguard/netns.sh | 17 ++++++++---------
|
||||
1 file changed, 8 insertions(+), 9 deletions(-)
|
||||
|
||||
--- a/tools/testing/selftests/wireguard/netns.sh
|
||||
+++ b/tools/testing/selftests/wireguard/netns.sh
|
||||
@@ -38,9 +38,8 @@ ip0() { pretty 0 "ip $*"; ip -n $netns0
|
||||
ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; }
|
||||
ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; }
|
||||
sleep() { read -t "$1" -N 1 || true; }
|
||||
-waitiperf() { pretty "${1//*-}" "wait for iperf:5201"; while [[ $(ss -N "$1" -tlp 'sport = 5201') != *iperf3* ]]; do sleep 0.1; done; }
|
||||
-waitncatudp() { pretty "${1//*-}" "wait for udp:1111"; while [[ $(ss -N "$1" -ulp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; }
|
||||
-waitncattcp() { pretty "${1//*-}" "wait for tcp:1111"; while [[ $(ss -N "$1" -tlp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; }
|
||||
+waitiperf() { pretty "${1//*-}" "wait for iperf:5201 pid $2"; while [[ $(ss -N "$1" -tlpH 'sport = 5201') != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; }
|
||||
+waitncatudp() { pretty "${1//*-}" "wait for udp:1111 pid $2"; while [[ $(ss -N "$1" -ulpH 'sport = 1111') != *\"ncat\",pid=$2,fd=* ]]; do sleep 0.1; done; }
|
||||
waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; }
|
||||
|
||||
cleanup() {
|
||||
@@ -119,22 +118,22 @@ tests() {
|
||||
|
||||
# TCP over IPv4
|
||||
n2 iperf3 -s -1 -B 192.168.241.2 &
|
||||
- waitiperf $netns2
|
||||
+ waitiperf $netns2 $!
|
||||
n1 iperf3 -Z -t 3 -c 192.168.241.2
|
||||
|
||||
# TCP over IPv6
|
||||
n1 iperf3 -s -1 -B fd00::1 &
|
||||
- waitiperf $netns1
|
||||
+ waitiperf $netns1 $!
|
||||
n2 iperf3 -Z -t 3 -c fd00::1
|
||||
|
||||
# UDP over IPv4
|
||||
n1 iperf3 -s -1 -B 192.168.241.1 &
|
||||
- waitiperf $netns1
|
||||
+ waitiperf $netns1 $!
|
||||
n2 iperf3 -Z -t 3 -b 0 -u -c 192.168.241.1
|
||||
|
||||
# UDP over IPv6
|
||||
n2 iperf3 -s -1 -B fd00::2 &
|
||||
- waitiperf $netns2
|
||||
+ waitiperf $netns2 $!
|
||||
n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2
|
||||
}
|
||||
|
||||
@@ -207,7 +206,7 @@ n1 ping -W 1 -c 1 192.168.241.2
|
||||
n1 wg set wg0 peer "$pub2" allowed-ips 192.168.241.0/24
|
||||
exec 4< <(n1 ncat -l -u -p 1111)
|
||||
ncat_pid=$!
|
||||
-waitncatudp $netns1
|
||||
+waitncatudp $netns1 $ncat_pid
|
||||
n2 ncat -u 192.168.241.1 1111 <<<"X"
|
||||
read -r -N 1 -t 1 out <&4 && [[ $out == "X" ]]
|
||||
kill $ncat_pid
|
||||
@@ -216,7 +215,7 @@ n1 wg set wg0 peer "$more_specific_key"
|
||||
n2 wg set wg0 listen-port 9997
|
||||
exec 4< <(n1 ncat -l -u -p 1111)
|
||||
ncat_pid=$!
|
||||
-waitncatudp $netns1
|
||||
+waitncatudp $netns1 $ncat_pid
|
||||
n2 ncat -u 192.168.241.1 1111 <<<"X"
|
||||
! read -r -N 1 -t 1 out <&4 || false
|
||||
kill $ncat_pid
|
@ -0,0 +1,64 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Tue, 11 Feb 2020 20:47:08 +0100
|
||||
Subject: [PATCH] wireguard: device: use icmp_ndo_send helper
|
||||
|
||||
commit a12d7f3cbdc72c7625881c8dc2660fc2c979fdf2 upstream.
|
||||
|
||||
Because wireguard is calling icmp from network device context, it should
|
||||
use the ndo helper so that the rate limiting applies correctly. This
|
||||
commit adds a small test to the wireguard test suite to ensure that the
|
||||
new functions continue doing the right thing in the context of
|
||||
wireguard. It does this by setting up a condition that will definately
|
||||
evoke an icmp error message from the driver, but along a nat'd path.
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/wireguard/device.c | 4 ++--
|
||||
tools/testing/selftests/wireguard/netns.sh | 11 +++++++++++
|
||||
2 files changed, 13 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/net/wireguard/device.c
|
||||
+++ b/drivers/net/wireguard/device.c
|
||||
@@ -203,9 +203,9 @@ err_peer:
|
||||
err:
|
||||
++dev->stats.tx_errors;
|
||||
if (skb->protocol == htons(ETH_P_IP))
|
||||
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
|
||||
+ icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
|
||||
else if (skb->protocol == htons(ETH_P_IPV6))
|
||||
- icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
|
||||
+ icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
|
||||
kfree_skb(skb);
|
||||
return ret;
|
||||
}
|
||||
--- a/tools/testing/selftests/wireguard/netns.sh
|
||||
+++ b/tools/testing/selftests/wireguard/netns.sh
|
||||
@@ -24,6 +24,7 @@
|
||||
set -e
|
||||
|
||||
exec 3>&1
|
||||
+export LANG=C
|
||||
export WG_HIDE_KEYS=never
|
||||
netns0="wg-test-$$-0"
|
||||
netns1="wg-test-$$-1"
|
||||
@@ -297,7 +298,17 @@ ip1 -4 rule add table main suppress_pref
|
||||
n1 ping -W 1 -c 100 -f 192.168.99.7
|
||||
n1 ping -W 1 -c 100 -f abab::1111
|
||||
|
||||
+# Have ns2 NAT into wg0 packets from ns0, but return an icmp error along the right route.
|
||||
+n2 iptables -t nat -A POSTROUTING -s 10.0.0.0/24 -d 192.168.241.0/24 -j SNAT --to 192.168.241.2
|
||||
+n0 iptables -t filter -A INPUT \! -s 10.0.0.0/24 -i vethrs -j DROP # Manual rpfilter just to be explicit.
|
||||
+n2 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward'
|
||||
+ip0 -4 route add 192.168.241.1 via 10.0.0.100
|
||||
+n2 wg set wg0 peer "$pub1" remove
|
||||
+[[ $(! n0 ping -W 1 -c 1 192.168.241.1 || false) == *"From 10.0.0.100 icmp_seq=1 Destination Host Unreachable"* ]]
|
||||
+
|
||||
n0 iptables -t nat -F
|
||||
+n0 iptables -t filter -F
|
||||
+n2 iptables -t nat -F
|
||||
ip0 link del vethrc
|
||||
ip0 link del vethrs
|
||||
ip1 link del wg0
|
@ -0,0 +1,104 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Fri, 14 Feb 2020 23:57:20 +0100
|
||||
Subject: [PATCH] wireguard: selftests: reduce complexity and fix make races
|
||||
|
||||
commit 04ddf1208f03e1dbc39a4619c40eba640051b950 upstream.
|
||||
|
||||
This gives us fewer dependencies and shortens build time, fixes up some
|
||||
hash checking race conditions, and also fixes missing directory creation
|
||||
that caused issues on massively parallel builds.
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
.../testing/selftests/wireguard/qemu/Makefile | 38 +++++++------------
|
||||
1 file changed, 14 insertions(+), 24 deletions(-)
|
||||
|
||||
--- a/tools/testing/selftests/wireguard/qemu/Makefile
|
||||
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
|
||||
@@ -38,19 +38,17 @@ endef
|
||||
define file_download =
|
||||
$(DISTFILES_PATH)/$(1):
|
||||
mkdir -p $(DISTFILES_PATH)
|
||||
- flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp'
|
||||
- if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi
|
||||
+ flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi'
|
||||
endef
|
||||
|
||||
$(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3))
|
||||
-$(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81))
|
||||
$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
|
||||
$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
|
||||
$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae))
|
||||
$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c))
|
||||
$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa))
|
||||
$(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
|
||||
-$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20191226,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,aa8af0fdc9872d369d8c890a84dbc2a2466b55795dccd5b47721b2d97644b04f))
|
||||
+$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20200206,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,f5207248c6a3c3e3bfc9ab30b91c1897b00802ed861e1f9faaed873366078c64))
|
||||
|
||||
KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
|
||||
rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
|
||||
@@ -295,21 +293,13 @@ $(IPERF_PATH)/src/iperf3: | $(IPERF_PATH
|
||||
$(MAKE) -C $(IPERF_PATH)
|
||||
$(STRIP) -s $@
|
||||
|
||||
-$(LIBMNL_PATH)/.installed: $(LIBMNL_TAR)
|
||||
- flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
|
||||
- touch $@
|
||||
-
|
||||
-$(LIBMNL_PATH)/src/.libs/libmnl.a: | $(LIBMNL_PATH)/.installed $(USERSPACE_DEPS)
|
||||
- cd $(LIBMNL_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared
|
||||
- $(MAKE) -C $(LIBMNL_PATH)
|
||||
- sed -i 's:prefix=.*:prefix=$(LIBMNL_PATH):' $(LIBMNL_PATH)/libmnl.pc
|
||||
-
|
||||
$(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR)
|
||||
+ mkdir -p $(BUILD_PATH)
|
||||
flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
|
||||
touch $@
|
||||
|
||||
-$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
|
||||
- LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg
|
||||
+$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(USERSPACE_DEPS)
|
||||
+ $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src wg
|
||||
$(STRIP) -s $@
|
||||
|
||||
$(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS)
|
||||
@@ -340,17 +330,17 @@ $(BASH_PATH)/bash: | $(BASH_PATH)/.insta
|
||||
$(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR)
|
||||
mkdir -p $(BUILD_PATH)
|
||||
flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
|
||||
- printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=y\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_LIBMNL -I$(LIBMNL_PATH)/include\nLDLIBS+=-lmnl' > $(IPROUTE2_PATH)/config.mk
|
||||
+ printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS\n' > $(IPROUTE2_PATH)/config.mk
|
||||
printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile
|
||||
touch $@
|
||||
|
||||
-$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
|
||||
- LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip
|
||||
- $(STRIP) -s $(IPROUTE2_PATH)/ip/ip
|
||||
-
|
||||
-$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
|
||||
- LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss
|
||||
- $(STRIP) -s $(IPROUTE2_PATH)/misc/ss
|
||||
+$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS)
|
||||
+ $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip
|
||||
+ $(STRIP) -s $@
|
||||
+
|
||||
+$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS)
|
||||
+ $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss
|
||||
+ $(STRIP) -s $@
|
||||
|
||||
$(IPTABLES_PATH)/.installed: $(IPTABLES_TAR)
|
||||
mkdir -p $(BUILD_PATH)
|
||||
@@ -358,8 +348,8 @@ $(IPTABLES_PATH)/.installed: $(IPTABLES_
|
||||
sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure
|
||||
touch $@
|
||||
|
||||
-$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
|
||||
- cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include
|
||||
+$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(USERSPACE_DEPS)
|
||||
+ cd $(IPTABLES_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --disable-connlabel --with-kernel=$(BUILD_PATH)/include
|
||||
$(MAKE) -C $(IPTABLES_PATH)
|
||||
$(STRIP) -s $@
|
||||
|
@ -0,0 +1,38 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Fri, 14 Feb 2020 23:57:21 +0100
|
||||
Subject: [PATCH] wireguard: receive: reset last_under_load to zero
|
||||
|
||||
commit 2a8a4df36462aa85b0db87b7c5ea145ba67e34a8 upstream.
|
||||
|
||||
This is a small optimization that prevents more expensive comparisons
|
||||
from happening when they are no longer necessary, by clearing the
|
||||
last_under_load variable whenever we wind up in a state where we were
|
||||
under load but we no longer are.
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Suggested-by: Matt Dunwoodie <ncon@noconroy.net>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/wireguard/receive.c | 7 +++++--
|
||||
1 file changed, 5 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/net/wireguard/receive.c
|
||||
+++ b/drivers/net/wireguard/receive.c
|
||||
@@ -118,10 +118,13 @@ static void wg_receive_handshake_packet(
|
||||
|
||||
under_load = skb_queue_len(&wg->incoming_handshakes) >=
|
||||
MAX_QUEUED_INCOMING_HANDSHAKES / 8;
|
||||
- if (under_load)
|
||||
+ if (under_load) {
|
||||
last_under_load = ktime_get_coarse_boottime_ns();
|
||||
- else if (last_under_load)
|
||||
+ } else if (last_under_load) {
|
||||
under_load = !wg_birthdate_has_expired(last_under_load, 1);
|
||||
+ if (!under_load)
|
||||
+ last_under_load = 0;
|
||||
+ }
|
||||
mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb,
|
||||
under_load);
|
||||
if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) ||
|
@ -0,0 +1,95 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Fri, 14 Feb 2020 23:57:22 +0100
|
||||
Subject: [PATCH] wireguard: send: account for mtu=0 devices
|
||||
|
||||
commit 175f1ca9a9ed8689d2028da1a7c624bb4fb4ff7e upstream.
|
||||
|
||||
It turns out there's an easy way to get packets queued up while still
|
||||
having an MTU of zero, and that's via persistent keep alive. This commit
|
||||
makes sure that in whatever condition, we don't wind up dividing by
|
||||
zero. Note that an MTU of zero for a wireguard interface is something
|
||||
quasi-valid, so I don't think the correct fix is to limit it via
|
||||
min_mtu. This can be reproduced easily with:
|
||||
|
||||
ip link add wg0 type wireguard
|
||||
ip link add wg1 type wireguard
|
||||
ip link set wg0 up mtu 0
|
||||
ip link set wg1 up
|
||||
wg set wg0 private-key <(wg genkey)
|
||||
wg set wg1 listen-port 1 private-key <(wg genkey) peer $(wg show wg0 public-key)
|
||||
wg set wg0 peer $(wg show wg1 public-key) persistent-keepalive 1 endpoint 127.0.0.1:1
|
||||
|
||||
However, while min_mtu=0 seems fine, it makes sense to restrict the
|
||||
max_mtu. This commit also restricts the maximum MTU to the greatest
|
||||
number for which rounding up to the padding multiple won't overflow a
|
||||
signed integer. Packets this large were always rejected anyway
|
||||
eventually, due to checks deeper in, but it seems more sound not to even
|
||||
let the administrator configure something that won't work anyway.
|
||||
|
||||
We use this opportunity to clean up this function a bit so that it's
|
||||
clear which paths we're expecting.
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Cc: Eric Dumazet <eric.dumazet@gmail.com>
|
||||
Reviewed-by: Eric Dumazet <edumazet@google.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/wireguard/device.c | 7 ++++---
|
||||
drivers/net/wireguard/send.c | 16 +++++++++++-----
|
||||
2 files changed, 15 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/drivers/net/wireguard/device.c
|
||||
+++ b/drivers/net/wireguard/device.c
|
||||
@@ -258,6 +258,8 @@ static void wg_setup(struct net_device *
|
||||
enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
|
||||
NETIF_F_SG | NETIF_F_GSO |
|
||||
NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA };
|
||||
+ const int overhead = MESSAGE_MINIMUM_LENGTH + sizeof(struct udphdr) +
|
||||
+ max(sizeof(struct ipv6hdr), sizeof(struct iphdr));
|
||||
|
||||
dev->netdev_ops = &netdev_ops;
|
||||
dev->hard_header_len = 0;
|
||||
@@ -271,9 +273,8 @@ static void wg_setup(struct net_device *
|
||||
dev->features |= WG_NETDEV_FEATURES;
|
||||
dev->hw_features |= WG_NETDEV_FEATURES;
|
||||
dev->hw_enc_features |= WG_NETDEV_FEATURES;
|
||||
- dev->mtu = ETH_DATA_LEN - MESSAGE_MINIMUM_LENGTH -
|
||||
- sizeof(struct udphdr) -
|
||||
- max(sizeof(struct ipv6hdr), sizeof(struct iphdr));
|
||||
+ dev->mtu = ETH_DATA_LEN - overhead;
|
||||
+ dev->max_mtu = round_down(INT_MAX, MESSAGE_PADDING_MULTIPLE) - overhead;
|
||||
|
||||
SET_NETDEV_DEVTYPE(dev, &device_type);
|
||||
|
||||
--- a/drivers/net/wireguard/send.c
|
||||
+++ b/drivers/net/wireguard/send.c
|
||||
@@ -143,16 +143,22 @@ static void keep_key_fresh(struct wg_pee
|
||||
|
||||
static unsigned int calculate_skb_padding(struct sk_buff *skb)
|
||||
{
|
||||
+ unsigned int padded_size, last_unit = skb->len;
|
||||
+
|
||||
+ if (unlikely(!PACKET_CB(skb)->mtu))
|
||||
+ return ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE) - last_unit;
|
||||
+
|
||||
/* We do this modulo business with the MTU, just in case the networking
|
||||
* layer gives us a packet that's bigger than the MTU. In that case, we
|
||||
* wouldn't want the final subtraction to overflow in the case of the
|
||||
- * padded_size being clamped.
|
||||
+ * padded_size being clamped. Fortunately, that's very rarely the case,
|
||||
+ * so we optimize for that not happening.
|
||||
*/
|
||||
- unsigned int last_unit = skb->len % PACKET_CB(skb)->mtu;
|
||||
- unsigned int padded_size = ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE);
|
||||
+ if (unlikely(last_unit > PACKET_CB(skb)->mtu))
|
||||
+ last_unit %= PACKET_CB(skb)->mtu;
|
||||
|
||||
- if (padded_size > PACKET_CB(skb)->mtu)
|
||||
- padded_size = PACKET_CB(skb)->mtu;
|
||||
+ padded_size = min(PACKET_CB(skb)->mtu,
|
||||
+ ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE));
|
||||
return padded_size - last_unit;
|
||||
}
|
||||
|
@ -0,0 +1,32 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Fri, 14 Feb 2020 23:57:23 +0100
|
||||
Subject: [PATCH] wireguard: socket: remove extra call to synchronize_net
|
||||
|
||||
commit 1fbc33b0a7feb6ca72bf7dc8a05d81485ee8ee2e upstream.
|
||||
|
||||
synchronize_net() is a wrapper around synchronize_rcu(), so there's no
|
||||
point in having synchronize_net and synchronize_rcu back to back,
|
||||
despite the documentation comment suggesting maybe it's somewhat useful,
|
||||
"Wait for packets currently being received to be done." This commit
|
||||
removes the extra call.
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
|
||||
Reviewed-by: Eric Dumazet <edumazet@google.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/wireguard/socket.c | 1 -
|
||||
1 file changed, 1 deletion(-)
|
||||
|
||||
--- a/drivers/net/wireguard/socket.c
|
||||
+++ b/drivers/net/wireguard/socket.c
|
||||
@@ -432,7 +432,6 @@ void wg_socket_reinit(struct wg_device *
|
||||
wg->incoming_port = ntohs(inet_sk(new4)->inet_sport);
|
||||
mutex_unlock(&wg->socket_update_lock);
|
||||
synchronize_rcu();
|
||||
- synchronize_net();
|
||||
sock_free(old4);
|
||||
sock_free(old6);
|
||||
}
|
@ -0,0 +1,27 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: YueHaibing <yuehaibing@huawei.com>
|
||||
Date: Wed, 18 Mar 2020 18:30:43 -0600
|
||||
Subject: [PATCH] wireguard: selftests: remove duplicated include <sys/types.h>
|
||||
|
||||
commit 166391159c5deb84795d2ff46e95f276177fa5fb upstream.
|
||||
|
||||
This commit removes a duplicated include.
|
||||
|
||||
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
tools/testing/selftests/wireguard/qemu/init.c | 1 -
|
||||
1 file changed, 1 deletion(-)
|
||||
|
||||
--- a/tools/testing/selftests/wireguard/qemu/init.c
|
||||
+++ b/tools/testing/selftests/wireguard/qemu/init.c
|
||||
@@ -13,7 +13,6 @@
|
||||
#include <fcntl.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/mount.h>
|
||||
-#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/io.h>
|
@ -0,0 +1,100 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Wed, 18 Mar 2020 18:30:45 -0600
|
||||
Subject: [PATCH] wireguard: queueing: account for skb->protocol==0
|
||||
|
||||
commit a5588604af448664e796daf3c1d5a4523c60667b upstream.
|
||||
|
||||
We carry out checks to the effect of:
|
||||
|
||||
if (skb->protocol != wg_examine_packet_protocol(skb))
|
||||
goto err;
|
||||
|
||||
By having wg_skb_examine_untrusted_ip_hdr return 0 on failure, this
|
||||
means that the check above still passes in the case where skb->protocol
|
||||
is zero, which is possible to hit with AF_PACKET:
|
||||
|
||||
struct sockaddr_pkt saddr = { .spkt_device = "wg0" };
|
||||
unsigned char buffer[5] = { 0 };
|
||||
sendto(socket(AF_PACKET, SOCK_PACKET, /* skb->protocol = */ 0),
|
||||
buffer, sizeof(buffer), 0, (const struct sockaddr *)&saddr, sizeof(saddr));
|
||||
|
||||
Additional checks mean that this isn't actually a problem in the code
|
||||
base, but I could imagine it becoming a problem later if the function is
|
||||
used more liberally.
|
||||
|
||||
I would prefer to fix this by having wg_examine_packet_protocol return a
|
||||
32-bit ~0 value on failure, which will never match any value of
|
||||
skb->protocol, which would simply change the generated code from a mov
|
||||
to a movzx. However, sparse complains, and adding __force casts doesn't
|
||||
seem like a good idea, so instead we just add a simple helper function
|
||||
to check for the zero return value. Since wg_examine_packet_protocol
|
||||
itself gets inlined, this winds up not adding an additional branch to
|
||||
the generated code, since the 0 return value already happens in a
|
||||
mergable branch.
|
||||
|
||||
Reported-by: Fabian Freyer <fabianfreyer@radicallyopensecurity.com>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/wireguard/device.c | 2 +-
|
||||
drivers/net/wireguard/queueing.h | 8 +++++++-
|
||||
drivers/net/wireguard/receive.c | 4 ++--
|
||||
3 files changed, 10 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/drivers/net/wireguard/device.c
|
||||
+++ b/drivers/net/wireguard/device.c
|
||||
@@ -122,7 +122,7 @@ static netdev_tx_t wg_xmit(struct sk_buf
|
||||
u32 mtu;
|
||||
int ret;
|
||||
|
||||
- if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol)) {
|
||||
+ if (unlikely(!wg_check_packet_protocol(skb))) {
|
||||
ret = -EPROTONOSUPPORT;
|
||||
net_dbg_ratelimited("%s: Invalid IP packet\n", dev->name);
|
||||
goto err;
|
||||
--- a/drivers/net/wireguard/queueing.h
|
||||
+++ b/drivers/net/wireguard/queueing.h
|
||||
@@ -66,7 +66,7 @@ struct packet_cb {
|
||||
#define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer)
|
||||
|
||||
/* Returns either the correct skb->protocol value, or 0 if invalid. */
|
||||
-static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb)
|
||||
+static inline __be16 wg_examine_packet_protocol(struct sk_buff *skb)
|
||||
{
|
||||
if (skb_network_header(skb) >= skb->head &&
|
||||
(skb_network_header(skb) + sizeof(struct iphdr)) <=
|
||||
@@ -81,6 +81,12 @@ static inline __be16 wg_skb_examine_untr
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static inline bool wg_check_packet_protocol(struct sk_buff *skb)
|
||||
+{
|
||||
+ __be16 real_protocol = wg_examine_packet_protocol(skb);
|
||||
+ return real_protocol && skb->protocol == real_protocol;
|
||||
+}
|
||||
+
|
||||
static inline void wg_reset_packet(struct sk_buff *skb)
|
||||
{
|
||||
skb_scrub_packet(skb, true);
|
||||
--- a/drivers/net/wireguard/receive.c
|
||||
+++ b/drivers/net/wireguard/receive.c
|
||||
@@ -56,7 +56,7 @@ static int prepare_skb_header(struct sk_
|
||||
size_t data_offset, data_len, header_len;
|
||||
struct udphdr *udp;
|
||||
|
||||
- if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol ||
|
||||
+ if (unlikely(!wg_check_packet_protocol(skb) ||
|
||||
skb_transport_header(skb) < skb->head ||
|
||||
(skb_transport_header(skb) + sizeof(struct udphdr)) >
|
||||
skb_tail_pointer(skb)))
|
||||
@@ -388,7 +388,7 @@ static void wg_packet_consume_data_done(
|
||||
*/
|
||||
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
||||
skb->csum_level = ~0; /* All levels */
|
||||
- skb->protocol = wg_skb_examine_untrusted_ip_hdr(skb);
|
||||
+ skb->protocol = wg_examine_packet_protocol(skb);
|
||||
if (skb->protocol == htons(ETH_P_IP)) {
|
||||
len = ntohs(ip_hdr(skb)->tot_len);
|
||||
if (unlikely(len < sizeof(struct iphdr)))
|
@ -0,0 +1,35 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Wed, 18 Mar 2020 18:30:46 -0600
|
||||
Subject: [PATCH] wireguard: receive: remove dead code from default packet type
|
||||
case
|
||||
|
||||
commit 2b8765c52db24c0fbcc81bac9b5e8390f2c7d3c8 upstream.
|
||||
|
||||
The situation in which we wind up hitting the default case here
|
||||
indicates a major bug in earlier parsing code. It is not a usual thing
|
||||
that should ever happen, which means a "friendly" message for it doesn't
|
||||
make sense. Rather, replace this with a WARN_ON, just like we do earlier
|
||||
in the file for a similar situation, so that somebody sends us a bug
|
||||
report and we can fix it.
|
||||
|
||||
Reported-by: Fabian Freyer <fabianfreyer@radicallyopensecurity.com>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/wireguard/receive.c | 3 +--
|
||||
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/net/wireguard/receive.c
|
||||
+++ b/drivers/net/wireguard/receive.c
|
||||
@@ -587,8 +587,7 @@ void wg_packet_receive(struct wg_device
|
||||
wg_packet_consume_data(wg, skb);
|
||||
break;
|
||||
default:
|
||||
- net_dbg_skb_ratelimited("%s: Invalid packet from %pISpfsc\n",
|
||||
- wg->dev->name, skb);
|
||||
+ WARN(1, "Non-exhaustive parsing of packet header lead to unknown packet type!\n");
|
||||
goto err;
|
||||
}
|
||||
return;
|
@ -0,0 +1,224 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Wed, 18 Mar 2020 18:30:47 -0600
|
||||
Subject: [PATCH] wireguard: noise: error out precomputed DH during handshake
|
||||
rather than config
|
||||
|
||||
commit 11a7686aa99c7fe4b3f80f6dcccd54129817984d upstream.
|
||||
|
||||
We precompute the static-static ECDH during configuration time, in order
|
||||
to save an expensive computation later when receiving network packets.
|
||||
However, not all ECDH computations yield a contributory result. Prior,
|
||||
we were just not letting those peers be added to the interface. However,
|
||||
this creates a strange inconsistency, since it was still possible to add
|
||||
other weird points, like a valid public key plus a low-order point, and,
|
||||
like points that result in zeros, a handshake would not complete. In
|
||||
order to make the behavior more uniform and less surprising, simply
|
||||
allow all peers to be added. Then, we'll error out later when doing the
|
||||
crypto if there's an issue. This also adds more separation between the
|
||||
crypto layer and the configuration layer.
|
||||
|
||||
Discussed-with: Mathias Hall-Andersen <mathias@hall-andersen.dk>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/wireguard/netlink.c | 8 +---
|
||||
drivers/net/wireguard/noise.c | 55 ++++++++++++----------
|
||||
drivers/net/wireguard/noise.h | 12 ++---
|
||||
drivers/net/wireguard/peer.c | 7 +--
|
||||
tools/testing/selftests/wireguard/netns.sh | 15 ++++--
|
||||
5 files changed, 49 insertions(+), 48 deletions(-)
|
||||
|
||||
--- a/drivers/net/wireguard/netlink.c
|
||||
+++ b/drivers/net/wireguard/netlink.c
|
||||
@@ -417,11 +417,7 @@ static int set_peer(struct wg_device *wg
|
||||
|
||||
peer = wg_peer_create(wg, public_key, preshared_key);
|
||||
if (IS_ERR(peer)) {
|
||||
- /* Similar to the above, if the key is invalid, we skip
|
||||
- * it without fanfare, so that services don't need to
|
||||
- * worry about doing key validation themselves.
|
||||
- */
|
||||
- ret = PTR_ERR(peer) == -EKEYREJECTED ? 0 : PTR_ERR(peer);
|
||||
+ ret = PTR_ERR(peer);
|
||||
peer = NULL;
|
||||
goto out;
|
||||
}
|
||||
@@ -575,7 +571,7 @@ static int wg_set_device(struct sk_buff
|
||||
private_key);
|
||||
list_for_each_entry_safe(peer, temp, &wg->peer_list,
|
||||
peer_list) {
|
||||
- BUG_ON(!wg_noise_precompute_static_static(peer));
|
||||
+ wg_noise_precompute_static_static(peer);
|
||||
wg_noise_expire_current_peer_keypairs(peer);
|
||||
}
|
||||
wg_cookie_checker_precompute_device_keys(&wg->cookie_checker);
|
||||
--- a/drivers/net/wireguard/noise.c
|
||||
+++ b/drivers/net/wireguard/noise.c
|
||||
@@ -44,32 +44,23 @@ void __init wg_noise_init(void)
|
||||
}
|
||||
|
||||
/* Must hold peer->handshake.static_identity->lock */
|
||||
-bool wg_noise_precompute_static_static(struct wg_peer *peer)
|
||||
+void wg_noise_precompute_static_static(struct wg_peer *peer)
|
||||
{
|
||||
- bool ret;
|
||||
-
|
||||
down_write(&peer->handshake.lock);
|
||||
- if (peer->handshake.static_identity->has_identity) {
|
||||
- ret = curve25519(
|
||||
- peer->handshake.precomputed_static_static,
|
||||
+ if (!peer->handshake.static_identity->has_identity ||
|
||||
+ !curve25519(peer->handshake.precomputed_static_static,
|
||||
peer->handshake.static_identity->static_private,
|
||||
- peer->handshake.remote_static);
|
||||
- } else {
|
||||
- u8 empty[NOISE_PUBLIC_KEY_LEN] = { 0 };
|
||||
-
|
||||
- ret = curve25519(empty, empty, peer->handshake.remote_static);
|
||||
+ peer->handshake.remote_static))
|
||||
memset(peer->handshake.precomputed_static_static, 0,
|
||||
NOISE_PUBLIC_KEY_LEN);
|
||||
- }
|
||||
up_write(&peer->handshake.lock);
|
||||
- return ret;
|
||||
}
|
||||
|
||||
-bool wg_noise_handshake_init(struct noise_handshake *handshake,
|
||||
- struct noise_static_identity *static_identity,
|
||||
- const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
|
||||
- const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
|
||||
- struct wg_peer *peer)
|
||||
+void wg_noise_handshake_init(struct noise_handshake *handshake,
|
||||
+ struct noise_static_identity *static_identity,
|
||||
+ const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
|
||||
+ const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
|
||||
+ struct wg_peer *peer)
|
||||
{
|
||||
memset(handshake, 0, sizeof(*handshake));
|
||||
init_rwsem(&handshake->lock);
|
||||
@@ -81,7 +72,7 @@ bool wg_noise_handshake_init(struct nois
|
||||
NOISE_SYMMETRIC_KEY_LEN);
|
||||
handshake->static_identity = static_identity;
|
||||
handshake->state = HANDSHAKE_ZEROED;
|
||||
- return wg_noise_precompute_static_static(peer);
|
||||
+ wg_noise_precompute_static_static(peer);
|
||||
}
|
||||
|
||||
static void handshake_zero(struct noise_handshake *handshake)
|
||||
@@ -403,6 +394,19 @@ static bool __must_check mix_dh(u8 chain
|
||||
return true;
|
||||
}
|
||||
|
||||
+static bool __must_check mix_precomputed_dh(u8 chaining_key[NOISE_HASH_LEN],
|
||||
+ u8 key[NOISE_SYMMETRIC_KEY_LEN],
|
||||
+ const u8 precomputed[NOISE_PUBLIC_KEY_LEN])
|
||||
+{
|
||||
+ static u8 zero_point[NOISE_PUBLIC_KEY_LEN];
|
||||
+ if (unlikely(!crypto_memneq(precomputed, zero_point, NOISE_PUBLIC_KEY_LEN)))
|
||||
+ return false;
|
||||
+ kdf(chaining_key, key, NULL, precomputed, NOISE_HASH_LEN,
|
||||
+ NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
|
||||
+ chaining_key);
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len)
|
||||
{
|
||||
struct blake2s_state blake;
|
||||
@@ -531,10 +535,9 @@ wg_noise_handshake_create_initiation(str
|
||||
NOISE_PUBLIC_KEY_LEN, key, handshake->hash);
|
||||
|
||||
/* ss */
|
||||
- kdf(handshake->chaining_key, key, NULL,
|
||||
- handshake->precomputed_static_static, NOISE_HASH_LEN,
|
||||
- NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
|
||||
- handshake->chaining_key);
|
||||
+ if (!mix_precomputed_dh(handshake->chaining_key, key,
|
||||
+ handshake->precomputed_static_static))
|
||||
+ goto out;
|
||||
|
||||
/* {t} */
|
||||
tai64n_now(timestamp);
|
||||
@@ -595,9 +598,9 @@ wg_noise_handshake_consume_initiation(st
|
||||
handshake = &peer->handshake;
|
||||
|
||||
/* ss */
|
||||
- kdf(chaining_key, key, NULL, handshake->precomputed_static_static,
|
||||
- NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
|
||||
- chaining_key);
|
||||
+ if (!mix_precomputed_dh(chaining_key, key,
|
||||
+ handshake->precomputed_static_static))
|
||||
+ goto out;
|
||||
|
||||
/* {t} */
|
||||
if (!message_decrypt(t, src->encrypted_timestamp,
|
||||
--- a/drivers/net/wireguard/noise.h
|
||||
+++ b/drivers/net/wireguard/noise.h
|
||||
@@ -94,11 +94,11 @@ struct noise_handshake {
|
||||
struct wg_device;
|
||||
|
||||
void wg_noise_init(void);
|
||||
-bool wg_noise_handshake_init(struct noise_handshake *handshake,
|
||||
- struct noise_static_identity *static_identity,
|
||||
- const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
|
||||
- const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
|
||||
- struct wg_peer *peer);
|
||||
+void wg_noise_handshake_init(struct noise_handshake *handshake,
|
||||
+ struct noise_static_identity *static_identity,
|
||||
+ const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
|
||||
+ const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
|
||||
+ struct wg_peer *peer);
|
||||
void wg_noise_handshake_clear(struct noise_handshake *handshake);
|
||||
static inline void wg_noise_reset_last_sent_handshake(atomic64_t *handshake_ns)
|
||||
{
|
||||
@@ -116,7 +116,7 @@ void wg_noise_expire_current_peer_keypai
|
||||
void wg_noise_set_static_identity_private_key(
|
||||
struct noise_static_identity *static_identity,
|
||||
const u8 private_key[NOISE_PUBLIC_KEY_LEN]);
|
||||
-bool wg_noise_precompute_static_static(struct wg_peer *peer);
|
||||
+void wg_noise_precompute_static_static(struct wg_peer *peer);
|
||||
|
||||
bool
|
||||
wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst,
|
||||
--- a/drivers/net/wireguard/peer.c
|
||||
+++ b/drivers/net/wireguard/peer.c
|
||||
@@ -34,11 +34,8 @@ struct wg_peer *wg_peer_create(struct wg
|
||||
return ERR_PTR(ret);
|
||||
peer->device = wg;
|
||||
|
||||
- if (!wg_noise_handshake_init(&peer->handshake, &wg->static_identity,
|
||||
- public_key, preshared_key, peer)) {
|
||||
- ret = -EKEYREJECTED;
|
||||
- goto err_1;
|
||||
- }
|
||||
+ wg_noise_handshake_init(&peer->handshake, &wg->static_identity,
|
||||
+ public_key, preshared_key, peer);
|
||||
if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))
|
||||
goto err_1;
|
||||
if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false,
|
||||
--- a/tools/testing/selftests/wireguard/netns.sh
|
||||
+++ b/tools/testing/selftests/wireguard/netns.sh
|
||||
@@ -527,11 +527,16 @@ n0 wg set wg0 peer "$pub2" allowed-ips 0
|
||||
n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75
|
||||
n0 wg set wg0 peer "$pub2" allowed-ips ::/0
|
||||
n0 wg set wg0 peer "$pub2" remove
|
||||
-low_order_points=( AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38= )
|
||||
-n0 wg set wg0 private-key /dev/null ${low_order_points[@]/#/peer }
|
||||
-[[ -z $(n0 wg show wg0 peers) ]]
|
||||
-n0 wg set wg0 private-key <(echo "$key1") ${low_order_points[@]/#/peer }
|
||||
-[[ -z $(n0 wg show wg0 peers) ]]
|
||||
+for low_order_point in AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38=; do
|
||||
+ n0 wg set wg0 peer "$low_order_point" persistent-keepalive 1 endpoint 127.0.0.1:1111
|
||||
+done
|
||||
+[[ -n $(n0 wg show wg0 peers) ]]
|
||||
+exec 4< <(n0 ncat -l -u -p 1111)
|
||||
+ncat_pid=$!
|
||||
+waitncatudp $netns0 $ncat_pid
|
||||
+ip0 link set wg0 up
|
||||
+! read -r -n 1 -t 2 <&4 || false
|
||||
+kill $ncat_pid
|
||||
ip0 link del wg0
|
||||
|
||||
declare -A objects
|
@ -0,0 +1,29 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
Date: Wed, 29 Apr 2020 14:59:20 -0600
|
||||
Subject: [PATCH] wireguard: send: remove errant newline from
|
||||
packet_encrypt_worker
|
||||
|
||||
commit d6833e42786e050e7522d6a91a9361e54085897d upstream.
|
||||
|
||||
This commit removes a useless newline at the end of a scope, which
|
||||
doesn't add anything in the way of organization or readability.
|
||||
|
||||
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/wireguard/send.c | 1 -
|
||||
1 file changed, 1 deletion(-)
|
||||
|
||||
--- a/drivers/net/wireguard/send.c
|
||||
+++ b/drivers/net/wireguard/send.c
|
||||
@@ -304,7 +304,6 @@ void wg_packet_encrypt_worker(struct wor
|
||||
}
|
||||
wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first,
|
||||
state);
|
||||
-
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,35 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Wed, 29 Apr 2020 14:59:21 -0600
|
||||
Subject: [PATCH] wireguard: queueing: cleanup ptr_ring in error path of
|
||||
packet_queue_init
|
||||
|
||||
commit 130c58606171326c81841a49cc913cd354113dd9 upstream.
|
||||
|
||||
Prior, if the alloc_percpu of packet_percpu_multicore_worker_alloc
|
||||
failed, the previously allocated ptr_ring wouldn't be freed. This commit
|
||||
adds the missing call to ptr_ring_cleanup in the error case.
|
||||
|
||||
Reported-by: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/wireguard/queueing.c | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/net/wireguard/queueing.c
|
||||
+++ b/drivers/net/wireguard/queueing.c
|
||||
@@ -35,8 +35,10 @@ int wg_packet_queue_init(struct crypt_qu
|
||||
if (multicore) {
|
||||
queue->worker = wg_packet_percpu_multicore_worker_alloc(
|
||||
function, queue);
|
||||
- if (!queue->worker)
|
||||
+ if (!queue->worker) {
|
||||
+ ptr_ring_cleanup(&queue->ring, NULL);
|
||||
return -ENOMEM;
|
||||
+ }
|
||||
} else {
|
||||
INIT_WORK(&queue->work, function);
|
||||
}
|
@ -0,0 +1,50 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
|
||||
Date: Wed, 29 Apr 2020 14:59:22 -0600
|
||||
Subject: [PATCH] wireguard: receive: use tunnel helpers for decapsulating ECN
|
||||
markings
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
commit eebabcb26ea1e3295704477c6cd4e772c96a9559 upstream.
|
||||
|
||||
WireGuard currently only propagates ECN markings on tunnel decap according
|
||||
to the old RFC3168 specification. However, the spec has since been updated
|
||||
in RFC6040 to recommend slightly different decapsulation semantics. This
|
||||
was implemented in the kernel as a set of common helpers for ECN
|
||||
decapsulation, so let's just switch over WireGuard to using those, so it
|
||||
can benefit from this enhancement and any future tweaks. We do not drop
|
||||
packets with invalid ECN marking combinations, because WireGuard is
|
||||
frequently used to work around broken ISPs, which could be doing that.
|
||||
|
||||
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
|
||||
Reported-by: Olivier Tilmans <olivier.tilmans@nokia-bell-labs.com>
|
||||
Cc: Dave Taht <dave.taht@gmail.com>
|
||||
Cc: Rodney W. Grimes <ietf@gndrsh.dnsmgr.net>
|
||||
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/wireguard/receive.c | 6 ++----
|
||||
1 file changed, 2 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/drivers/net/wireguard/receive.c
|
||||
+++ b/drivers/net/wireguard/receive.c
|
||||
@@ -393,13 +393,11 @@ static void wg_packet_consume_data_done(
|
||||
len = ntohs(ip_hdr(skb)->tot_len);
|
||||
if (unlikely(len < sizeof(struct iphdr)))
|
||||
goto dishonest_packet_size;
|
||||
- if (INET_ECN_is_ce(PACKET_CB(skb)->ds))
|
||||
- IP_ECN_set_ce(ip_hdr(skb));
|
||||
+ INET_ECN_decapsulate(skb, PACKET_CB(skb)->ds, ip_hdr(skb)->tos);
|
||||
} else if (skb->protocol == htons(ETH_P_IPV6)) {
|
||||
len = ntohs(ipv6_hdr(skb)->payload_len) +
|
||||
sizeof(struct ipv6hdr);
|
||||
- if (INET_ECN_is_ce(PACKET_CB(skb)->ds))
|
||||
- IP6_ECN_set_ce(skb, ipv6_hdr(skb));
|
||||
+ INET_ECN_decapsulate(skb, PACKET_CB(skb)->ds, ipv6_get_dsfield(ipv6_hdr(skb)));
|
||||
} else {
|
||||
goto dishonest_packet_type;
|
||||
}
|
@ -0,0 +1,28 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Wed, 6 May 2020 15:33:02 -0600
|
||||
Subject: [PATCH] wireguard: selftests: use normal kernel stack size on ppc64
|
||||
|
||||
commit a0fd7cc87a018df1a17f9d3f0bd994c1f22c6b34 upstream.
|
||||
|
||||
While at some point it might have made sense to be running these tests
|
||||
on ppc64 with 4k stacks, the kernel hasn't actually used 4k stacks on
|
||||
64-bit powerpc in a long time, and more interesting things that we test
|
||||
don't really work when we deviate from the default (16k). So, we stop
|
||||
pushing our luck in this commit, and return to the default instead of
|
||||
the minimum.
|
||||
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
|
||||
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
|
||||
@@ -10,3 +10,4 @@ CONFIG_CMDLINE_BOOL=y
|
||||
CONFIG_CMDLINE="console=hvc0 wg.success=hvc1"
|
||||
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
|
||||
CONFIG_FRAME_WARN=1280
|
||||
+CONFIG_THREAD_SHIFT=14
|
@ -0,0 +1,162 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Wed, 6 May 2020 15:33:03 -0600
|
||||
Subject: [PATCH] wireguard: socket: remove errant restriction on looping to
|
||||
self
|
||||
|
||||
commit b673e24aad36981f327a6570412ffa7754de8911 upstream.
|
||||
|
||||
It's already possible to create two different interfaces and loop
|
||||
packets between them. This has always been possible with tunnels in the
|
||||
kernel, and isn't specific to wireguard. Therefore, the networking stack
|
||||
already needs to deal with that. At the very least, the packet winds up
|
||||
exceeding the MTU and is discarded at that point. So, since this is
|
||||
already something that happens, there's no need to forbid the not very
|
||||
exceptional case of routing a packet back to the same interface; this
|
||||
loop is no different than others, and we shouldn't special case it, but
|
||||
rather rely on generic handling of loops in general. This also makes it
|
||||
easier to do interesting things with wireguard such as onion routing.
|
||||
|
||||
At the same time, we add a selftest for this, ensuring that both onion
|
||||
routing works and infinite routing loops do not crash the kernel. We
|
||||
also add a test case for wireguard interfaces nesting packets and
|
||||
sending traffic between each other, as well as the loop in this case
|
||||
too. We make sure to send some throughput-heavy traffic for this use
|
||||
case, to stress out any possible recursion issues with the locks around
|
||||
workqueues.
|
||||
|
||||
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/wireguard/socket.c | 12 -----
|
||||
tools/testing/selftests/wireguard/netns.sh | 54 ++++++++++++++++++++--
|
||||
2 files changed, 51 insertions(+), 15 deletions(-)
|
||||
|
||||
--- a/drivers/net/wireguard/socket.c
|
||||
+++ b/drivers/net/wireguard/socket.c
|
||||
@@ -76,12 +76,6 @@ static int send4(struct wg_device *wg, s
|
||||
net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
|
||||
wg->dev->name, &endpoint->addr, ret);
|
||||
goto err;
|
||||
- } else if (unlikely(rt->dst.dev == skb->dev)) {
|
||||
- ip_rt_put(rt);
|
||||
- ret = -ELOOP;
|
||||
- net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n",
|
||||
- wg->dev->name, &endpoint->addr);
|
||||
- goto err;
|
||||
}
|
||||
if (cache)
|
||||
dst_cache_set_ip4(cache, &rt->dst, fl.saddr);
|
||||
@@ -149,12 +143,6 @@ static int send6(struct wg_device *wg, s
|
||||
net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
|
||||
wg->dev->name, &endpoint->addr, ret);
|
||||
goto err;
|
||||
- } else if (unlikely(dst->dev == skb->dev)) {
|
||||
- dst_release(dst);
|
||||
- ret = -ELOOP;
|
||||
- net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n",
|
||||
- wg->dev->name, &endpoint->addr);
|
||||
- goto err;
|
||||
}
|
||||
if (cache)
|
||||
dst_cache_set_ip6(cache, dst, &fl.saddr);
|
||||
--- a/tools/testing/selftests/wireguard/netns.sh
|
||||
+++ b/tools/testing/selftests/wireguard/netns.sh
|
||||
@@ -48,8 +48,11 @@ cleanup() {
|
||||
exec 2>/dev/null
|
||||
printf "$orig_message_cost" > /proc/sys/net/core/message_cost
|
||||
ip0 link del dev wg0
|
||||
+ ip0 link del dev wg1
|
||||
ip1 link del dev wg0
|
||||
+ ip1 link del dev wg1
|
||||
ip2 link del dev wg0
|
||||
+ ip2 link del dev wg1
|
||||
local to_kill="$(ip netns pids $netns0) $(ip netns pids $netns1) $(ip netns pids $netns2)"
|
||||
[[ -n $to_kill ]] && kill $to_kill
|
||||
pp ip netns del $netns1
|
||||
@@ -77,18 +80,20 @@ ip0 link set wg0 netns $netns2
|
||||
key1="$(pp wg genkey)"
|
||||
key2="$(pp wg genkey)"
|
||||
key3="$(pp wg genkey)"
|
||||
+key4="$(pp wg genkey)"
|
||||
pub1="$(pp wg pubkey <<<"$key1")"
|
||||
pub2="$(pp wg pubkey <<<"$key2")"
|
||||
pub3="$(pp wg pubkey <<<"$key3")"
|
||||
+pub4="$(pp wg pubkey <<<"$key4")"
|
||||
psk="$(pp wg genpsk)"
|
||||
[[ -n $key1 && -n $key2 && -n $psk ]]
|
||||
|
||||
configure_peers() {
|
||||
ip1 addr add 192.168.241.1/24 dev wg0
|
||||
- ip1 addr add fd00::1/24 dev wg0
|
||||
+ ip1 addr add fd00::1/112 dev wg0
|
||||
|
||||
ip2 addr add 192.168.241.2/24 dev wg0
|
||||
- ip2 addr add fd00::2/24 dev wg0
|
||||
+ ip2 addr add fd00::2/112 dev wg0
|
||||
|
||||
n1 wg set wg0 \
|
||||
private-key <(echo "$key1") \
|
||||
@@ -230,9 +235,38 @@ n1 ping -W 1 -c 1 192.168.241.2
|
||||
n1 wg set wg0 private-key <(echo "$key3")
|
||||
n2 wg set wg0 peer "$pub3" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 peer "$pub1" remove
|
||||
n1 ping -W 1 -c 1 192.168.241.2
|
||||
+n2 wg set wg0 peer "$pub3" remove
|
||||
|
||||
-ip1 link del wg0
|
||||
+# Test that we can route wg through wg
|
||||
+ip1 addr flush dev wg0
|
||||
+ip2 addr flush dev wg0
|
||||
+ip1 addr add fd00::5:1/112 dev wg0
|
||||
+ip2 addr add fd00::5:2/112 dev wg0
|
||||
+n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") allowed-ips fd00::5:2/128 endpoint 127.0.0.1:2
|
||||
+n2 wg set wg0 private-key <(echo "$key2") listen-port 2 peer "$pub1" preshared-key <(echo "$psk") allowed-ips fd00::5:1/128 endpoint 127.212.121.99:9998
|
||||
+ip1 link add wg1 type wireguard
|
||||
+ip2 link add wg1 type wireguard
|
||||
+ip1 addr add 192.168.241.1/24 dev wg1
|
||||
+ip1 addr add fd00::1/112 dev wg1
|
||||
+ip2 addr add 192.168.241.2/24 dev wg1
|
||||
+ip2 addr add fd00::2/112 dev wg1
|
||||
+ip1 link set mtu 1340 up dev wg1
|
||||
+ip2 link set mtu 1340 up dev wg1
|
||||
+n1 wg set wg1 listen-port 5 private-key <(echo "$key3") peer "$pub4" allowed-ips 192.168.241.2/32,fd00::2/128 endpoint [fd00::5:2]:5
|
||||
+n2 wg set wg1 listen-port 5 private-key <(echo "$key4") peer "$pub3" allowed-ips 192.168.241.1/32,fd00::1/128 endpoint [fd00::5:1]:5
|
||||
+tests
|
||||
+# Try to set up a routing loop between the two namespaces
|
||||
+ip1 link set netns $netns0 dev wg1
|
||||
+ip0 addr add 192.168.241.1/24 dev wg1
|
||||
+ip0 link set up dev wg1
|
||||
+n0 ping -W 1 -c 1 192.168.241.2
|
||||
+n1 wg set wg0 peer "$pub2" endpoint 192.168.241.2:7
|
||||
ip2 link del wg0
|
||||
+ip2 link del wg1
|
||||
+! n0 ping -W 1 -c 10 -f 192.168.241.2 || false # Should not crash kernel
|
||||
+
|
||||
+ip0 link del wg1
|
||||
+ip1 link del wg0
|
||||
|
||||
# Test using NAT. We now change the topology to this:
|
||||
# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────────────┐ ┌────────────────────────────────────────┐
|
||||
@@ -282,6 +316,20 @@ pp sleep 3
|
||||
n2 ping -W 1 -c 1 192.168.241.1
|
||||
n1 wg set wg0 peer "$pub2" persistent-keepalive 0
|
||||
|
||||
+# Test that onion routing works, even when it loops
|
||||
+n1 wg set wg0 peer "$pub3" allowed-ips 192.168.242.2/32 endpoint 192.168.241.2:5
|
||||
+ip1 addr add 192.168.242.1/24 dev wg0
|
||||
+ip2 link add wg1 type wireguard
|
||||
+ip2 addr add 192.168.242.2/24 dev wg1
|
||||
+n2 wg set wg1 private-key <(echo "$key3") listen-port 5 peer "$pub1" allowed-ips 192.168.242.1/32
|
||||
+ip2 link set wg1 up
|
||||
+n1 ping -W 1 -c 1 192.168.242.2
|
||||
+ip2 link del wg1
|
||||
+n1 wg set wg0 peer "$pub3" endpoint 192.168.242.2:5
|
||||
+! n1 ping -W 1 -c 1 192.168.242.2 || false # Should not crash kernel
|
||||
+n1 wg set wg0 peer "$pub3" remove
|
||||
+ip1 addr del 192.168.242.1/24 dev wg0
|
||||
+
|
||||
# Do a wg-quick(8)-style policy routing for the default route, making sure vethc has a v6 address to tease out bugs.
|
||||
ip1 -6 addr add fc00::9/96 dev vethc
|
||||
ip1 -6 route add default via fc00::1
|
@ -0,0 +1,58 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Wed, 6 May 2020 15:33:04 -0600
|
||||
Subject: [PATCH] wireguard: send/receive: cond_resched() when processing
|
||||
worker ringbuffers
|
||||
|
||||
commit 4005f5c3c9d006157ba716594e0d70c88a235c5e upstream.
|
||||
|
||||
Users with pathological hardware reported CPU stalls on CONFIG_
|
||||
PREEMPT_VOLUNTARY=y, because the ringbuffers would stay full, meaning
|
||||
these workers would never terminate. That turned out not to be okay on
|
||||
systems without forced preemption, which Sultan observed. This commit
|
||||
adds a cond_resched() to the bottom of each loop iteration, so that
|
||||
these workers don't hog the core. Note that we don't need this on the
|
||||
napi poll worker, since that terminates after its budget is expended.
|
||||
|
||||
Suggested-by: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
Reported-by: Wang Jian <larkwang@gmail.com>
|
||||
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/wireguard/receive.c | 2 ++
|
||||
drivers/net/wireguard/send.c | 4 ++++
|
||||
2 files changed, 6 insertions(+)
|
||||
|
||||
--- a/drivers/net/wireguard/receive.c
|
||||
+++ b/drivers/net/wireguard/receive.c
|
||||
@@ -516,6 +516,8 @@ void wg_packet_decrypt_worker(struct wor
|
||||
&PACKET_CB(skb)->keypair->receiving)) ?
|
||||
PACKET_STATE_CRYPTED : PACKET_STATE_DEAD;
|
||||
wg_queue_enqueue_per_peer_napi(skb, state);
|
||||
+ if (need_resched())
|
||||
+ cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
--- a/drivers/net/wireguard/send.c
|
||||
+++ b/drivers/net/wireguard/send.c
|
||||
@@ -281,6 +281,8 @@ void wg_packet_tx_worker(struct work_str
|
||||
|
||||
wg_noise_keypair_put(keypair, false);
|
||||
wg_peer_put(peer);
|
||||
+ if (need_resched())
|
||||
+ cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -304,6 +306,8 @@ void wg_packet_encrypt_worker(struct wor
|
||||
}
|
||||
wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first,
|
||||
state);
|
||||
+ if (need_resched())
|
||||
+ cond_resched();
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,51 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
|
||||
Date: Wed, 6 May 2020 15:33:05 -0600
|
||||
Subject: [PATCH] wireguard: selftests: initalize ipv6 members to NULL to
|
||||
squelch clang warning
|
||||
|
||||
commit 4fed818ef54b08d4b29200e416cce65546ad5312 upstream.
|
||||
|
||||
Without setting these to NULL, clang complains in certain
|
||||
configurations that have CONFIG_IPV6=n:
|
||||
|
||||
In file included from drivers/net/wireguard/ratelimiter.c:223:
|
||||
drivers/net/wireguard/selftest/ratelimiter.c:173:34: error: variable 'skb6' is uninitialized when used here [-Werror,-Wuninitialized]
|
||||
ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count);
|
||||
^~~~
|
||||
drivers/net/wireguard/selftest/ratelimiter.c:123:29: note: initialize the variable 'skb6' to silence this warning
|
||||
struct sk_buff *skb4, *skb6;
|
||||
^
|
||||
= NULL
|
||||
drivers/net/wireguard/selftest/ratelimiter.c:173:40: error: variable 'hdr6' is uninitialized when used here [-Werror,-Wuninitialized]
|
||||
ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count);
|
||||
^~~~
|
||||
drivers/net/wireguard/selftest/ratelimiter.c:125:22: note: initialize the variable 'hdr6' to silence this warning
|
||||
struct ipv6hdr *hdr6;
|
||||
^
|
||||
|
||||
We silence this warning by setting the variables to NULL as the warning
|
||||
suggests.
|
||||
|
||||
Reported-by: Arnd Bergmann <arnd@arndb.de>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
||||
---
|
||||
drivers/net/wireguard/selftest/ratelimiter.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/net/wireguard/selftest/ratelimiter.c
|
||||
+++ b/drivers/net/wireguard/selftest/ratelimiter.c
|
||||
@@ -120,9 +120,9 @@ bool __init wg_ratelimiter_selftest(void
|
||||
enum { TRIALS_BEFORE_GIVING_UP = 5000 };
|
||||
bool success = false;
|
||||
int test = 0, trials;
|
||||
- struct sk_buff *skb4, *skb6;
|
||||
+ struct sk_buff *skb4, *skb6 = NULL;
|
||||
struct iphdr *hdr4;
|
||||
- struct ipv6hdr *hdr6;
|
||||
+ struct ipv6hdr *hdr6 = NULL;
|
||||
|
||||
if (IS_ENABLED(CONFIG_KASAN) || IS_ENABLED(CONFIG_UBSAN))
|
||||
return true;
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user