Commit | Line | Data |
---|---|---|
c9320b6d MW |
1 | /* |
2 | * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code | |
3 | * | |
4 | * Copyright (C) 2015 Martin Willi | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | */ | |
11 | ||
12 | #include <crypto/algapi.h> | |
13 | #include <crypto/chacha20.h> | |
14 | #include <linux/crypto.h> | |
15 | #include <linux/kernel.h> | |
16 | #include <linux/module.h> | |
17 | #include <asm/fpu/api.h> | |
18 | #include <asm/simd.h> | |
19 | ||
20 | #define CHACHA20_STATE_ALIGN 16 | |
21 | ||
22 | asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src); | |
274f938e | 23 | asmlinkage void chacha20_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src); |
3d1e93cd MW |
24 | #ifdef CONFIG_AS_AVX2 |
25 | asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src); | |
26 | static bool chacha20_use_avx2; | |
27 | #endif | |
c9320b6d MW |
28 | |
29 | static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src, | |
30 | unsigned int bytes) | |
31 | { | |
32 | u8 buf[CHACHA20_BLOCK_SIZE]; | |
33 | ||
3d1e93cd MW |
34 | #ifdef CONFIG_AS_AVX2 |
35 | if (chacha20_use_avx2) { | |
36 | while (bytes >= CHACHA20_BLOCK_SIZE * 8) { | |
37 | chacha20_8block_xor_avx2(state, dst, src); | |
38 | bytes -= CHACHA20_BLOCK_SIZE * 8; | |
39 | src += CHACHA20_BLOCK_SIZE * 8; | |
40 | dst += CHACHA20_BLOCK_SIZE * 8; | |
41 | state[12] += 8; | |
42 | } | |
43 | } | |
44 | #endif | |
274f938e MW |
45 | while (bytes >= CHACHA20_BLOCK_SIZE * 4) { |
46 | chacha20_4block_xor_ssse3(state, dst, src); | |
47 | bytes -= CHACHA20_BLOCK_SIZE * 4; | |
48 | src += CHACHA20_BLOCK_SIZE * 4; | |
49 | dst += CHACHA20_BLOCK_SIZE * 4; | |
50 | state[12] += 4; | |
51 | } | |
c9320b6d MW |
52 | while (bytes >= CHACHA20_BLOCK_SIZE) { |
53 | chacha20_block_xor_ssse3(state, dst, src); | |
54 | bytes -= CHACHA20_BLOCK_SIZE; | |
55 | src += CHACHA20_BLOCK_SIZE; | |
56 | dst += CHACHA20_BLOCK_SIZE; | |
57 | state[12]++; | |
58 | } | |
59 | if (bytes) { | |
60 | memcpy(buf, src, bytes); | |
61 | chacha20_block_xor_ssse3(state, buf, buf); | |
62 | memcpy(dst, buf, bytes); | |
63 | } | |
64 | } | |
65 | ||
66 | static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, | |
67 | struct scatterlist *src, unsigned int nbytes) | |
68 | { | |
69 | u32 *state, state_buf[16 + (CHACHA20_STATE_ALIGN / sizeof(u32)) - 1]; | |
70 | struct blkcipher_walk walk; | |
71 | int err; | |
72 | ||
73 | if (!may_use_simd()) | |
74 | return crypto_chacha20_crypt(desc, dst, src, nbytes); | |
75 | ||
76 | state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN); | |
77 | ||
78 | blkcipher_walk_init(&walk, dst, src, nbytes); | |
79 | err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE); | |
80 | ||
81 | crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv); | |
82 | ||
83 | kernel_fpu_begin(); | |
84 | ||
85 | while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { | |
86 | chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, | |
87 | rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); | |
88 | err = blkcipher_walk_done(desc, &walk, | |
89 | walk.nbytes % CHACHA20_BLOCK_SIZE); | |
90 | } | |
91 | ||
92 | if (walk.nbytes) { | |
93 | chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, | |
94 | walk.nbytes); | |
95 | err = blkcipher_walk_done(desc, &walk, 0); | |
96 | } | |
97 | ||
98 | kernel_fpu_end(); | |
99 | ||
100 | return err; | |
101 | } | |
102 | ||
103 | static struct crypto_alg alg = { | |
104 | .cra_name = "chacha20", | |
105 | .cra_driver_name = "chacha20-simd", | |
106 | .cra_priority = 300, | |
107 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | |
108 | .cra_blocksize = 1, | |
109 | .cra_type = &crypto_blkcipher_type, | |
110 | .cra_ctxsize = sizeof(struct chacha20_ctx), | |
111 | .cra_alignmask = sizeof(u32) - 1, | |
112 | .cra_module = THIS_MODULE, | |
113 | .cra_u = { | |
114 | .blkcipher = { | |
115 | .min_keysize = CHACHA20_KEY_SIZE, | |
116 | .max_keysize = CHACHA20_KEY_SIZE, | |
117 | .ivsize = CHACHA20_IV_SIZE, | |
118 | .geniv = "seqiv", | |
119 | .setkey = crypto_chacha20_setkey, | |
120 | .encrypt = chacha20_simd, | |
121 | .decrypt = chacha20_simd, | |
122 | }, | |
123 | }, | |
124 | }; | |
125 | ||
126 | static int __init chacha20_simd_mod_init(void) | |
127 | { | |
362f924b | 128 | if (!boot_cpu_has(X86_FEATURE_SSSE3)) |
c9320b6d MW |
129 | return -ENODEV; |
130 | ||
3d1e93cd | 131 | #ifdef CONFIG_AS_AVX2 |
abcfdfe0 | 132 | chacha20_use_avx2 = cpu_has_avx && boot_cpu_has(X86_FEATURE_AVX2) && |
d91cab78 | 133 | cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); |
3d1e93cd | 134 | #endif |
c9320b6d MW |
135 | return crypto_register_alg(&alg); |
136 | } | |
137 | ||
138 | static void __exit chacha20_simd_mod_fini(void) | |
139 | { | |
140 | crypto_unregister_alg(&alg); | |
141 | } | |
142 | ||
143 | module_init(chacha20_simd_mod_init); | |
144 | module_exit(chacha20_simd_mod_fini); | |
145 | ||
146 | MODULE_LICENSE("GPL"); | |
147 | MODULE_AUTHOR("Martin Willi <martin@strongswan.org>"); | |
148 | MODULE_DESCRIPTION("chacha20 cipher algorithm, SIMD accelerated"); | |
149 | MODULE_ALIAS_CRYPTO("chacha20"); | |
150 | MODULE_ALIAS_CRYPTO("chacha20-simd"); |