1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM
4  *
5  * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
6  */
7 
8 #include <asm/hwcap.h>
9 #include <asm/neon.h>
10 #include <asm/simd.h>
11 #include <linux/unaligned.h>
12 #include <crypto/algapi.h>
13 #include <crypto/internal/hash.h>
14 #include <crypto/internal/poly1305.h>
15 #include <crypto/internal/simd.h>
16 #include <linux/cpufeature.h>
17 #include <linux/crypto.h>
18 #include <linux/jump_label.h>
19 #include <linux/module.h>
20 
21 void poly1305_init_arm(void *state, const u8 *key);
22 void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit);
23 void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
24 void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce);
25 
poly1305_blocks_neon(void * state,const u8 * src,u32 len,u32 hibit)26 void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
27 {
28 }
29 
30 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
31 
poly1305_init_arch(struct poly1305_desc_ctx * dctx,const u8 key[POLY1305_KEY_SIZE])32 void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
33 {
34 	poly1305_init_arm(&dctx->h, key);
35 	dctx->s[0] = get_unaligned_le32(key + 16);
36 	dctx->s[1] = get_unaligned_le32(key + 20);
37 	dctx->s[2] = get_unaligned_le32(key + 24);
38 	dctx->s[3] = get_unaligned_le32(key + 28);
39 	dctx->buflen = 0;
40 }
41 EXPORT_SYMBOL(poly1305_init_arch);
42 
arm_poly1305_init(struct shash_desc * desc)43 static int arm_poly1305_init(struct shash_desc *desc)
44 {
45 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
46 
47 	dctx->buflen = 0;
48 	dctx->rset = 0;
49 	dctx->sset = false;
50 
51 	return 0;
52 }
53 
arm_poly1305_blocks(struct poly1305_desc_ctx * dctx,const u8 * src,u32 len,u32 hibit,bool do_neon)54 static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
55 				 u32 len, u32 hibit, bool do_neon)
56 {
57 	if (unlikely(!dctx->sset)) {
58 		if (!dctx->rset) {
59 			poly1305_init_arm(&dctx->h, src);
60 			src += POLY1305_BLOCK_SIZE;
61 			len -= POLY1305_BLOCK_SIZE;
62 			dctx->rset = 1;
63 		}
64 		if (len >= POLY1305_BLOCK_SIZE) {
65 			dctx->s[0] = get_unaligned_le32(src +  0);
66 			dctx->s[1] = get_unaligned_le32(src +  4);
67 			dctx->s[2] = get_unaligned_le32(src +  8);
68 			dctx->s[3] = get_unaligned_le32(src + 12);
69 			src += POLY1305_BLOCK_SIZE;
70 			len -= POLY1305_BLOCK_SIZE;
71 			dctx->sset = true;
72 		}
73 		if (len < POLY1305_BLOCK_SIZE)
74 			return;
75 	}
76 
77 	len &= ~(POLY1305_BLOCK_SIZE - 1);
78 
79 	if (static_branch_likely(&have_neon) && likely(do_neon))
80 		poly1305_blocks_neon(&dctx->h, src, len, hibit);
81 	else
82 		poly1305_blocks_arm(&dctx->h, src, len, hibit);
83 }
84 
arm_poly1305_do_update(struct poly1305_desc_ctx * dctx,const u8 * src,u32 len,bool do_neon)85 static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx,
86 				    const u8 *src, u32 len, bool do_neon)
87 {
88 	if (unlikely(dctx->buflen)) {
89 		u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
90 
91 		memcpy(dctx->buf + dctx->buflen, src, bytes);
92 		src += bytes;
93 		len -= bytes;
94 		dctx->buflen += bytes;
95 
96 		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
97 			arm_poly1305_blocks(dctx, dctx->buf,
98 					    POLY1305_BLOCK_SIZE, 1, false);
99 			dctx->buflen = 0;
100 		}
101 	}
102 
103 	if (likely(len >= POLY1305_BLOCK_SIZE)) {
104 		arm_poly1305_blocks(dctx, src, len, 1, do_neon);
105 		src += round_down(len, POLY1305_BLOCK_SIZE);
106 		len %= POLY1305_BLOCK_SIZE;
107 	}
108 
109 	if (unlikely(len)) {
110 		dctx->buflen = len;
111 		memcpy(dctx->buf, src, len);
112 	}
113 }
114 
arm_poly1305_update(struct shash_desc * desc,const u8 * src,unsigned int srclen)115 static int arm_poly1305_update(struct shash_desc *desc,
116 			       const u8 *src, unsigned int srclen)
117 {
118 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
119 
120 	arm_poly1305_do_update(dctx, src, srclen, false);
121 	return 0;
122 }
123 
arm_poly1305_update_neon(struct shash_desc * desc,const u8 * src,unsigned int srclen)124 static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc,
125 						   const u8 *src,
126 						   unsigned int srclen)
127 {
128 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
129 	bool do_neon = crypto_simd_usable() && srclen > 128;
130 
131 	if (static_branch_likely(&have_neon) && do_neon)
132 		kernel_neon_begin();
133 	arm_poly1305_do_update(dctx, src, srclen, do_neon);
134 	if (static_branch_likely(&have_neon) && do_neon)
135 		kernel_neon_end();
136 	return 0;
137 }
138 
poly1305_update_arch(struct poly1305_desc_ctx * dctx,const u8 * src,unsigned int nbytes)139 void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
140 			  unsigned int nbytes)
141 {
142 	bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
143 		       crypto_simd_usable();
144 
145 	if (unlikely(dctx->buflen)) {
146 		u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
147 
148 		memcpy(dctx->buf + dctx->buflen, src, bytes);
149 		src += bytes;
150 		nbytes -= bytes;
151 		dctx->buflen += bytes;
152 
153 		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
154 			poly1305_blocks_arm(&dctx->h, dctx->buf,
155 					    POLY1305_BLOCK_SIZE, 1);
156 			dctx->buflen = 0;
157 		}
158 	}
159 
160 	if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
161 		unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
162 
163 		if (static_branch_likely(&have_neon) && do_neon) {
164 			do {
165 				unsigned int todo = min_t(unsigned int, len, SZ_4K);
166 
167 				kernel_neon_begin();
168 				poly1305_blocks_neon(&dctx->h, src, todo, 1);
169 				kernel_neon_end();
170 
171 				len -= todo;
172 				src += todo;
173 			} while (len);
174 		} else {
175 			poly1305_blocks_arm(&dctx->h, src, len, 1);
176 			src += len;
177 		}
178 		nbytes %= POLY1305_BLOCK_SIZE;
179 	}
180 
181 	if (unlikely(nbytes)) {
182 		dctx->buflen = nbytes;
183 		memcpy(dctx->buf, src, nbytes);
184 	}
185 }
186 EXPORT_SYMBOL(poly1305_update_arch);
187 
poly1305_final_arch(struct poly1305_desc_ctx * dctx,u8 * dst)188 void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
189 {
190 	if (unlikely(dctx->buflen)) {
191 		dctx->buf[dctx->buflen++] = 1;
192 		memset(dctx->buf + dctx->buflen, 0,
193 		       POLY1305_BLOCK_SIZE - dctx->buflen);
194 		poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
195 	}
196 
197 	poly1305_emit_arm(&dctx->h, dst, dctx->s);
198 	*dctx = (struct poly1305_desc_ctx){};
199 }
200 EXPORT_SYMBOL(poly1305_final_arch);
201 
arm_poly1305_final(struct shash_desc * desc,u8 * dst)202 static int arm_poly1305_final(struct shash_desc *desc, u8 *dst)
203 {
204 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
205 
206 	if (unlikely(!dctx->sset))
207 		return -ENOKEY;
208 
209 	poly1305_final_arch(dctx, dst);
210 	return 0;
211 }
212 
213 static struct shash_alg arm_poly1305_algs[] = {{
214 	.init			= arm_poly1305_init,
215 	.update			= arm_poly1305_update,
216 	.final			= arm_poly1305_final,
217 	.digestsize		= POLY1305_DIGEST_SIZE,
218 	.descsize		= sizeof(struct poly1305_desc_ctx),
219 
220 	.base.cra_name		= "poly1305",
221 	.base.cra_driver_name	= "poly1305-arm",
222 	.base.cra_priority	= 150,
223 	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
224 	.base.cra_module	= THIS_MODULE,
225 #ifdef CONFIG_KERNEL_MODE_NEON
226 }, {
227 	.init			= arm_poly1305_init,
228 	.update			= arm_poly1305_update_neon,
229 	.final			= arm_poly1305_final,
230 	.digestsize		= POLY1305_DIGEST_SIZE,
231 	.descsize		= sizeof(struct poly1305_desc_ctx),
232 
233 	.base.cra_name		= "poly1305",
234 	.base.cra_driver_name	= "poly1305-neon",
235 	.base.cra_priority	= 200,
236 	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
237 	.base.cra_module	= THIS_MODULE,
238 #endif
239 }};
240 
arm_poly1305_mod_init(void)241 static int __init arm_poly1305_mod_init(void)
242 {
243 	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
244 	    (elf_hwcap & HWCAP_NEON))
245 		static_branch_enable(&have_neon);
246 	else if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
247 		/* register only the first entry */
248 		return crypto_register_shash(&arm_poly1305_algs[0]);
249 
250 	return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
251 		crypto_register_shashes(arm_poly1305_algs,
252 					ARRAY_SIZE(arm_poly1305_algs)) : 0;
253 }
254 
arm_poly1305_mod_exit(void)255 static void __exit arm_poly1305_mod_exit(void)
256 {
257 	if (!IS_REACHABLE(CONFIG_CRYPTO_HASH))
258 		return;
259 	if (!static_branch_likely(&have_neon)) {
260 		crypto_unregister_shash(&arm_poly1305_algs[0]);
261 		return;
262 	}
263 	crypto_unregister_shashes(arm_poly1305_algs,
264 				  ARRAY_SIZE(arm_poly1305_algs));
265 }
266 
267 module_init(arm_poly1305_mod_init);
268 module_exit(arm_poly1305_mod_exit);
269 
270 MODULE_DESCRIPTION("Accelerated Poly1305 transform for ARM");
271 MODULE_LICENSE("GPL v2");
272 MODULE_ALIAS_CRYPTO("poly1305");
273 MODULE_ALIAS_CRYPTO("poly1305-arm");
274 MODULE_ALIAS_CRYPTO("poly1305-neon");
275