arm/crypto/ghash-ce-core.S

1 /* SPDX-License-Identifier: GPL-2.0-only */
5  * Copyright (C) 2015 - 2017 Linaro Ltd.
12 	.arch		armv8-a
13 	.fpu		crypto-neon-fp-armv8
101 	 * This implementation of 64x64 -> 128 bit polynomial multiplication
102 	 * using vmull.p8 instructions (8x8 -> 16) is taken from the paper
105 	 * Ricardo Dahab (https://hal.inria.fr/hal-01506572)
107 	 * It has been slightly tweaked for in-order performance, and to allow
159 	// PMULL (64x64->128) based reduction for CPUs that can do
175 	// 64x64->128 PMULL instruction
214 	vld1.8		{XL2-XM2}, [r2]!
215 1:	vld1.8		{T2-T3}, [r2]!
222 	vld1.64		{HH3-HH4}, [ip, :128]
279 	vld1.8		{XL2-XM2}, [r2]!
334 	vld1.64		{HH3-HH4}, [r3]
398 	vld1.8		{ek0-ek1}, [\rkp, :128]!
400 	blt		.L\@			// AES-128
407 	beq		.L\@			// AES-192
503 	.macro		enc_4x, in0, in1, in2, in3  argument
508 	veor		\in2, \in2, e2
509 	veor		\in3, \in3, e3
511 	vst1.8		{\in0-\in1}, [r4]!
512 	vst1.8		{\in2-\in3}, [r4]!
515 	.macro		dec_4x, in0, in1, in2, in3  argument
520 	veor		e2, e2, \in2
521 	veor		e3, e3, \in3
523 	vst1.8		{e0-e1}, [r4]!
524 	vst1.8		{e2-e3}, [r4]!
533 	push		{r4-r8, lr}
542 	pop		{r4-r8, pc}
551 	push		{r4-r8, lr}
560 	pop		{r4-r8, pc}
569 	push		{r4-r8, lr}
615 	pop		{r4-r8, pc}
625 	push		{r4-r8, lr}
674 	vmvn		T1, T1			// 0 for eq, -1 for ne
684 	pop		{r4-r8, pc}