riscv/crypto/aes-riscv64-zvkned.S

1 /* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
3 // This file is dual-licensed, meaning that you can use it under your
41 // The generated code of this file depends on the following RISC-V extensions:
42 // - RV64I
43 // - RISC-V Vector ('V') with VLEN >= 128
44 // - RISC-V Vector AES block cipher extension ('Zvkned')
51 #include "aes-macros.S"
88 	// t0 is the remaining length in 32-bit words.  It's a multiple of 4.
91 	sub		t0, t0, t1	// Subtract number of words processed
134 	addi		LEN, LEN, -16
146 	vle32.v		v20, (INP)	// Load ciphertext blocks
147 	vslideup.vi	v16, v20, 4	// Setup prev ciphertext blocks
148 	addi		t1, t0, -4
150 	aes_decrypt	v20, \keylen	// Decrypt the blocks
151 	vxor.vv		v20, v20, v16	// XOR with prev ciphertext blocks
152 	vse32.v		v20, (OUTP)	// Store plaintext blocks
157 	sub		LEN, LEN, t0
190 	// CBC-encrypt all blocks except the last.  But don't store the
191 	// second-to-last block to the output buffer yet, since it will be
193 	// message is single-block, still encrypt the last (and only) block.
204 	addi		LEN, LEN, -16
210 	// Encrypt the last two blocks using ciphertext stealing as follows:
211 	//	C[n-1] = Encrypt(Encrypt(P[n-1] ^ C[n-2]) ^ P[n])
212 	//	C[n] = Encrypt(P[n-1] ^ C[n-2])[0..LEN]
216 	// is 1 <= LEN <= 16.  If there are only 2 blocks, C[n-2] means the IV.
218 	// v16 already contains Encrypt(P[n-1] ^ C[n-2]).
219 	// INP points to P[n].  OUTP points to where C[n-1] should go.
220 	// To support in-place encryption, load P[n] before storing C[n].
225 	vxor.vv		v16, v16, v17	// v16 = Encrypt(P[n-1] ^ C[n-2]) ^ P[n]
229 	vse32.v		v16, (OUTP)	// Store C[n-1] (or C[n] in single-block case)
233 #define LEN32		t4 // Length of remaining full blocks in 32-bit words
241 	// Save C[n-2] in v28 so that it's available later during the ciphertext
242 	// stealing step.  If there are fewer than three blocks, C[n-2] means
243 	// the IV, otherwise it means the third-to-last ciphertext block.
245 	add		t0, LEN, -33
251 	// CBC-decrypt all full blocks.  For the last full block, or the last 2
252 	// full blocks if the message is block-aligned, this doesn't write the
253 	// correct output blocks (unless the message is only a single block),
259 	addi		t1, t0, -4
260 	vle32.v		v20, (INP)	// Load next set of ciphertext blocks
262 	vslideup.vi	v24, v20, 4	// Setup prev ciphertext blocks
264 	aes_decrypt	v20, \keylen	// Decrypt this set of blocks
265 	vxor.vv		v24, v24, v20	// XOR prev ciphertext blocks with decrypted blocks
266 	vse32.v		v24, (OUTP)	// Store this set of plaintext blocks
267 	sub		LEN32, LEN32, t0
275 	addi		t0, OUTP, -16	// Get pointer to last full plaintext block
282 	// Block-aligned message.  Just fix up the last 2 blocks.  We need:
284 	//	P[n-1] = Decrypt(C[n]) ^ C[n-2]
285 	//	P[n] = Decrypt(C[n-1]) ^ C[n]
287 	// We have C[n] in v16, Decrypt(C[n]) in v20, and C[n-2] in v28.
288 	// Together with Decrypt(C[n-1]) ^ C[n-2] from the output buffer, this
289 	// is everything needed to fix the output without re-decrypting blocks.
290 	addi		t1, OUTP, -32	// Get pointer to where P[n-1] should go
291 	vxor.vv		v20, v20, v28	// Decrypt(C[n]) ^ C[n-2] == P[n-1]
292 	vle32.v		v24, (t1)	// Decrypt(C[n-1]) ^ C[n-2]
293 	vse32.v		v20, (t1)	// Store P[n-1]
294 	vxor.vv		v20, v24, v16	// Decrypt(C[n-1]) ^ C[n-2] ^ C[n] == P[n] ^ C[n-2]
298 	// Decrypt the last two blocks using ciphertext stealing as follows:
300 	//	P[n-1] = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16]) ^ C[n-2]
301 	//	P[n] = (Decrypt(C[n-1]) ^ C[n])[0..LEN_MOD16]
303 	// We already have Decrypt(C[n-1]) in v20 and C[n-2] in v28.
304 	vmv.v.v		v16, v20	// v16 = Decrypt(C[n-1])
306 	vle8.v		v20, (INP)	// v20 = C[n] || Decrypt(C[n-1])[LEN_MOD16..16]
307 	vxor.vv		v16, v16, v20	// v16 = Decrypt(C[n-1]) ^ C[n]
310 	aes_decrypt	v20, \keylen	// v20 = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16])
312 	vxor.vv		v20, v20, v28	// XOR with C[n-2]
330 // Encrypts or decrypts a message with the CS3 variant of AES-CBC-CTS.
331 // This is the variant that unconditionally swaps the last two blocks.