Lines Matching +full:0 +full:- +full:8
1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
12 .arch armv8-a
13 .fpu crypto-neon-fp-armv8
17 aese.8 \state, \key
18 aesmc.8 \state, \state
22 aesd.8 \state, \key
23 aesimc.8 \state, \state
38 aese.8 q0, \key2
44 aesd.8 q0, \key2
75 aese.8 q0, \key2
76 aese.8 q1, \key2
77 aese.8 q2, \key2
78 aese.8 q3, \key2
90 aesd.8 q0, \key2
91 aesd.8 q1, \key2
92 aesd.8 q2, \key2
93 aesd.8 q3, \key2
102 vld1.32 {q10-q11}, [ip]!
104 vld1.32 {q12-q13}, [ip]!
106 vld1.32 {q10-q11}, [ip]!
108 vld1.32 {q12-q13}, [ip]!
110 blo 0f @ AES-128: 10 rounds
111 vld1.32 {q10-q11}, [ip]!
113 beq 1f @ AES-192: 12 rounds
114 vld1.32 {q12-q13}, [ip]
116 0: \fround q12, q13, q14
124 * Internal, non-AAPCS compliant functions that implement the core AES
125 * transforms. These should preserve all registers except q0 - q2 and ip
164 vld1.32 {q8-q9}, [\rk] @ load first 2 round keys
176 ldr r4, [sp, #8]
181 vld1.8 {q0-q1}, [r1]!
182 vld1.8 {q2-q3}, [r1]!
184 vst1.8 {q0-q1}, [r0]!
185 vst1.8 {q2-q3}, [r0]!
191 vld1.8 {q0}, [r1]!
193 vst1.8 {q0}, [r0]!
202 ldr r4, [sp, #8]
207 vld1.8 {q0-q1}, [r1]!
208 vld1.8 {q2-q3}, [r1]!
210 vst1.8 {q0-q1}, [r0]!
211 vst1.8 {q2-q3}, [r0]!
217 vld1.8 {q0}, [r1]!
219 vst1.8 {q0}, [r0]!
233 push {r4-r6, lr}
235 vld1.8 {q0}, [r5]
238 vld1.8 {q1}, [r1]! @ get next pt block
241 vst1.8 {q0}, [r0]!
244 vst1.8 {q0}, [r5]
245 pop {r4-r6, pc}
249 push {r4-r6, lr}
251 vld1.8 {q15}, [r5] @ keep iv in q15
256 vld1.8 {q0-q1}, [r1]!
257 vld1.8 {q2-q3}, [r1]!
268 vst1.8 {q0-q1}, [r0]!
269 vst1.8 {q2-q3}, [r0]!
276 vld1.8 {q0}, [r1]! @ get next ct block
280 vst1.8 {q0}, [r0]!
284 vst1.8 {q15}, [r5] @ keep iv in q15
285 pop {r4-r6, pc}
297 push {r4-r6, lr}
306 vld1.8 {q5}, [ip]
307 vld1.8 {q6}, [lr]
310 vld1.8 {q0}, [r1] @ overlapping loads
311 vld1.8 {q3}, [ip]
313 vld1.8 {q1}, [r5] @ get iv
319 vtbl.8 d4, {d0-d1}, d10
320 vtbl.8 d5, {d0-d1}, d11
321 vtbl.8 d2, {d6-d7}, d12
322 vtbl.8 d3, {d6-d7}, d13
328 vst1.8 {q2}, [r4] @ overlapping stores
329 vst1.8 {q0}, [r0]
331 pop {r4-r6, pc}
335 push {r4-r6, lr}
344 vld1.8 {q5}, [ip]
345 vld1.8 {q6}, [lr]
348 vld1.8 {q0}, [r1] @ overlapping loads
349 vld1.8 {q1}, [ip]
351 vld1.8 {q3}, [r5] @ get iv
356 vtbl.8 d4, {d0-d1}, d10
357 vtbl.8 d5, {d0-d1}, d11
358 vtbx.8 d0, {d2-d3}, d12
359 vtbx.8 d1, {d2-d3}, d13
366 vst1.8 {q1}, [r4] @ overlapping stores
367 vst1.8 {q0}, [r0]
369 pop {r4-r6, pc}
378 push {r4-r6, lr}
380 vld1.8 {q7}, [r5] @ load ctr
392 * a silicon erratum that exists in Cortex-A57 (#1742098) and
393 * Cortex-A72 (#1655431) cores, where AESE/AESMC instruction pairs
395 * register of which a single 32-bit lane has been updated the last
397 * q0-q3 below are not manipulated individually, and the different
415 vld1.8 {q4-q5}, [r1]!
416 vld1.8 {q6}, [r1]!
417 vld1.8 {q15}, [r1]!
424 vst1.8 {q0-q1}, [r0]!
425 vst1.8 {q2-q3}, [r0]!
442 bmi .Lctrtailblock @ blocks < 0 means tail block
443 vld1.8 {q3}, [r1]!
445 vst1.8 {q3}, [r0]!
449 vst1.8 {q7}, [r5] @ return next CTR value
450 pop {r4-r6, pc}
453 vst1.8 {q0}, [r0, :64] @ return the key stream
479 vext.8 \tmp, \tmp, \tmp, #8
484 vmov.i32 d30, #0x87 @ compose tweak mask vector
490 vld1.8 {q0}, [r5] @ load iv
503 push {r4-r6, lr}
509 teq r6, #0 @ start of a block?
517 vld1.8 {q0-q1}, [r1]! @ get 4 pt blocks
518 vld1.8 {q2-q3}, [r1]!
531 vst1.8 {q0-q1}, [r0]! @ write 4 ct blocks
532 vst1.8 {q2-q3}, [r0]!
534 teq r4, #0
543 vld1.8 {q0}, [r1]!
548 teq r4, #0
553 vst1.8 {q0}, [r0]!
556 vst1.8 {q0}, [r0]
558 vst1.8 {q4}, [r5]
559 pop {r4-r6, pc}
575 vld1.8 {q1}, [r1] @ load final partial block
576 vld1.8 {q2}, [ip]
577 vld1.8 {q3}, [lr]
579 vtbl.8 d4, {d0-d1}, d4
580 vtbl.8 d5, {d0-d1}, d5
581 vtbx.8 d0, {d2-d3}, d6
582 vtbx.8 d1, {d2-d3}, d7
584 vst1.8 {q2}, [r4] @ overlapping stores
585 mov r4, #0
591 push {r4-r6, lr}
598 tst r4, #0xf
599 subne r4, r4, #0x10
601 teq r6, #0 @ start of a block?
609 vld1.8 {q0-q1}, [r1]! @ get 4 ct blocks
610 vld1.8 {q2-q3}, [r1]!
623 vst1.8 {q0-q1}, [r0]! @ write 4 pt blocks
624 vst1.8 {q2-q3}, [r0]!
626 teq r4, #0
634 vld1.8 {q0}, [r1]!
640 vst1.8 {q0}, [r0]!
641 teq r4, #0
647 vst1.8 {q4}, [r5]
648 pop {r4-r6, pc}
663 vld1.8 {q1}, [r1] @ load final partial block
664 vld1.8 {q2}, [ip]
665 vld1.8 {q3}, [lr]
671 vtbl.8 d4, {d0-d1}, d4
672 vtbl.8 d5, {d0-d1}, d5
673 vtbx.8 d0, {d2-d3}, d6
674 vtbx.8 d1, {d2-d3}, d7
676 vst1.8 {q2}, [r4] @ overlapping stores
677 mov r4, #0
682 * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
689 aese.8 q0, q1
695 * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
700 aesimc.8 q0, q0
708 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
709 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
710 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
711 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
712 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
713 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff