Lines Matching +full:1 +full:- +full:16
1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
5 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
8 /* included by aes-ce.S and aes-neon.S */
62 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
64 ST5( ld1 {v4.16b}, [x1], #16 )
66 st1 {v0.16b-v3.16b}, [x0], #64
67 ST5( st1 {v4.16b}, [x0], #16 )
73 ld1 {v0.16b}, [x1], #16 /* get next pt block */
75 st1 {v0.16b}, [x0], #16
76 subs w4, w4, #1
92 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
94 ST5( ld1 {v4.16b}, [x1], #16 )
96 st1 {v0.16b-v3.16b}, [x0], #64
97 ST5( st1 {v4.16b}, [x0], #16 )
103 ld1 {v0.16b}, [x1], #16 /* get next ct block */
105 st1 {v0.16b}, [x0], #16
106 subs w4, w4, #1
128 ld1 {v4.16b}, [x5] /* get iv */
130 mov w8, #14 /* AES-256: 14 rounds */
137 ld1 {v4.16b}, [x5] /* get iv */
143 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
144 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
146 eor v1.16b, v1.16b, v0.16b
148 eor v2.16b, v2.16b, v1.16b
150 eor v3.16b, v3.16b, v2.16b
152 st1 {v0.16b-v3.16b}, [x0], #64
153 mov v4.16b, v3.16b
159 ld1 {v0.16b}, [x1], #16 /* get next pt block */
160 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
162 st1 {v4.16b}, [x0], #16
163 subs w4, w4, #1
166 st1 {v4.16b}, [x5] /* return iv */
172 ld1 {cbciv.16b}, [x5] /* get iv */
174 mov w8, #14 /* AES-256: 14 rounds */
180 ld1 {cbciv.16b}, [x5] /* get iv */
188 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
190 ld1 {v4.16b}, [x1], #16 /* get 1 ct block */
191 mov v5.16b, v0.16b
192 mov v6.16b, v1.16b
193 mov v7.16b, v2.16b
196 eor v0.16b, v0.16b, cbciv.16b
197 eor v1.16b, v1.16b, v5.16b
198 ld1 {v5.16b}, [x1], #16 /* reload 1 ct block */
199 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */
200 eor v2.16b, v2.16b, v6.16b
201 eor v3.16b, v3.16b, v7.16b
202 eor v4.16b, v4.16b, v5.16b
204 mov v4.16b, v0.16b
205 mov v5.16b, v1.16b
206 mov v6.16b, v2.16b
208 sub x1, x1, #16
209 eor v0.16b, v0.16b, cbciv.16b
210 eor v1.16b, v1.16b, v4.16b
211 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */
212 eor v2.16b, v2.16b, v5.16b
213 eor v3.16b, v3.16b, v6.16b
215 st1 {v0.16b-v3.16b}, [x0], #64
216 ST5( st1 {v4.16b}, [x0], #16 )
222 ld1 {v1.16b}, [x1], #16 /* get next ct block */
223 mov v0.16b, v1.16b /* ...and copy to v0 */
225 eor v0.16b, v0.16b, cbciv.16b /* xor with iv => pt */
226 mov cbciv.16b, v1.16b /* ct is next iv */
227 st1 {v0.16b}, [x0], #16
228 subs w4, w4, #1
231 st1 {cbciv.16b}, [x5] /* return iv */
247 sub x4, x4, #16
251 ld1 {v3.16b}, [x8]
252 ld1 {v4.16b}, [x9]
254 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
255 ld1 {v1.16b}, [x1]
257 ld1 {v5.16b}, [x5] /* get iv */
260 eor v0.16b, v0.16b, v5.16b /* xor with iv */
261 tbl v1.16b, {v1.16b}, v4.16b
264 eor v1.16b, v1.16b, v0.16b
265 tbl v0.16b, {v0.16b}, v3.16b
269 st1 {v0.16b}, [x4] /* overlapping stores */
270 st1 {v1.16b}, [x0]
276 sub x4, x4, #16
280 ld1 {v3.16b}, [x8]
281 ld1 {v4.16b}, [x9]
283 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
284 ld1 {v1.16b}, [x1]
286 ld1 {v5.16b}, [x5] /* get iv */
290 tbl v2.16b, {v0.16b}, v3.16b
291 eor v2.16b, v2.16b, v1.16b
293 tbx v0.16b, {v1.16b}, v4.16b
295 eor v0.16b, v0.16b, v5.16b /* xor with iv */
298 st1 {v2.16b}, [x4] /* overlapping stores */
299 st1 {v0.16b}, [x0]
336 ld1 {vctr.16b}, [IV]
341 * the 64-bit counter with the IV.
347 umov IV_PART, vctr.d[1]
360 * Set up the counter values in v0-v{MAX_STRIDE-1}.
364 * v{MAX_STRIDE-1}. For example: if encrypting two blocks with
372 mov v0.16b, vctr.16b
373 mov v1.16b, vctr.16b
374 mov v2.16b, vctr.16b
375 mov v3.16b, vctr.16b
376 ST5( mov v4.16b, vctr.16b )
378 sub x6, CTR, #MAX_STRIDE - 1
379 sub x7, CTR, #MAX_STRIDE - 2
380 sub x8, CTR, #MAX_STRIDE - 3
381 sub x9, CTR, #MAX_STRIDE - 4
382 ST5( sub x10, CTR, #MAX_STRIDE - 5 )
395 .subsection 1
408 add x8, x8, #1
420 adr x16, 1f
433 1: b 2f
437 ins vctr.d[1], x7
438 sub x7, IV_PART, #MAX_STRIDE - 1
439 sub x8, IV_PART, #MAX_STRIDE - 2
440 sub x9, IV_PART, #MAX_STRIDE - 3
443 mov v1.d[1], x7
445 ST5( sub x10, IV_PART, #MAX_STRIDE - 4 )
446 mov v2.d[1], x8
448 mov v3.d[1], x9
449 ST5( mov v4.d[1], x10 )
457 ld1 {v5.16b-v7.16b}, [IN], #48
460 eor v0.16b, v5.16b, v0.16b
461 ST4( ld1 {v5.16b}, [IN], #16 )
462 eor v1.16b, v6.16b, v1.16b
463 ST5( ld1 {v5.16b-v6.16b}, [IN], #32 )
464 eor v2.16b, v7.16b, v2.16b
465 eor v3.16b, v5.16b, v3.16b
466 ST5( eor v4.16b, v6.16b, v4.16b )
467 st1 {v0.16b-v3.16b}, [OUT], #64
468 ST5( st1 {v4.16b}, [OUT], #16 )
474 st1 {vctr.16b}, [IV] /* return next CTR value */
481 * Handle up to MAX_STRIDE * 16 - 1 bytes of plaintext
483 * This code expects the last keystream block to be in v{MAX_STRIDE-1}.
492 mov x16, #16
496 ST5( cmp BYTES_W, #64 - (MAX_STRIDE << 4))
498 cmp BYTES_W, #48 - (MAX_STRIDE << 4)
500 cmp BYTES_W, #32 - (MAX_STRIDE << 4)
502 cmp BYTES_W, #16 - (MAX_STRIDE << 4)
508 ST5( ld1 {v5.16b}, [IN], x14 )
509 ld1 {v6.16b}, [IN], x15
510 ld1 {v7.16b}, [IN], x16
515 ld1 {v8.16b}, [IN], x13
516 ld1 {v9.16b}, [IN]
517 ld1 {v10.16b}, [x9]
519 ST4( eor v6.16b, v6.16b, v0.16b )
520 ST4( eor v7.16b, v7.16b, v1.16b )
521 ST4( tbl v3.16b, {v3.16b}, v10.16b )
522 ST4( eor v8.16b, v8.16b, v2.16b )
523 ST4( eor v9.16b, v9.16b, v3.16b )
525 ST5( eor v5.16b, v5.16b, v0.16b )
526 ST5( eor v6.16b, v6.16b, v1.16b )
527 ST5( tbl v4.16b, {v4.16b}, v10.16b )
528 ST5( eor v7.16b, v7.16b, v2.16b )
529 ST5( eor v8.16b, v8.16b, v3.16b )
530 ST5( eor v9.16b, v9.16b, v4.16b )
532 ST5( st1 {v5.16b}, [OUT], x14 )
533 st1 {v6.16b}, [OUT], x15
534 st1 {v7.16b}, [OUT], x16
536 st1 {v9.16b}, [x13] // overlapping stores
537 st1 {v8.16b}, [OUT]
542 * Handle <= 16 bytes of plaintext
544 * This code always reads and writes 16 bytes. To avoid out of bounds
546 * encrypting/decrypting less than 16 bytes.
550 * This causes unusual behaviour when encrypting/decrypting less than 16
555 sub x8, x7, #16
559 ld1 {v5.16b}, [IN]
560 ld1 {v6.16b}, [OUT]
561 ST5( mov v3.16b, v4.16b )
563 ld1 {v10.16b-v11.16b}, [x9]
564 tbl v3.16b, {v3.16b}, v10.16b
565 sshr v11.16b, v11.16b, #7
566 eor v5.16b, v5.16b, v3.16b
567 bif v5.16b, v6.16b, v11.16b
568 st1 {v5.16b}, [OUT]
591 * The input and output buffers must always be at least 16 bytes even if
592 * encrypting/decrypting less than 16 bytes. Otherwise out of bounds
594 * to be at the end of this 16-byte temporary buffer rather than the
606 * The input and output buffers must always be at least 16 bytes even if
607 * encrypting/decrypting less than 16 bytes. Otherwise out of bounds
609 * to be at the end of this 16-byte temporary buffer rather than the
614 ctr_encrypt 1
627 and \tmp\().16b, \tmp\().16b, xtsmask.16b
629 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
630 eor \out\().16b, \out\().16b, \tmp\().16b
642 ld1 {v4.16b}, [x6]
659 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
661 eor v0.16b, v0.16b, v4.16b
663 eor v1.16b, v1.16b, v5.16b
664 eor v2.16b, v2.16b, v6.16b
666 eor v3.16b, v3.16b, v7.16b
668 eor v3.16b, v3.16b, v7.16b
669 eor v0.16b, v0.16b, v4.16b
670 eor v1.16b, v1.16b, v5.16b
671 eor v2.16b, v2.16b, v6.16b
672 st1 {v0.16b-v3.16b}, [x0], #64
673 mov v4.16b, v7.16b
680 subs w4, w4, #16
683 ld1 {v0.16b}, [x1], #16
685 eor v0.16b, v0.16b, v4.16b
687 eor v0.16b, v0.16b, v4.16b
689 subs w4, w4, #16
692 st1 {v0.16b}, [x0], #16
695 st1 {v0.16b}, [x0]
697 st1 {v4.16b}, [x6]
702 mov v0.16b, v3.16b
703 sub x0, x0, #16
708 add w4, w4, #16 /* # bytes in final block */
714 ld1 {v1.16b}, [x1] /* load final block */
715 ld1 {v2.16b}, [x8]
716 ld1 {v3.16b}, [x9]
718 tbl v2.16b, {v0.16b}, v2.16b
719 tbx v0.16b, {v1.16b}, v3.16b
720 st1 {v2.16b}, [x4] /* overlapping stores */
728 /* subtract 16 bytes if we are doing CTS */
733 ld1 {v4.16b}, [x6]
751 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
753 eor v0.16b, v0.16b, v4.16b
755 eor v1.16b, v1.16b, v5.16b
756 eor v2.16b, v2.16b, v6.16b
758 eor v3.16b, v3.16b, v7.16b
760 eor v3.16b, v3.16b, v7.16b
761 eor v0.16b, v0.16b, v4.16b
762 eor v1.16b, v1.16b, v5.16b
763 eor v2.16b, v2.16b, v6.16b
764 st1 {v0.16b-v3.16b}, [x0], #64
765 mov v4.16b, v7.16b
772 subs w4, w4, #16
774 ld1 {v0.16b}, [x1], #16
777 eor v0.16b, v0.16b, v4.16b
779 eor v0.16b, v0.16b, v4.16b
780 st1 {v0.16b}, [x0], #16
782 subs w4, w4, #16
786 st1 {v4.16b}, [x6]
794 add w4, w4, #16 /* # bytes in final block */
802 ld1 {v1.16b}, [x1] /* load final block */
803 ld1 {v2.16b}, [x8]
804 ld1 {v3.16b}, [x9]
806 eor v0.16b, v0.16b, v5.16b
808 eor v0.16b, v0.16b, v5.16b
810 tbl v2.16b, {v0.16b}, v2.16b
811 tbx v0.16b, {v1.16b}, v3.16b
813 st1 {v2.16b}, [x4] /* overlapping stores */
823 ld1 {v0.16b}, [x4] /* get dg */
832 ld1 {v1.16b-v4.16b}, [x0], #64 /* get next pt block */
833 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
835 eor v0.16b, v0.16b, v2.16b
837 eor v0.16b, v0.16b, v3.16b
839 eor v0.16b, v0.16b, v4.16b
844 st1 {v0.16b}, [x4] /* return dg */
851 ld1 {v1.16b}, [x0], #16 /* get next pt block */
852 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
854 subs w3, w3, #1
863 st1 {v0.16b}, [x4] /* return dg */