Lines Matching +full:8 +full:- +full:9
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 # Accelerated AES-GCM stitched implementation for ppc64le.
5 # Copyright 2022- IBM Inc. All rights reserved
22 # Hash keys = v3 - v14
29 # v31 - counter 1
32 # vs0 - vs14 for round keys
33 # v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted)
35 # This implementation uses stitched AES-GCM approach to improve overall performance.
36 # AES is implemented with 8x blocks and GHASH is using 2 4x blocks.
48 # v15 - v18 - input states
49 # vs1 - vs9 - round keys
80 xxlor 22+32, 8, 8
102 xxlor 23+32, 9, 9
109 # 8x loops
110 # v15 - v22 - input states
111 # vs1 - vs9 - round keys
158 xxlor 26+32, 8, 8
196 xxlor 23+32, 9, 9
221 xxlor 22+32, 8, 8
228 xxlor 19+32, 9, 9
239 vpmsumd 24, 9, 16
260 vsldoi 26, 24, 29, 8 # mL
261 vsldoi 29, 29, 24, 8 # mH
264 vsldoi 23, 23, 23, 8 # swap
269 vpmsumd 26, 8, 17
279 vsldoi 27, 23, 23, 8 # swap
290 # v15 - v22 - input blocks
297 vpmsumd 24, 9, 16
319 vsldoi 26, 24, 29, 8 # mL
320 vsldoi 29, 29, 24, 8 # mH
323 vsldoi 23, 23, 23, 8 # swap
328 vpmsumd 26, 8, 17
338 vsldoi 27, 23, 23, 8 # swap
344 vpmsumd 24, 9, 20
368 vsldoi 26, 24, 29, 8 # mL
369 vsldoi 29, 29, 24, 8 # mH
372 vsldoi 23, 23, 23, 8 # swap
377 vpmsumd 26, 8, 21
387 vsldoi 27, 23, 23, 8 # swap
410 vsldoi 25, 23, 19, 8 # mL
411 vsldoi 26, 19, 23, 8 # mH
415 vsldoi 22, 22, 22, 8 # swap
418 vsldoi 20, 22, 22, 8 # swap
428 stdu 1,-640(1)
439 li 9, 256
440 stvx 20, 9, 1
441 addi 9, 9, 16
442 stvx 21, 9, 1
443 addi 9, 9, 16
444 stvx 22, 9, 1
445 addi 9, 9, 16
446 stvx 23, 9, 1
447 addi 9, 9, 16
448 stvx 24, 9, 1
449 addi 9, 9, 16
450 stvx 25, 9, 1
451 addi 9, 9, 16
452 stvx 26, 9, 1
453 addi 9, 9, 16
454 stvx 27, 9, 1
455 addi 9, 9, 16
456 stvx 28, 9, 1
457 addi 9, 9, 16
458 stvx 29, 9, 1
459 addi 9, 9, 16
460 stvx 30, 9, 1
461 addi 9, 9, 16
462 stvx 31, 9, 1
485 li 9, 256
486 lvx 20, 9, 1
487 addi 9, 9, 16
488 lvx 21, 9, 1
489 addi 9, 9, 16
490 lvx 22, 9, 1
491 addi 9, 9, 16
492 lvx 23, 9, 1
493 addi 9, 9, 16
494 lvx 24, 9, 1
495 addi 9, 9, 16
496 lvx 25, 9, 1
497 addi 9, 9, 16
498 lvx 26, 9, 1
499 addi 9, 9, 16
500 lvx 27, 9, 1
501 addi 9, 9, 16
502 lvx 28, 9, 1
503 addi 9, 9, 16
504 lvx 29, 9, 1
505 addi 9, 9, 16
506 lvx 30, 9, 1
507 addi 9, 9, 16
508 lvx 31, 9, 1
526 lxvb16x 32, 0, 8 # load Xi
528 # load Hash - h^4, h^3, h^2, h
530 lxvd2x 2+32, 10, 8 # H Poli
532 lxvd2x 3+32, 10, 8 # Hl
534 lxvd2x 4+32, 10, 8 # H
536 lxvd2x 5+32, 10, 8 # Hh
539 lxvd2x 6+32, 10, 8 # H^2l
541 lxvd2x 7+32, 10, 8 # H^2
543 lxvd2x 8+32, 10, 8 # H^2h
546 lxvd2x 9+32, 10, 8 # H^3l
548 lxvd2x 10+32, 10, 8 # H^3
550 lxvd2x 11+32, 10, 8 # H^3h
553 lxvd2x 12+32, 10, 8 # H^4l
555 lxvd2x 13+32, 10, 8 # H^4
557 lxvd2x 14+32, 10, 8 # H^4h
564 # r3 - inp
565 # r4 - out
566 # r5 - len
567 # r6 - AES round keys
568 # r7 - iv and other data
569 # r8 - Xi, HPoli, hash keys
581 # initialize ICB: GHASH( IV ), IV - r7
582 lxvb16x 30+32, 0, 7 # load IV - v30
601 lxv 8, 0x80(6)
602 lxv 9, 0x90(6)
605 # load rounds - 10 (128), 12 (192), 14 (256)
606 lwz 9,240(6)
611 vxor 15, 30, 29 # IV + round key - add round key 0
613 cmpdi 9, 10
620 cmpdi 9, 12
626 cmpdi 9, 14
634 mr 9, 4
650 divdu 10, 12, 10 # n 128 bytes-blocks
761 stxvb16x 47, 0, 9 # store output
763 stxvb16x 48, 15, 9 # store output
769 stxvb16x 49, 16, 9 # store output
771 stxvb16x 50, 17, 9 # store output
777 stxvb16x 51, 18, 9 # store output
779 stxvb16x 52, 19, 9 # store output
785 stxvb16x 53, 20, 9 # store output
787 stxvb16x 54, 21, 9 # store output
789 addi 9, 9, 128
813 addi 12, 12, -128
872 stxvb16x 47, 0, 9 # store output
874 addi 9, 9, 16
879 addi 12, 12, -16
947 vspltisb 16, -1 # first 16 bytes - 0xffff...ff
948 vspltisb 17, 0 # second 16 bytes - 0x0000...00
968 stxvb16x 32, 0, 8 # write out Xi
969 stxvb16x 32, 16, 8 # write out Xi
976 vspltisb 16, -1 # first 16 bytes - 0xffff...ff
977 vspltisb 17, 0 # second 16 bytes - 0x0000...00
1024 lxvb16x 32+29, 16, 8
1026 stxvb16x 32, 0, 8 # save Xi
1027 stxvb16x 32, 16, 8 # save Xi
1034 #stxvb16x 15+32, 0, 9 # last block
1037 sub 17, 16, 15 # 16 - partial
1049 addi 10, 9, -1
1060 add 9, 9, 17
1071 vxor 15, 30, 29 # IV + round key - add round key 0
1073 std 15, 56(7) # partial done - clear
1083 # r9 - output
1084 # r12 - remaining bytes
1085 # v15 - partial input data
1091 addi 10, 9, -1
1106 stxvb16x 32, 0, 8 # write out Xi
1113 # 8x Decrypt
1122 # initialize ICB: GHASH( IV ), IV - r7
1123 lxvb16x 30+32, 0, 7 # load IV - v30
1142 lxv 8, 0x80(6)
1143 lxv 9, 0x90(6)
1146 # load rounds - 10 (128), 12 (192), 14 (256)
1147 lwz 9,240(6)
1152 vxor 15, 30, 29 # IV + round key - add round key 0
1154 cmpdi 9, 10
1161 cmpdi 9, 12
1167 cmpdi 9, 14
1175 mr 9, 4
1191 divdu 10, 12, 10 # n 128 bytes-blocks
1302 stxvb16x 47, 0, 9 # store output
1304 stxvb16x 48, 15, 9 # store output
1310 stxvb16x 49, 16, 9 # store output
1312 stxvb16x 50, 17, 9 # store output
1318 stxvb16x 51, 18, 9 # store output
1320 stxvb16x 52, 19, 9 # store output
1326 stxvb16x 53, 20, 9 # store output
1328 stxvb16x 54, 21, 9 # store output
1330 addi 9, 9, 128
1363 addi 12, 12, -128
1422 stxvb16x 47, 0, 9 # store output
1424 addi 9, 9, 16
1430 addi 12, 12, -16
1497 vspltisb 16, -1 # first 16 bytes - 0xffff...ff
1498 vspltisb 17, 0 # second 16 bytes - 0x0000...00
1519 stxvb16x 32, 0, 8 # write out Xi
1520 stxvb16x 32, 16, 8 # write out Xi