Lines Matching +full:3 +full:- +full:9
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 # Accelerated AES-GCM stitched implementation for ppc64le.
5 # Copyright 2022- IBM Inc. All rights reserved
14 # X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H =
22 # Hash keys = v3 - v14
25 # ( H^3.l, H^3, H^3.h)
29 # v31 - counter 1
32 # vs0 - vs14 for round keys
35 # This implementation uses stitched AES-GCM approach to improve overall performance.
48 # v15 - v18 - input states
49 # vs1 - vs9 - round keys
54 xxlor 21+32, 3, 3
102 xxlor 23+32, 9, 9
110 # v15 - v22 - input states
111 # vs1 - vs9 - round keys
116 xxlor 25+32, 3, 3
196 xxlor 23+32, 9, 9
210 xxlor 21+32, 3, 3
228 xxlor 19+32, 9, 9
239 vpmsumd 24, 9, 16
241 vpmsumd 26, 3, 18
290 # v15 - v22 - input blocks
297 vpmsumd 24, 9, 16
299 vpmsumd 26, 3, 18
344 vpmsumd 24, 9, 20
346 vpmsumd 26, 3, 22
404 vpmsumd 22, 3, 28 # L
428 stdu 1,-640(1)
439 li 9, 256
440 stvx 20, 9, 1
441 addi 9, 9, 16
442 stvx 21, 9, 1
443 addi 9, 9, 16
444 stvx 22, 9, 1
445 addi 9, 9, 16
446 stvx 23, 9, 1
447 addi 9, 9, 16
448 stvx 24, 9, 1
449 addi 9, 9, 16
450 stvx 25, 9, 1
451 addi 9, 9, 16
452 stvx 26, 9, 1
453 addi 9, 9, 16
454 stvx 27, 9, 1
455 addi 9, 9, 16
456 stvx 28, 9, 1
457 addi 9, 9, 16
458 stvx 29, 9, 1
459 addi 9, 9, 16
460 stvx 30, 9, 1
461 addi 9, 9, 16
462 stvx 31, 9, 1
485 li 9, 256
486 lvx 20, 9, 1
487 addi 9, 9, 16
488 lvx 21, 9, 1
489 addi 9, 9, 16
490 lvx 22, 9, 1
491 addi 9, 9, 16
492 lvx 23, 9, 1
493 addi 9, 9, 16
494 lvx 24, 9, 1
495 addi 9, 9, 16
496 lvx 25, 9, 1
497 addi 9, 9, 16
498 lvx 26, 9, 1
499 addi 9, 9, 16
500 lvx 27, 9, 1
501 addi 9, 9, 16
502 lvx 28, 9, 1
503 addi 9, 9, 16
504 lvx 29, 9, 1
505 addi 9, 9, 16
506 lvx 30, 9, 1
507 addi 9, 9, 16
508 lvx 31, 9, 1
528 # load Hash - h^4, h^3, h^2, h
532 lxvd2x 3+32, 10, 8 # Hl
546 lxvd2x 9+32, 10, 8 # H^3l
548 lxvd2x 10+32, 10, 8 # H^3
550 lxvd2x 11+32, 10, 8 # H^3h
564 # r3 - inp
565 # r4 - out
566 # r5 - len
567 # r6 - AES round keys
568 # r7 - iv and other data
569 # r8 - Xi, HPoli, hash keys
581 # initialize ICB: GHASH( IV ), IV - r7
582 lxvb16x 30+32, 0, 7 # load IV - v30
596 lxv 3, 0x30(6)
602 lxv 9, 0x90(6)
605 # load rounds - 10 (128), 12 (192), 14 (256)
606 lwz 9,240(6)
611 vxor 15, 30, 29 # IV + round key - add round key 0
613 cmpdi 9, 10
620 cmpdi 9, 12
626 cmpdi 9, 14
633 mr 14, 3
634 mr 9, 4
650 divdu 10, 12, 10 # n 128 bytes-blocks
761 stxvb16x 47, 0, 9 # store output
763 stxvb16x 48, 15, 9 # store output
769 stxvb16x 49, 16, 9 # store output
771 stxvb16x 50, 17, 9 # store output
777 stxvb16x 51, 18, 9 # store output
779 stxvb16x 52, 19, 9 # store output
785 stxvb16x 53, 20, 9 # store output
787 stxvb16x 54, 21, 9 # store output
789 addi 9, 9, 128
813 addi 12, 12, -128
872 stxvb16x 47, 0, 9 # store output
874 addi 9, 9, 16
879 addi 12, 12, -16
947 vspltisb 16, -1 # first 16 bytes - 0xffff...ff
948 vspltisb 17, 0 # second 16 bytes - 0x0000...00
976 vspltisb 16, -1 # first 16 bytes - 0xffff...ff
977 vspltisb 17, 0 # second 16 bytes - 0x0000...00
1006 sldi 16, 15, 3
1031 sldi 16, 15, 3
1034 #stxvb16x 15+32, 0, 9 # last block
1037 sub 17, 16, 15 # 16 - partial
1049 addi 10, 9, -1
1060 add 9, 9, 17
1071 vxor 15, 30, 29 # IV + round key - add round key 0
1073 std 15, 56(7) # partial done - clear
1083 # r9 - output
1084 # r12 - remaining bytes
1085 # v15 - partial input data
1091 addi 10, 9, -1
1107 add 3, 11, 12 # return count
1122 # initialize ICB: GHASH( IV ), IV - r7
1123 lxvb16x 30+32, 0, 7 # load IV - v30
1137 lxv 3, 0x30(6)
1143 lxv 9, 0x90(6)
1146 # load rounds - 10 (128), 12 (192), 14 (256)
1147 lwz 9,240(6)
1152 vxor 15, 30, 29 # IV + round key - add round key 0
1154 cmpdi 9, 10
1161 cmpdi 9, 12
1167 cmpdi 9, 14
1174 mr 14, 3
1175 mr 9, 4
1191 divdu 10, 12, 10 # n 128 bytes-blocks
1302 stxvb16x 47, 0, 9 # store output
1304 stxvb16x 48, 15, 9 # store output
1310 stxvb16x 49, 16, 9 # store output
1312 stxvb16x 50, 17, 9 # store output
1318 stxvb16x 51, 18, 9 # store output
1320 stxvb16x 52, 19, 9 # store output
1326 stxvb16x 53, 20, 9 # store output
1328 stxvb16x 54, 21, 9 # store output
1330 addi 9, 9, 128
1363 addi 12, 12, -128
1422 stxvb16x 47, 0, 9 # store output
1424 addi 9, 9, 16
1430 addi 12, 12, -16
1497 vspltisb 16, -1 # first 16 bytes - 0xffff...ff
1498 vspltisb 17, 0 # second 16 bytes - 0x0000...00