Lines Matching +full:10 +full:- +full:14
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 # Accelerated AES-GCM stitched implementation for ppc64le.
5 # Copyright 2022- IBM Inc. All rights reserved
22 # Hash keys = v3 - v14
29 # v31 - counter 1
32 # vs0 - vs14 for round keys
35 # This implementation uses stitched AES-GCM approach to improve overall performance.
48 # v15 - v18 - input states
49 # vs1 - vs9 - round keys
110 # v15 - v22 - input states
111 # vs1 - vs9 - round keys
248 vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L
267 vpmsumd 24, 14, 15 # H4.H * X.H
290 # v15 - v22 - input blocks
306 vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L
326 vpmsumd 24, 14, 15 # H4.H * X.H
355 vpmsumd 25, 10, 20 # H3.L * X1.H + H3.H * X1.L
375 vpmsumd 24, 14, 19 # H4.H * X.H
428 stdu 1,-640(1)
431 std 14,112(1)
463 stxv 14, 464(1)
476 lxv 14, 464(1)
511 ld 14,112(1)
528 # load Hash - h^4, h^3, h^2, h
529 li 10, 32
530 lxvd2x 2+32, 10, 8 # H Poli
531 li 10, 48
532 lxvd2x 3+32, 10, 8 # Hl
533 li 10, 64
534 lxvd2x 4+32, 10, 8 # H
535 li 10, 80
536 lxvd2x 5+32, 10, 8 # Hh
538 li 10, 96
539 lxvd2x 6+32, 10, 8 # H^2l
540 li 10, 112
541 lxvd2x 7+32, 10, 8 # H^2
542 li 10, 128
543 lxvd2x 8+32, 10, 8 # H^2h
545 li 10, 144
546 lxvd2x 9+32, 10, 8 # H^3l
547 li 10, 160
548 lxvd2x 10+32, 10, 8 # H^3
549 li 10, 176
550 lxvd2x 11+32, 10, 8 # H^3h
552 li 10, 192
553 lxvd2x 12+32, 10, 8 # H^4l
554 li 10, 208
555 lxvd2x 13+32, 10, 8 # H^4
556 li 10, 224
557 lxvd2x 14+32, 10, 8 # H^4h
564 # r3 - inp
565 # r4 - out
566 # r5 - len
567 # r6 - AES round keys
568 # r7 - iv and other data
569 # r8 - Xi, HPoli, hash keys
581 # initialize ICB: GHASH( IV ), IV - r7
582 lxvb16x 30+32, 0, 7 # load IV - v30
603 lxv 10, 0xa0(6)
605 # load rounds - 10 (128), 12 (192), 14 (256)
611 vxor 15, 30, 29 # IV + round key - add round key 0
613 cmpdi 9, 10
625 lxv 14, 0xe0(6)
626 cmpdi 9, 14
633 mr 14, 3
649 li 10, 128
650 divdu 10, 12, 10 # n 128 bytes-blocks
651 cmpdi 10, 0
669 mtctr 10
679 lwz 10, 240(6)
683 lxvb16x 15, 0, 14 # load block
684 lxvb16x 16, 15, 14 # load block
685 lxvb16x 17, 16, 14 # load block
686 lxvb16x 18, 17, 14 # load block
687 lxvb16x 19, 18, 14 # load block
688 lxvb16x 20, 19, 14 # load block
689 lxvb16x 21, 20, 14 # load block
690 lxvb16x 22, 21, 14 # load block
691 addi 14, 14, 128
695 xxlor 23+32, 10, 10
697 cmpdi 10, 10
723 cmpdi 10, 12
747 xxlor 23+32, 14, 14
749 cmpdi 10, 14
813 addi 12, 12, -128
826 li 10, 16
827 divdu 10, 12, 10
829 mtctr 10
831 lwz 10, 240(6)
837 lxvb16x 15, 0, 14 # load block
841 xxlor 23+32, 10, 10
843 cmpdi 10, 10
854 cmpdi 10, 12
863 xxlor 23+32, 14, 14
865 cmpdi 10, 14
873 addi 14, 14, 16
879 addi 12, 12, -16
894 lwz 10, 240(6)
897 xxlor 23+32, 10, 10
899 cmpdi 10, 10
910 cmpdi 10, 12
919 xxlor 23+32, 14, 14
921 cmpdi 10, 14
940 lxvb16x 15, 0, 14 # load last block
947 vspltisb 16, -1 # first 16 bytes - 0xffff...ff
948 vspltisb 17, 0 # second 16 bytes - 0x0000...00
949 li 10, 192
950 stvx 16, 10, 1
951 addi 10, 10, 16
952 stvx 17, 10, 1
954 addi 10, 1, 192
955 lxvb16x 16, 15, 10 # load partial block mask
976 vspltisb 16, -1 # first 16 bytes - 0xffff...ff
977 vspltisb 17, 0 # second 16 bytes - 0x0000...00
978 li 10, 192
979 stxvb16x 17+32, 10, 1
980 add 10, 10, \_start
981 stxvb16x 16+32, 10, 1
982 add 10, 10, \_end
983 stxvb16x 17+32, 10, 1
985 addi 10, 1, 192
986 lxvb16x \_mask, 0, 10 # load partial block mask
1005 lxvb16x 17+32, 0, 14 # load last block
1037 sub 17, 16, 15 # 16 - partial
1046 li 10, 192
1047 stxvb16x 15+32, 10, 1 # save current block
1049 addi 10, 9, -1
1055 stbu 18, 1(10)
1059 add 14, 14, 17
1071 vxor 15, 30, 29 # IV + round key - add round key 0
1073 std 15, 56(7) # partial done - clear
1083 # r9 - output
1084 # r12 - remaining bytes
1085 # v15 - partial input data
1088 li 10, 192
1089 stxvb16x 15+32, 10, 1 # last block
1091 addi 10, 9, -1
1098 lbzu 14, 1(16)
1099 stbu 14, 1(10)
1122 # initialize ICB: GHASH( IV ), IV - r7
1123 lxvb16x 30+32, 0, 7 # load IV - v30
1144 lxv 10, 0xa0(6)
1146 # load rounds - 10 (128), 12 (192), 14 (256)
1152 vxor 15, 30, 29 # IV + round key - add round key 0
1154 cmpdi 9, 10
1166 lxv 14, 0xe0(6)
1167 cmpdi 9, 14
1174 mr 14, 3
1190 li 10, 128
1191 divdu 10, 12, 10 # n 128 bytes-blocks
1192 cmpdi 10, 0
1210 mtctr 10
1220 lwz 10, 240(6)
1224 lxvb16x 15, 0, 14 # load block
1225 lxvb16x 16, 15, 14 # load block
1226 lxvb16x 17, 16, 14 # load block
1227 lxvb16x 18, 17, 14 # load block
1228 lxvb16x 19, 18, 14 # load block
1229 lxvb16x 20, 19, 14 # load block
1230 lxvb16x 21, 20, 14 # load block
1231 lxvb16x 22, 21, 14 # load block
1232 addi 14, 14, 128
1236 xxlor 23+32, 10, 10
1238 cmpdi 10, 10
1264 cmpdi 10, 12
1288 xxlor 23+32, 14, 14
1290 cmpdi 10, 14
1363 addi 12, 12, -128
1376 li 10, 16
1377 divdu 10, 12, 10
1379 mtctr 10
1381 lwz 10, 240(6)
1387 lxvb16x 15, 0, 14 # load block
1391 xxlor 23+32, 10, 10
1393 cmpdi 10, 10
1404 cmpdi 10, 12
1413 xxlor 23+32, 14, 14
1415 cmpdi 10, 14
1423 addi 14, 14, 16
1430 addi 12, 12, -16
1445 lwz 10, 240(6)
1448 xxlor 23+32, 10, 10
1450 cmpdi 10, 10
1461 cmpdi 10, 12
1470 xxlor 23+32, 14, 14
1472 cmpdi 10, 14
1490 lxvb16x 15, 0, 14 # load last block
1497 vspltisb 16, -1 # first 16 bytes - 0xffff...ff
1498 vspltisb 17, 0 # second 16 bytes - 0x0000...00
1499 li 10, 192
1500 stvx 16, 10, 1
1501 addi 10, 10, 16
1502 stvx 17, 10, 1
1504 addi 10, 1, 192
1505 lxvb16x 16, 15, 10 # load partial block mask