Lines Matching +full:1 +full:- +full:16
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 # Accelerated AES-GCM stitched implementation for ppc64le.
5 # Copyright 2022- IBM Inc. All rights reserved
22 # Hash keys = v3 - v14
29 # v31 - counter 1
32 # vs0 - vs14 for round keys
35 # This implementation uses stitched AES-GCM approach to improve overall performance.
48 # v15 - v18 - input states
49 # vs1 - vs9 - round keys
52 xxlor 19+32, 1, 1
58 vcipher 16, 16, 19
63 vcipher 16, 16, 20
68 vcipher 16, 16, 21
73 vcipher 16, 16, 22
83 vcipher 16, 16, 19
88 vcipher 16, 16, 20
93 vcipher 16, 16, 21
98 vcipher 16, 16, 22
104 vcipher 16, 16, 23
110 # v15 - v22 - input states
111 # vs1 - vs9 - round keys
114 xxlor 23+32, 1, 1
120 vcipher 16, 16, 23
129 vcipher 16, 16, 24
138 vcipher 16, 16, 25
147 vcipher 16, 16, 26
161 vcipher 16, 16, 23
170 vcipher 16, 16, 24
179 vcipher 16, 16, 25
188 vcipher 16, 16, 26
198 vcipher 16, 16, 23
208 xxlor 19+32, 1, 1
239 vpmsumd 24, 9, 16
248 vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L
268 vpmsumd 25, 11, 16
290 # v15 - v22 - input blocks
297 vpmsumd 24, 9, 16
306 vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L
327 vpmsumd 25, 11, 16
341 vxor 27, 23, 27 # 1st Xi
428 stdu 1,-640(1)
431 std 14,112(1)
432 std 15,120(1)
433 std 16,128(1)
434 std 17,136(1)
435 std 18,144(1)
436 std 19,152(1)
437 std 20,160(1)
438 std 21,168(1)
440 stvx 20, 9, 1
441 addi 9, 9, 16
442 stvx 21, 9, 1
443 addi 9, 9, 16
444 stvx 22, 9, 1
445 addi 9, 9, 16
446 stvx 23, 9, 1
447 addi 9, 9, 16
448 stvx 24, 9, 1
449 addi 9, 9, 16
450 stvx 25, 9, 1
451 addi 9, 9, 16
452 stvx 26, 9, 1
453 addi 9, 9, 16
454 stvx 27, 9, 1
455 addi 9, 9, 16
456 stvx 28, 9, 1
457 addi 9, 9, 16
458 stvx 29, 9, 1
459 addi 9, 9, 16
460 stvx 30, 9, 1
461 addi 9, 9, 16
462 stvx 31, 9, 1
463 stxv 14, 464(1)
464 stxv 15, 480(1)
465 stxv 16, 496(1)
466 stxv 17, 512(1)
467 stxv 18, 528(1)
468 stxv 19, 544(1)
469 stxv 20, 560(1)
470 stxv 21, 576(1)
471 stxv 22, 592(1)
472 std 0, 656(1)
476 lxv 14, 464(1)
477 lxv 15, 480(1)
478 lxv 16, 496(1)
479 lxv 17, 512(1)
480 lxv 18, 528(1)
481 lxv 19, 544(1)
482 lxv 20, 560(1)
483 lxv 21, 576(1)
484 lxv 22, 592(1)
486 lvx 20, 9, 1
487 addi 9, 9, 16
488 lvx 21, 9, 1
489 addi 9, 9, 16
490 lvx 22, 9, 1
491 addi 9, 9, 16
492 lvx 23, 9, 1
493 addi 9, 9, 16
494 lvx 24, 9, 1
495 addi 9, 9, 16
496 lvx 25, 9, 1
497 addi 9, 9, 16
498 lvx 26, 9, 1
499 addi 9, 9, 16
500 lvx 27, 9, 1
501 addi 9, 9, 16
502 lvx 28, 9, 1
503 addi 9, 9, 16
504 lvx 29, 9, 1
505 addi 9, 9, 16
506 lvx 30, 9, 1
507 addi 9, 9, 16
508 lvx 31, 9, 1
510 ld 0, 656(1)
511 ld 14,112(1)
512 ld 15,120(1)
513 ld 16,128(1)
514 ld 17,136(1)
515 ld 18,144(1)
516 ld 19,152(1)
517 ld 20,160(1)
518 ld 21,168(1)
521 addi 1, 1, 640
528 # load Hash - h^4, h^3, h^2, h
562 # const char *rk, unsigned char iv[16], void *Xip);
564 # r3 - inp
565 # r4 - out
566 # r5 - len
567 # r6 - AES round keys
568 # r7 - iv and other data
569 # r8 - Xi, HPoli, hash keys
581 # initialize ICB: GHASH( IV ), IV - r7
582 lxvb16x 30+32, 0, 7 # load IV - v30
587 # counter 1
589 vspltisb 22, 1
590 vsldoi 31, 31, 22,1 # counter 1
594 lxv 1, 0x10(6)
605 # load rounds - 10 (128), 12 (192), 14 (256)
611 vxor 15, 30, 29 # IV + round key - add round key 0
644 cmpdi 15, 16
650 divdu 10, 12, 10 # n 128 bytes-blocks
655 vxor 16, 30, 29
671 li 15, 16
672 li 16, 32
684 lxvb16x 16, 15, 14 # load block
685 lxvb16x 17, 16, 14 # load block
704 vcipher 16, 16, 23
713 vcipher 16, 16, 24
730 vcipher 16, 16, 23
739 vcipher 16, 16, 24
758 vcipherlast 16, 16, 23
762 xxlxor 48, 48, 16
769 stxvb16x 49, 16, 9 # store output
799 vxor 16, 30, 27
813 addi 12, 12, -128
826 li 10, 16
833 cmpdi 12, 16
873 addi 14, 14, 16
874 addi 9, 9, 16
879 addi 12, 12, -16
880 addi 11, 11, 16
944 li 15, 16
947 vspltisb 16, -1 # first 16 bytes - 0xffff...ff
948 vspltisb 17, 0 # second 16 bytes - 0x0000...00
950 stvx 16, 10, 1
951 addi 10, 10, 16
952 stvx 17, 10, 1
954 addi 10, 1, 192
955 lxvb16x 16, 15, 10 # load partial block mask
956 xxland 47, 47, 16
966 li 16, 16
969 stxvb16x 32, 16, 8 # write out Xi
976 vspltisb 16, -1 # first 16 bytes - 0xffff...ff
977 vspltisb 17, 0 # second 16 bytes - 0x0000...00
979 stxvb16x 17+32, 10, 1
981 stxvb16x 16+32, 10, 1
983 stxvb16x 17+32, 10, 1
985 addi 10, 1, 192
995 cmpdi 17, 16
1001 li 16, 16
1002 GEN_MASK 18, 15, 16
1006 sldi 16, 15, 3
1007 mtvsrdd 32+16, 0, 16
1008 vsro 17, 17, 16
1023 li 16, 16
1024 lxvb16x 32+29, 16, 8
1027 stxvb16x 32, 16, 8 # save Xi
1031 sldi 16, 15, 3
1032 mtvsrdd 32+16, 0, 16
1033 vslo 15, 15, 16
1036 li 16, 16
1037 sub 17, 16, 15 # 16 - partial
1039 add 16, 15, 5
1040 cmpdi 16, 16
1047 stxvb16x 15+32, 10, 1 # save current block
1049 addi 10, 9, -1
1050 addi 16, 1, 191
1054 lbzu 18, 1(16)
1055 stbu 18, 1(10)
1065 cmpdi 15, 16
1071 vxor 15, 30, 29 # IV + round key - add round key 0
1073 std 15, 56(7) # partial done - clear
1083 # r9 - output
1084 # r12 - remaining bytes
1085 # v15 - partial input data
1089 stxvb16x 15+32, 10, 1 # last block
1091 addi 10, 9, -1
1092 addi 16, 1, 191
1098 lbzu 14, 1(16)
1099 stbu 14, 1(10)
1122 # initialize ICB: GHASH( IV ), IV - r7
1123 lxvb16x 30+32, 0, 7 # load IV - v30
1128 # counter 1
1130 vspltisb 22, 1
1131 vsldoi 31, 31, 22,1 # counter 1
1135 lxv 1, 0x10(6)
1146 # load rounds - 10 (128), 12 (192), 14 (256)
1152 vxor 15, 30, 29 # IV + round key - add round key 0
1185 cmpdi 15, 16
1191 divdu 10, 12, 10 # n 128 bytes-blocks
1196 vxor 16, 30, 29
1212 li 15, 16
1213 li 16, 32
1225 lxvb16x 16, 15, 14 # load block
1226 lxvb16x 17, 16, 14 # load block
1245 vcipher 16, 16, 23
1254 vcipher 16, 16, 24
1271 vcipher 16, 16, 23
1280 vcipher 16, 16, 24
1299 vcipherlast 16, 16, 23
1303 xxlxor 48, 48, 16
1310 stxvb16x 49, 16, 9 # store output
1333 xxlor 16+32, 16, 16
1349 vxor 16, 30, 27
1363 addi 12, 12, -128
1376 li 10, 16
1383 cmpdi 12, 16
1423 addi 14, 14, 16
1424 addi 9, 9, 16
1430 addi 12, 12, -16
1431 addi 11, 11, 16
1479 li 21, 1 # decrypt
1494 li 15, 16
1497 vspltisb 16, -1 # first 16 bytes - 0xffff...ff
1498 vspltisb 17, 0 # second 16 bytes - 0x0000...00
1500 stvx 16, 10, 1
1501 addi 10, 10, 16
1502 stvx 17, 10, 1
1504 addi 10, 1, 192
1505 lxvb16x 16, 15, 10 # load partial block mask
1506 xxland 47, 47, 16
1508 xxland 32+28, 15, 16
1517 li 16, 16
1520 stxvb16x 32, 16, 8 # write out Xi