Lines Matching +full:8 +full:- +full:12
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 # Copyright 2023- IBM Corp. All rights reserved
10 # Poly1305 - this version mainly using vector/VSX/Scalar
11 # - 26 bits limbs
12 # - Handle multiple 64 byte blcok.
17 # p = 2^130 - 5
25 # 07/22/21 - this revison based on the above sum of products. Setup r^4, r^3, r^2, r and s3, s2, …
56 #include <asm/asm-offsets.h>
57 #include <asm/asm-compat.h>
95 stdu 1,-752(1)
223 vmulouw 12, 7, 1
224 vmulouw 13, 8, 0
230 vaddudm 14, 14, 12
234 vmulouw 12, 7, 2
235 vmulouw 13, 8, 1
236 vaddudm 15, 15, 12
243 vmulouw 12, 7, 3
244 vmulouw 13, 8, 2
245 vaddudm 16, 16, 12
252 vmulouw 12, 7, 26
253 vmulouw 13, 8, 3
254 vaddudm 17, 17, 12
261 vmulouw 12, 7, 27
262 vmulouw 13, 8, 26
263 vaddudm 18, 18, 12
271 vmuleuw 12, 7, 1
272 vmuleuw 13, 8, 0
276 vaddudm 14, 14, 12
282 vmuleuw 12, 7, 2
283 vmuleuw 13, 8, 1
287 vaddudm 15, 15, 12
293 vmuleuw 12, 7, 3
294 vmuleuw 13, 8, 2
298 vaddudm 16, 16, 12
304 vmuleuw 12, 7, 26
305 vmuleuw 13, 8, 3
309 vaddudm 17, 17, 12
315 vmuleuw 12, 7, 27
316 vmuleuw 13, 8, 26
320 vaddudm 18, 18, 12
362 vmr 8, 30
378 vsld 12, 30, 13
382 vaddudm 3, 12, 30
389 vmrgow 30, 30, 8
394 vsld 12, 30, 13
398 vaddudm 3, 12, 30
409 xxlor 8, 35, 35
430 vsld 12, 30, 13
434 vaddudm 3, 12, 30
448 vsrd 12, 18, 31
452 vand 8, 18, 25
454 vaddudm 4, 4, 12
455 vsld 10, 12, 9
468 vaddudm 8, 8, 11
480 ld 12, 8(10)
486 lvx 25, 0, 10 # v25 - mask
497 and. 10, 10, 12
501 extrdi 15, 9, 26, 12
502 extrdi 16, 9, 12, 0
544 ld 10, 8(3)
549 extrdi 15, 9, 26, 12
550 extrdi 16, 9, 12, 0
559 vor 8, 8, 9
568 vperm 14, 11, 12, 17
569 vperm 15, 11, 12, 18
572 vsrd 11, 10, 31 # 12 bits left
574 vspltisb 13, 12
576 vsld 12, 16, 13
577 vor 11, 11, 12
580 vsrd 12, 15, 13 # >> 14
581 vsrd 13, 12, 31 # >> 26, a4
582 vand 12, 12, 25 # a3
587 vaddudm 23, 7, 12
588 vaddudm 24, 8, 13
595 vperm 14, 11, 12, 17
596 vperm 15, 11, 12, 18
599 vsrd 11, 10, 31 # 12 bits left
601 vspltisb 13, 12
603 vsld 12, 16, 13
605 vor 11, 11, 12
607 vsrd 12, 15, 13 # >> 14
608 vsrd 13, 12, 31 # >> 26, a4
609 vand 12, 12, 25 # a3
615 vmrgow 7, 12, 23
616 vmrgow 8, 13, 24
617 vaddudm 8, 8, 19
619 addi 5, 5, -64 # len -= 64
633 # h3 = (h1 + m3) * r^2, h4 = (h2 + m4) * r^2 --> (h0 + m1) r*4 + (h3 + m3) r^2, (h0 + m2) r^4 + (h…
635 # h5 = (h3 + m5) * r^2, h6 = (h4 + m6) * r^2 -->
636 # h7 = (h5 + m7) * r^2, h8 = (h6 + m8) * r^1 --> m5 * r^4 + m6 * r^3 + m7 * r^2 + m8 * r
650 vsrd 12, 18, 31
654 vand 8, 18, 25
656 vaddudm 4, 4, 12
657 vsld 10, 12, 9
670 vaddudm 8, 8, 11
679 vperm 14, 11, 12, 17
680 vperm 15, 11, 12, 18
685 vperm 17, 11, 12, 17
686 vperm 18, 11, 12, 18
691 vsrd 22, 21, 31 # 12 bits left
693 vsrd 11, 10, 31 # 12 bits left
698 vspltisb 13, 12
704 vsld 12, 16, 13
705 vor 11, 11, 12
711 vsrd 12, 18, 13 # >> 14
712 vsrd 13, 12, 31 # >> 26, a4
713 vand 12, 12, 25 # a3
719 vaddudm 8, 8, 24
725 vmrgow 7, 12, 7
726 vmrgow 8, 13, 8
727 vaddudm 8, 8, 19
729 addi 5, 5, -64 # len -= 64
743 xxlor 35, 8, 8
760 vaddudm 7, 17, 12
763 vaddudm 8, 18, 13
772 vaddudm 8, 8, 11
773 vsrd 12, 8, 31
777 vand 8, 8, 25
779 vaddudm 4, 4, 12
780 vsld 10, 12, 9
796 vaddudm 8, 8, 11
805 vspltisb 11, 12
806 vsrd 12, 6, 11
812 vor 21, 7, 12
814 vsld 8, 8, 11
815 vsld 8, 8, 31
816 vor 21, 21, 8
822 std 19, 8(3)
847 ld 12, 8(10)
854 and. 10, 10, 12 # cramp mask r1
857 add 19, 21, 10 # s1: r19 - (r1 >> 2) *5
882 vmsumudm 10, 8, 2, 11 # d1 += h2 * s1
885 vmsumudm 11, 8, 3, 9 # d2 = h2 * r0
923 # - no highbit if final leftover block (highbit = 0)
931 stdu 1,-400(1)
962 ld 28, 8(3)
976 ld 21, 8(11)
985 mtvsrdd 32+8, 29, 22 # h2
994 std 28, 8(3)
1035 ld 11, 8(3)
1036 ld 12, 16(3)
1039 # h + 5 + (-p)
1042 mr 8, 12
1045 addze 8, 8
1046 srdi 9, 8, 2 # overflow?
1051 mr 12, 8
1055 ld 7, 8(4)
1058 addze 12, 12
1061 std 11, 8(5)