Lines Matching +full:5 +full:- +full:8
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 # Copyright 2023- IBM Corp. All rights reserved
10 # Poly1305 - this version mainly using vector/VSX/Scalar
11 # - 26 bits limbs
12 # - Handle multiple 64 byte blcok.
17 # p = 2^130 - 5
25 # 07/22/21 - this revison based on the above sum of products. Setup r^4, r^3, r^2, r and s3, s2, …
35 # vs5 = [r1*5,...]
36 # vs6 = [r2*5,...]
37 # vs7 = [r2*5,...]
38 # vs8 = [r4*5,...]
42 # r0, r4*5, r3*5, r2*5, r1*5;
43 # r1, r0, r4*5, r3*5, r2*5;
44 # r2, r1, r0, r4*5, r3*5;
45 # r3, r2, r1, r0, r4*5;
56 #include <asm/asm-offsets.h>
57 #include <asm/asm-compat.h>
95 stdu 1,-752(1)
209 # p[0] = a0*r0 + a1*r4*5 + a2*r3*5 + a3*r2*5 + a4*r1*5;
210 # p[1] = a0*r1 + a1*r0 + a2*r4*5 + a3*r3*5 + a4*r2*5;
211 # p[2] = a0*r2 + a1*r1 + a2*r0 + a3*r4*5 + a4*r3*5;
212 # p[3] = a0*r3 + a1*r2 + a2*r1 + a3*r0 + a4*r4*5;
221 vmulouw 10, 5, 3
224 vmulouw 13, 8, 0
228 vmulouw 10, 5, 26
235 vmulouw 13, 8, 1
239 vmulouw 10, 5, 27
244 vmulouw 13, 8, 2
248 vmulouw 10, 5, 28
253 vmulouw 13, 8, 3
257 vmulouw 10, 5, 29
262 vmulouw 13, 8, 26
269 vmuleuw 10, 5, 3
272 vmuleuw 13, 8, 0
280 vmuleuw 10, 5, 26
283 vmuleuw 13, 8, 1
291 vmuleuw 10, 5, 27
294 vmuleuw 13, 8, 2
302 vmuleuw 10, 5, 28
305 vmuleuw 13, 8, 3
313 vmuleuw 10, 5, 29
316 vmuleuw 13, 8, 26
334 # vs5 = [r4*5,...]
335 # vs6 = [r3*5,...]
336 # vs7 = [r2*5,...]
337 # vs8 = [r1*5,...]
339 # r0, r4*5, r3*5, r2*5, r1*5;
340 # r1, r0, r4*5, r3*5, r2*5;
341 # r2, r1, r0, r4*5, r3*5;
342 # r3, r2, r1, r0, r4*5;
359 vmr 5, 27
362 vmr 8, 30
386 vmrgow 27, 27, 5
389 vmrgow 30, 30, 8
406 xxlor 5, 32, 32
409 xxlor 8, 35, 35
452 vand 8, 18, 25
453 vand 5, 15, 25
467 vaddudm 5, 5, 10
468 vaddudm 8, 8, 11
480 ld 12, 8(10)
486 lvx 25, 0, 10 # v25 - mask
512 # r1 = r1 * 5, r2 = r2 * 5, r3 = r3 * 5, r4 = r4 * 5
513 li 9, 5
529 .align 5
530 cmpdi 5, 64
544 ld 10, 8(3)
559 vor 8, 8, 9
585 vaddudm 21, 5, 10
588 vaddudm 24, 8, 13
611 # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1]
613 vmrgow 5, 10, 21
616 vmrgow 8, 13, 24
617 vaddudm 8, 8, 19
619 addi 5, 5, -64 # len -= 64
623 divdu 31, 5, 9
633 # h3 = (h1 + m3) * r^2, h4 = (h2 + m4) * r^2 --> (h0 + m1) r*4 + (h3 + m3) r^2, (h0 + m2) r^4 + (h…
635 # h5 = (h3 + m5) * r^2, h6 = (h4 + m6) * r^2 -->
636 # h7 = (h5 + m7) * r^2, h8 = (h6 + m8) * r^1 --> m5 * r^4 + m6 * r^3 + m7 * r^2 + m8 * r
654 vand 8, 18, 25
655 vand 5, 15, 25
669 vaddudm 5, 5, 10
670 vaddudm 8, 8, 11
716 vaddudm 5, 5, 21
719 vaddudm 8, 8, 24
721 # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1]
723 vmrgow 5, 10, 5
726 vmrgow 8, 13, 8
727 vaddudm 8, 8, 19
729 addi 5, 5, -64 # len -= 64
740 xxlor 32, 5, 5
743 xxlor 35, 8, 8
754 vaddudm 5, 15, 10
763 vaddudm 8, 18, 13
772 vaddudm 8, 8, 11
773 vsrd 12, 8, 31
774 vaddudm 5, 5, 10
776 vsrd 11, 5, 31
777 vand 8, 8, 25
778 vand 5, 5, 25
792 vaddudm 5, 5, 10
793 vsrd 10, 5, 31
794 vand 5, 5, 25
796 vaddudm 8, 8, 11
803 vsld 5, 5, 31
804 vor 20, 4, 5
814 vsld 8, 8, 11
815 vsld 8, 8, 31
816 vor 21, 21, 8
822 std 19, 8(3)
847 ld 12, 8(10)
857 add 19, 21, 10 # s1: r19 - (r1 >> 2) *5
882 vmsumudm 10, 8, 2, 11 # d1 += h2 * s1
885 vmsumudm 11, 8, 3, 9 # d2 = h2 * r0
907 add 23, 23, 22 # (h2 & 3) * 5
923 # - no highbit if final leftover block (highbit = 0)
926 cmpdi 5, 0
931 stdu 1,-400(1)
962 ld 28, 8(3)
966 divdu 31, 5, 30
976 ld 21, 8(11)
985 mtvsrdd 32+8, 29, 22 # h2
994 std 28, 8(3)
1035 ld 11, 8(3)
1039 # h + 5 + (-p)
1042 mr 8, 12
1043 addic. 6, 6, 5
1045 addze 8, 8
1046 srdi 9, 8, 2 # overflow?
1051 mr 12, 8
1055 ld 7, 8(4)
1060 std 10, 0(5)
1061 std 11, 8(5)
1066 .align 5