Lines Matching +full:5 +full:- +full:6
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 # Copyright 2023- IBM Corp. All rights reserved
10 # Poly1305 - this version mainly using vector/VSX/Scalar
11 # - 26 bits limbs
12 # - Handle multiple 64 byte blcok.
17 # p = 2^130 - 5
25 # 07/22/21 - this revison based on the above sum of products. Setup r^4, r^3, r^2, r and s3, s2, …
35 # vs5 = [r1*5,...]
36 # vs6 = [r2*5,...]
37 # vs7 = [r2*5,...]
38 # vs8 = [r4*5,...]
42 # r0, r4*5, r3*5, r2*5, r1*5;
43 # r1, r0, r4*5, r3*5, r2*5;
44 # r2, r1, r0, r4*5, r3*5;
45 # r3, r2, r1, r0, r4*5;
56 #include <asm/asm-offsets.h>
57 #include <asm/asm-compat.h>
95 stdu 1,-752(1)
209 # p[0] = a0*r0 + a1*r4*5 + a2*r3*5 + a3*r2*5 + a4*r1*5;
210 # p[1] = a0*r1 + a1*r0 + a2*r4*5 + a3*r3*5 + a4*r2*5;
211 # p[2] = a0*r2 + a1*r1 + a2*r0 + a3*r4*5 + a4*r3*5;
212 # p[3] = a0*r3 + a1*r2 + a2*r1 + a3*r0 + a4*r4*5;
221 vmulouw 10, 5, 3
222 vmulouw 11, 6, 2
228 vmulouw 10, 5, 26
229 vmulouw 11, 6, 3
239 vmulouw 10, 5, 27
240 vmulouw 11, 6, 26
248 vmulouw 10, 5, 28
249 vmulouw 11, 6, 27
257 vmulouw 10, 5, 29
258 vmulouw 11, 6, 28
269 vmuleuw 10, 5, 3
270 vmuleuw 11, 6, 2
280 vmuleuw 10, 5, 26
281 vmuleuw 11, 6, 3
291 vmuleuw 10, 5, 27
292 vmuleuw 11, 6, 26
302 vmuleuw 10, 5, 28
303 vmuleuw 11, 6, 27
313 vmuleuw 10, 5, 29
314 vmuleuw 11, 6, 28
334 # vs5 = [r4*5,...]
335 # vs6 = [r3*5,...]
336 # vs7 = [r2*5,...]
337 # vs8 = [r1*5,...]
339 # r0, r4*5, r3*5, r2*5, r1*5;
340 # r1, r0, r4*5, r3*5, r2*5;
341 # r2, r1, r0, r4*5, r3*5;
342 # r3, r2, r1, r0, r4*5;
359 vmr 5, 27
360 vmr 6, 28
386 vmrgow 27, 27, 5
387 vmrgow 28, 28, 6
406 xxlor 5, 32, 32
407 xxlor 6, 33, 33
453 vand 5, 15, 25
456 vaddudm 6, 16, 11
458 vsrd 13, 6, 31
459 vand 6, 6, 25
467 vaddudm 5, 5, 10
486 lvx 25, 0, 10 # v25 - mask
512 # r1 = r1 * 5, r2 = r2 * 5, r3 = r3 * 5, r4 = r4 * 5
513 li 9, 5
529 .align 5
530 cmpdi 5, 64
585 vaddudm 21, 5, 10
586 vaddudm 22, 6, 11
611 # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1]
613 vmrgow 5, 10, 21
614 vmrgow 6, 11, 22
619 addi 5, 5, -64 # len -= 64
623 divdu 31, 5, 9
633 # h3 = (h1 + m3) * r^2, h4 = (h2 + m4) * r^2 --> (h0 + m1) r*4 + (h3 + m3) r^2, (h0 + m2) r^4 + (h…
635 # h5 = (h3 + m5) * r^2, h6 = (h4 + m6) * r^2 -->
636 # h7 = (h5 + m7) * r^2, h8 = (h6 + m8) * r^1 --> m5 * r^4 + m6 * r^3 + m7 * r^2 + m8 * r
655 vand 5, 15, 25
658 vaddudm 6, 16, 11
660 vsrd 13, 6, 31
661 vand 6, 6, 25
669 vaddudm 5, 5, 10
716 vaddudm 5, 5, 21
717 vaddudm 6, 6, 22
721 # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1]
723 vmrgow 5, 10, 5
724 vmrgow 6, 11, 6
729 addi 5, 5, -64 # len -= 64
740 xxlor 32, 5, 5
741 xxlor 33, 6, 6
754 vaddudm 5, 15, 10
757 vaddudm 6, 16, 11
774 vaddudm 5, 5, 10
776 vsrd 11, 5, 31
778 vand 5, 5, 25
781 vaddudm 6, 6, 11
783 vsrd 13, 6, 31
784 vand 6, 6, 25
792 vaddudm 5, 5, 10
793 vsrd 10, 5, 31
794 vand 5, 5, 25
795 vaddudm 6, 6, 10
803 vsld 5, 5, 31
804 vor 20, 4, 5
806 vsrd 12, 6, 11
807 vsld 6, 6, 31
808 vsld 6, 6, 31
809 vor 20, 20, 6
857 add 19, 21, 10 # s1: r19 - (r1 >> 2) *5
878 vmsumudm 7, 6, 0, 9 # h0 * r0, h1 * s1
881 vmsumudm 11, 6, 1, 9 # h0 * r1, h1 * r0
907 add 23, 23, 22 # (h2 & 3) * 5
923 # - no highbit if final leftover block (highbit = 0)
926 cmpdi 5, 0
931 stdu 1,-400(1)
966 divdu 31, 5, 30
970 mr 24, 6 # highbit
984 mtvsrdd 32+6, 27, 28 # h0, h1
1039 # h + 5 + (-p)
1040 mr 6, 10
1043 addic. 6, 6, 5
1049 mr 10, 6
1054 ld 6, 0(4)
1056 addc 10, 10, 6
1060 std 10, 0(5)
1061 std 11, 8(5)
1066 .align 5