Lines Matching +full:3 +full:- +full:5
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 # Copyright 2023- IBM Corp. All rights reserved
10 # Poly1305 - this version mainly using vector/VSX/Scalar
11 # - 26 bits limbs
12 # - Handle multiple 64 byte blcok.
17 # p = 2^130 - 5
25 # 07/22/21 - this revison based on the above sum of products. Setup r^4, r^3, r^2, r and s3, s2, …
28 # setup r^4, r^3, r^2, r vectors
29 # vs [r^1, r^3, r^2, r^4]
35 # vs5 = [r1*5,...]
36 # vs6 = [r2*5,...]
37 # vs7 = [r2*5,...]
38 # vs8 = [r4*5,...]
42 # r0, r4*5, r3*5, r2*5, r1*5;
43 # r1, r0, r4*5, r3*5, r2*5;
44 # r2, r1, r0, r4*5, r3*5;
45 # r3, r2, r1, r0, r4*5;
56 #include <asm/asm-offsets.h>
57 #include <asm/asm-compat.h>
95 stdu 1,-752(1)
209 # p[0] = a0*r0 + a1*r4*5 + a2*r3*5 + a3*r2*5 + a4*r1*5;
210 # p[1] = a0*r1 + a1*r0 + a2*r4*5 + a3*r3*5 + a4*r2*5;
211 # p[2] = a0*r2 + a1*r1 + a2*r0 + a3*r4*5 + a4*r3*5;
212 # p[3] = a0*r3 + a1*r2 + a2*r1 + a3*r0 + a4*r4*5;
215 # [r^2, r^3, r^1, r^4]
221 vmulouw 10, 5, 3
228 vmulouw 10, 5, 26
229 vmulouw 11, 6, 3
239 vmulouw 10, 5, 27
243 vmulouw 12, 7, 3
248 vmulouw 10, 5, 28
253 vmulouw 13, 8, 3
257 vmulouw 10, 5, 29
269 vmuleuw 10, 5, 3
280 vmuleuw 10, 5, 26
281 vmuleuw 11, 6, 3
291 vmuleuw 10, 5, 27
293 vmuleuw 12, 7, 3
302 vmuleuw 10, 5, 28
305 vmuleuw 13, 8, 3
313 vmuleuw 10, 5, 29
327 # setup r^4, r^3, r^2, r vectors
328 # [r, r^3, r^2, r^4]
334 # vs5 = [r4*5,...]
335 # vs6 = [r3*5,...]
336 # vs7 = [r2*5,...]
337 # vs8 = [r1*5,...]
339 # r0, r4*5, r3*5, r2*5, r1*5;
340 # r1, r0, r4*5, r3*5, r2*5;
341 # r2, r1, r0, r4*5, r3*5;
342 # r3, r2, r1, r0, r4*5;
356 # [r, r^3, r^2, r^4]
359 vmr 5, 27
382 vaddudm 3, 12, 30
384 bl do_mul # r^4 r^3
386 vmrgow 27, 27, 5
398 vaddudm 3, 12, 30
404 xxlor 3, 61, 61
406 xxlor 5, 32, 32
411 vspltw 9, 26, 3
414 vspltw 9, 27, 3
417 vspltw 9, 28, 3
420 vspltw 9, 29, 3
423 vspltw 9, 30, 3
434 vaddudm 3, 12, 30
453 vand 5, 15, 25
467 vaddudm 5, 5, 10
486 lvx 25, 0, 10 # v25 - mask
494 ld 9, 24(3)
495 ld 10, 32(3)
512 # r1 = r1 * 5, r2 = r2 * 5, r3 = r3 * 5, r4 = r4 * 5
513 li 9, 5
518 vmulouw 3, 30, 4 # v3 = rr3
529 .align 5
530 cmpdi 5, 64
543 ld 9, 0(3)
544 ld 10, 8(3)
545 ld 19, 16(3)
585 vaddudm 21, 5, 10
611 # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1]
613 vmrgow 5, 10, 21
619 addi 5, 5, -64 # len -= 64
623 divdu 31, 5, 9
633 # h3 = (h1 + m3) * r^2, h4 = (h2 + m4) * r^2 --> (h0 + m1) r*4 + (h3 + m3) r^2, (h0 + m2) r^4 + (h…
635 # h5 = (h3 + m5) * r^2, h6 = (h4 + m6) * r^2 -->
636 # h7 = (h5 + m7) * r^2, h8 = (h6 + m8) * r^1 --> m5 * r^4 + m6 * r^3 + m7 * r^2 + m8 * r
655 vand 5, 15, 25
669 vaddudm 5, 5, 10
716 vaddudm 5, 5, 21
721 # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1]
723 vmrgow 5, 10, 5
729 addi 5, 5, -64 # len -= 64
738 xxlor 61, 3, 3
740 xxlor 32, 5, 5
753 xxpermdi 36, 31, 36, 3
754 vaddudm 5, 15, 10
755 xxpermdi 37, 31, 37, 3
758 xxpermdi 38, 31, 38, 3
761 xxpermdi 39, 31, 39, 3
764 xxpermdi 40, 31, 40, 3
774 vaddudm 5, 5, 10
776 vsrd 11, 5, 31
778 vand 5, 5, 25
792 vaddudm 5, 5, 10
793 vsrd 10, 5, 31
794 vand 5, 5, 25
803 vsld 5, 5, 31
804 vor 20, 4, 5
821 std 17, 0(3)
822 std 19, 8(3)
823 stw 16, 16(3)
826 li 3, 0
833 li 3, 0
851 ld 9, 24(3)
852 ld 10, 32(3)
857 add 19, 21, 10 # s1: r19 - (r1 >> 2) *5
864 mtvsrdd 32+3, 9, 25 # r0
885 vmsumudm 11, 8, 3, 9 # d2 = h2 * r0
907 add 23, 23, 22 # (h2 & 3) * 5
923 # - no highbit if final leftover block (highbit = 0)
926 cmpdi 5, 0
931 stdu 1,-400(1)
961 ld 27, 0(3)
962 ld 28, 8(3)
963 lwz 29, 16(3)
966 divdu 31, 5, 30
993 std 27, 0(3)
994 std 28, 8(3)
995 stw 29, 16(3)
997 li 3, 0
1025 li 3, 0
1034 ld 10, 0(3)
1035 ld 11, 8(3)
1036 ld 12, 16(3)
1039 # h + 5 + (-p)
1043 addic. 6, 6, 5
1060 std 10, 0(5)
1061 std 11, 8(5)
1066 .align 5