Lines Matching +full:1 +full:- +full:5

1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 # Copyright 2023- IBM Corp. All rights reserved
10 # Poly1305 - this version mainly using vector/VSX/Scalar
11 # - 26 bits limbs
12 # - Handle multiple 64 byte blcok.
17 # p = 2^130 - 5
25 # 07/22/21 - this revison based on the above sum of products. Setup r^4, r^3, r^2, r and s3, s2, …
29 # vs [r^1, r^3, r^2, r^4]
35 # vs5 = [r1*5,...]
36 # vs6 = [r2*5,...]
37 # vs7 = [r2*5,...]
38 # vs8 = [r4*5,...]
42 # r0, r4*5, r3*5, r2*5, r1*5;
43 # r1, r0, r4*5, r3*5, r2*5;
44 # r2, r1, r0, r4*5, r3*5;
45 # r3, r2, r1, r0, r4*5;
56 #include <asm/asm-offsets.h>
57 #include <asm/asm-compat.h>
94 std 0, 16(1)
95 stdu 1,-752(1)
97 SAVE_GPR 14, 112, 1
98 SAVE_GPR 15, 120, 1
99 SAVE_GPR 16, 128, 1
100 SAVE_GPR 17, 136, 1
101 SAVE_GPR 18, 144, 1
102 SAVE_GPR 19, 152, 1
103 SAVE_GPR 20, 160, 1
104 SAVE_GPR 21, 168, 1
105 SAVE_GPR 22, 176, 1
106 SAVE_GPR 23, 184, 1
107 SAVE_GPR 24, 192, 1
108 SAVE_GPR 25, 200, 1
109 SAVE_GPR 26, 208, 1
110 SAVE_GPR 27, 216, 1
111 SAVE_GPR 28, 224, 1
112 SAVE_GPR 29, 232, 1
113 SAVE_GPR 30, 240, 1
114 SAVE_GPR 31, 248, 1
116 addi 9, 1, 256
151 addi 9, 1, 256
184 RESTORE_GPR 14, 112, 1
185 RESTORE_GPR 15, 120, 1
186 RESTORE_GPR 16, 128, 1
187 RESTORE_GPR 17, 136, 1
188 RESTORE_GPR 18, 144, 1
189 RESTORE_GPR 19, 152, 1
190 RESTORE_GPR 20, 160, 1
191 RESTORE_GPR 21, 168, 1
192 RESTORE_GPR 22, 176, 1
193 RESTORE_GPR 23, 184, 1
194 RESTORE_GPR 24, 192, 1
195 RESTORE_GPR 25, 200, 1
196 RESTORE_GPR 26, 208, 1
197 RESTORE_GPR 27, 216, 1
198 RESTORE_GPR 28, 224, 1
199 RESTORE_GPR 29, 232, 1
200 RESTORE_GPR 30, 240, 1
201 RESTORE_GPR 31, 248, 1
203 addi 1, 1, 752
204 ld 0, 16(1)
209 # p[0] = a0*r0 + a1*r4*5 + a2*r3*5 + a3*r2*5 + a4*r1*5;
210 # p[1] = a0*r1 + a1*r0 + a2*r4*5 + a3*r3*5 + a4*r2*5;
211 # p[2] = a0*r2 + a1*r1 + a2*r0 + a3*r4*5 + a4*r3*5;
212 # p[3] = a0*r3 + a1*r2 + a2*r1 + a3*r0 + a4*r4*5;
215 # [r^2, r^3, r^1, r^4]
221 vmulouw 10, 5, 3
223 vmulouw 12, 7, 1
228 vmulouw 10, 5, 26
235 vmulouw 13, 8, 1
239 vmulouw 10, 5, 27
248 vmulouw 10, 5, 28
257 vmulouw 10, 5, 29
269 vmuleuw 10, 5, 3
271 vmuleuw 12, 7, 1
280 vmuleuw 10, 5, 26
283 vmuleuw 13, 8, 1
291 vmuleuw 10, 5, 27
302 vmuleuw 10, 5, 28
313 vmuleuw 10, 5, 29
334 # vs5 = [r4*5,...]
335 # vs6 = [r3*5,...]
336 # vs7 = [r2*5,...]
337 # vs8 = [r1*5,...]
339 # r0, r4*5, r3*5, r2*5, r1*5;
340 # r1, r0, r4*5, r3*5, r2*5;
341 # r2, r1, r0, r4*5, r3*5;
342 # r3, r2, r1, r0, r4*5;
359 vmr 5, 27
363 bl do_mul # r^2 r^1
380 vaddudm 1, 10, 28
386 vmrgow 27, 27, 5
396 vaddudm 1, 10, 28
402 xxlor 1, 59, 59
406 xxlor 5, 32, 32
432 vaddudm 1, 10, 28
453 vand 5, 15, 25
467 vaddudm 5, 5, 10
486 lvx 25, 0, 10 # v25 - mask
487 lvx 31, 14, 10 # v31 = 1a
488 lvx 19, 15, 10 # v19 = 1 << 24
512 # r1 = r1 * 5, r2 = r2 * 5, r3 = r3 * 5, r4 = r4 * 5
513 li 9, 5
516 vmulouw 1, 28, 4 # v1 = rr1
529 .align 5
530 cmpdi 5, 64
585 vaddudm 21, 5, 10
611 # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1]
613 vmrgow 5, 10, 21
619 addi 5, 5, -64 # len -= 64
623 divdu 31, 5, 9
633 # h3 = (h1 + m3) * r^2, h4 = (h2 + m4) * r^2 --> (h0 + m1) r*4 + (h3 + m3) r^2, (h0 + m2) r^4 + (h…
635 # h5 = (h3 + m5) * r^2, h6 = (h4 + m6) * r^2 -->
636 # h7 = (h5 + m7) * r^2, h8 = (h6 + m8) * r^1 --> m5 * r^4 + m6 * r^3 + m7 * r^2 + m8 * r
655 vand 5, 15, 25
669 vaddudm 5, 5, 10
716 vaddudm 5, 5, 21
721 # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1]
723 vmrgow 5, 10, 5
729 addi 5, 5, -64 # len -= 64
736 xxlor 59, 1, 1
740 xxlor 32, 5, 5
754 vaddudm 5, 15, 10
774 vaddudm 5, 5, 10
776 vsrd 11, 5, 31
778 vand 5, 5, 25
792 vaddudm 5, 5, 10
793 vsrd 10, 5, 31
794 vand 5, 5, 25
803 vsld 5, 5, 31
804 vor 20, 4, 5
857 add 19, 21, 10 # s1: r19 - (r1 >> 2) *5
862 mtvsrdd 32+1, 10, 9 # r1, r0
881 vmsumudm 11, 6, 1, 9 # h0 * r1, h1 * r0
907 add 23, 23, 22 # (h2 & 3) * 5
923 # - no highbit if final leftover block (highbit = 0)
926 cmpdi 5, 0
930 std 0, 16(1)
931 stdu 1,-400(1)
933 SAVE_GPR 14, 112, 1
934 SAVE_GPR 15, 120, 1
935 SAVE_GPR 16, 128, 1
936 SAVE_GPR 17, 136, 1
937 SAVE_GPR 18, 144, 1
938 SAVE_GPR 19, 152, 1
939 SAVE_GPR 20, 160, 1
940 SAVE_GPR 21, 168, 1
941 SAVE_GPR 22, 176, 1
942 SAVE_GPR 23, 184, 1
943 SAVE_GPR 24, 192, 1
944 SAVE_GPR 25, 200, 1
945 SAVE_GPR 26, 208, 1
946 SAVE_GPR 27, 216, 1
947 SAVE_GPR 28, 224, 1
948 SAVE_GPR 29, 232, 1
949 SAVE_GPR 30, 240, 1
950 SAVE_GPR 31, 248, 1
966 divdu 31, 5, 30
999 RESTORE_GPR 14, 112, 1
1000 RESTORE_GPR 15, 120, 1
1001 RESTORE_GPR 16, 128, 1
1002 RESTORE_GPR 17, 136, 1
1003 RESTORE_GPR 18, 144, 1
1004 RESTORE_GPR 19, 152, 1
1005 RESTORE_GPR 20, 160, 1
1006 RESTORE_GPR 21, 168, 1
1007 RESTORE_GPR 22, 176, 1
1008 RESTORE_GPR 23, 184, 1
1009 RESTORE_GPR 24, 192, 1
1010 RESTORE_GPR 25, 200, 1
1011 RESTORE_GPR 26, 208, 1
1012 RESTORE_GPR 27, 216, 1
1013 RESTORE_GPR 28, 224, 1
1014 RESTORE_GPR 29, 232, 1
1015 RESTORE_GPR 30, 240, 1
1016 RESTORE_GPR 31, 248, 1
1018 addi 1, 1, 400
1019 ld 0, 16(1)
1039 # h + 5 + (-p)
1043 addic. 6, 6, 5
1060 std 10, 0(5)
1061 std 11, 8(5)
1066 .align 5