Lines Matching +full:1 +full:- +full:9
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 # Copyright 2023- IBM Corp. All rights reserved
10 # Poly1305 - this version mainly using vector/VSX/Scalar
11 # - 26 bits limbs
12 # - Handle multiple 64 byte blcok.
17 # p = 2^130 - 5
25 # 07/22/21 - this revison based on the above sum of products. Setup r^4, r^3, r^2, r and s3, s2, …
26 # to 9 vectors for multiplications.
29 # vs [r^1, r^3, r^2, r^4]
56 #include <asm/asm-offsets.h>
57 #include <asm/asm-compat.h>
94 std 0, 16(1)
95 stdu 1,-752(1)
97 SAVE_GPR 14, 112, 1
98 SAVE_GPR 15, 120, 1
99 SAVE_GPR 16, 128, 1
100 SAVE_GPR 17, 136, 1
101 SAVE_GPR 18, 144, 1
102 SAVE_GPR 19, 152, 1
103 SAVE_GPR 20, 160, 1
104 SAVE_GPR 21, 168, 1
105 SAVE_GPR 22, 176, 1
106 SAVE_GPR 23, 184, 1
107 SAVE_GPR 24, 192, 1
108 SAVE_GPR 25, 200, 1
109 SAVE_GPR 26, 208, 1
110 SAVE_GPR 27, 216, 1
111 SAVE_GPR 28, 224, 1
112 SAVE_GPR 29, 232, 1
113 SAVE_GPR 30, 240, 1
114 SAVE_GPR 31, 248, 1
116 addi 9, 1, 256
117 SAVE_VRS 20, 0, 9
118 SAVE_VRS 21, 16, 9
119 SAVE_VRS 22, 32, 9
120 SAVE_VRS 23, 48, 9
121 SAVE_VRS 24, 64, 9
122 SAVE_VRS 25, 80, 9
123 SAVE_VRS 26, 96, 9
124 SAVE_VRS 27, 112, 9
125 SAVE_VRS 28, 128, 9
126 SAVE_VRS 29, 144, 9
127 SAVE_VRS 30, 160, 9
128 SAVE_VRS 31, 176, 9
130 SAVE_VSX 14, 192, 9
131 SAVE_VSX 15, 208, 9
132 SAVE_VSX 16, 224, 9
133 SAVE_VSX 17, 240, 9
134 SAVE_VSX 18, 256, 9
135 SAVE_VSX 19, 272, 9
136 SAVE_VSX 20, 288, 9
137 SAVE_VSX 21, 304, 9
138 SAVE_VSX 22, 320, 9
139 SAVE_VSX 23, 336, 9
140 SAVE_VSX 24, 352, 9
141 SAVE_VSX 25, 368, 9
142 SAVE_VSX 26, 384, 9
143 SAVE_VSX 27, 400, 9
144 SAVE_VSX 28, 416, 9
145 SAVE_VSX 29, 432, 9
146 SAVE_VSX 30, 448, 9
147 SAVE_VSX 31, 464, 9
151 addi 9, 1, 256
152 RESTORE_VRS 20, 0, 9
153 RESTORE_VRS 21, 16, 9
154 RESTORE_VRS 22, 32, 9
155 RESTORE_VRS 23, 48, 9
156 RESTORE_VRS 24, 64, 9
157 RESTORE_VRS 25, 80, 9
158 RESTORE_VRS 26, 96, 9
159 RESTORE_VRS 27, 112, 9
160 RESTORE_VRS 28, 128, 9
161 RESTORE_VRS 29, 144, 9
162 RESTORE_VRS 30, 160, 9
163 RESTORE_VRS 31, 176, 9
165 RESTORE_VSX 14, 192, 9
166 RESTORE_VSX 15, 208, 9
167 RESTORE_VSX 16, 224, 9
168 RESTORE_VSX 17, 240, 9
169 RESTORE_VSX 18, 256, 9
170 RESTORE_VSX 19, 272, 9
171 RESTORE_VSX 20, 288, 9
172 RESTORE_VSX 21, 304, 9
173 RESTORE_VSX 22, 320, 9
174 RESTORE_VSX 23, 336, 9
175 RESTORE_VSX 24, 352, 9
176 RESTORE_VSX 25, 368, 9
177 RESTORE_VSX 26, 384, 9
178 RESTORE_VSX 27, 400, 9
179 RESTORE_VSX 28, 416, 9
180 RESTORE_VSX 29, 432, 9
181 RESTORE_VSX 30, 448, 9
182 RESTORE_VSX 31, 464, 9
184 RESTORE_GPR 14, 112, 1
185 RESTORE_GPR 15, 120, 1
186 RESTORE_GPR 16, 128, 1
187 RESTORE_GPR 17, 136, 1
188 RESTORE_GPR 18, 144, 1
189 RESTORE_GPR 19, 152, 1
190 RESTORE_GPR 20, 160, 1
191 RESTORE_GPR 21, 168, 1
192 RESTORE_GPR 22, 176, 1
193 RESTORE_GPR 23, 184, 1
194 RESTORE_GPR 24, 192, 1
195 RESTORE_GPR 25, 200, 1
196 RESTORE_GPR 26, 208, 1
197 RESTORE_GPR 27, 216, 1
198 RESTORE_GPR 28, 224, 1
199 RESTORE_GPR 29, 232, 1
200 RESTORE_GPR 30, 240, 1
201 RESTORE_GPR 31, 248, 1
203 addi 1, 1, 752
204 ld 0, 16(1)
210 # p[1] = a0*r1 + a1*r0 + a2*r4*5 + a3*r3*5 + a4*r2*5;
215 # [r^2, r^3, r^1, r^4]
223 vmulouw 12, 7, 1
235 vmulouw 13, 8, 1
268 vmuleuw 9, 4, 26
271 vmuleuw 12, 7, 1
273 vaddudm 14, 14, 9
279 vmuleuw 9, 4, 27
283 vmuleuw 13, 8, 1
284 vaddudm 15, 15, 9
290 vmuleuw 9, 4, 28
295 vaddudm 16, 16, 9
301 vmuleuw 9, 4, 29
306 vaddudm 17, 17, 9
312 vmuleuw 9, 4, 30
317 vaddudm 18, 18, 9
363 bl do_mul # r^2 r^1
375 vsld 9, 27, 13
379 vaddudm 0, 9, 27
380 vaddudm 1, 10, 28
391 vsld 9, 27, 13
395 vaddudm 0, 9, 27
396 vaddudm 1, 10, 28
402 xxlor 1, 59, 59
411 vspltw 9, 26, 3
413 vmrgow 26, 10, 9
414 vspltw 9, 27, 3
416 vmrgow 27, 10, 9
417 vspltw 9, 28, 3
419 vmrgow 28, 10, 9
420 vspltw 9, 29, 3
422 vmrgow 29, 10, 9
423 vspltw 9, 30, 3
425 vmrgow 30, 10, 9
427 vsld 9, 27, 13
431 vaddudm 0, 9, 27
432 vaddudm 1, 10, 28
442 vspltisb 9, 2
455 vsld 10, 12, 9
486 lvx 25, 0, 10 # v25 - mask
487 lvx 31, 14, 10 # v31 = 1a
488 lvx 19, 15, 10 # v19 = 1 << 24
494 ld 9, 24(3)
496 and. 9, 9, 11
500 extrdi 14, 9, 26, 38
501 extrdi 15, 9, 26, 12
502 extrdi 16, 9, 12, 0
513 li 9, 5
514 mtvsrdd 36, 0, 9
516 vmulouw 1, 28, 4 # v1 = rr1
543 ld 9, 0(3)
548 extrdi 14, 9, 26, 38
549 extrdi 15, 9, 26, 12
550 extrdi 16, 9, 12, 0
559 vor 8, 8, 9
570 vand 9, 14, 25 # a0
584 vaddudm 20, 4, 9
597 vand 9, 14, 25 # a0
612 vmrgow 4, 9, 20
619 addi 5, 5, -64 # len -= 64
622 li 9, 64
623 divdu 31, 5, 9
633 # h3 = (h1 + m3) * r^2, h4 = (h2 + m4) * r^2 --> (h0 + m1) r*4 + (h3 + m3) r^2, (h0 + m2) r^4 + (h…
635 # h5 = (h3 + m5) * r^2, h6 = (h4 + m6) * r^2 -->
636 # h7 = (h5 + m7) * r^2, h8 = (h6 + m8) * r^1 --> m5 * r^4 + m6 * r^3 + m7 * r^2 + m8 * r
644 vspltisb 9, 2
657 vsld 10, 12, 9
689 vand 9, 17, 25 # a0
722 vmrgow 4, 9, 4
729 addi 5, 5, -64 # len -= 64
736 xxlor 59, 1, 1
752 vaddudm 4, 14, 9
767 vspltisb 9, 2
780 vsld 10, 12, 9
851 ld 9, 24(3)
853 and. 9, 9, 11 # cramp mask r0
857 add 19, 21, 10 # s1: r19 - (r1 >> 2) *5
861 mtvsrdd 32+0, 9, 19 # r0, s1
862 mtvsrdd 32+1, 10, 9 # r1, r0
864 mtvsrdd 32+3, 9, 25 # r0
878 vmsumudm 7, 6, 0, 9 # h0 * r0, h1 * s1
881 vmsumudm 11, 6, 1, 9 # h0 * r1, h1 * r0
885 vmsumudm 11, 8, 3, 9 # d2 = h2 * r0
923 # - no highbit if final leftover block (highbit = 0)
930 std 0, 16(1)
931 stdu 1,-400(1)
933 SAVE_GPR 14, 112, 1
934 SAVE_GPR 15, 120, 1
935 SAVE_GPR 16, 128, 1
936 SAVE_GPR 17, 136, 1
937 SAVE_GPR 18, 144, 1
938 SAVE_GPR 19, 152, 1
939 SAVE_GPR 20, 160, 1
940 SAVE_GPR 21, 168, 1
941 SAVE_GPR 22, 176, 1
942 SAVE_GPR 23, 184, 1
943 SAVE_GPR 24, 192, 1
944 SAVE_GPR 25, 200, 1
945 SAVE_GPR 26, 208, 1
946 SAVE_GPR 27, 216, 1
947 SAVE_GPR 28, 224, 1
948 SAVE_GPR 29, 232, 1
949 SAVE_GPR 30, 240, 1
950 SAVE_GPR 31, 248, 1
973 vxor 9, 9, 9
999 RESTORE_GPR 14, 112, 1
1000 RESTORE_GPR 15, 120, 1
1001 RESTORE_GPR 16, 128, 1
1002 RESTORE_GPR 17, 136, 1
1003 RESTORE_GPR 18, 144, 1
1004 RESTORE_GPR 19, 152, 1
1005 RESTORE_GPR 20, 160, 1
1006 RESTORE_GPR 21, 168, 1
1007 RESTORE_GPR 22, 176, 1
1008 RESTORE_GPR 23, 184, 1
1009 RESTORE_GPR 24, 192, 1
1010 RESTORE_GPR 25, 200, 1
1011 RESTORE_GPR 26, 208, 1
1012 RESTORE_GPR 27, 216, 1
1013 RESTORE_GPR 28, 224, 1
1014 RESTORE_GPR 29, 232, 1
1015 RESTORE_GPR 30, 240, 1
1016 RESTORE_GPR 31, 248, 1
1018 addi 1, 1, 400
1019 ld 0, 16(1)
1039 # h + 5 + (-p)
1046 srdi 9, 8, 2 # overflow?
1047 cmpdi 9, 0