Lines Matching +full:7 +full:- +full:9
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 # Copyright 2023- IBM Corp. All rights reserved
10 # Poly1305 - this version mainly using vector/VSX/Scalar
11 # - 26 bits limbs
12 # - Handle multiple 64 byte blcok.
17 # p = 2^130 - 5
25 # 07/22/21 - this revison based on the above sum of products. Setup r^4, r^3, r^2, r and s3, s2, …
26 # to 9 vectors for multiplications.
56 #include <asm/asm-offsets.h>
57 #include <asm/asm-compat.h>
95 stdu 1,-752(1)
116 addi 9, 1, 256
117 SAVE_VRS 20, 0, 9
118 SAVE_VRS 21, 16, 9
119 SAVE_VRS 22, 32, 9
120 SAVE_VRS 23, 48, 9
121 SAVE_VRS 24, 64, 9
122 SAVE_VRS 25, 80, 9
123 SAVE_VRS 26, 96, 9
124 SAVE_VRS 27, 112, 9
125 SAVE_VRS 28, 128, 9
126 SAVE_VRS 29, 144, 9
127 SAVE_VRS 30, 160, 9
128 SAVE_VRS 31, 176, 9
130 SAVE_VSX 14, 192, 9
131 SAVE_VSX 15, 208, 9
132 SAVE_VSX 16, 224, 9
133 SAVE_VSX 17, 240, 9
134 SAVE_VSX 18, 256, 9
135 SAVE_VSX 19, 272, 9
136 SAVE_VSX 20, 288, 9
137 SAVE_VSX 21, 304, 9
138 SAVE_VSX 22, 320, 9
139 SAVE_VSX 23, 336, 9
140 SAVE_VSX 24, 352, 9
141 SAVE_VSX 25, 368, 9
142 SAVE_VSX 26, 384, 9
143 SAVE_VSX 27, 400, 9
144 SAVE_VSX 28, 416, 9
145 SAVE_VSX 29, 432, 9
146 SAVE_VSX 30, 448, 9
147 SAVE_VSX 31, 464, 9
151 addi 9, 1, 256
152 RESTORE_VRS 20, 0, 9
153 RESTORE_VRS 21, 16, 9
154 RESTORE_VRS 22, 32, 9
155 RESTORE_VRS 23, 48, 9
156 RESTORE_VRS 24, 64, 9
157 RESTORE_VRS 25, 80, 9
158 RESTORE_VRS 26, 96, 9
159 RESTORE_VRS 27, 112, 9
160 RESTORE_VRS 28, 128, 9
161 RESTORE_VRS 29, 144, 9
162 RESTORE_VRS 30, 160, 9
163 RESTORE_VRS 31, 176, 9
165 RESTORE_VSX 14, 192, 9
166 RESTORE_VSX 15, 208, 9
167 RESTORE_VSX 16, 224, 9
168 RESTORE_VSX 17, 240, 9
169 RESTORE_VSX 18, 256, 9
170 RESTORE_VSX 19, 272, 9
171 RESTORE_VSX 20, 288, 9
172 RESTORE_VSX 21, 304, 9
173 RESTORE_VSX 22, 320, 9
174 RESTORE_VSX 23, 336, 9
175 RESTORE_VSX 24, 352, 9
176 RESTORE_VSX 25, 368, 9
177 RESTORE_VSX 26, 384, 9
178 RESTORE_VSX 27, 400, 9
179 RESTORE_VSX 28, 416, 9
180 RESTORE_VSX 29, 432, 9
181 RESTORE_VSX 30, 448, 9
182 RESTORE_VSX 31, 464, 9
223 vmulouw 12, 7, 1
234 vmulouw 12, 7, 2
243 vmulouw 12, 7, 3
252 vmulouw 12, 7, 26
261 vmulouw 12, 7, 27
268 vmuleuw 9, 4, 26
271 vmuleuw 12, 7, 1
273 vaddudm 14, 14, 9
279 vmuleuw 9, 4, 27
282 vmuleuw 12, 7, 2
284 vaddudm 15, 15, 9
290 vmuleuw 9, 4, 28
293 vmuleuw 12, 7, 3
295 vaddudm 16, 16, 9
301 vmuleuw 9, 4, 29
304 vmuleuw 12, 7, 26
306 vaddudm 17, 17, 9
312 vmuleuw 9, 4, 30
315 vmuleuw 12, 7, 27
317 vaddudm 18, 18, 9
361 vmr 7, 29
375 vsld 9, 27, 13
379 vaddudm 0, 9, 27
388 vmrgow 29, 29, 7
391 vsld 9, 27, 13
395 vaddudm 0, 9, 27
408 xxlor 7, 34, 34
411 vspltw 9, 26, 3
413 vmrgow 26, 10, 9
414 vspltw 9, 27, 3
416 vmrgow 27, 10, 9
417 vspltw 9, 28, 3
419 vmrgow 28, 10, 9
420 vspltw 9, 29, 3
422 vmrgow 29, 10, 9
423 vspltw 9, 30, 3
425 vmrgow 30, 10, 9
427 vsld 9, 27, 13
431 vaddudm 0, 9, 27
442 vspltisb 9, 2
445 vand 7, 17, 25
455 vsld 10, 12, 9
462 vaddudm 7, 7, 13
464 vsrd 11, 7, 31
465 vand 7, 7, 25
486 lvx 25, 0, 10 # v25 - mask
494 ld 9, 24(3)
496 and. 9, 9, 11
500 extrdi 14, 9, 26, 38
501 extrdi 15, 9, 26, 12
502 extrdi 16, 9, 12, 0
513 li 9, 5
514 mtvsrdd 36, 0, 9
543 ld 9, 0(3)
548 extrdi 14, 9, 26, 38
549 extrdi 15, 9, 26, 12
550 extrdi 16, 9, 12, 0
559 vor 8, 8, 9
570 vand 9, 14, 25 # a0
584 vaddudm 20, 4, 9
587 vaddudm 23, 7, 12
597 vand 9, 14, 25 # a0
612 vmrgow 4, 9, 20
615 vmrgow 7, 12, 23
619 addi 5, 5, -64 # len -= 64
622 li 9, 64
623 divdu 31, 5, 9
633 # h3 = (h1 + m3) * r^2, h4 = (h2 + m4) * r^2 --> (h0 + m1) r*4 + (h3 + m3) r^2, (h0 + m2) r^4 + (h…
635 # h5 = (h3 + m5) * r^2, h6 = (h4 + m6) * r^2 -->
636 # h7 = (h5 + m7) * r^2, h8 = (h6 + m8) * r^1 --> m5 * r^4 + m6 * r^3 + m7 * r^2 + m8 * r
644 vspltisb 9, 2
647 vand 7, 17, 25
657 vsld 10, 12, 9
664 vaddudm 7, 7, 13
666 vsrd 11, 7, 31
667 vand 7, 7, 25
689 vand 9, 17, 25 # a0
718 vaddudm 7, 7, 23
722 vmrgow 4, 9, 4
725 vmrgow 7, 12, 7
729 addi 5, 5, -64 # len -= 64
742 xxlor 34, 7, 7
752 vaddudm 4, 14, 9
760 vaddudm 7, 17, 12
767 vspltisb 9, 2
769 vsrd 11, 7, 31
770 vand 7, 7, 25
780 vsld 10, 12, 9
787 vaddudm 7, 7, 13
789 vsrd 11, 7, 31
790 vand 7, 7, 25
811 vsld 7, 7, 11
812 vor 21, 7, 12
851 ld 9, 24(3)
853 and. 9, 9, 11 # cramp mask r0
857 add 19, 21, 10 # s1: r19 - (r1 >> 2) *5
861 mtvsrdd 32+0, 9, 19 # r0, s1
862 mtvsrdd 32+1, 10, 9 # r1, r0
864 mtvsrdd 32+3, 9, 25 # r0
878 vmsumudm 7, 6, 0, 9 # h0 * r0, h1 * s1
881 vmsumudm 11, 6, 1, 9 # h0 * r1, h1 * r0
885 vmsumudm 11, 8, 3, 9 # d2 = h2 * r0
897 mfvsrld 27, 32+7
900 mfvsrd 20, 32+7 # h0.h
923 # - no highbit if final leftover block (highbit = 0)
931 stdu 1,-400(1)
973 vxor 9, 9, 9
1039 # h + 5 + (-p)
1041 mr 7, 11
1044 addze 7, 7
1046 srdi 9, 8, 2 # overflow?
1047 cmpdi 9, 0
1050 mr 11, 7
1055 ld 7, 8(4)
1057 adde 11, 11, 7