Lines Matching +full:4 +full:- +full:16

1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
35 ld1 {v16.16b-v19.16b}, [x5], #64; \
36 ld1 {v20.16b-v23.16b}, [x5], #64; \
37 ld1 {v24.16b-v27.16b}, [x5], #64; \
38 ld1 {v28.16b-v31.16b}, [x5];
41 zip1 RTMP0.4s, s0.4s, s1.4s; \
42 zip1 RTMP1.4s, s2.4s, s3.4s; \
43 zip2 RTMP2.4s, s0.4s, s1.4s; \
44 zip2 RTMP3.4s, s2.4s, s3.4s; \
51 zip1 RTMP0.4s, s0.4s, s1.4s; \
52 zip1 RTMP1.4s, s2.4s, s3.4s; \
53 zip2 RTMP2.4s, s0.4s, s1.4s; \
54 zip2 RTMP3.4s, s2.4s, s3.4s; \
55 zip1 RTMP4.4s, s4.4s, s5.4s; \
56 zip1 RTMP5.4s, s6.4s, s7.4s; \
57 zip2 RTMP6.4s, s4.4s, s5.4s; \
58 zip2 RTMP7.4s, s6.4s, s7.4s; \
69 zip1 RTMP0.4s, s1.4s, s0.4s; \
70 zip2 RTMP1.4s, s1.4s, s0.4s; \
71 zip1 RTMP2.4s, s3.4s, s2.4s; \
72 zip2 RTMP3.4s, s3.4s, s2.4s; \
79 zip1 RTMP0.4s, s1.4s, s0.4s; \
80 zip1 RTMP2.4s, s3.4s, s2.4s; \
81 zip2 RTMP1.4s, s1.4s, s0.4s; \
82 zip2 RTMP3.4s, s3.4s, s2.4s; \
83 zip1 RTMP4.4s, s5.4s, s4.4s; \
84 zip1 RTMP6.4s, s7.4s, s6.4s; \
85 zip2 RTMP5.4s, s5.4s, s4.4s; \
86 zip2 RTMP7.4s, s7.4s, s6.4s; \
97 dup RX0.4s, RKEY.s[round]; \
99 eor RTMP1.16b, s2.16b, s3.16b; \
100 eor RX0.16b, RX0.16b, s1.16b; \
101 eor RX0.16b, RX0.16b, RTMP1.16b; \
103 /* sbox, non-linear part */ \
104 movi RTMP3.16b, #64; /* sizeof(sbox) / 4 */ \
105 tbl RTMP0.16b, {v16.16b-v19.16b}, RX0.16b; \
106 sub RX0.16b, RX0.16b, RTMP3.16b; \
107 tbx RTMP0.16b, {v20.16b-v23.16b}, RX0.16b; \
108 sub RX0.16b, RX0.16b, RTMP3.16b; \
109 tbx RTMP0.16b, {v24.16b-v27.16b}, RX0.16b; \
110 sub RX0.16b, RX0.16b, RTMP3.16b; \
111 tbx RTMP0.16b, {v28.16b-v31.16b}, RX0.16b; \
114 shl RTMP1.4s, RTMP0.4s, #8; \
115 shl RTMP2.4s, RTMP0.4s, #16; \
116 shl RTMP3.4s, RTMP0.4s, #24; \
117 sri RTMP1.4s, RTMP0.4s, #(32-8); \
118 sri RTMP2.4s, RTMP0.4s, #(32-16); \
119 sri RTMP3.4s, RTMP0.4s, #(32-24); \
120 /* RTMP1 = x ^ rol32(x, 8) ^ rol32(x, 16) */ \
121 eor RTMP1.16b, RTMP1.16b, RTMP0.16b; \
122 eor RTMP1.16b, RTMP1.16b, RTMP2.16b; \
124 eor RTMP3.16b, RTMP3.16b, RTMP0.16b; \
125 shl RTMP2.4s, RTMP1.4s, 2; \
126 sri RTMP2.4s, RTMP1.4s, #(32-2); \
127 eor RTMP3.16b, RTMP3.16b, RTMP2.16b; \
129 eor s0.16b, s0.16b, RTMP3.16b;
133 4: \
134 ld1 {RKEY.4s}, [x0], #16; \
142 bne 4b; \
144 rev32 b0.16b, b0.16b; \
145 rev32 b1.16b, b1.16b; \
146 rev32 b2.16b, b2.16b; \
147 rev32 b3.16b, b3.16b; \
155 rev32 b0.16b, b0.16b; \
156 rev32 b1.16b, b1.16b; \
157 rev32 b2.16b, b2.16b; \
158 rev32 b3.16b, b3.16b; \
163 dup RX0.4s, RKEY.s[round]; \
164 eor RTMP0.16b, s2.16b, s3.16b; \
165 mov RX1.16b, RX0.16b; \
166 eor RTMP1.16b, t2.16b, t3.16b; \
167 eor RX0.16b, RX0.16b, s1.16b; \
168 eor RX1.16b, RX1.16b, t1.16b; \
169 eor RX0.16b, RX0.16b, RTMP0.16b; \
170 eor RX1.16b, RX1.16b, RTMP1.16b; \
172 /* sbox, non-linear part */ \
173 movi RTMP3.16b, #64; /* sizeof(sbox) / 4 */ \
174 tbl RTMP0.16b, {v16.16b-v19.16b}, RX0.16b; \
175 tbl RTMP1.16b, {v16.16b-v19.16b}, RX1.16b; \
176 sub RX0.16b, RX0.16b, RTMP3.16b; \
177 sub RX1.16b, RX1.16b, RTMP3.16b; \
178 tbx RTMP0.16b, {v20.16b-v23.16b}, RX0.16b; \
179 tbx RTMP1.16b, {v20.16b-v23.16b}, RX1.16b; \
180 sub RX0.16b, RX0.16b, RTMP3.16b; \
181 sub RX1.16b, RX1.16b, RTMP3.16b; \
182 tbx RTMP0.16b, {v24.16b-v27.16b}, RX0.16b; \
183 tbx RTMP1.16b, {v24.16b-v27.16b}, RX1.16b; \
184 sub RX0.16b, RX0.16b, RTMP3.16b; \
185 sub RX1.16b, RX1.16b, RTMP3.16b; \
186 tbx RTMP0.16b, {v28.16b-v31.16b}, RX0.16b; \
187 tbx RTMP1.16b, {v28.16b-v31.16b}, RX1.16b; \
190 shl RX0.4s, RTMP0.4s, #8; \
191 shl RX1.4s, RTMP1.4s, #8; \
192 shl RTMP2.4s, RTMP0.4s, #16; \
193 shl RTMP3.4s, RTMP1.4s, #16; \
194 sri RX0.4s, RTMP0.4s, #(32 - 8); \
195 sri RX1.4s, RTMP1.4s, #(32 - 8); \
196 sri RTMP2.4s, RTMP0.4s, #(32 - 16); \
197 sri RTMP3.4s, RTMP1.4s, #(32 - 16); \
198 /* RX = x ^ rol32(x, 8) ^ rol32(x, 16) */ \
199 eor RX0.16b, RX0.16b, RTMP0.16b; \
200 eor RX1.16b, RX1.16b, RTMP1.16b; \
201 eor RX0.16b, RX0.16b, RTMP2.16b; \
202 eor RX1.16b, RX1.16b, RTMP3.16b; \
204 shl RTMP2.4s, RTMP0.4s, #24; \
205 shl RTMP3.4s, RTMP1.4s, #24; \
206 sri RTMP2.4s, RTMP0.4s, #(32 - 24); \
207 sri RTMP3.4s, RTMP1.4s, #(32 - 24); \
208 eor RTMP0.16b, RTMP0.16b, RTMP2.16b; \
209 eor RTMP1.16b, RTMP1.16b, RTMP3.16b; \
210 shl RTMP2.4s, RX0.4s, #2; \
211 shl RTMP3.4s, RX1.4s, #2; \
212 sri RTMP2.4s, RX0.4s, #(32 - 2); \
213 sri RTMP3.4s, RX1.4s, #(32 - 2); \
214 eor RTMP0.16b, RTMP0.16b, RTMP2.16b; \
215 eor RTMP1.16b, RTMP1.16b, RTMP3.16b; \
217 eor s0.16b, s0.16b, RTMP0.16b; \
218 eor t0.16b, t0.16b, RTMP1.16b;
221 rev32 b0.16b, b0.16b; \
222 rev32 b1.16b, b1.16b; \
223 rev32 b2.16b, b2.16b; \
224 rev32 b3.16b, b3.16b; \
225 rev32 b4.16b, b4.16b; \
226 rev32 b5.16b, b5.16b; \
227 rev32 b6.16b, b6.16b; \
228 rev32 b7.16b, b7.16b; \
232 ld1 {RKEY.4s}, [x0], #16; \
242 rev32 b0.16b, b0.16b; \
243 rev32 b1.16b, b1.16b; \
244 rev32 b2.16b, b2.16b; \
245 rev32 b3.16b, b3.16b; \
246 rev32 b4.16b, b4.16b; \
247 rev32 b5.16b, b5.16b; \
248 rev32 b6.16b, b6.16b; \
249 rev32 b7.16b, b7.16b; \
273 ld4 {v0.4s-v3.4s}, [x2], #64
274 ld4 {v4.4s-v7.4s}, [x2], #64
278 st1 {v0.16b-v3.16b}, [x1], #64
279 st1 {v4.16b-v7.16b}, [x1], #64
286 cmp w3, #4
289 sub w3, w3, #4
291 ld4 {v0.4s-v3.4s}, [x2], #64
295 st1 {v0.16b-v3.16b}, [x1], #64
301 ld1 {v0.16b}, [x2], #16
303 ld1 {v1.16b}, [x2], #16
305 ld1 {v2.16b}, [x2], #16
313 st1 {v0.16b}, [x1], #16
315 st1 {v1.16b}, [x1], #16
317 st1 {v2.16b}, [x1], #16
334 ld1 {RIV.16b}, [x3]
340 ld4 {v0.4s-v3.4s}, [x2], #64
341 ld4 {v4.4s-v7.4s}, [x2]
351 eor v0.16b, v0.16b, RIV.16b
353 ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64
354 ld1 {RTMP4.16b-RTMP7.16b}, [x2], #64
356 eor v1.16b, v1.16b, RTMP0.16b
357 eor v2.16b, v2.16b, RTMP1.16b
358 eor v3.16b, v3.16b, RTMP2.16b
359 eor v4.16b, v4.16b, RTMP3.16b
360 eor v5.16b, v5.16b, RTMP4.16b
361 eor v6.16b, v6.16b, RTMP5.16b
362 eor v7.16b, v7.16b, RTMP6.16b
364 mov RIV.16b, RTMP7.16b
366 st1 {v0.16b-v3.16b}, [x1], #64
367 st1 {v4.16b-v7.16b}, [x1], #64
374 cmp w4, #4
377 sub w4, w4, #4
379 ld1 {v0.16b-v3.16b}, [x2], #64
381 rev32 v4.16b, v0.16b
382 rev32 v5.16b, v1.16b
383 rev32 v6.16b, v2.16b
384 rev32 v7.16b, v3.16b
390 eor v4.16b, v4.16b, RIV.16b
391 eor v5.16b, v5.16b, v0.16b
392 eor v6.16b, v6.16b, v1.16b
393 eor v7.16b, v7.16b, v2.16b
395 mov RIV.16b, v3.16b
397 st1 {v4.16b-v7.16b}, [x1], #64
403 ld1 {v0.16b}, [x2], #16
405 ld1 {v1.16b}, [x2], #16
407 ld1 {v2.16b}, [x2], #16
410 rev32 v4.16b, v0.16b
411 rev32 v5.16b, v1.16b
412 rev32 v6.16b, v2.16b
419 eor v4.16b, v4.16b, RIV.16b
420 mov RIV.16b, v0.16b
421 st1 {v4.16b}, [x1], #16
424 eor v5.16b, v5.16b, v0.16b
425 mov RIV.16b, v1.16b
426 st1 {v5.16b}, [x1], #16
429 eor v6.16b, v6.16b, v1.16b
430 mov RIV.16b, v2.16b
431 st1 {v6.16b}, [x1], #16
435 st1 {RIV.16b}, [x3]
463 rev64 vctr.16b, vctr.16b; \
471 inc_le128(v4) /* +4 */
480 ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64
481 ld1 {RTMP4.16b-RTMP7.16b}, [x2], #64
483 eor v0.16b, v0.16b, RTMP0.16b
484 eor v1.16b, v1.16b, RTMP1.16b
485 eor v2.16b, v2.16b, RTMP2.16b
486 eor v3.16b, v3.16b, RTMP3.16b
487 eor v4.16b, v4.16b, RTMP4.16b
488 eor v5.16b, v5.16b, RTMP5.16b
489 eor v6.16b, v6.16b, RTMP6.16b
490 eor v7.16b, v7.16b, RTMP7.16b
492 st1 {v0.16b-v3.16b}, [x1], #64
493 st1 {v4.16b-v7.16b}, [x1], #64
500 cmp w4, #4
503 sub w4, w4, #4
511 ld1 {v4.16b-v7.16b}, [x2], #64
517 eor v0.16b, v0.16b, v4.16b
518 eor v1.16b, v1.16b, v5.16b
519 eor v2.16b, v2.16b, v6.16b
520 eor v3.16b, v3.16b, v7.16b
522 st1 {v0.16b-v3.16b}, [x1], #64
528 ld1 {v4.16b}, [x2], #16
533 ld1 {v5.16b}, [x2], #16
538 ld1 {v6.16b}, [x2], #16
548 eor v0.16b, v0.16b, v4.16b
549 st1 {v0.16b}, [x1], #16
552 eor v1.16b, v1.16b, v5.16b
553 st1 {v1.16b}, [x1], #16
556 eor v2.16b, v2.16b, v6.16b
557 st1 {v2.16b}, [x1], #16