Lines Matching +full:ext +full:- +full:32 +full:k

1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * sm3-neon-core.S - SM3 secure hash using NEON instructions
28 #define STACK_W_SIZE (32 * 2 * 3)
92 ror o, a, #(32 - n);
121 #define R(i, a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \ argument
128 add k, k, e; \
133 add k, k, t0; \
137 rolw(k, k, 7); /* rol (t0 + e + t), 7) => k */ \
139 add h, h, k; /* h + w1 + k => h */ \
142 eor t0, t0, k; /* k ^ t0 => t0 */ \
148 eor h, t3, t3, ror #(32-9); \
153 eor h, h, t3, ror #(32-17); /* P0(t3) => h */
155 #define R1(a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \ argument
156 R(1, ##a, ##b, ##c, ##d, ##e, ##f, ##g, ##h, ##k, K_LOAD, round, widx, wtype, IOP, iop_param)
158 #define R2(a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \ argument
159 R(2, ##a, ##b, ##c, ##d, ##e, ##f, ##g, ##h, ##k, K_LOAD, round, widx, wtype, IOP, iop_param)
166 /* Byte-swapped input address. */
172 (STACK_W + ((((round) / 3) - 4) % 2) * 64 + (offs) + ((widx) * 4))
174 /* Rounds 1-12, byte-swapped input block addresses. */
175 #define IW_W1_ADDR(round, widx) IW_W_ADDR(round, widx, 32)
178 /* Rounds 1-12, expanded input block addresses. */
183 * Interleaving within round function needed for in-order CPUs. */
216 ext W0.16b, XTMP0.16b, XTMP0.16b, #8; /* W0: xx, w0, xx, xx */
223 ext W1.16b, XTMP0.16b, XTMP0.16b, #4; /* W1: xx, w3, w2, w1 */
225 ext W3.16b, XTMP1.16b, XTMP2.16b, #12; /* W3: xx, w9, w8, w7 */
227 ext W4.16b, XTMP2.16b, XTMP3.16b, #8; /* W4: xx, w12, w11, w10 */
233 ext W5.16b, XTMP3.16b, XTMP3.16b, #4; /* W5: xx, w15, w14, w13 */
243 * Interleaving within round function needed for in-order CPUs. */
245 /* Load (w[i - 16]) => XTMP0 */ \
246 /* Load (w[i - 13]) => XTMP5 */ \
247 ext XTMP0.16b, w0.16b, w0.16b, #12; /* XTMP0: w0, xx, xx, xx */
249 ext XTMP5.16b, w1.16b, w1.16b, #12;
251 ext XTMP0.16b, XTMP0.16b, w1.16b, #12; /* XTMP0: xx, w2, w1, w0 */
253 ext XTMP5.16b, XTMP5.16b, w2.16b, #12;
255 /* w[i - 9] == w3 */ \
259 /* w[i - 3] == w5 */ \
267 sri XTMP2.4s, w5.4s, #(32-15);
269 sri XTMP1.4s, XTMP5.4s, #(32-7);
273 /* w[i - 6] == W4 */ \
284 sri XTMP3.4s, XTMP0.4s, #(32-15);
286 sri XTMP4.4s, XTMP0.4s, #(32-23);
290 /* Load (w[i - 3]) => XTMP2 */ \
291 ext XTMP2.16b, w4.16b, w4.16b, #12;
295 ext XTMP2.16b, XTMP2.16b, w5.16b, #12;
301 st1 {XTMP2.16b-XTMP3.16b}, [addr0];
348 * Transform blocks*64 bytes (blocks*16 32-bit words) at 'src'.
361 stp x28, x29, [sp, #-16]!
362 stp x19, x20, [sp, #-16]!
363 stp x21, x22, [sp, #-16]!
364 stp x23, x24, [sp, #-16]!
365 stp x25, x26, [sp, #-16]!
400 /* Transform 0-3 */
406 /* Transform 4-7 + Precalc 12-14 */
412 /* Transform 8-11 + Precalc 12-17 */
418 /* Transform 12-14 + Precalc 18-20 */
423 /* Transform 15-17 + Precalc 21-23 */
428 /* Transform 18-20 + Precalc 24-26 */
433 /* Transform 21-23 + Precalc 27-29 */
438 /* Transform 24-26 + Precalc 30-32 */
443 /* Transform 27-29 + Precalc 33-35 */
448 /* Transform 30-32 + Precalc 36-38 */
451 R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 32, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 36)
453 /* Transform 33-35 + Precalc 39-41 */
458 /* Transform 36-38 + Precalc 42-44 */
463 /* Transform 39-41 + Precalc 45-47 */
468 /* Transform 42-44 + Precalc 48-50 */
473 /* Transform 45-47 + Precalc 51-53 */
478 /* Transform 48-50 + Precalc 54-56 */
483 /* Transform 51-53 + Precalc 57-59 */
488 /* Transform 54-56 + Precalc 60-62 */
493 /* Transform 57-59 + Precalc 63 */
503 /* Transform 61-63 + Preload next block */
528 /* Transform 61-63 */
566 st1 {W0.16b-W3.16b}, [addr0], #64
567 st1 {W0.16b-W3.16b}, [addr0], #64
568 st1 {W0.16b-W3.16b}, [addr0]