Lines Matching +full:1 +full:- +full:d

1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 * SM4-GCM AEAD Algorithm using ARMv8 Crypto Extensions
14 #include "sm4-ce-asm.h"
16 .arch armv8-a+crypto
18 .irp b, 0, 1, 2, 3, 24, 25, 26, 27, 28, 29, 30, 31
37 * output: r0:r1 (low 128-bits in r0, high in r1)
41 pmull r0.1q, m0.1d, m1.1d; \
42 pmull T1.1q, m0.1d, T0.1d; \
43 pmull2 T0.1q, m0.2d, T0.2d; \
44 pmull2 r1.1q, m0.2d, m1.2d; \
59 pmull r0.1q, m0.1d, m1.1d; \
60 pmull r2.1q, m2.1d, m3.1d; \
61 pmull r4.1q, m4.1d, m5.1d; \
62 pmull r6.1q, m6.1d, m7.1d; \
63 pmull T1.1q, m0.1d, T0.1d; \
64 pmull T3.1q, m2.1d, T2.1d; \
65 pmull T5.1q, m4.1d, T4.1d; \
66 pmull T7.1q, m6.1d, T6.1d; \
67 pmull2 T0.1q, m0.2d, T0.2d; \
68 pmull2 T2.1q, m2.2d, T2.2d; \
69 pmull2 T4.1q, m4.2d, T4.2d; \
70 pmull2 T6.1q, m6.2d, T6.2d; \
71 pmull2 r1.1q, m0.2d, m1.2d; \
72 pmull2 r3.1q, m2.2d, m3.2d; \
73 pmull2 r5.1q, m4.2d, m5.2d; \
74 pmull2 r7.1q, m6.2d, m7.2d; \
97 * input: r0:r1 (low 128-bits in r0, high in r1)
101 pmull2 T0.1q, r1.2d, rconst.2d; \
106 pmull T0.1q, r1.1d, rconst.1d; \
113 pmull r0.1q, m0.1d, m1.1d; \
115 pmull T1.1q, m0.1d, T0.1d; \
117 pmull2 T0.1q, m0.2d, T0.2d; \
119 pmull2 r1.1q, m0.2d, m1.2d; \
146 pmull r0.1q, m0.1d, m1.1d; \
147 pmull r2.1q, m2.1d, m3.1d; \
148 pmull r4.1q, m4.1d, m5.1d; \
152 pmull T1.1q, m0.1d, T0.1d; \
153 pmull T3.1q, m2.1d, T2.1d; \
154 pmull T5.1q, m4.1d, T4.1d; \
158 pmull2 T0.1q, m0.2d, T0.2d; \
159 pmull2 T2.1q, m2.2d, T2.2d; \
160 pmull2 T4.1q, m4.2d, T4.2d; \
164 pmull2 r1.1q, m0.2d, m1.2d; \
165 pmull2 r3.1q, m2.2d, m3.2d; \
166 pmull2 r5.1q, m4.2d, m5.2d; \
209 mov vctr.d[1], x9; \
210 add w6, w9, #1; \
211 mov vctr.d[0], x8; \
218 /* the lower 32-bits of initial IV is always be32(1) */ \
221 mov vctr0.d[0], x8; \
222 mov vctr0.d[1], x9; \
236 /* can be the same as input v0-v3 */
270 ld1r {RRCONST.2d}, [x2]
278 /* H ^ 1 */
293 st1 {RH1.16b-RH4.16b}, [x1]
306 ld1 {RH1.16b-RH4.16b}, [x0]
312 ld1r {RRCONST.2d}, [x4]
322 ld1 {v0.16b-v3.16b}, [x2], #64
333 * (in3) * H^1 => rr6:rr7
355 sub w3, w3, #1
368 st1 {RHASH.2d}, [x1]
391 ld1 {RH1.16b-RH4.16b}, [x6]
397 ld1r {RRCONST.2d}, [x6]
411 inc32_le128(v1) /* +1 */
415 ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64
423 st1 {v0.16b-v3.16b}, [x1], #64
436 * (in3) * H^1 => rr6:rr7
495 ldrb w0, [x2], #1 /* get 1 byte from input */
498 strb w6, [x1], #1 /* store out byte */
501 ext v0.16b, v0.16b, v0.16b, #1
505 subs w4, w4, #1
534 st1 {RHASH.2d}, [x5]
563 /* v0-v2 for building CTRs, v3-v5 for saving inputs */
602 ld1 {RH1.16b-RH3.16b}, [x6]
608 ld1r {RRCONST.2d}, [x6]
620 ld1 {v3.16b-v5.16b}, [x2], #(3 * 16)
625 inc32_le128(v1) /* +1 */
644 st1 {v0.16b-v2.16b}, [x1], #(3 * 16)
687 ldrb w0, [x2], #1 /* get 1 byte from input */
690 strb w6, [x1], #1 /* store out byte */
693 ext v0.16b, v0.16b, v0.16b, #1
697 subs w4, w4, #1
726 st1 {RHASH.2d}, [x5]