Lines Matching +full:1 +full:- +full:d

1 /* SPDX-License-Identifier: GPL-2.0-only */
5 * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
61 .arch armv8-a+crypto
64 pmull \rd\().1q, \rn\().1d, \rm\().1d
68 pmull2 \rd\().1q, \rn\().2d, \rm\().2d
72 ext t3.8b, \ad\().8b, \ad\().8b, #1 // A1
107 pmull\t \rq\().8h, \ad, \bd // D = A*B
113 uzp1 t4.2d, t3.2d, t5.2d
114 uzp2 t3.2d, t3.2d, t5.2d
115 uzp1 t6.2d, t7.2d, t9.2d
116 uzp2 t7.2d, t7.2d, t9.2d
131 zip2 t5.2d, t4.2d, t3.2d
132 zip1 t3.2d, t4.2d, t3.2d
133 zip2 t9.2d, t6.2d, t7.2d
134 zip1 t7.2d, t6.2d, t7.2d
149 ld1 {HH.2d-HH4.2d}, [x8]
151 trn1 SHASH2.2d, SHASH.2d, HH.2d
152 trn2 T1.2d, SHASH.2d, HH.2d
155 trn1 HH34.2d, HH3.2d, HH4.2d
156 trn2 T1.2d, HH3.2d, HH4.2d
160 shl MASK.2d, MASK.2d, #57
169 movi k32_48.2d, #0xffffffff
171 ushr k00_16.2d, k32_48.2d, #32
176 dup perm1.2d, x5
178 ushr perm2.2d, perm1.2d, #8
179 ushr perm3.2d, perm1.2d, #16
180 ushr T1.2d, perm1.2d, #24
181 sli perm2.2d, perm1.2d, #56
182 sli perm3.2d, perm1.2d, #48
183 sli T1.2d, perm1.2d, #40
190 ext ss1.8b, SHASH2.8b, SHASH2.8b, #1
197 // PMULL (64x64->128) based reduction for CPUs that can do
201 pmull T2.1q, XL.1d, MASK.1d
204 mov XH.d[0], XM.d[1]
205 mov XM.d[1], XL.d[0]
209 pmull XL.1q, XL.1d, MASK.1d
214 // 64x64->128 PMULL instruction
219 mov XL.d[1], XM.d[0]
220 mov XH.d[0], XM.d[1]
222 shl T1.2d, XL.2d, #57
223 shl T2.2d, XL.2d, #62
225 shl T1.2d, XL.2d, #63
230 mov XL.d[1], T2.d[0]
231 mov XH.d[0], T2.d[1]
233 ushr T2.2d, XL.2d, #1
236 ushr T2.2d, T2.2d, #6
237 ushr XL.2d, XL.2d, #1
241 ld1 {SHASH.2d}, [x3]
242 ld1 {XL.2d}, [x1]
248 ld1 {T1.2d}, [x4]
254 tbnz w0, #1, 2f // round multiple of 4
256 1: ld1 {XM3.16b-TT4.16b}, [x2], #64
269 pmull2 XH2.1q, SHASH.2d, IN1.2d // a1 * b1
270 pmull XL2.1q, SHASH.1d, IN1.1d // a0 * b0
271 pmull XM2.1q, SHASH2.1d, TT4.1d // (a1 + a0)(b1 + b0)
274 pmull2 XH3.1q, HH.2d, XL3.2d // a1 * b1
275 pmull XL3.1q, HH.1d, XL3.1d // a0 * b0
276 pmull2 XM3.1q, SHASH2.2d, TT3.2d // (a1 + a0)(b1 + b0)
284 pmull2 XH3.1q, HH3.2d, IN1.2d // a1 * b1
285 pmull XL3.1q, HH3.1d, IN1.1d // a0 * b0
286 pmull XM3.1q, HH34.1d, T2.1d // (a1 + a0)(b1 + b0)
297 pmull2 XH.1q, HH4.2d, XL.2d // a1 * b1
299 pmull XL.1q, HH4.1d, XL.1d // a0 * b0
300 pmull2 XM.1q, HH34.2d, T1.2d // (a1 + a0)(b1 + b0)
316 b 1b
319 2: ld1 {T1.2d}, [x2], #16
320 sub w0, w0, #1
346 5: st1 {XL.2d}, [x1]
388 ld1 {K0.4s-K3.4s}, [\rk]
389 ld1 {K4.4s-K5.4s}, [\tmp]
392 ld1 {KK.4s-KM.4s}, [\tmp]
409 ld1 {K6.4s-K7.4s}, [\tmp], #32
424 .subsection 1
426 ld1 {K8.4s-K9.4s}, [\tmp], #32
429 ld1 {K6.4s-K7.4s}, [\tmp]
432 tbz \rounds, #1, .Lout192_\@
439 frame_push 1
443 ld1 {SHASH.2d}, [x3], #16
444 ld1 {HH.2d-HH4.2d}, [x3]
446 trn1 SHASH2.2d, SHASH.2d, HH.2d
447 trn2 T1.2d, SHASH.2d, HH.2d
450 trn1 HH34.2d, HH3.2d, HH4.2d
451 trn2 T1.2d, HH3.2d, HH4.2d
454 ld1 {XL.2d}, [x4]
469 bmi 1f
470 ld1 {INP0.16b-INP3.16b}, [x2], #64
471 .subsection 1
477 * 1 byte | | | |x |
488 1: mov x15, #16
500 cmp x0, #-16
502 cmp x0, #-32
504 cmp x0, #-48
524 st1 {INP0.16b-INP3.16b}, [x1], #64
525 .if \enc == 1
534 mov w9, #1
537 mov w11, #(0x1 << 24) // BE '1U'
547 .if \enc == 1
557 mvn XL.16b, XL.16b // -1 for fail, 0 for pass
569 st1 {XL.2d}, [x4]
572 6: ld1 {T1.16b-T2.16b}, [x17], #32 // permute vectors
573 sub x17, x17, x19, lsl #1
575 cmp w9, #1
577 .subsection 1
591 .if \enc == 1
593 tbl INP3.16b, {INP3.16b}, T1.16b // clear non-data bits
605 pmull_gcm_do_crypt 1
619 shl MASK.2d, MASK.2d, #57
629 .subsection 1
634 tbz w9, #0, 1f // 2 blocks?
635 tbz w9, #1, 2f // 1 block?
641 1: eor TT3.16b, TT3.16b, XL.16b
653 pmull2 XH2.1q, HH4.2d, IN1.2d // a1 * b1
655 pmull XL2.1q, HH4.1d, IN1.1d // a0 * b0
656 pmull2 XM2.1q, HH34.2d, T1.2d // (a1 + a0)(b1 + b0)
660 pmull2 XH.1q, HH3.2d, T1.2d // a1 * b1
661 pmull XL.1q, HH3.1d, T1.1d // a0 * b0
662 pmull XM.1q, HH34.1d, T2.1d // (a1 + a0)(b1 + b0)
670 pmull2 XH.1q, HH.2d, T2.2d // a1 * b1
671 pmull XL.1q, HH.1d, T2.1d // a0 * b0
672 pmull2 XM.1q, SHASH2.2d, TT3.2d // (a1 + a0)(b1 + b0)
680 pmull XL.1q, SHASH.1d, IN1.1d // a0 * b0
681 pmull2 XH.1q, SHASH.2d, IN1.2d // a1 * b1
682 pmull XM.1q, SHASH2.1d, TT4.1d // (a1 + a0)(b1 + b0)
705 sub w13, w8, #1
719 ld1 {K6.4s-K7.4s}, [x10], #32
725 .subsection 1
727 ld1 {K8.4s-K9.4s}, [x10], #32
731 ld1 {K6.4s-K7.4s}, [x10]
735 tbz x7, #1, .Lout192