Lines Matching +full:1 +full:b
64 pmull \rd\().1q, \rn\().1d, \rm\().1d
68 pmull2 \rd\().1q, \rn\().2d, \rm\().2d
72 ext t3.8b, \ad\().8b, \ad\().8b, #1 // A1
73 ext t5.8b, \ad\().8b, \ad\().8b, #2 // A2
74 ext t7.8b, \ad\().8b, \ad\().8b, #3 // A3
80 tbl t3.16b, {\ad\().16b}, perm1.16b // A1
81 tbl t5.16b, {\ad\().16b}, perm2.16b // A2
82 tbl t7.16b, {\ad\().16b}, perm3.16b // A3
88 __pmull_p8_tail \rq, \ad\().8b, SHASH.8b, 8b,, sh1, sh2, sh3, sh4
92 __pmull_p8_tail \rq, \ad\().8b, SHASH2.8b, 8b,, ss1, ss2, ss3, ss4
96 __pmull_p8_tail \rq, \ad\().16b, SHASH.16b, 16b, 2, sh1, sh2, sh3, sh4
100 pmull\t t3.8h, t3.\nb, \bd // F = A1*B
102 pmull\t t5.8h, t5.\nb, \bd // H = A2*B
104 pmull\t t7.8h, t7.\nb, \bd // J = A3*B
107 pmull\t \rq\().8h, \ad, \bd // D = A*B
109 eor t3.16b, t3.16b, t4.16b // L = E + F
110 eor t5.16b, t5.16b, t6.16b // M = G + H
111 eor t7.16b, t7.16b, t8.16b // N = I + J
120 eor t4.16b, t4.16b, t3.16b
121 and t3.16b, t3.16b, k32_48.16b
125 eor t6.16b, t6.16b, t7.16b
126 and t7.16b, t7.16b, k00_16.16b
128 eor t4.16b, t4.16b, t3.16b
129 eor t6.16b, t6.16b, t7.16b
136 ext t3.16b, t3.16b, t3.16b, #15
137 ext t5.16b, t5.16b, t5.16b, #14
138 ext t7.16b, t7.16b, t7.16b, #13
139 ext t9.16b, t9.16b, t9.16b, #12
141 eor t3.16b, t3.16b, t5.16b
142 eor t7.16b, t7.16b, t9.16b
143 eor \rq\().16b, \rq\().16b, t3.16b
144 eor \rq\().16b, \rq\().16b, t7.16b
153 eor SHASH2.16b, SHASH2.16b, T1.16b
157 eor HH34.16b, HH34.16b, T1.16b
159 movi MASK.16b, #0xe1
164 ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
165 eor SHASH2.16b, SHASH2.16b, SHASH.16b
175 movi T1.8b, #8
177 eor perm1.16b, perm1.16b, T1.16b
186 tbl sh1.16b, {SHASH.16b}, perm1.16b
187 tbl sh2.16b, {SHASH.16b}, perm2.16b
188 tbl sh3.16b, {SHASH.16b}, perm3.16b
189 tbl sh4.16b, {SHASH.16b}, T1.16b
190 ext ss1.8b, SHASH2.8b, SHASH2.8b, #1
191 ext ss2.8b, SHASH2.8b, SHASH2.8b, #2
192 ext ss3.8b, SHASH2.8b, SHASH2.8b, #3
193 ext ss4.8b, SHASH2.8b, SHASH2.8b, #4
201 pmull T2.1q, XL.1d, MASK.1d
202 eor XM.16b, XM.16b, T1.16b
204 mov XH.d[0], XM.d[1]
205 mov XM.d[1], XL.d[0]
207 eor XL.16b, XM.16b, T2.16b
208 ext T2.16b, XL.16b, XL.16b, #8
209 pmull XL.1q, XL.1d, MASK.1d
217 eor XM.16b, XM.16b, T1.16b
219 mov XL.d[1], XM.d[0]
220 mov XH.d[0], XM.d[1]
224 eor T2.16b, T2.16b, T1.16b
226 eor T2.16b, T2.16b, T1.16b
227 ext T1.16b, XL.16b, XH.16b, #8
228 eor T2.16b, T2.16b, T1.16b
230 mov XL.d[1], T2.d[0]
231 mov XH.d[0], T2.d[1]
233 ushr T2.2d, XL.2d, #1
234 eor XH.16b, XH.16b, XL.16b
235 eor XL.16b, XL.16b, T2.16b
237 ushr XL.2d, XL.2d, #1
250 b 3f
254 tbnz w0, #1, 2f // round multiple of 4
256 1: ld1 {XM3.16b-TT4.16b}, [x2], #64
260 rev64 T1.16b, XM3.16b
261 rev64 T2.16b, XH3.16b
262 rev64 TT4.16b, TT4.16b
263 rev64 TT3.16b, TT3.16b
265 ext IN1.16b, TT4.16b, TT4.16b, #8
266 ext XL3.16b, TT3.16b, TT3.16b, #8
268 eor TT4.16b, TT4.16b, IN1.16b
269 pmull2 XH2.1q, SHASH.2d, IN1.2d // a1 * b1
270 pmull XL2.1q, SHASH.1d, IN1.1d // a0 * b0
271 pmull XM2.1q, SHASH2.1d, TT4.1d // (a1 + a0)(b1 + b0)
273 eor TT3.16b, TT3.16b, XL3.16b
274 pmull2 XH3.1q, HH.2d, XL3.2d // a1 * b1
275 pmull XL3.1q, HH.1d, XL3.1d // a0 * b0
276 pmull2 XM3.1q, SHASH2.2d, TT3.2d // (a1 + a0)(b1 + b0)
278 ext IN1.16b, T2.16b, T2.16b, #8
279 eor XL2.16b, XL2.16b, XL3.16b
280 eor XH2.16b, XH2.16b, XH3.16b
281 eor XM2.16b, XM2.16b, XM3.16b
283 eor T2.16b, T2.16b, IN1.16b
284 pmull2 XH3.1q, HH3.2d, IN1.2d // a1 * b1
285 pmull XL3.1q, HH3.1d, IN1.1d // a0 * b0
286 pmull XM3.1q, HH34.1d, T2.1d // (a1 + a0)(b1 + b0)
288 eor XL2.16b, XL2.16b, XL3.16b
289 eor XH2.16b, XH2.16b, XH3.16b
290 eor XM2.16b, XM2.16b, XM3.16b
292 ext IN1.16b, T1.16b, T1.16b, #8
293 ext TT3.16b, XL.16b, XL.16b, #8
294 eor XL.16b, XL.16b, IN1.16b
295 eor T1.16b, T1.16b, TT3.16b
297 pmull2 XH.1q, HH4.2d, XL.2d // a1 * b1
298 eor T1.16b, T1.16b, XL.16b
299 pmull XL.1q, HH4.1d, XL.1d // a0 * b0
300 pmull2 XM.1q, HH34.2d, T1.2d // (a1 + a0)(b1 + b0)
302 eor XL.16b, XL.16b, XL2.16b
303 eor XH.16b, XH.16b, XH2.16b
304 eor XM.16b, XM.16b, XM2.16b
306 eor T2.16b, XL.16b, XH.16b
307 ext T1.16b, XL.16b, XH.16b, #8
308 eor XM.16b, XM.16b, T2.16b
312 eor T2.16b, T2.16b, XH.16b
313 eor XL.16b, XL.16b, T2.16b
316 b 1b
320 sub w0, w0, #1
323 CPU_LE( rev64 T1.16b, T1.16b )
325 ext T2.16b, XL.16b, XL.16b, #8
326 ext IN1.16b, T1.16b, T1.16b, #8
327 eor T1.16b, T1.16b, T2.16b
328 eor XL.16b, XL.16b, IN1.16b
331 eor T1.16b, T1.16b, XL.16b
335 4: eor T2.16b, XL.16b, XH.16b
336 ext T1.16b, XL.16b, XH.16b, #8
337 eor XM.16b, XM.16b, T2.16b
341 eor T2.16b, T2.16b, XH.16b
342 eor XL.16b, XL.16b, T2.16b
344 cbnz w0, 0b
396 aese \state\().16b, \key\().16b
397 aesmc \state\().16b, \state\().16b
421 aese \state\().16b, KL.16b
422 eor \state\().16b, \state\().16b, KM.16b
424 .subsection 1
432 tbz \rounds, #1, .Lout192_\@
433 b .Lout256_\@
439 frame_push 1
448 eor SHASH2.16b, SHASH2.16b, T1.16b
452 eor HH34.16b, HH34.16b, T1.16b
469 bmi 1f
470 ld1 {INP0.16b-INP3.16b}, [x2], #64
471 .subsection 1
477 * 1 byte | | | |x |
488 1: mov x15, #16
496 ld1 {T1.16b}, [x12]
509 ld1 {INP0.16b}, [x2], x14
510 ld1 {INP1.16b}, [x2], x15
511 ld1 {INP2.16b}, [x2], x16
512 ld1 {INP3.16b}, [x2]
513 tbl INP3.16b, {INP3.16b}, T1.16b
514 b 2f
524 st1 {INP0.16b-INP3.16b}, [x1], #64
525 .if \enc == 1
528 bne 0b
533 ld1 {INP3.16b}, [x10] // load lengths[]
534 mov w9, #1
537 mov w11, #(0x1 << 24) // BE '1U'
538 ld1 {KS0.16b}, [x5]
543 ext XL.16b, XL.16b, XL.16b, #8
544 rev64 XL.16b, XL.16b
545 eor XL.16b, XL.16b, KS0.16b
547 .if \enc == 1
548 st1 {XL.16b}, [x10] // store tag
552 ld1 {KS0.16b}, [x11] // load supplied tag
554 ld1 {KS1.16b}, [x17] // load permute vector
556 cmeq XL.16b, XL.16b, KS0.16b // compare tags
557 mvn XL.16b, XL.16b // -1 for fail, 0 for pass
558 tbl XL.16b, {XL.16b}, KS1.16b // keep authsize bytes only
559 sminv b0, XL.16b // signed minimum across XL
560 smov w0, v0.b[0] // return b0
570 b 4b
572 6: ld1 {T1.16b-T2.16b}, [x17], #32 // permute vectors
573 sub x17, x17, x19, lsl #1
575 cmp w9, #1
577 .subsection 1
578 7: ld1 {INP2.16b}, [x1]
579 tbx INP2.16b, {INP3.16b}, T1.16b
580 mov INP3.16b, INP2.16b
581 b 8f
584 st1 {INP0.16b}, [x1], x14
585 st1 {INP1.16b}, [x1], x15
586 st1 {INP2.16b}, [x1], x16
587 tbl INP3.16b, {INP3.16b}, T1.16b
588 tbx INP3.16b, {INP2.16b}, T2.16b
589 8: st1 {INP3.16b}, [x1]
591 .if \enc == 1
592 ld1 {T1.16b}, [x17]
593 tbl INP3.16b, {INP3.16b}, T1.16b // clear non-data bits
596 b 3b
605 pmull_gcm_do_crypt 1
618 movi MASK.16b, #0xe1
621 rev64 T1.16b, INP0.16b
622 rev64 T2.16b, INP1.16b
623 rev64 TT3.16b, INP2.16b
624 rev64 TT4.16b, INP3.16b
626 ext XL.16b, XL.16b, XL.16b, #8
629 .subsection 1
630 0: movi XH2.16b, #0
631 movi XM2.16b, #0
632 movi XL2.16b, #0
634 tbz w9, #0, 1f // 2 blocks?
635 tbz w9, #1, 2f // 1 block?
637 eor T2.16b, T2.16b, XL.16b
638 ext T1.16b, T2.16b, T2.16b, #8
639 b .Lgh3
641 1: eor TT3.16b, TT3.16b, XL.16b
642 ext T2.16b, TT3.16b, TT3.16b, #8
643 b .Lgh2
645 2: eor TT4.16b, TT4.16b, XL.16b
646 ext IN1.16b, TT4.16b, TT4.16b, #8
647 b .Lgh1
650 eor T1.16b, T1.16b, XL.16b
651 ext IN1.16b, T1.16b, T1.16b, #8
653 pmull2 XH2.1q, HH4.2d, IN1.2d // a1 * b1
654 eor T1.16b, T1.16b, IN1.16b
655 pmull XL2.1q, HH4.1d, IN1.1d // a0 * b0
656 pmull2 XM2.1q, HH34.2d, T1.2d // (a1 + a0)(b1 + b0)
658 ext T1.16b, T2.16b, T2.16b, #8
659 .Lgh3: eor T2.16b, T2.16b, T1.16b
660 pmull2 XH.1q, HH3.2d, T1.2d // a1 * b1
661 pmull XL.1q, HH3.1d, T1.1d // a0 * b0
662 pmull XM.1q, HH34.1d, T2.1d // (a1 + a0)(b1 + b0)
664 eor XH2.16b, XH2.16b, XH.16b
665 eor XL2.16b, XL2.16b, XL.16b
666 eor XM2.16b, XM2.16b, XM.16b
668 ext T2.16b, TT3.16b, TT3.16b, #8
669 .Lgh2: eor TT3.16b, TT3.16b, T2.16b
670 pmull2 XH.1q, HH.2d, T2.2d // a1 * b1
671 pmull XL.1q, HH.1d, T2.1d // a0 * b0
672 pmull2 XM.1q, SHASH2.2d, TT3.2d // (a1 + a0)(b1 + b0)
674 eor XH2.16b, XH2.16b, XH.16b
675 eor XL2.16b, XL2.16b, XL.16b
676 eor XM2.16b, XM2.16b, XM.16b
678 ext IN1.16b, TT4.16b, TT4.16b, #8
679 .Lgh1: eor TT4.16b, TT4.16b, IN1.16b
680 pmull XL.1q, SHASH.1d, IN1.1d // a0 * b0
681 pmull2 XH.1q, SHASH.2d, IN1.2d // a1 * b1
682 pmull XM.1q, SHASH2.1d, TT4.1d // (a1 + a0)(b1 + b0)
684 eor XH.16b, XH.16b, XH2.16b
685 eor XL.16b, XL.16b, XL2.16b
686 eor XM.16b, XM.16b, XM2.16b
688 eor T2.16b, XL.16b, XH.16b
689 ext T1.16b, XL.16b, XH.16b, #8
690 eor XM.16b, XM.16b, T2.16b
694 eor T2.16b, T2.16b, XH.16b
695 eor XL.16b, XL.16b, T2.16b
701 ld1 {KS0.16b}, [x5] // load upper counter
705 sub w13, w8, #1
710 mov KS1.16b, KS0.16b
711 mov KS2.16b, KS0.16b
712 mov KS3.16b, KS0.16b
725 .subsection 1
735 tbz x7, #1, .Lout192
736 b .Lout256
747 aese KS0.16b, KL.16b
748 aese KS1.16b, KL.16b
749 aese KS2.16b, KL.16b
750 aese KS3.16b, KL.16b
752 eor KS0.16b, KS0.16b, KM.16b
753 eor KS1.16b, KS1.16b, KM.16b
754 eor KS2.16b, KS2.16b, KM.16b
755 eor KS3.16b, KS3.16b, KM.16b
757 eor INP0.16b, INP0.16b, KS0.16b
758 eor INP1.16b, INP1.16b, KS1.16b
759 eor INP2.16b, INP2.16b, KS2.16b
760 eor INP3.16b, INP3.16b, KS3.16b