Lines Matching +full:3 +full:b

47 	eor		v3.16b, v3.16b, v0.16b
52 eor v4.16b, v1.16b, v2.16b
58 eor v3.16b, v3.16b, v0.16b
59 tbl v3.16b, {v3.16b}, v12.16b
63 eor v4.16b, v1.16b, v2.16b
67 // x1 = shuffle32(x1, MASK(0, 3, 2, 1))
68 ext v1.16b, v1.16b, v1.16b, #4
69 // x2 = shuffle32(x2, MASK(1, 0, 3, 2))
70 ext v2.16b, v2.16b, v2.16b, #8
71 // x3 = shuffle32(x3, MASK(2, 1, 0, 3))
72 ext v3.16b, v3.16b, v3.16b, #12
76 eor v3.16b, v3.16b, v0.16b
81 eor v4.16b, v1.16b, v2.16b
87 eor v3.16b, v3.16b, v0.16b
88 tbl v3.16b, {v3.16b}, v12.16b
92 eor v4.16b, v1.16b, v2.16b
96 // x1 = shuffle32(x1, MASK(2, 1, 0, 3))
97 ext v1.16b, v1.16b, v1.16b, #12
98 // x2 = shuffle32(x2, MASK(1, 0, 3, 2))
99 ext v2.16b, v2.16b, v2.16b, #8
100 // x3 = shuffle32(x3, MASK(0, 3, 2, 1))
101 ext v3.16b, v3.16b, v3.16b, #4
104 b.ne .Ldoubleround
118 // x0..3 = s0..3
124 ld1 {v4.16b-v7.16b}, [x2]
128 eor v0.16b, v0.16b, v4.16b
132 eor v1.16b, v1.16b, v5.16b
136 eor v2.16b, v2.16b, v6.16b
140 eor v3.16b, v3.16b, v7.16b
142 st1 {v0.16b-v3.16b}, [x1]
213 // x0..15[0-3] = s0..3[0..3]
254 eor v12.16b, v12.16b, v0.16b
256 eor v13.16b, v13.16b, v1.16b
258 eor v14.16b, v14.16b, v2.16b
260 eor v15.16b, v15.16b, v3.16b
285 eor v16.16b, v4.16b, v8.16b
287 eor v17.16b, v5.16b, v9.16b
289 eor v18.16b, v6.16b, v10.16b
291 eor v19.16b, v7.16b, v11.16b
321 eor v12.16b, v12.16b, v0.16b
323 eor v13.16b, v13.16b, v1.16b
325 eor v14.16b, v14.16b, v2.16b
327 eor v15.16b, v15.16b, v3.16b
330 tbl v12.16b, {v12.16b}, v31.16b
332 tbl v13.16b, {v13.16b}, v31.16b
334 tbl v14.16b, {v14.16b}, v31.16b
336 tbl v15.16b, {v15.16b}, v31.16b
352 eor v16.16b, v4.16b, v8.16b
354 eor v17.16b, v5.16b, v9.16b
356 eor v18.16b, v6.16b, v10.16b
358 eor v19.16b, v7.16b, v11.16b
388 eor v15.16b, v15.16b, v0.16b
390 eor v12.16b, v12.16b, v1.16b
392 eor v13.16b, v13.16b, v2.16b
394 eor v14.16b, v14.16b, v3.16b
419 eor v16.16b, v5.16b, v10.16b
421 eor v17.16b, v6.16b, v11.16b
423 eor v18.16b, v7.16b, v8.16b
425 eor v19.16b, v4.16b, v9.16b
455 eor v15.16b, v15.16b, v0.16b
457 eor v12.16b, v12.16b, v1.16b
459 eor v13.16b, v13.16b, v2.16b
461 eor v14.16b, v14.16b, v3.16b
464 tbl v15.16b, {v15.16b}, v31.16b
466 tbl v12.16b, {v12.16b}, v31.16b
468 tbl v13.16b, {v13.16b}, v31.16b
470 tbl v14.16b, {v14.16b}, v31.16b
486 eor v16.16b, v5.16b, v10.16b
488 eor v17.16b, v6.16b, v11.16b
490 eor v18.16b, v7.16b, v8.16b
492 eor v19.16b, v4.16b, v9.16b
510 b.ne .Ldoubleround4
515 // x12 += counter values 0-3
518 // x0[0-3] += s0[0]
519 // x1[0-3] += s0[1]
520 // x2[0-3] += s0[2]
521 // x3[0-3] += s0[3]
542 // x4[0-3] += s1[0]
543 // x5[0-3] += s1[1]
544 // x6[0-3] += s1[2]
545 // x7[0-3] += s1[3]
563 // x8[0-3] += s2[0]
564 // x9[0-3] += s2[1]
565 // x10[0-3] += s2[2]
566 // x11[0-3] += s2[3]
584 // x12[0-3] += s3[0]
585 // x13[0-3] += s3[1]
586 // x14[0-3] += s3[2]
587 // x15[0-3] += s3[3]
662 ld1 {v16.16b-v19.16b}, [x2], #64
673 ld1 {v20.16b-v23.16b}, [x2], #64
684 ld1 {v24.16b-v27.16b}, [x2], #64
695 ld1 {v28.16b-v31.16b}, [x2]
698 eor v16.16b, v16.16b, v0.16b
699 eor v17.16b, v17.16b, v1.16b
700 eor v18.16b, v18.16b, v2.16b
701 eor v19.16b, v19.16b, v3.16b
705 eor v20.16b, v20.16b, v4.16b
706 eor v21.16b, v21.16b, v5.16b
707 eor v22.16b, v22.16b, v6.16b
708 eor v23.16b, v23.16b, v7.16b
710 st1 {v16.16b-v19.16b}, [x1], #64
713 eor v24.16b, v24.16b, v8.16b
714 eor v25.16b, v25.16b, v9.16b
715 eor v26.16b, v26.16b, v10.16b
716 eor v27.16b, v27.16b, v11.16b
718 st1 {v20.16b-v23.16b}, [x1], #64
721 eor v28.16b, v28.16b, v12.16b
722 eor v29.16b, v29.16b, v13.16b
723 eor v30.16b, v30.16b, v14.16b
724 eor v31.16b, v31.16b, v15.16b
726 st1 {v24.16b-v27.16b}, [x1], #64
727 st1 {v28.16b-v31.16b}, [x1]
734 ld1 {v28.16b-v31.16b}, [x10]
736 tbl v28.16b, {v4.16b-v7.16b}, v28.16b
737 tbl v29.16b, {v4.16b-v7.16b}, v29.16b
738 tbl v30.16b, {v4.16b-v7.16b}, v30.16b
739 tbl v31.16b, {v4.16b-v7.16b}, v31.16b
741 0: eor v20.16b, v20.16b, v28.16b
742 eor v21.16b, v21.16b, v29.16b
743 eor v22.16b, v22.16b, v30.16b
744 eor v23.16b, v23.16b, v31.16b
745 st1 {v20.16b-v23.16b}, [x5] // overlapping stores
746 1: st1 {v16.16b-v19.16b}, [x1]
747 b .Lout
750 .Lt128: ld1 {v28.16b-v31.16b}, [x10]
753 tbl v28.16b, {v0.16b-v3.16b}, v28.16b
754 tbl v29.16b, {v0.16b-v3.16b}, v29.16b
755 tbl v30.16b, {v0.16b-v3.16b}, v30.16b
756 tbl v31.16b, {v0.16b-v3.16b}, v31.16b
757 ld1 {v16.16b-v19.16b}, [x1] // reload first output block
758 b 0b
762 ld1 {v4.16b-v7.16b}, [x10]
764 tbl v0.16b, {v8.16b-v11.16b}, v4.16b
765 tbl v1.16b, {v8.16b-v11.16b}, v5.16b
766 tbl v2.16b, {v8.16b-v11.16b}, v6.16b
767 tbl v3.16b, {v8.16b-v11.16b}, v7.16b
769 eor v28.16b, v28.16b, v0.16b
770 eor v29.16b, v29.16b, v1.16b
771 eor v30.16b, v30.16b, v2.16b
772 eor v31.16b, v31.16b, v3.16b
773 st1 {v28.16b-v31.16b}, [x6] // overlapping stores
774 2: st1 {v20.16b-v23.16b}, [x1]
775 b .Lout
778 .Lt320: cbz x7, 3f // exactly 256 bytes?
779 ld1 {v4.16b-v7.16b}, [x10]
781 tbl v0.16b, {v12.16b-v15.16b}, v4.16b
782 tbl v1.16b, {v12.16b-v15.16b}, v5.16b
783 tbl v2.16b, {v12.16b-v15.16b}, v6.16b
784 tbl v3.16b, {v12.16b-v15.16b}, v7.16b
786 eor v28.16b, v28.16b, v0.16b
787 eor v29.16b, v29.16b, v1.16b
788 eor v30.16b, v30.16b, v2.16b
789 eor v31.16b, v31.16b, v3.16b
790 st1 {v28.16b-v31.16b}, [x7] // overlapping stores
791 3: st1 {v24.16b-v27.16b}, [x1]
792 b .Lout
804 CTRINC: .word 1, 2, 3, 4