Lines Matching +full:- +full:b
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
13 #include "sm4-ce-asm.h"
15 .arch armv8-a+crypto
17 .irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, \
19 .set .Lv\b\().4s, \b
45 * x0: 128-bit key
51 ld1 {v0.16b}, [x0];
52 rev32 v0.16b, v0.16b;
53 ld1 {v1.16b}, [x3];
55 ld1 {v24.16b-v27.16b}, [x4], #64;
56 ld1 {v28.16b-v31.16b}, [x4];
59 eor v0.16b, v0.16b, v1.16b;
71 ld1 {v24.16b}, [x5]
73 st1 {v0.16b-v3.16b}, [x1], #64;
74 st1 {v4.16b-v7.16b}, [x1];
76 tbl v16.16b, {v7.16b}, v24.16b
77 tbl v17.16b, {v6.16b}, v24.16b
78 tbl v18.16b, {v5.16b}, v24.16b
79 tbl v19.16b, {v4.16b}, v24.16b
80 tbl v20.16b, {v3.16b}, v24.16b
81 tbl v21.16b, {v2.16b}, v24.16b
82 tbl v22.16b, {v1.16b}, v24.16b
83 tbl v23.16b, {v0.16b}, v24.16b
85 st1 {v16.16b-v19.16b}, [x2], #64
86 st1 {v20.16b-v23.16b}, [x2]
100 ld1 {v0.16b}, [x2];
102 st1 {v0.16b}, [x1];
121 ld1 {v0.16b-v3.16b}, [x2], #64;
122 ld1 {v4.16b-v7.16b}, [x2], #64;
126 st1 {v0.16b-v3.16b}, [x1], #64;
127 st1 {v4.16b-v7.16b}, [x1], #64;
130 b .Lcrypt_loop_blk;
139 ld1 {v0.16b-v3.16b}, [x2], #64;
141 st1 {v0.16b-v3.16b}, [x1], #64;
148 ld1 {v0.16b}, [x2], #16;
150 st1 {v0.16b}, [x1], #16;
169 ld1 {RIV.16b}, [x3]
177 ld1 {v0.16b-v3.16b}, [x2], #64
179 eor v0.16b, v0.16b, RIV.16b
181 eor v1.16b, v1.16b, v0.16b
183 eor v2.16b, v2.16b, v1.16b
185 eor v3.16b, v3.16b, v2.16b
188 st1 {v0.16b-v3.16b}, [x1], #64
189 mov RIV.16b, v3.16b
192 b .Lcbc_enc_loop_4x
197 ld1 {v0.16b}, [x2], #16
199 eor RIV.16b, RIV.16b, v0.16b
202 st1 {RIV.16b}, [x1], #16
208 st1 {RIV.16b}, [x3]
224 ld1 {RIV.16b}, [x3]
230 ld1 {v0.16b-v3.16b}, [x2], #64
231 ld1 {v4.16b-v7.16b}, [x2], #64
233 rev32 v8.16b, v0.16b
234 rev32 v9.16b, v1.16b
235 rev32 v10.16b, v2.16b
236 rev32 v11.16b, v3.16b
237 rev32 v12.16b, v4.16b
238 rev32 v13.16b, v5.16b
239 rev32 v14.16b, v6.16b
240 rev32 v15.16b, v7.16b
244 eor v8.16b, v8.16b, RIV.16b
245 eor v9.16b, v9.16b, v0.16b
246 eor v10.16b, v10.16b, v1.16b
247 eor v11.16b, v11.16b, v2.16b
248 eor v12.16b, v12.16b, v3.16b
249 eor v13.16b, v13.16b, v4.16b
250 eor v14.16b, v14.16b, v5.16b
251 eor v15.16b, v15.16b, v6.16b
253 st1 {v8.16b-v11.16b}, [x1], #64
254 st1 {v12.16b-v15.16b}, [x1], #64
256 mov RIV.16b, v7.16b
259 b .Lcbc_dec_loop_8x
268 ld1 {v0.16b-v3.16b}, [x2], #64
270 rev32 v8.16b, v0.16b
271 rev32 v9.16b, v1.16b
272 rev32 v10.16b, v2.16b
273 rev32 v11.16b, v3.16b
277 eor v8.16b, v8.16b, RIV.16b
278 eor v9.16b, v9.16b, v0.16b
279 eor v10.16b, v10.16b, v1.16b
280 eor v11.16b, v11.16b, v2.16b
282 st1 {v8.16b-v11.16b}, [x1], #64
284 mov RIV.16b, v3.16b
291 ld1 {v0.16b}, [x2], #16
293 rev32 v8.16b, v0.16b
297 eor v8.16b, v8.16b, RIV.16b
298 st1 {v8.16b}, [x1], #16
300 mov RIV.16b, v0.16b
306 st1 {RIV.16b}, [x3]
325 ld1 {RIV.16b}, [x3]
327 ld1 {v0.16b}, [x2]
328 eor RIV.16b, RIV.16b, v0.16b
336 ld1 {v3.16b}, [x6]
337 ld1 {v4.16b}, [x7]
341 ld1 {v1.16b}, [x2]
343 /* create Cn from En-1 */
344 tbl v0.16b, {RIV.16b}, v3.16b
346 tbl v1.16b, {v1.16b}, v4.16b
348 eor v1.16b, v1.16b, RIV.16b
353 st1 {v0.16b}, [x5]
354 st1 {v1.16b}, [x1]
373 ld1 {RIV.16b}, [x3]
380 ld1 {v3.16b}, [x6]
381 ld1 {v4.16b}, [x7]
384 ld1 {v0.16b}, [x2], x5
385 ld1 {v1.16b}, [x2]
389 tbl v2.16b, {v0.16b}, v3.16b
390 eor v2.16b, v2.16b, v1.16b
392 /* overwrite the first Ln bytes with Cn to create En-1 */
393 tbx v0.16b, {v1.16b}, v4.16b
395 eor v0.16b, v0.16b, RIV.16b
399 st1 {v2.16b}, [x5]
400 st1 {v0.16b}, [x1]
428 rev64 vctr.16b, vctr.16b; \
441 ld1 {v8.16b-v11.16b}, [x2], #64
442 ld1 {v12.16b-v15.16b}, [x2], #64
446 eor v0.16b, v0.16b, v8.16b
447 eor v1.16b, v1.16b, v9.16b
448 eor v2.16b, v2.16b, v10.16b
449 eor v3.16b, v3.16b, v11.16b
450 eor v4.16b, v4.16b, v12.16b
451 eor v5.16b, v5.16b, v13.16b
452 eor v6.16b, v6.16b, v14.16b
453 eor v7.16b, v7.16b, v15.16b
455 st1 {v0.16b-v3.16b}, [x1], #64
456 st1 {v4.16b-v7.16b}, [x1], #64
459 b .Lctr_loop_8x
474 ld1 {v8.16b-v11.16b}, [x2], #64
478 eor v0.16b, v0.16b, v8.16b
479 eor v1.16b, v1.16b, v9.16b
480 eor v2.16b, v2.16b, v10.16b
481 eor v3.16b, v3.16b, v11.16b
483 st1 {v0.16b-v3.16b}, [x1], #64
493 ld1 {v8.16b}, [x2], #16
497 eor v0.16b, v0.16b, v8.16b
498 st1 {v0.16b}, [x1], #16
514 and RTMP.16b, RTMP.16b, RMASK.16b; \
516 ext RTMP.16b, RTMP.16b, RTMP.16b, #8; \
517 eor vt.16b, vt.16b, RTMP.16b;
529 ld1 {v8.16b}, [x3]
565 ld1 {v0.16b-v3.16b}, [x2], #64
566 ld1 {v4.16b-v7.16b}, [x2], #64
567 eor v0.16b, v0.16b, v8.16b
568 eor v1.16b, v1.16b, v9.16b
569 eor v2.16b, v2.16b, v10.16b
570 eor v3.16b, v3.16b, v11.16b
571 eor v4.16b, v4.16b, v12.16b
572 eor v5.16b, v5.16b, v13.16b
573 eor v6.16b, v6.16b, v14.16b
574 eor v7.16b, v7.16b, v15.16b
578 eor v0.16b, v0.16b, v8.16b
579 eor v1.16b, v1.16b, v9.16b
580 eor v2.16b, v2.16b, v10.16b
581 eor v3.16b, v3.16b, v11.16b
582 eor v4.16b, v4.16b, v12.16b
583 eor v5.16b, v5.16b, v13.16b
584 eor v6.16b, v6.16b, v14.16b
585 eor v7.16b, v7.16b, v15.16b
586 st1 {v0.16b-v3.16b}, [x1], #64
587 st1 {v4.16b-v7.16b}, [x1], #64
592 b .Lxts_enc_loop_8x
605 ld1 {v0.16b-v3.16b}, [x2], #64
606 eor v0.16b, v0.16b, v8.16b
607 eor v1.16b, v1.16b, v9.16b
608 eor v2.16b, v2.16b, v10.16b
609 eor v3.16b, v3.16b, v11.16b
613 eor v0.16b, v0.16b, v8.16b
614 eor v1.16b, v1.16b, v9.16b
615 eor v2.16b, v2.16b, v10.16b
616 eor v3.16b, v3.16b, v11.16b
617 st1 {v0.16b-v3.16b}, [x1], #64
626 ld1 {v0.16b}, [x2], #16
627 eor v0.16b, v0.16b, v8.16b
631 eor v0.16b, v0.16b, v8.16b
632 st1 {v0.16b}, [x1], #16
644 ld1 {v0.16b}, [x2]
645 eor v0.16b, v0.16b, v8.16b
647 eor v0.16b, v0.16b, v8.16b
654 ld1 {v3.16b}, [x6]
655 ld1 {v4.16b}, [x7]
659 ld1 {v1.16b}, [x2]
661 /* create Cn from En-1 */
662 tbl v2.16b, {v0.16b}, v3.16b
663 /* padding Pn with En-1 at the end */
664 tbx v0.16b, {v1.16b}, v4.16b
666 eor v0.16b, v0.16b, v9.16b
668 eor v0.16b, v0.16b, v9.16b
673 st1 {v2.16b}, [x5]
674 st1 {v0.16b}, [x1]
676 b .Lxts_enc_ret
680 st1 {v8.16b}, [x3]
696 ld1 {v8.16b}, [x3]
732 ld1 {v0.16b-v3.16b}, [x2], #64
733 ld1 {v4.16b-v7.16b}, [x2], #64
734 eor v0.16b, v0.16b, v8.16b
735 eor v1.16b, v1.16b, v9.16b
736 eor v2.16b, v2.16b, v10.16b
737 eor v3.16b, v3.16b, v11.16b
738 eor v4.16b, v4.16b, v12.16b
739 eor v5.16b, v5.16b, v13.16b
740 eor v6.16b, v6.16b, v14.16b
741 eor v7.16b, v7.16b, v15.16b
745 eor v0.16b, v0.16b, v8.16b
746 eor v1.16b, v1.16b, v9.16b
747 eor v2.16b, v2.16b, v10.16b
748 eor v3.16b, v3.16b, v11.16b
749 eor v4.16b, v4.16b, v12.16b
750 eor v5.16b, v5.16b, v13.16b
751 eor v6.16b, v6.16b, v14.16b
752 eor v7.16b, v7.16b, v15.16b
753 st1 {v0.16b-v3.16b}, [x1], #64
754 st1 {v4.16b-v7.16b}, [x1], #64
759 b .Lxts_dec_loop_8x
772 ld1 {v0.16b-v3.16b}, [x2], #64
773 eor v0.16b, v0.16b, v8.16b
774 eor v1.16b, v1.16b, v9.16b
775 eor v2.16b, v2.16b, v10.16b
776 eor v3.16b, v3.16b, v11.16b
780 eor v0.16b, v0.16b, v8.16b
781 eor v1.16b, v1.16b, v9.16b
782 eor v2.16b, v2.16b, v10.16b
783 eor v3.16b, v3.16b, v11.16b
784 st1 {v0.16b-v3.16b}, [x1], #64
793 ld1 {v0.16b}, [x2], #16
794 eor v0.16b, v0.16b, v8.16b
798 eor v0.16b, v0.16b, v8.16b
799 st1 {v0.16b}, [x1], #16
811 ld1 {v0.16b}, [x2]
812 eor v0.16b, v0.16b, v9.16b
814 eor v0.16b, v0.16b, v9.16b
821 ld1 {v3.16b}, [x6]
822 ld1 {v4.16b}, [x7]
826 ld1 {v1.16b}, [x2]
828 /* create Cn from En-1 */
829 tbl v2.16b, {v0.16b}, v3.16b
830 /* padding Pn with En-1 at the end */
831 tbx v0.16b, {v1.16b}, v4.16b
833 eor v0.16b, v0.16b, v8.16b
835 eor v0.16b, v0.16b, v8.16b
840 st1 {v2.16b}, [x5]
841 st1 {v0.16b}, [x1]
843 b .Lxts_dec_ret
847 st1 {v8.16b}, [x3]
865 ld1 {RMAC.16b}, [x1]
886 ld1 {v0.16b-v3.16b}, [x2], #64
888 eor RMAC.16b, RMAC.16b, v0.16b
890 eor RMAC.16b, RMAC.16b, v1.16b
892 eor RMAC.16b, RMAC.16b, v2.16b
894 eor RMAC.16b, RMAC.16b, v3.16b
898 b .Lmac_loop_4x
903 ld1 {v0.16b}, [x2], #16
905 eor RMAC.16b, RMAC.16b, v0.16b
914 ld1 {v0.16b}, [x2], #16
915 eor RMAC.16b, RMAC.16b, v0.16b
918 st1 {RMAC.16b}, [x1]