Lines Matching +full:4 +full:- +full:16
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 # Copyright 2023- IBM Corp. All rights reserved
10 # Poly1305 - this version mainly using vector/VSX/Scalar
11 # - 26 bits limbs
12 # - Handle multiple 64 byte blcok.
14 # Block size 16 bytes
17 # p = 2^130 - 5
25 # 07/22/21 - this revison based on the above sum of products. Setup r^4, r^3, r^2, r and s3, s2, …
28 # setup r^4, r^3, r^2, r vectors
29 # vs [r^1, r^3, r^2, r^4]
56 #include <asm/asm-offsets.h>
57 #include <asm/asm-compat.h>
69 li 16, \OFFSET
70 stvx \VRS, 16, \FRAME
74 li 16, \OFFSET
75 stxvx \VSX, 16, \FRAME
83 li 16, \OFFSET
84 lvx \VRS, 16, \FRAME
88 li 16, \OFFSET
89 lxvx \VSX, 16, \FRAME
94 std 0, 16(1)
95 stdu 1,-752(1)
99 SAVE_GPR 16, 128, 1
118 SAVE_VRS 21, 16, 9
132 SAVE_VSX 16, 224, 9
153 RESTORE_VRS 21, 16, 9
167 RESTORE_VSX 16, 224, 9
186 RESTORE_GPR 16, 128, 1
204 ld 0, 16(1)
213 # p[4] = a0*r4 + a1*r3 + a2*r2 + a3*r1 + a4*r0 ;
215 # [r^2, r^3, r^1, r^4]
220 vmulouw 14, 4, 26
225 vmulouw 15, 4, 27
238 vmulouw 16, 4, 28
241 vaddudm 16, 16, 10
242 vaddudm 16, 16, 11
245 vaddudm 16, 16, 12
246 vaddudm 16, 16, 13 # x2
247 vmulouw 17, 4, 29
256 vmulouw 18, 4, 30
268 vmuleuw 9, 4, 26
279 vmuleuw 9, 4, 27
290 vmuleuw 9, 4, 28
295 vaddudm 16, 16, 9
296 vaddudm 16, 16, 10
297 vaddudm 16, 16, 11
298 vaddudm 16, 16, 12
299 vaddudm 16, 16, 13 # x2
301 vmuleuw 9, 4, 29
312 vmuleuw 9, 4, 30
327 # setup r^4, r^3, r^2, r vectors
328 # [r, r^3, r^2, r^4]
356 # [r, r^3, r^2, r^4]
358 vmr 4, 26
384 bl do_mul # r^4 r^3
385 vmrgow 26, 26, 4
400 # r^2 r^4
405 xxlor 4, 62, 62
446 vand 4, 14, 25
454 vaddudm 4, 4, 12
456 vaddudm 6, 16, 11
460 vaddudm 4, 4, 10
461 vsrd 10, 4, 31
466 vand 4, 4, 25
482 li 14, 16
486 lvx 25, 0, 10 # v25 - mask
502 extrdi 16, 9, 12, 0
504 insrdi 16, 10, 14, 38
507 mtvsrdd 60, 0, 16
515 vmulouw 0, 27, 4 # v0 = rr0
516 vmulouw 1, 28, 4 # v1 = rr1
517 vmulouw 2, 29, 4 # v2 = rr2
518 vmulouw 3, 30, 4 # v3 = rr3
545 ld 19, 16(3)
550 extrdi 16, 9, 12, 0
552 insrdi 16, 10, 14, 38
555 mtvsrdd 38, 0, 16
562 add 20, 4, 21
566 addi 17, 20, 16
575 vand 16, 15, 25
576 vsld 12, 16, 13
584 vaddudm 20, 4, 9
591 addi 17, 17, 16
593 addi 17, 17, 16
602 vand 16, 15, 25
603 vsld 12, 16, 13
611 # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1]
612 vmrgow 4, 9, 20
619 addi 5, 5, -64 # len -= 64
633 # h3 = (h1 + m3) * r^2, h4 = (h2 + m4) * r^2 --> (h0 + m1) r*4 + (h3 + m3) r^2, (h0 + m2) r^4 + (h…
635 # h5 = (h3 + m5) * r^2, h6 = (h4 + m6) * r^2 -->
636 # h7 = (h5 + m7) * r^2, h8 = (h6 + m8) * r^1 --> m5 * r^4 + m6 * r^3 + m7 * r^2 + m8 * r
648 vand 4, 14, 25
656 vaddudm 4, 4, 12
658 vaddudm 6, 16, 11
662 vaddudm 4, 4, 10
663 vsrd 10, 4, 31
668 vand 4, 4, 25
673 add 20, 4, 21
677 addi 17, 20, 16
681 addi 17, 17, 16
683 addi 17, 17, 16
699 vand 16, 15, 25
700 vsld 23, 16, 13
703 vand 16, 18, 25
704 vsld 12, 16, 13
715 vaddudm 4, 4, 20
721 # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1]
722 vmrgow 4, 9, 4
729 addi 5, 5, -64 # len -= 64
739 xxlor 62, 4, 4
752 vaddudm 4, 14, 9
757 vaddudm 6, 16, 11
768 vsrd 10, 4, 31
771 vand 4, 4, 25
779 vaddudm 4, 4, 12
785 vaddudm 4, 4, 10
786 vsrd 10, 4, 31
791 vand 4, 4, 25
804 vor 20, 4, 5
813 mfvsrld 16, 40 # save last 2 bytes
819 srdi 16, 16, 24
823 stw 16, 16(3)
857 add 19, 21, 10 # s1: r19 - (r1 >> 2) *5
923 # - no highbit if final leftover block (highbit = 0)
930 std 0, 16(1)
931 stdu 1,-400(1)
935 SAVE_GPR 16, 128, 1
957 add 11, 25, 4
963 lwz 29, 16(3)
965 li 30, 16
977 addi 11, 11, 16
995 stw 29, 16(3)
1001 RESTORE_GPR 16, 128, 1
1019 ld 0, 16(1)
1036 ld 12, 16(3)
1039 # h + 5 + (-p)
1054 ld 6, 0(4)
1055 ld 7, 8(4)