Lines Matching +full:4 +full:-
1 // SPDX-License-Identifier: GPL-2.0
27 * number of blocks of output with nonce 0, taking an input key and 8-bytes
30 * This implementation avoids d8-d15 because they are callee-save in user
39 * x1: 32-byte key input
40 * x2: 8-byte counter input/output
41 * x3: number of 64-byte block to write to output
45 /* copy0 = "expand 32-byte k" */
52 ld1 { copy1.4s, copy2.4s }, [x1]
57 uzp1 one_v.4s, one_v.4s, one_v.4s
69 * Permute one 64-byte block where the state matrix is stored in the four NEON
70 * registers state0-state3. It performs matrix operations on four words in parallel,
76 add state0.4s, state0.4s, state1.4s
81 add state2.4s, state2.4s, state3.4s
83 shl state1.4s, tmp.4s, #12
84 sri state1.4s, tmp.4s, #20
87 add state0.4s, state0.4s, state1.4s
89 shl state3.4s, tmp.4s, #8
90 sri state3.4s, tmp.4s, #24
93 add state2.4s, state2.4s, state3.4s
95 shl state1.4s, tmp.4s, #7
96 sri state1.4s, tmp.4s, #25
99 ext state1.16b, state1.16b, state1.16b, #4
106 add state0.4s, state0.4s, state1.4s
111 add state2.4s, state2.4s, state3.4s
113 shl state1.4s, tmp.4s, #12
114 sri state1.4s, tmp.4s, #20
117 add state0.4s, state0.4s, state1.4s
119 shl state3.4s, tmp.4s, #8
120 sri state3.4s, tmp.4s, #24
123 add state2.4s, state2.4s, state3.4s
125 shl state1.4s, tmp.4s, #7
126 sri state1.4s, tmp.4s, #25
133 ext state3.16b, state3.16b, state3.16b, #4
139 add state0.4s, state0.4s, copy0.4s
141 add state1.4s, state1.4s, copy1.4s
143 add state2.4s, state2.4s, copy2.4s
145 add state3.4s, state3.4s, copy3.4s
146 st1 { state0.16b - state3.16b }, [x0]
154 /* output += 64, --nblocks */