Lines Matching +full:1 +full:- +full:16

1 // SPDX-License-Identifier: GPL-2.0
27 * number of blocks of output with nonce 0, taking an input key and 8-bytes
30 * This implementation avoids d8-d15 because they are callee-save in user
39 * x1: 32-byte key input
40 * x2: 8-byte counter input/output
41 * x3: number of 64-byte block to write to output
45 /* copy0 = "expand 32-byte k" */
49 mov copy0.d[1], x9
56 movi one_v.2s, #1
61 mov state0.16b, copy0.16b
62 mov state1.16b, copy1.16b
63 mov state2.16b, copy2.16b
64 mov state3.16b, copy3.16b
69 * Permute one 64-byte block where the state matrix is stored in the four NEON
70 * registers state0-state3. It performs matrix operations on four words in parallel,
75 /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */
77 eor state3.16b, state3.16b, state0.16b
82 eor tmp.16b, state1.16b, state2.16b
88 eor tmp.16b, state3.16b, state0.16b
94 eor tmp.16b, state1.16b, state2.16b
98 /* state1[0,1,2,3] = state1[1,2,3,0] */
99 ext state1.16b, state1.16b, state1.16b, #4
100 /* state2[0,1,2,3] = state2[2,3,0,1] */
101 ext state2.16b, state2.16b, state2.16b, #8
102 /* state3[0,1,2,3] = state3[1,2,3,0] */
103 ext state3.16b, state3.16b, state3.16b, #12
105 /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */
107 eor state3.16b, state3.16b, state0.16b
112 eor tmp.16b, state1.16b, state2.16b
118 eor tmp.16b, state3.16b, state0.16b
124 eor tmp.16b, state1.16b, state2.16b
128 /* state1[0,1,2,3] = state1[3,0,1,2] */
129 ext state1.16b, state1.16b, state1.16b, #12
130 /* state2[0,1,2,3] = state2[2,3,0,1] */
131 ext state2.16b, state2.16b, state2.16b, #8
132 /* state3[0,1,2,3] = state3[1,2,3,0] */
133 ext state3.16b, state3.16b, state3.16b, #4
146 st1 { state0.16b - state3.16b }, [x0]
154 /* output += 64, --nblocks */
156 subs x3, x3, #1
163 movi state0.16b, #0
164 movi state1.16b, #0
165 movi state2.16b, #0
166 movi state3.16b, #0
167 movi copy1.16b, #0
168 movi copy2.16b, #0