Lines Matching +full:xlen +full:- +full:1
1 /* SPDX-License-Identifier: GPL-2.0-only */
12 * a0 - dest
15 * a0 - Inclusive first byte of dest
16 * a1 - Inclusive first byte of src
17 * a2 - Length of copy n
22 * Note: This currently only works on little-endian.
23 * To port to big-endian, reverse the direction of shifts
33 * Forward Copy: a1 - Index counter of src
34 * Reverse Copy: a4 - Index counter of src
35 * Forward Copy: t3 - Index counter of dest
36 * Reverse Copy: t4 - Index counter of dest
37 * Both Copy Modes: t5 - Inclusive first multibyte/aligned of dest
38 * Both Copy Modes: t6 - Non-Inclusive last multibyte/aligned of dest
39 * Both Copy Modes: t0 - Link / Temporary for load-store
40 * Both Copy Modes: t1 - Temporary for load-store
41 * Both Copy Modes: t2 - Temporary for load-store
42 * Both Copy Modes: a5 - dest to src alignment offset
43 * Both Copy Modes: a6 - Shift ammount
44 * Both Copy Modes: a7 - Inverse Shift ammount
45 * Both Copy Modes: a2 - Alternate breakpoint for unrolled loops
61 andi t0, a2, -(2 * SZREG)
67 andi t5, t3, -SZREG
68 andi t6, t4, -SZREG
72 * to find the low-bound of SZREG alignment in the dest memory
78 beq t5, t3, 1f
80 1:
83 * If the dest and src are co-aligned to SZREG, then there is
85 * Instead, do a simpler co-aligned copy.
88 andi t1, t0, (SZREG - 1)
98 andi a5, a1, (SZREG - 1) /* Find the alignment offset of src (a1) */
101 andi a1, a1, -SZREG /* Align the src pointer */
106 * a7 = XLEN - a6 = XLEN + -a6
107 * 2s complement negation to find the negative: -a6 = ~a6 + 1
108 * Add that to XLEN. XLEN = SZREG * 8.
111 addi a7, a7, (SZREG * 8 + 1)
114 * Fix Misalignment Copy Loop - Forward
117 * load_val1 = load_ptr[1];
119 * store_ptr[0 - 2] = (load_val0 >> {a6}) | (load_val1 << {a7});
126 * store_ptr[1 - 2] = (load_val1 >> {a6}) | (load_val0 << {a7});
133 1:
134 REG_L t1, (1 * SZREG)(a1)
139 REG_S t2, ((0 * SZREG) - (2 * SZREG))(t3)
148 REG_S t2, ((1 * SZREG) - (2 * SZREG))(t3)
150 bne t3, t6, 1b
160 andi a5, a4, (SZREG - 1) /* Find the alignment offset of src (a4) */
163 andi a4, a4, -SZREG /* Align the src pointer */
164 addi a2, t5, -SZREG /* The other breakpoint for the unrolled loop*/
168 * a7 = XLEN - a6 = XLEN + -a6
169 * 2s complement negation to find the negative: -a6 = ~a6 + 1
170 * Add that to XLEN. XLEN = SZREG * 8.
173 addi a7, a7, (SZREG * 8 + 1)
176 * Fix Misalignment Copy Loop - Reverse
179 * load_val0 = load_ptr[-1];
180 * store_ptr -= 2;
181 * store_ptr[1] = (load_val0 >> {a6}) | (load_val1 << {a7});
186 * load_val1 = load_ptr[-2];
187 * load_ptr -= 2;
195 1:
196 REG_L t0, (-1 * SZREG)(a4)
197 addi t4, t4, (-2 * SZREG)
201 REG_S t2, ( 1 * SZREG)(t4)
205 REG_L t1, (-2 * SZREG)(a4)
206 addi a4, a4, (-2 * SZREG)
212 bne t4, t5, 1b
220 * Simple copy loops for SZREG co-aligned memory locations.
230 1:
234 REG_S t1, (-1 * SZREG)(t3)
235 bne t3, t6, 1b
242 1:
243 REG_L t1, (-1 * SZREG)(a4)
244 addi a4, a4, -SZREG
245 addi t4, t4, -SZREG
247 bne t4, t5, 1b
252 * These are basically sub-functions within the function. They
262 1:
264 addi a1, a1, 1
265 addi t3, t3, 1
266 sb t1, -1(t3)
267 bne t3, t5, 1b
273 1:
274 lb t1, -1(a4)
275 addi a4, a4, -1
276 addi t4, t4, -1
278 bne t4, t6, 1b
292 1:
294 addi a1, a1, 1
295 addi t3, t3, 1
296 sb t1, -1(t3)
297 bne t3, t4, 1b
303 1:
304 lb t1, -1(a4)
305 addi a4, a4, -1
306 addi t4, t4, -1
308 bne t4, t3, 1b