Lines Matching +full:4 +full:f
29 * This function assumes 2- or 4-byte alignment. Other alignments will fail!
35 bgeu sum, val, 99f ; \
44 * is aligned on either a 2-byte or 4-byte boundary.
48 bnez a5, 8f /* branch if 2-byte aligned */
49 /* Fall-through on common case, 4-byte alignment */
53 loopgtz a5, 2f
55 beqz a5, 2f
61 l32i a7, a2, 4
76 addi a2, a2, 4*8
81 extui a5, a3, 2, 3 /* remaining 4-byte chunks */
83 loopgtz a5, 3f
85 beqz a5, 3f
87 add a5, a5, a2 /* a5 = end of last 4-byte chunk */
92 addi a2, a2, 4
97 _bbci.l a3, 1, 5f /* remaining 2-byte chunk */
102 _bbci.l a3, 0, 7f /* remaining 1-byte chunk */
118 bnez a5, 8f /* branch if 1-byte aligned */
124 j 1b /* now buf is 4-byte aligned */
131 srli a5, a3, 2 /* 4-byte chunks */
133 loopgtz a5, 2f
135 beqz a5, 2f
137 add a5, a5, a2 /* a5 = end of last 4-byte chunk */
152 addi a2, a2, 4
157 _bbci.l a3, 1, 3f /* remaining 2-byte chunk, still odd addr */
188 This function is optimized for 4-byte aligned addresses. Other
198 /* We optimize the following alignment tests for the 4-byte
206 beqz a9, 1f /* branch if both are 4-byte aligned */
207 bbsi.l a10, 0, 5f /* branch if one address is odd */
208 j 3f /* one address is 2-byte aligned */
210 /* _bbsi.l a10, 0, 5f */ /* branch if odd address */
211 /* _bbsi.l a10, 1, 3f */ /* branch if 2-byte-aligned address */
214 /* src and dst are both 4-byte aligned */
217 loopgtz a10, 2f
219 beqz a10, 2f
224 EX(10f) l32i a9, a2, 0
225 EX(10f) l32i a8, a2, 4
226 EX(10f) s32i a9, a3, 0
227 EX(10f) s32i a8, a3, 4
230 EX(10f) l32i a9, a2, 8
231 EX(10f) l32i a8, a2, 12
232 EX(10f) s32i a9, a3, 8
233 EX(10f) s32i a8, a3, 12
236 EX(10f) l32i a9, a2, 16
237 EX(10f) l32i a8, a2, 20
238 EX(10f) s32i a9, a3, 16
239 EX(10f) s32i a8, a3, 20
242 EX(10f) l32i a9, a2, 24
243 EX(10f) l32i a8, a2, 28
244 EX(10f) s32i a9, a3, 24
245 EX(10f) s32i a8, a3, 28
254 extui a10, a4, 2, 3 /* remaining 4-byte chunks */
257 loopgtz a10, 3f
259 beqz a10, 3f
261 add a10, a10, a2 /* a10 = end of last 4-byte src chunk */
264 EX(10f) l32i a9, a2, 0
265 EX(10f) s32i a9, a3, 0
267 addi a2, a2, 4
268 addi a3, a3, 4
275 to here from the 4-byte alignment case to process, at most,
279 inefficient, so align your addresses to 4-byte boundaries.
288 loopgtz a10, 4f
290 beqz a10, 4f
295 EX(10f) l16ui a9, a2, 0
296 EX(10f) s16i a9, a3, 0
303 4:
305 _bbci.l a4, 0, 8f /* 1-byte chunk */
306 EX(10f) l8ui a9, a2, 0
307 EX(10f) s8i a9, a3, 0
323 loopgtz a10, 6f
325 beqz a10, 6f
330 EX(10f) l8ui a9, a2, 0
331 EX(10f) l8ui a8, a2, 1
332 EX(10f) s8i a9, a3, 0
333 EX(10f) s8i a8, a3, 1
347 j 4b /* process the possible trailing odd byte */