Lines Matching +full:4 +full:- +full:byte

1 /* SPDX-License-Identifier: GPL-2.0-or-later */
29 * This function assumes 2- or 4-byte alignment. Other alignments will fail!
32 /* ONES_ADD converts twos-complement math to ones-complement. */
44 * is aligned on either a 2-byte or 4-byte boundary.
48 bnez a5, 8f /* branch if 2-byte aligned */
49 /* Fall-through on common case, 4-byte alignment */
51 srli a5, a3, 5 /* 32-byte chunks */
57 add a5, a5, a2 /* a5 = end of last 32-byte chunk */
61 l32i a7, a2, 4
76 addi a2, a2, 4*8
81 extui a5, a3, 2, 3 /* remaining 4-byte chunks */
87 add a5, a5, a2 /* a5 = end of last 4-byte chunk */
92 addi a2, a2, 4
97 _bbci.l a3, 1, 5f /* remaining 2-byte chunk */
102 _bbci.l a3, 0, 7f /* remaining 1-byte chunk */
105 slli a6, a6, 8 /* load byte into bits 8..15 */
112 /* uncommon case, buf is 2-byte aligned */
118 bnez a5, 8f /* branch if 1-byte aligned */
123 addi a3, a3, -2 /* adjust len */
124 j 1b /* now buf is 4-byte aligned */
126 /* case: odd-byte aligned, len > 1
131 srli a5, a3, 2 /* 4-byte chunks */
137 add a5, a5, a2 /* a5 = end of last 4-byte chunk */
152 addi a2, a2, 4
157 _bbci.l a3, 1, 3f /* remaining 2-byte chunk, still odd addr */
169 j 5b /* branch to handle the remaining byte */
188 This function is optimized for 4-byte aligned addresses. Other
195 movi a5, -1
198 /* We optimize the following alignment tests for the 4-byte
206 beqz a9, 1f /* branch if both are 4-byte aligned */
208 j 3f /* one address is 2-byte aligned */
211 /* _bbsi.l a10, 1, 3f */ /* branch if 2-byte-aligned address */
214 /* src and dst are both 4-byte aligned */
215 srli a10, a4, 5 /* 32-byte chunks */
221 add a10, a10, a2 /* a10 = end of last 32-byte src chunk */
225 EX(10f) l32i a8, a2, 4
227 EX(10f) s32i a8, a3, 4
254 extui a10, a4, 2, 3 /* remaining 4-byte chunks */
255 extui a4, a4, 0, 2 /* reset len for general-case, 2-byte chunks */
261 add a10, a10, a2 /* a10 = end of last 4-byte src chunk */
267 addi a2, a2, 4
268 addi a3, a3, 4
275 to here from the 4-byte alignment case to process, at most,
276 one 2-byte chunk. (2) It branches to here from above if
277 either src or dst is 2-byte aligned, and we process all bytes
278 here, except for perhaps a trailing odd byte. It's
279 inefficient, so align your addresses to 4-byte boundaries.
286 srli a10, a4, 1 /* 2-byte chunks */
288 loopgtz a10, 4f
290 beqz a10, 4f
292 add a10, a10, a2 /* a10 = end of last 2-byte src chunk */
303 4:
304 /* This section processes a possible trailing odd byte. */
305 _bbci.l a4, 0, 8f /* 1-byte chunk */
309 slli a9, a9, 8 /* shift byte to bits 8..15 */
318 process all bytes using 8-bit accesses. Grossly inefficient,
321 srli a10, a4, 1 /* handle in pairs for 16-bit csum */
327 add a10, a10, a2 /* a10 = end of last odd-aligned, 2-byte src chunk */
335 slli a9, a9, 8 /* combine into a single 16-bit value */
347 j 4b /* process the possible trailing odd byte */