Lines Matching +full:address +full:- +full:aligned

1 /* SPDX-License-Identifier: GPL-2.0-or-later */
29 * This function assumes 2- or 4-byte alignment. Other alignments will fail!
32 /* ONES_ADD converts twos-complement math to ones-complement. */
44 * is aligned on either a 2-byte or 4-byte boundary.
48 bnez a5, 8f /* branch if 2-byte aligned */
49 /* Fall-through on common case, 4-byte alignment */
51 srli a5, a3, 5 /* 32-byte chunks */
57 add a5, a5, a2 /* a5 = end of last 32-byte chunk */
81 extui a5, a3, 2, 3 /* remaining 4-byte chunks */
87 add a5, a5, a2 /* a5 = end of last 4-byte chunk */
97 _bbci.l a3, 1, 5f /* remaining 2-byte chunk */
102 _bbci.l a3, 0, 7f /* remaining 1-byte chunk */
112 /* uncommon case, buf is 2-byte aligned */
118 bnez a5, 8f /* branch if 1-byte aligned */
123 addi a3, a3, -2 /* adjust len */
124 j 1b /* now buf is 4-byte aligned */
126 /* case: odd-byte aligned, len > 1
127 * This case is dog slow, so don't give us an odd address.
131 srli a5, a3, 2 /* 4-byte chunks */
137 add a5, a5, a2 /* a5 = end of last 4-byte chunk */
157 _bbci.l a3, 1, 3f /* remaining 2-byte chunk, still odd addr */
188 This function is optimized for 4-byte aligned addresses. Other
195 movi a5, -1
198 /* We optimize the following alignment tests for the 4-byte
199 aligned case. Two bbsi.l instructions might seem more optimal
206 beqz a9, 1f /* branch if both are 4-byte aligned */
207 bbsi.l a10, 0, 5f /* branch if one address is odd */
208 j 3f /* one address is 2-byte aligned */
210 /* _bbsi.l a10, 0, 5f */ /* branch if odd address */
211 /* _bbsi.l a10, 1, 3f */ /* branch if 2-byte-aligned address */
214 /* src and dst are both 4-byte aligned */
215 srli a10, a4, 5 /* 32-byte chunks */
221 add a10, a10, a2 /* a10 = end of last 32-byte src chunk */
254 extui a10, a4, 2, 3 /* remaining 4-byte chunks */
255 extui a4, a4, 0, 2 /* reset len for general-case, 2-byte chunks */
261 add a10, a10, a2 /* a10 = end of last 4-byte src chunk */
275 to here from the 4-byte alignment case to process, at most,
276 one 2-byte chunk. (2) It branches to here from above if
277 either src or dst is 2-byte aligned, and we process all bytes
279 inefficient, so align your addresses to 4-byte boundaries.
286 srli a10, a4, 1 /* 2-byte chunks */
292 add a10, a10, a2 /* a10 = end of last 2-byte src chunk */
305 _bbci.l a4, 0, 8f /* 1-byte chunk */
318 process all bytes using 8-bit accesses. Grossly inefficient,
319 so don't feed us an odd address. */
321 srli a10, a4, 1 /* handle in pairs for 16-bit csum */
327 add a10, a10, a2 /* a10 = end of last odd-aligned, 2-byte src chunk */
335 slli a9, a9, 8 /* combine into a single 16-bit value */