Lines Matching +full:4 +full:- +full:byte
2 * arch/xtensa/lib/hal/memcopy.S -- Core HAL library functions
9 * Copyright (C) 2002 - 2012 Tensilica Inc.
24 * 32-bit load and store instructions (as required for these
36 * 8, 4, 2, and 1 byte copies conditional on the length;
39 * This code tries to use fall-through branches for the common
41 * of 4 (or 8) length.
61 * Byte by byte copy
63 .align 4
64 .byte 0 # 1 mod 4 alignment for LOOPNEZ
65 # (0 mod 4 alignment for LBEG)
88 .align 4
89 .Ldst1mod2: # dst is only byte aligned
90 _bltui a4, 7, .Lbytecopy # do short copies byte by byte
92 # copy 1 byte
95 addi a4, a4, -1
100 .Ldst2mod4: # dst 16-bit aligned
102 _bltui a4, 6, .Lbytecopy # do short copies byte by byte
106 addi a4, a4, -2
120 _bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4
122 srli a7, a4, 4 # number of loop iterations with 16B
127 * Destination and source are word-aligned, use word copy.
129 # copy 16 bytes per iteration for word-aligned dst and word-aligned src
134 slli a8, a7, 4
139 l32i a7, a3, 4
142 s32i a7, a5, 4
155 l32i a7, a3, 4
158 s32i a7, a5, 4
166 # copy 4 bytes
168 addi a3, a3, 4
170 addi a5, a5, 4
183 # copy 1 byte
192 .align 4
194 _beqz a4, .Ldone # avoid loading anything for zero-length copies
195 # copy 16 bytes per iteration for word-aligned dst and unaligned src
196 __ssa8 a3 # set shift amount from byte offset
210 slli a10, a7, 4
214 l32i a7, a3, 4
220 s32i a7, a5, 4
234 l32i a7, a3, 4
240 s32i a7, a5, 4
245 # copy 4 bytes
246 l32i a7, a3, 4
247 addi a3, a3, 4
250 addi a5, a5, 4
270 # copy 1 byte
287 * 32-bit load and store instructions (as required for these
314 * Byte by byte copy
316 .align 4
317 .byte 0 # 1 mod 4 alignment for LOOPNEZ
318 # (0 mod 4 alignment for LBEG)
327 addi a3, a3, -1
329 addi a5, a5, -1
342 .align 4
343 .Lbackdst1mod2: # dst is only byte aligned
344 _bltui a4, 7, .Lbackbytecopy # do short copies byte by byte
346 # copy 1 byte
347 addi a3, a3, -1
349 addi a5, a5, -1
351 addi a4, a4, -1
354 .Lbackdst2mod4: # dst 16-bit aligned
356 _bltui a4, 6, .Lbackbytecopy # do short copies byte by byte
357 addi a3, a3, -2
360 addi a5, a5, -2
363 addi a4, a4, -2
381 _bbsi.l a5, 1, .Lbackdst2mod4 # if dst is 2 mod 4
383 srli a7, a4, 4 # number of loop iterations with 16B
388 * Destination and source are word-aligned, use word copy.
390 # copy 16 bytes per iteration for word-aligned dst and word-aligned src
395 slli a8, a7, 4
399 addi a3, a3, -16
402 addi a5, a5, -16
404 l32i a7, a3, 4
407 s32i a7, a5, 4
415 addi a3, a3, -8
417 l32i a7, a3, 4
418 addi a5, a5, -8
420 s32i a7, a5, 4
427 # copy 4 bytes
428 addi a3, a3, -4
430 addi a5, a5, -4
437 addi a3, a3, -2
439 addi a5, a5, -2
444 # copy 1 byte
445 addi a3, a3, -1
447 addi a5, a5, -1
455 .align 4
457 _beqz a4, .Lbackdone # avoid loading anything for zero-length copies
458 # copy 16 bytes per iteration for word-aligned dst and unaligned src
459 __ssa8 a3 # set shift amount from byte offset
472 slli a10, a7, 4
476 addi a3, a3, -16
479 addi a5, a5, -16
482 l32i a9, a3, 4
487 s32i a8, a5, 4
496 addi a3, a3, -8
497 l32i a7, a3, 4
499 addi a5, a5, -8
501 s32i a6, a5, 4
507 # copy 4 bytes
508 addi a3, a3, -4
510 addi a5, a5, -4
524 addi a3, a3, -2
527 addi a5, a5, -2
533 # copy 1 byte
534 addi a3, a3, -1
535 addi a5, a5, -1