Lines Matching +full:mod +full:- +full:12 +full:b

2  * arch/xtensa/lib/hal/memcopy.S -- Core HAL library functions
9 * Copyright (C) 2002 - 2012 Tensilica Inc.
24 * 32-bit load and store instructions (as required for these
39 * This code tries to use fall-through branches for the common
64 .byte 0 # 1 mod 4 alignment for LOOPNEZ
65 # (0 mod 4 alignment for LBEG)
95 addi a4, a4, -1
100 .Ldst2mod4: # dst 16-bit aligned
106 addi a4, a4, -2
119 _bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2
120 _bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4
121 .Ldstaligned: # return here from .Ldst?mod? once dst is aligned
122 srli a7, a4, 4 # number of loop iterations with 16B
127 * Destination and source are word-aligned, use word copy.
129 # copy 16 bytes per iteration for word-aligned dst and word-aligned src
135 add a8, a8, a3 # a8 = end of last 16B source chunk
143 l32i a7, a3, 12
146 s32i a7, a5, 12
194 _beqz a4, .Ldone # avoid loading anything for zero-length copies
195 # copy 16 bytes per iteration for word-aligned dst and unaligned src
211 add a10, a10, a3 # a10 = end of last 16B source chunk
218 l32i a9, a3, 12
226 s32i a9, a5, 12
287 * 32-bit load and store instructions (as required for these
317 .byte 0 # 1 mod 4 alignment for LOOPNEZ
318 # (0 mod 4 alignment for LBEG)
327 addi a3, a3, -1
329 addi a5, a5, -1
347 addi a3, a3, -1
349 addi a5, a5, -1
351 addi a4, a4, -1
354 .Lbackdst2mod4: # dst 16-bit aligned
357 addi a3, a3, -2
360 addi a5, a5, -2
363 addi a4, a4, -2
380 _bbsi.l a5, 0, .Lbackdst1mod2 # if dst is 1 mod 2
381 _bbsi.l a5, 1, .Lbackdst2mod4 # if dst is 2 mod 4
382 .Lbackdstaligned: # return here from .Lbackdst?mod? once dst is aligned
383 srli a7, a4, 4 # number of loop iterations with 16B
388 * Destination and source are word-aligned, use word copy.
390 # copy 16 bytes per iteration for word-aligned dst and word-aligned src
396 sub a8, a3, a8 # a8 = start of first 16B source chunk
399 addi a3, a3, -16
400 l32i a7, a3, 12
402 addi a5, a5, -16
403 s32i a7, a5, 12
415 addi a3, a3, -8
418 addi a5, a5, -8
428 addi a3, a3, -4
430 addi a5, a5, -4
437 addi a3, a3, -2
439 addi a5, a5, -2
445 addi a3, a3, -1
447 addi a5, a5, -1
457 _beqz a4, .Lbackdone # avoid loading anything for zero-length copies
458 # copy 16 bytes per iteration for word-aligned dst and unaligned src
473 sub a10, a3, a10 # a10 = start of first 16B source chunk
476 addi a3, a3, -16
477 l32i a7, a3, 12
479 addi a5, a5, -16
481 s32i a6, a5, 12
496 addi a3, a3, -8
499 addi a5, a5, -8
508 addi a3, a3, -4
510 addi a5, a5, -4
524 addi a3, a3, -2
527 addi a5, a5, -2
534 addi a3, a3, -1
535 addi a5, a5, -1