Jetpack/u-boot/arch/arc/lib/strchr-700.S

142 lines
2.9 KiB
ArmAsm

/*
* Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0+
*/
/*
* ARC700 has a relatively long pipeline and branch prediction, so we want
* to avoid branches that are hard to predict. On the other hand, the
* presence of the norm instruction makes it easier to operate on whole
* words branch-free.
*/
.global strchr
.align 4
strchr:
extb_s %r1, %r1
asl %r5, %r1, 8
bmsk %r2, %r0, 1
or %r5, %r5, %r1
mov_s %r3, 0x01010101
breq.d %r2, %r0, .Laligned
asl %r4, %r5, 16
sub_s %r0, %r0, %r2
asl %r7, %r2, 3
ld_s %r2, [%r0]
#ifdef __LITTLE_ENDIAN__
asl %r7, %r3, %r7
#else /* __BIG_ENDIAN__ */
lsr %r7, %r3, %r7
#endif /* _ENDIAN__ */
or %r5, %r5, %r4
ror %r4, %r3
sub %r12, %r2, %r7
bic_s %r12, %r12, %r2
and %r12, %r12, %r4
brne.d %r12, 0, .Lfound0_ua
xor %r6, %r2, %r5
ld.a %r2, [%r0, 4]
sub %r12, %r6, %r7
bic %r12, %r12, %r6
#ifdef __LITTLE_ENDIAN__
and %r7, %r12, %r4
/* For speed, we want this branch to be unaligned. */
breq %r7, 0, .Loop
/* Likewise this one */
b .Lfound_char
#else /* __BIG_ENDIAN__ */
and %r12, %r12, %r4
/* For speed, we want this branch to be unaligned. */
breq %r12, 0, .Loop
lsr_s %r12, %r12, 7
bic %r2, %r7, %r6
b.d .Lfound_char_b
and_s %r2, %r2, %r12
#endif /* _ENDIAN__ */
/* We require this code address to be unaligned for speed... */
.Laligned:
ld_s %r2, [%r0]
or %r5, %r5, %r4
ror %r4, %r3
/* ... so that this code address is aligned, for itself and ... */
.Loop:
sub %r12, %r2, %r3
bic_s %r12, %r12, %r2
and %r12, %r12, %r4
brne.d %r12, 0, .Lfound0
xor %r6, %r2, %r5
ld.a %r2, [%r0, 4]
sub %r12, %r6, %r3
bic %r12, %r12, %r6
and %r7, %r12, %r4
breq %r7, 0, .Loop
/*
*... so that this branch is unaligned.
* Found searched-for character.
* r0 has already advanced to next word.
*/
#ifdef __LITTLE_ENDIAN__
/*
* We only need the information about the first matching byte
* (i.e. the least significant matching byte) to be exact,
* hence there is no problem with carry effects.
*/
.Lfound_char:
sub %r3, %r7, 1
bic %r3, %r3, %r7
norm %r2, %r3
sub_s %r0, %r0, 1
asr_s %r2, %r2, 3
j.d [%blink]
sub_s %r0, %r0, %r2
.balign 4
.Lfound0_ua:
mov %r3, %r7
.Lfound0:
sub %r3, %r6, %r3
bic %r3, %r3, %r6
and %r2, %r3, %r4
or_s %r12, %r12, %r2
sub_s %r3, %r12, 1
bic_s %r3, %r3, %r12
norm %r3, %r3
add_s %r0, %r0, 3
asr_s %r12, %r3, 3
asl.f 0, %r2, %r3
sub_s %r0, %r0, %r12
j_s.d [%blink]
mov.pl %r0, 0
#else /* __BIG_ENDIAN__ */
.Lfound_char:
lsr %r7, %r7, 7
bic %r2, %r7, %r6
.Lfound_char_b:
norm %r2, %r2
sub_s %r0, %r0, 4
asr_s %r2, %r2, 3
j.d [%blink]
add_s %r0, %r0, %r2
.Lfound0_ua:
mov_s %r3, %r7
.Lfound0:
asl_s %r2, %r2, 7
or %r7, %r6, %r4
bic_s %r12, %r12, %r2
sub %r2, %r7, %r3
or %r2, %r2, %r6
bic %r12, %r2, %r12
bic.f %r3, %r4, %r12
norm %r3, %r3
add.pl %r3, %r3, 1
asr_s %r12, %r3, 3
asl.f 0, %r2, %r3
add_s %r0, %r0, %r12
j_s.d [%blink]
mov.mi %r0, 0
#endif /* _ENDIAN__ */