mirror of git://sourceware.org/git/glibc.git
PowerPC: strcpy/stpcpy optimization for PPC64/POWER7
This patch intends to unify both strcpy and stpcpy implementationsi for PPC64 and PPC64/POWER7. The idead default powerpc64 implementation is to provide both doubleword and word aligned memory access. For PPC64/POWER7 is also provide doubleword and word memory access, remove the branch hints, use the cmpb instruction for compare doubleword/words, and add an optimization for inputs of same alignment.
This commit is contained in:
parent
151659f637
commit
69f13dbf06
11
ChangeLog
11
ChangeLog
|
|
@ -1,3 +1,14 @@
|
||||||
|
2013-10-04 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
|
||||||
|
|
||||||
|
* sysdeps/powerpc/powerpc64/strcpy.S (strcpy): Add word load/store
|
||||||
|
to provide a boost for large inputs with word alignment.
|
||||||
|
* sysdeps/powerpc/powerpc64/stpcpy.S (__stpcpy): Rewrite
|
||||||
|
implementation based on optimized PPC64 strcpy.
|
||||||
|
* sysdeps/powerpc/powerpc64/power7/strcpy.S: New file: optimized
|
||||||
|
strcpy for PPC64/POWER7 based on both doubleword and word load/store.
|
||||||
|
* sysdeps/powerpc/powerpc64/power7/stpcpy.S: New file: optimized
|
||||||
|
stpcpy for PPC64/POWER7 based on PPC64/POWER7 strcpy.
|
||||||
|
|
||||||
2013-10-25 Ondřej Bílka <neleai@seznam.cz>
|
2013-10-25 Ondřej Bílka <neleai@seznam.cz>
|
||||||
|
|
||||||
[BZ 2801]
|
[BZ 2801]
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,24 @@
|
||||||
|
/* Optimized stpcpy implementation for PowerPC64/POWER7.
|
||||||
|
Copyright (C) 2013 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#define USE_AS_STPCPY
|
||||||
|
#include <sysdeps/powerpc/powerpc64/power7/strcpy.S>
|
||||||
|
|
||||||
|
weak_alias (__stpcpy, stpcpy)
|
||||||
|
libc_hidden_def (__stpcpy)
|
||||||
|
libc_hidden_builtin_def (stpcpy)
|
||||||
|
|
@ -0,0 +1,274 @@
|
||||||
|
/* Optimized strcpy/stpcpy implementation for PowerPC64/POWER7.
|
||||||
|
Copyright (C) 2013 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#include <sysdep.h>
|
||||||
|
|
||||||
|
/* Implements the function
|
||||||
|
|
||||||
|
char * [r3] strcpy (char *dest [r3], const char *src [r4])
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
char * [r3] strcpy (char *dest [r3], const char *src [r4])
|
||||||
|
|
||||||
|
if USE_AS_STPCPY is defined. It tries to use aligned memory accesses
|
||||||
|
when possible using the following algorithm:
|
||||||
|
|
||||||
|
if (((((uintptr_t)dst & 0x7UL) == 0) && ((uintptr_t)src & 0x7UL) == 0))
|
||||||
|
goto aligned_doubleword_copy;
|
||||||
|
if (((((uintptr_t)dst & 0x3UL) == 0) && ((uintptr_t)src & 0x3UL) == 0))
|
||||||
|
goto aligned_word_copy;
|
||||||
|
if (((uintptr_t)dst & 0x7UL) == ((uintptr_t)src & 0x7UL))
|
||||||
|
goto same_alignment;
|
||||||
|
goto unaligned;
|
||||||
|
|
||||||
|
The aligned comparison are made using cmpb instructions. */
|
||||||
|
|
||||||
|
#ifdef USE_AS_STPCPY
|
||||||
|
# define FUNC_NAME __stpcpy
|
||||||
|
#else
|
||||||
|
# define FUNC_NAME strcpy
|
||||||
|
#endif
|
||||||
|
|
||||||
|
.machine power7
|
||||||
|
EALIGN (FUNC_NAME, 4, 0)
|
||||||
|
CALL_MCOUNT 2
|
||||||
|
|
||||||
|
#define rTMP r0
|
||||||
|
#ifdef USE_AS_STPCPY
|
||||||
|
#define rRTN r3 /* pointer to previous word/doubleword in dest */
|
||||||
|
#else
|
||||||
|
#define rRTN r12 /* pointer to previous word/doubleword in dest */
|
||||||
|
#endif
|
||||||
|
#define rSRC r4 /* pointer to previous word/doubleword in src */
|
||||||
|
#define rMASK r5 /* mask 0xffffffff | 0xffffffffffffffff */
|
||||||
|
#define rWORD r6 /* current word from src */
|
||||||
|
#define rALT r7 /* alternate word from src */
|
||||||
|
#define rRTNAL r8 /* alignment of return pointer */
|
||||||
|
#define rSRCAL r9 /* alignment of source pointer */
|
||||||
|
#define rALCNT r10 /* bytes to read to reach 8 bytes alignment */
|
||||||
|
#define rSUBAL r11 /* doubleword minus unaligned displacement */
|
||||||
|
|
||||||
|
#ifndef USE_AS_STPCPY
|
||||||
|
/* Save the dst pointer to use as return value. */
|
||||||
|
mr rRTN, r3
|
||||||
|
#endif
|
||||||
|
or rTMP, rSRC, rRTN
|
||||||
|
clrldi. rTMP, rTMP, 61
|
||||||
|
bne L(check_word_alignment)
|
||||||
|
b L(aligned_doubleword_copy)
|
||||||
|
|
||||||
|
L(same_alignment):
|
||||||
|
/* Src and dst with same alignment: align both to doubleword. */
|
||||||
|
mr rALCNT, rRTN
|
||||||
|
lbz rWORD, 0(rSRC)
|
||||||
|
subfic rSUBAL, rRTNAL, 8
|
||||||
|
addi rRTN, rRTN, 1
|
||||||
|
addi rSRC, rSRC, 1
|
||||||
|
cmpdi cr7, rWORD, 0
|
||||||
|
stb rWORD, 0(rALCNT)
|
||||||
|
beq cr7, L(s2)
|
||||||
|
|
||||||
|
add rALCNT, rALCNT, rSUBAL
|
||||||
|
subf rALCNT, rRTN, rALCNT
|
||||||
|
addi rALCNT, rALCNT, 1
|
||||||
|
mtctr rALCNT
|
||||||
|
b L(s1)
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
L(s0):
|
||||||
|
addi rSRC, rSRC, 1
|
||||||
|
lbz rWORD, -1(rSRC)
|
||||||
|
cmpdi cr7, rWORD, 0
|
||||||
|
stb rWORD, -1(rALCNT)
|
||||||
|
beqlr cr7
|
||||||
|
mr rRTN, rALCNT
|
||||||
|
L(s1):
|
||||||
|
addi rALCNT, rRTN,1
|
||||||
|
bdnz L(s0)
|
||||||
|
b L(aligned_doubleword_copy)
|
||||||
|
.align 4
|
||||||
|
L(s2):
|
||||||
|
mr rRTN, rALCNT
|
||||||
|
blr
|
||||||
|
|
||||||
|
/* For doubleword aligned memory, operate using doubleword load and stores. */
|
||||||
|
.align 4
|
||||||
|
L(aligned_doubleword_copy):
|
||||||
|
li rMASK, 0
|
||||||
|
addi rRTN, rRTN, -8
|
||||||
|
ld rWORD, 0(rSRC)
|
||||||
|
b L(g2)
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
L(g0): ldu rALT, 8(rSRC)
|
||||||
|
stdu rWORD, 8(rRTN)
|
||||||
|
cmpb rTMP, rALT, rMASK
|
||||||
|
cmpdi rTMP, 0
|
||||||
|
bne L(g1)
|
||||||
|
ldu rWORD, 8(rSRC)
|
||||||
|
stdu rALT, 8(rRTN)
|
||||||
|
L(g2): cmpb rTMP, rWORD, rMASK
|
||||||
|
cmpdi rTMP, 0 /* If rTMP is 0, no null's have been found. */
|
||||||
|
beq L(g0)
|
||||||
|
|
||||||
|
mr rALT, rWORD
|
||||||
|
/* We've hit the end of the string. Do the rest byte-by-byte. */
|
||||||
|
L(g1):
|
||||||
|
#ifdef __LITTLE_ENDIAN__
|
||||||
|
extrdi. rTMP, rALT, 8, 56
|
||||||
|
stbu rALT, 8(rRTN)
|
||||||
|
beqlr-
|
||||||
|
extrdi. rTMP, rALT, 8, 48
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
beqlr-
|
||||||
|
extrdi. rTMP, rALT, 8, 40
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
beqlr-
|
||||||
|
extrdi. rTMP, rALT, 8, 32
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
beqlr-
|
||||||
|
extrdi. rTMP, rALT, 8, 24
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
beqlr-
|
||||||
|
extrdi. rTMP, rALT, 8, 16
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
beqlr-
|
||||||
|
extrdi. rTMP, rALT, 8, 8
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
beqlr-
|
||||||
|
extrdi rTMP, rALT, 8, 0
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
#else
|
||||||
|
extrdi. rTMP, rALT, 8, 0
|
||||||
|
stbu rTMP, 8(rRTN)
|
||||||
|
beqlr
|
||||||
|
extrdi. rTMP, rALT, 8, 8
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
beqlr
|
||||||
|
extrdi. rTMP, rALT, 8, 16
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
beqlr
|
||||||
|
extrdi. rTMP, rALT, 8, 24
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
beqlr
|
||||||
|
extrdi. rTMP, rALT, 8, 32
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
beqlr
|
||||||
|
extrdi. rTMP, rALT, 8, 40
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
beqlr
|
||||||
|
extrdi. rTMP, rALT, 8, 48
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
beqlr
|
||||||
|
stbu rALT, 1(rRTN)
|
||||||
|
#endif
|
||||||
|
blr
|
||||||
|
|
||||||
|
L(check_word_alignment):
|
||||||
|
clrldi. rTMP, rTMP, 62
|
||||||
|
beq L(aligned_word_copy)
|
||||||
|
rldicl rRTNAL, rRTN, 0, 61
|
||||||
|
rldicl rSRCAL, rSRC, 0, 61
|
||||||
|
cmpld cr7, rSRCAL, rRTNAL
|
||||||
|
beq cr7, L(same_alignment)
|
||||||
|
b L(unaligned)
|
||||||
|
|
||||||
|
/* For word aligned memory, operate using word load and stores. */
|
||||||
|
.align 4
|
||||||
|
L(aligned_word_copy):
|
||||||
|
li rMASK, 0
|
||||||
|
addi rRTN, rRTN, -4
|
||||||
|
lwz rWORD, 0(rSRC)
|
||||||
|
b L(g5)
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
L(g3): lwzu rALT, 4(rSRC)
|
||||||
|
stwu rWORD, 4(rRTN)
|
||||||
|
cmpb rTMP, rALT, rMASK
|
||||||
|
cmpwi rTMP, 0
|
||||||
|
bne L(g4)
|
||||||
|
lwzu rWORD, 4(rSRC)
|
||||||
|
stwu rALT, 4(rRTN)
|
||||||
|
L(g5): cmpb rTMP, rWORD, rMASK
|
||||||
|
cmpwi rTMP, 0 /* If rTMP is 0, no null in word. */
|
||||||
|
beq L(g3)
|
||||||
|
|
||||||
|
mr rALT, rWORD
|
||||||
|
/* We've hit the end of the string. Do the rest byte-by-byte. */
|
||||||
|
L(g4):
|
||||||
|
#ifdef __LITTLE_ENDIAN__
|
||||||
|
rlwinm. rTMP, rALT, 0, 24, 31
|
||||||
|
stbu rALT, 4(rRTN)
|
||||||
|
beqlr-
|
||||||
|
rlwinm. rTMP, rALT, 24, 24, 31
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
beqlr-
|
||||||
|
rlwinm. rTMP, rALT, 16, 24, 31
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
beqlr-
|
||||||
|
rlwinm rTMP, rALT, 8, 24, 31
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
#else
|
||||||
|
rlwinm. rTMP, rALT, 8, 24, 31
|
||||||
|
stbu rTMP, 4(rRTN)
|
||||||
|
beqlr
|
||||||
|
rlwinm. rTMP, rALT, 16, 24, 31
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
beqlr
|
||||||
|
rlwinm. rTMP, rALT, 24, 24, 31
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
beqlr
|
||||||
|
stbu rALT, 1(rRTN)
|
||||||
|
#endif
|
||||||
|
blr
|
||||||
|
|
||||||
|
/* Oh well. In this case, we just do a byte-by-byte copy. */
|
||||||
|
.align 4
|
||||||
|
L(unaligned):
|
||||||
|
lbz rWORD, 0(rSRC)
|
||||||
|
addi rRTN, rRTN, -1
|
||||||
|
cmpdi rWORD, 0
|
||||||
|
beq L(u2)
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
L(u0): lbzu rALT, 1(rSRC)
|
||||||
|
stbu rWORD, 1(rRTN)
|
||||||
|
cmpdi rALT, 0
|
||||||
|
beq L(u1)
|
||||||
|
lbzu rWORD, 1(rSRC)
|
||||||
|
stbu rALT, 1(rRTN)
|
||||||
|
cmpdi rWORD, 0
|
||||||
|
beq L(u2)
|
||||||
|
lbzu rALT, 1(rSRC)
|
||||||
|
stbu rWORD, 1(rRTN)
|
||||||
|
cmpdi rALT, 0
|
||||||
|
beq L(u1)
|
||||||
|
lbzu rWORD, 1(rSRC)
|
||||||
|
stbu rALT, 1(rRTN)
|
||||||
|
cmpdi rWORD, 0
|
||||||
|
bne L(u0)
|
||||||
|
L(u2): stbu rWORD, 1(rRTN)
|
||||||
|
blr
|
||||||
|
L(u1): stbu rALT, 1(rRTN)
|
||||||
|
blr
|
||||||
|
END (FUNC_NAME)
|
||||||
|
|
||||||
|
#ifndef USE_AS_STPCPY
|
||||||
|
libc_hidden_builtin_def (strcpy)
|
||||||
|
#endif
|
||||||
|
|
@ -16,103 +16,8 @@
|
||||||
License along with the GNU C Library; if not, see
|
License along with the GNU C Library; if not, see
|
||||||
<http://www.gnu.org/licenses/>. */
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
#include <sysdep.h>
|
#define USE_AS_STPCPY
|
||||||
|
#include <sysdeps/powerpc/powerpc64/strcpy.S>
|
||||||
/* See strlen.s for comments on how the end-of-string testing works. */
|
|
||||||
|
|
||||||
/* char * [r3] stpcpy (char *dest [r3], const char *src [r4]) */
|
|
||||||
|
|
||||||
EALIGN (__stpcpy, 4, 0)
|
|
||||||
CALL_MCOUNT 2
|
|
||||||
|
|
||||||
#define rTMP r0
|
|
||||||
#define rRTN r3
|
|
||||||
#define rDEST r3 /* pointer to previous word in dest */
|
|
||||||
#define rSRC r4 /* pointer to previous word in src */
|
|
||||||
#define rWORD r6 /* current word from src */
|
|
||||||
#define rFEFE r7 /* 0xfefefeff */
|
|
||||||
#define r7F7F r8 /* 0x7f7f7f7f */
|
|
||||||
#define rNEG r9 /* ~(word in src | 0x7f7f7f7f) */
|
|
||||||
#define rALT r10 /* alternate word from src */
|
|
||||||
|
|
||||||
or rTMP, rSRC, rDEST
|
|
||||||
clrldi. rTMP, rTMP, 62
|
|
||||||
addi rDEST, rDEST, -4
|
|
||||||
bne L(unaligned)
|
|
||||||
|
|
||||||
lis rFEFE, -0x101
|
|
||||||
lis r7F7F, 0x7f7f
|
|
||||||
lwz rWORD, 0(rSRC)
|
|
||||||
addi rFEFE, rFEFE, -0x101
|
|
||||||
addi r7F7F, r7F7F, 0x7f7f
|
|
||||||
b L(g2)
|
|
||||||
|
|
||||||
L(g0): lwzu rALT, 4(rSRC)
|
|
||||||
stwu rWORD, 4(rDEST)
|
|
||||||
add rTMP, rFEFE, rALT
|
|
||||||
nor rNEG, r7F7F, rALT
|
|
||||||
and. rTMP, rTMP, rNEG
|
|
||||||
bne- L(g1)
|
|
||||||
lwzu rWORD, 4(rSRC)
|
|
||||||
stwu rALT, 4(rDEST)
|
|
||||||
L(g2): add rTMP, rFEFE, rWORD
|
|
||||||
nor rNEG, r7F7F, rWORD
|
|
||||||
and. rTMP, rTMP, rNEG
|
|
||||||
beq+ L(g0)
|
|
||||||
|
|
||||||
mr rALT, rWORD
|
|
||||||
/* We've hit the end of the string. Do the rest byte-by-byte. */
|
|
||||||
L(g1):
|
|
||||||
#ifdef __LITTLE_ENDIAN__
|
|
||||||
rlwinm. rTMP, rALT, 0, 24, 31
|
|
||||||
stbu rALT, 4(rDEST)
|
|
||||||
beqlr-
|
|
||||||
rlwinm. rTMP, rALT, 24, 24, 31
|
|
||||||
stbu rTMP, 1(rDEST)
|
|
||||||
beqlr-
|
|
||||||
rlwinm. rTMP, rALT, 16, 24, 31
|
|
||||||
stbu rTMP, 1(rDEST)
|
|
||||||
beqlr-
|
|
||||||
rlwinm rTMP, rALT, 8, 24, 31
|
|
||||||
stbu rTMP, 1(rDEST)
|
|
||||||
blr
|
|
||||||
#else
|
|
||||||
rlwinm. rTMP, rALT, 8, 24, 31
|
|
||||||
stbu rTMP, 4(rDEST)
|
|
||||||
beqlr-
|
|
||||||
rlwinm. rTMP, rALT, 16, 24, 31
|
|
||||||
stbu rTMP, 1(rDEST)
|
|
||||||
beqlr-
|
|
||||||
rlwinm. rTMP, rALT, 24, 24, 31
|
|
||||||
stbu rTMP, 1(rDEST)
|
|
||||||
beqlr-
|
|
||||||
stbu rALT, 1(rDEST)
|
|
||||||
blr
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Oh well. In this case, we just do a byte-by-byte copy. */
|
|
||||||
.align 4
|
|
||||||
nop
|
|
||||||
L(unaligned):
|
|
||||||
lbz rWORD, 0(rSRC)
|
|
||||||
addi rDEST, rDEST, 3
|
|
||||||
cmpwi rWORD, 0
|
|
||||||
beq- L(u2)
|
|
||||||
|
|
||||||
L(u0): lbzu rALT, 1(rSRC)
|
|
||||||
stbu rWORD, 1(rDEST)
|
|
||||||
cmpwi rALT, 0
|
|
||||||
beq- L(u1)
|
|
||||||
nop /* Let 601 load start of loop. */
|
|
||||||
lbzu rWORD, 1(rSRC)
|
|
||||||
stbu rALT, 1(rDEST)
|
|
||||||
cmpwi rWORD, 0
|
|
||||||
bne+ L(u0)
|
|
||||||
L(u2): stbu rWORD, 1(rDEST)
|
|
||||||
blr
|
|
||||||
L(u1): stbu rALT, 1(rDEST)
|
|
||||||
blr
|
|
||||||
END (__stpcpy)
|
|
||||||
|
|
||||||
weak_alias (__stpcpy, stpcpy)
|
weak_alias (__stpcpy, stpcpy)
|
||||||
libc_hidden_def (__stpcpy)
|
libc_hidden_def (__stpcpy)
|
||||||
|
|
|
||||||
|
|
@ -22,25 +22,38 @@
|
||||||
|
|
||||||
/* char * [r3] strcpy (char *dest [r3], const char *src [r4]) */
|
/* char * [r3] strcpy (char *dest [r3], const char *src [r4]) */
|
||||||
|
|
||||||
EALIGN (strcpy, 4, 0)
|
#ifdef USE_AS_STPCPY
|
||||||
|
# define FUNC_NAME __stpcpy
|
||||||
|
#else
|
||||||
|
# define FUNC_NAME strcpy
|
||||||
|
#endif
|
||||||
|
|
||||||
|
EALIGN (FUNC_NAME, 4, 0)
|
||||||
CALL_MCOUNT 2
|
CALL_MCOUNT 2
|
||||||
|
|
||||||
#define rTMP r0
|
#define rTMP r0
|
||||||
#define rRTN r3 /* incoming DEST arg preserved as result */
|
#ifdef USE_AS_STPCPY
|
||||||
#define rSRC r4 /* pointer to previous word in src */
|
#define rRTN r3 /* pointer to previous word/doubleword in dest */
|
||||||
#define rDEST r5 /* pointer to previous word in dest */
|
#else
|
||||||
|
#define rRTN r12 /* pointer to previous word/doubleword in dest */
|
||||||
|
#endif
|
||||||
|
#define rSRC r4 /* pointer to previous word/doubleword in src */
|
||||||
#define rWORD r6 /* current word from src */
|
#define rWORD r6 /* current word from src */
|
||||||
#define rFEFE r7 /* constant 0xfefefefefefefeff (-0x0101010101010101) */
|
#define rFEFE r7 /* constant 0xfefefeff | 0xfefefefefefefeff */
|
||||||
#define r7F7F r8 /* constant 0x7f7f7f7f7f7f7f7f */
|
#define r7F7F r8 /* constant 0x7f7f7f7f | 0x7f7f7f7f7f7f7f7f */
|
||||||
#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
|
#define rNEG r9 /* ~(word in s1 | r7F7F) */
|
||||||
#define rALT r10 /* alternate word from src */
|
#define rALT r10 /* alternate word from src */
|
||||||
|
|
||||||
dcbt 0,rSRC
|
#ifndef USE_AS_STPCPY
|
||||||
|
/* Save the dst pointer to use as return value. */
|
||||||
|
mr rRTN, r3
|
||||||
|
#endif
|
||||||
or rTMP, rSRC, rRTN
|
or rTMP, rSRC, rRTN
|
||||||
clrldi. rTMP, rTMP, 61
|
clrldi. rTMP, rTMP, 61
|
||||||
addi rDEST, rRTN, -8
|
bne L(check_word_alignment)
|
||||||
dcbtst 0,rRTN
|
|
||||||
bne L(unaligned)
|
/* For doubleword aligned memory, operate using doubleword load and stores. */
|
||||||
|
addi rRTN, rRTN, -8
|
||||||
|
|
||||||
lis rFEFE, -0x101
|
lis rFEFE, -0x101
|
||||||
lis r7F7F, 0x7f7f
|
lis r7F7F, 0x7f7f
|
||||||
|
|
@ -53,13 +66,13 @@ EALIGN (strcpy, 4, 0)
|
||||||
b L(g2)
|
b L(g2)
|
||||||
|
|
||||||
L(g0): ldu rALT, 8(rSRC)
|
L(g0): ldu rALT, 8(rSRC)
|
||||||
stdu rWORD, 8(rDEST)
|
stdu rWORD, 8(rRTN)
|
||||||
add rTMP, rFEFE, rALT
|
add rTMP, rFEFE, rALT
|
||||||
nor rNEG, r7F7F, rALT
|
nor rNEG, r7F7F, rALT
|
||||||
and. rTMP, rTMP, rNEG
|
and. rTMP, rTMP, rNEG
|
||||||
bne- L(g1)
|
bne- L(g1)
|
||||||
ldu rWORD, 8(rSRC)
|
ldu rWORD, 8(rSRC)
|
||||||
stdu rALT, 8(rDEST)
|
stdu rALT, 8(rRTN)
|
||||||
L(g2): add rTMP, rFEFE, rWORD
|
L(g2): add rTMP, rFEFE, rWORD
|
||||||
nor rNEG, r7F7F, rWORD
|
nor rNEG, r7F7F, rWORD
|
||||||
and. rTMP, rTMP, rNEG
|
and. rTMP, rTMP, rNEG
|
||||||
|
|
@ -70,77 +83,134 @@ L(g2): add rTMP, rFEFE, rWORD
|
||||||
L(g1):
|
L(g1):
|
||||||
#ifdef __LITTLE_ENDIAN__
|
#ifdef __LITTLE_ENDIAN__
|
||||||
extrdi. rTMP, rALT, 8, 56
|
extrdi. rTMP, rALT, 8, 56
|
||||||
stb rALT, 8(rDEST)
|
stbu rALT, 8(rRTN)
|
||||||
beqlr-
|
beqlr-
|
||||||
extrdi. rTMP, rALT, 8, 48
|
extrdi. rTMP, rALT, 8, 48
|
||||||
stb rTMP, 9(rDEST)
|
stbu rTMP, 1(rRTN)
|
||||||
beqlr-
|
beqlr-
|
||||||
extrdi. rTMP, rALT, 8, 40
|
extrdi. rTMP, rALT, 8, 40
|
||||||
stb rTMP, 10(rDEST)
|
stbu rTMP, 1(rRTN)
|
||||||
beqlr-
|
beqlr-
|
||||||
extrdi. rTMP, rALT, 8, 32
|
extrdi. rTMP, rALT, 8, 32
|
||||||
stb rTMP, 11(rDEST)
|
stbu rTMP, 1(rRTN)
|
||||||
beqlr-
|
beqlr-
|
||||||
extrdi. rTMP, rALT, 8, 24
|
extrdi. rTMP, rALT, 8, 24
|
||||||
stb rTMP, 12(rDEST)
|
stbu rTMP, 1(rRTN)
|
||||||
beqlr-
|
beqlr-
|
||||||
extrdi. rTMP, rALT, 8, 16
|
extrdi. rTMP, rALT, 8, 16
|
||||||
stb rTMP, 13(rDEST)
|
stbu rTMP, 1(rRTN)
|
||||||
beqlr-
|
beqlr-
|
||||||
extrdi. rTMP, rALT, 8, 8
|
extrdi. rTMP, rALT, 8, 8
|
||||||
stb rTMP, 14(rDEST)
|
stbu rTMP, 1(rRTN)
|
||||||
beqlr-
|
beqlr-
|
||||||
extrdi rTMP, rALT, 8, 0
|
extrdi rTMP, rALT, 8, 0
|
||||||
stb rTMP, 15(rDEST)
|
stbu rTMP, 1(rRTN)
|
||||||
blr
|
|
||||||
#else
|
#else
|
||||||
extrdi. rTMP, rALT, 8, 0
|
extrdi. rTMP, rALT, 8, 0
|
||||||
stb rTMP, 8(rDEST)
|
stbu rTMP, 8(rRTN)
|
||||||
beqlr-
|
beqlr-
|
||||||
extrdi. rTMP, rALT, 8, 8
|
extrdi. rTMP, rALT, 8, 8
|
||||||
stb rTMP, 9(rDEST)
|
stbu rTMP, 1(rRTN)
|
||||||
beqlr-
|
beqlr-
|
||||||
extrdi. rTMP, rALT, 8, 16
|
extrdi. rTMP, rALT, 8, 16
|
||||||
stb rTMP, 10(rDEST)
|
stbu rTMP, 1(rRTN)
|
||||||
beqlr-
|
beqlr-
|
||||||
extrdi. rTMP, rALT, 8, 24
|
extrdi. rTMP, rALT, 8, 24
|
||||||
stb rTMP, 11(rDEST)
|
stbu rTMP, 1(rRTN)
|
||||||
beqlr-
|
beqlr-
|
||||||
extrdi. rTMP, rALT, 8, 32
|
extrdi. rTMP, rALT, 8, 32
|
||||||
stb rTMP, 12(rDEST)
|
stbu rTMP, 1(rRTN)
|
||||||
beqlr-
|
beqlr
|
||||||
extrdi. rTMP, rALT, 8, 40
|
extrdi. rTMP, rALT, 8, 40
|
||||||
stb rTMP, 13(rDEST)
|
stbu rTMP, 1(rRTN)
|
||||||
beqlr-
|
beqlr-
|
||||||
extrdi. rTMP, rALT, 8, 48
|
extrdi. rTMP, rALT, 8, 48
|
||||||
stb rTMP, 14(rDEST)
|
stbu rTMP, 1(rRTN)
|
||||||
beqlr-
|
beqlr-
|
||||||
stb rALT, 15(rDEST)
|
stbu rALT, 1(rRTN)
|
||||||
blr
|
|
||||||
#endif
|
#endif
|
||||||
|
blr
|
||||||
|
|
||||||
|
L(check_word_alignment):
|
||||||
|
clrldi. rTMP, rTMP, 62
|
||||||
|
bne L(unaligned)
|
||||||
|
|
||||||
|
/* For word aligned memory, operate using word load and stores. */
|
||||||
|
addi rRTN, rRTN, -4
|
||||||
|
|
||||||
|
lis rFEFE, -0x101
|
||||||
|
lis r7F7F, 0x7f7f
|
||||||
|
lwz rWORD, 0(rSRC)
|
||||||
|
addi rFEFE, rFEFE, -0x101
|
||||||
|
addi r7F7F, r7F7F, 0x7f7f
|
||||||
|
b L(g5)
|
||||||
|
|
||||||
|
L(g3): lwzu rALT, 4(rSRC)
|
||||||
|
stwu rWORD, 4(rRTN)
|
||||||
|
add rTMP, rFEFE, rALT
|
||||||
|
nor rNEG, r7F7F, rALT
|
||||||
|
and. rTMP, rTMP, rNEG
|
||||||
|
bne- L(g4)
|
||||||
|
lwzu rWORD, 4(rSRC)
|
||||||
|
stwu rALT, 4(rRTN)
|
||||||
|
L(g5): add rTMP, rFEFE, rWORD
|
||||||
|
nor rNEG, r7F7F, rWORD
|
||||||
|
and. rTMP, rTMP, rNEG
|
||||||
|
beq+ L(g3)
|
||||||
|
|
||||||
|
mr rALT, rWORD
|
||||||
|
/* We've hit the end of the string. Do the rest byte-by-byte. */
|
||||||
|
L(g4):
|
||||||
|
#ifdef __LITTLE_ENDIAN__
|
||||||
|
rlwinm. rTMP, rALT, 0, 24, 31
|
||||||
|
stbu rALT, 4(rRTN)
|
||||||
|
beqlr-
|
||||||
|
rlwinm. rTMP, rALT, 24, 24, 31
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
beqlr-
|
||||||
|
rlwinm. rTMP, rALT, 16, 24, 31
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
beqlr-
|
||||||
|
rlwinm rTMP, rALT, 8, 24, 31
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
#else
|
||||||
|
rlwinm. rTMP, rALT, 8, 24, 31
|
||||||
|
stbu rTMP, 4(rRTN)
|
||||||
|
beqlr-
|
||||||
|
rlwinm. rTMP, rALT, 16, 24, 31
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
beqlr-
|
||||||
|
rlwinm. rTMP, rALT, 24, 24, 31
|
||||||
|
stbu rTMP, 1(rRTN)
|
||||||
|
beqlr-
|
||||||
|
stbu rALT, 1(rRTN)
|
||||||
|
#endif
|
||||||
|
blr
|
||||||
|
|
||||||
/* Oh well. In this case, we just do a byte-by-byte copy. */
|
/* Oh well. In this case, we just do a byte-by-byte copy. */
|
||||||
.align 4
|
.align 4
|
||||||
nop
|
nop
|
||||||
L(unaligned):
|
L(unaligned):
|
||||||
lbz rWORD, 0(rSRC)
|
lbz rWORD, 0(rSRC)
|
||||||
addi rDEST, rRTN, -1
|
addi rRTN, rRTN, -1
|
||||||
cmpwi rWORD, 0
|
cmpwi rWORD, 0
|
||||||
beq- L(u2)
|
beq- L(u2)
|
||||||
|
|
||||||
L(u0): lbzu rALT, 1(rSRC)
|
L(u0): lbzu rALT, 1(rSRC)
|
||||||
stbu rWORD, 1(rDEST)
|
stbu rWORD, 1(rRTN)
|
||||||
cmpwi rALT, 0
|
cmpwi rALT, 0
|
||||||
beq- L(u1)
|
beq- L(u1)
|
||||||
nop /* Let 601 load start of loop. */
|
nop /* Let 601 load start of loop. */
|
||||||
lbzu rWORD, 1(rSRC)
|
lbzu rWORD, 1(rSRC)
|
||||||
stbu rALT, 1(rDEST)
|
stbu rALT, 1(rRTN)
|
||||||
cmpwi rWORD, 0
|
cmpwi rWORD, 0
|
||||||
bne+ L(u0)
|
bne+ L(u0)
|
||||||
L(u2): stb rWORD, 1(rDEST)
|
L(u2): stbu rWORD, 1(rRTN)
|
||||||
blr
|
blr
|
||||||
L(u1): stb rALT, 1(rDEST)
|
L(u1): stbu rALT, 1(rRTN)
|
||||||
blr
|
blr
|
||||||
|
END (FUNC_NAME)
|
||||||
|
|
||||||
END (strcpy)
|
#ifndef USE_AS_STPCPY
|
||||||
libc_hidden_builtin_def (strcpy)
|
libc_hidden_builtin_def (strcpy)
|
||||||
|
#endif
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue