From 098e6c37e3835dee6506593d88313502f35b0987 Mon Sep 17 00:00:00 2001 From: joseph Date: Tue, 6 Sep 2011 15:08:18 +0000 Subject: Merge changes between r14661 and r15223 from /fsf/trunk. git-svn-id: svn://svn.eglibc.org/trunk@15224 7b3dc134-2b1b-0410-93df-9e9f96275f8d --- libc/sysdeps/generic/ldsodefs.h | 10 +- libc/sysdeps/i386/dl-trampoline.S | 4 +- libc/sysdeps/i386/i486/bits/atomic.h | 18 +- libc/sysdeps/i386/i486/bits/string.h | 16 +- libc/sysdeps/i386/i686/multiarch/Makefile | 5 +- libc/sysdeps/i386/i686/multiarch/strcat-sse2.S | 1244 ++++++++++++++++++++ libc/sysdeps/i386/i686/multiarch/strcat-ssse3.S | 573 +++++++++ libc/sysdeps/i386/i686/multiarch/strcat.S | 130 ++ libc/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S | 159 +++ libc/sysdeps/i386/i686/multiarch/strchr-sse2.S | 349 ++++++ libc/sysdeps/i386/i686/multiarch/strchr.S | 76 ++ libc/sysdeps/i386/i686/multiarch/strcpy-ssse3.S | 547 +++++---- libc/sysdeps/i386/i686/multiarch/strlen-sse2.S | 113 +- libc/sysdeps/i386/i686/multiarch/strncat-c.c | 8 + libc/sysdeps/i386/i686/multiarch/strncat-sse2.S | 4 + libc/sysdeps/i386/i686/multiarch/strncat-ssse3.S | 4 + libc/sysdeps/i386/i686/multiarch/strncat.S | 3 + .../sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S | 283 +++++ libc/sysdeps/i386/i686/multiarch/strrchr-sse2.S | 709 +++++++++++ libc/sysdeps/i386/i686/multiarch/strrchr.S | 76 ++ libc/sysdeps/i386/i686/multiarch/strspn.S | 4 +- libc/sysdeps/i386/i686/multiarch/wcscmp-c.c | 10 + libc/sysdeps/i386/i686/multiarch/wcscmp-sse2.S | 1003 ++++++++++++++++ libc/sysdeps/i386/i686/multiarch/wcscmp.S | 57 + libc/sysdeps/ieee754/dbl-64/k_rem_pio2.c | 18 +- libc/sysdeps/mach/hurd/spawni.c | 10 +- libc/sysdeps/posix/getaddrinfo.c | 5 +- libc/sysdeps/posix/spawni.c | 14 +- libc/sysdeps/powerpc/dl-tls.h | 4 + libc/sysdeps/powerpc/fpu/libm-test-ulps | 2 + libc/sysdeps/powerpc/powerpc64/dl-irel.h | 11 +- libc/sysdeps/pthread/aio_suspend.c | 14 +- libc/sysdeps/sparc/dl-procinfo.c | 9 +- libc/sysdeps/sparc/dl-procinfo.h | 5 +- libc/sysdeps/sparc/sparc32/bits/atomic.h | 8 +- libc/sysdeps/sparc/sparc32/dl-machine.h | 3 +- libc/sysdeps/sparc/sparc32/fpu/s_fabs.S | 29 + libc/sysdeps/sparc/sparc32/fpu/s_fabs.c | 11 - libc/sysdeps/sparc/sparc32/fpu/s_fabsf.S | 4 +- libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_fabs.S | 29 + libc/sysdeps/sparc/sparc64/dl-machine.h | 4 +- libc/sysdeps/sparc/sparc64/jmpbuf-unwind.h | 4 +- libc/sysdeps/sparc/sparc64/multiarch/memcpy.S | 8 +- libc/sysdeps/sparc/sparc64/multiarch/memset.S | 6 +- libc/sysdeps/sparc/sparc64/strcmp.S | 416 +++---- libc/sysdeps/sparc/sysdep.h | 45 + libc/sysdeps/unix/opendir.c | 21 +- libc/sysdeps/unix/sparc/sysdep.h | 24 +- libc/sysdeps/unix/sysv/linux/Makefile | 2 +- libc/sysdeps/unix/sysv/linux/bits/socket.h | 4 +- libc/sysdeps/unix/sysv/linux/i386/scandir64.c | 106 +- libc/sysdeps/unix/sysv/linux/ia64/sys/ptrace.h | 31 +- libc/sysdeps/unix/sysv/linux/kernel-features.h | 5 + .../unix/sysv/linux/powerpc/powerpc32/scandir64.c | 1 + libc/sysdeps/unix/sysv/linux/powerpc/scandir64.c | 1 - libc/sysdeps/unix/sysv/linux/powerpc/sys/ptrace.h | 31 +- libc/sysdeps/unix/sysv/linux/s390/sys/ptrace.h | 31 +- libc/sysdeps/unix/sysv/linux/sparc/bits/resource.h | 4 +- libc/sysdeps/unix/sysv/linux/sparc/bits/socket.h | 4 +- .../sysv/linux/sparc/sparc32/____longjmp_chk.S | 11 +- .../sysv/linux/sparc/sparc32/sparcv9/fpu/Implies | 3 + .../sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h | 110 +- .../sysv/linux/sparc/sparc64/____longjmp_chk.S | 26 +- .../sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h | 108 +- libc/sysdeps/unix/sysv/linux/sparc/sys/ptrace.h | 31 +- libc/sysdeps/unix/sysv/linux/sparc/sysdep.h | 89 +- libc/sysdeps/unix/sysv/linux/sys/ptrace.h | 36 +- .../unix/sysv/linux/x86_64/bits/libc-vdso.h | 5 +- libc/sysdeps/unix/sysv/linux/x86_64/bits/sem.h | 6 +- libc/sysdeps/unix/sysv/linux/x86_64/gettimeofday.S | 49 - libc/sysdeps/unix/sysv/linux/x86_64/gettimeofday.c | 51 + libc/sysdeps/unix/sysv/linux/x86_64/init-first.c | 24 +- libc/sysdeps/unix/sysv/linux/x86_64/sched_getcpu.S | 19 +- libc/sysdeps/unix/sysv/linux/x86_64/time.S | 47 - libc/sysdeps/unix/sysv/linux/x86_64/time.c | 49 + libc/sysdeps/wordsize-64/scandirat.c | 6 + libc/sysdeps/wordsize-64/scandirat64.c | 1 + libc/sysdeps/x86_64/dl-trampoline.S | 33 +- libc/sysdeps/x86_64/dl-trampoline.h | 25 +- libc/sysdeps/x86_64/l10nflist.c | 13 + libc/sysdeps/x86_64/multiarch/Makefile | 3 +- libc/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S | 281 +++++ libc/sysdeps/x86_64/multiarch/strchr.S | 6 +- libc/sysdeps/x86_64/multiarch/strlen.S | 2 +- .../sysdeps/x86_64/multiarch/strrchr-sse2-no-bsf.S | 556 +++++++++ libc/sysdeps/x86_64/multiarch/strrchr.S | 6 +- libc/sysdeps/x86_64/wcscmp.S | 936 +++++++++++++++ 87 files changed, 7905 insertions(+), 956 deletions(-) create mode 100644 libc/sysdeps/i386/i686/multiarch/strcat-sse2.S create mode 100644 libc/sysdeps/i386/i686/multiarch/strcat-ssse3.S create mode 100644 libc/sysdeps/i386/i686/multiarch/strcat.S create mode 100644 libc/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S create mode 100644 libc/sysdeps/i386/i686/multiarch/strchr-sse2.S create mode 100644 libc/sysdeps/i386/i686/multiarch/strchr.S create mode 100644 libc/sysdeps/i386/i686/multiarch/strncat-c.c create mode 100644 libc/sysdeps/i386/i686/multiarch/strncat-sse2.S create mode 100644 libc/sysdeps/i386/i686/multiarch/strncat-ssse3.S create mode 100644 libc/sysdeps/i386/i686/multiarch/strncat.S create mode 100644 libc/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S create mode 100644 libc/sysdeps/i386/i686/multiarch/strrchr-sse2.S create mode 100644 libc/sysdeps/i386/i686/multiarch/strrchr.S create mode 100644 libc/sysdeps/i386/i686/multiarch/wcscmp-c.c create mode 100644 libc/sysdeps/i386/i686/multiarch/wcscmp-sse2.S create mode 100644 libc/sysdeps/i386/i686/multiarch/wcscmp.S create mode 100644 libc/sysdeps/sparc/sparc32/fpu/s_fabs.S delete mode 100644 libc/sysdeps/sparc/sparc32/fpu/s_fabs.c create mode 100644 libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_fabs.S create mode 100644 libc/sysdeps/sparc/sysdep.h create mode 100644 libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/scandir64.c delete mode 100644 libc/sysdeps/unix/sysv/linux/powerpc/scandir64.c create mode 100644 libc/sysdeps/unix/sysv/linux/sparc/sparc32/sparcv9/fpu/Implies delete mode 100644 libc/sysdeps/unix/sysv/linux/x86_64/gettimeofday.S create mode 100644 libc/sysdeps/unix/sysv/linux/x86_64/gettimeofday.c delete mode 100644 libc/sysdeps/unix/sysv/linux/x86_64/time.S create mode 100644 libc/sysdeps/unix/sysv/linux/x86_64/time.c create mode 100644 libc/sysdeps/wordsize-64/scandirat.c create mode 100644 libc/sysdeps/wordsize-64/scandirat64.c create mode 100644 libc/sysdeps/x86_64/l10nflist.c create mode 100644 libc/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S create mode 100644 libc/sysdeps/x86_64/multiarch/strrchr-sse2-no-bsf.S create mode 100644 libc/sysdeps/x86_64/wcscmp.S (limited to 'libc/sysdeps') diff --git a/libc/sysdeps/generic/ldsodefs.h b/libc/sysdeps/generic/ldsodefs.h index a55db07f0..e186848e4 100644 --- a/libc/sysdeps/generic/ldsodefs.h +++ b/libc/sysdeps/generic/ldsodefs.h @@ -396,7 +396,7 @@ struct rtld_global uint32_t hashval; const char *name; const ElfW(Sym) *sym; - struct link_map *map; + const struct link_map *map; } *entries; size_t size; size_t n_elements; @@ -564,9 +564,10 @@ struct rtld_global_ro #define DL_DEBUG_FILES (1 << 6) #define DL_DEBUG_STATISTICS (1 << 7) #define DL_DEBUG_UNUSED (1 << 8) +#define DL_DEBUG_SCOPES (1 << 9) /* These two are used only internally. */ -#define DL_DEBUG_HELP (1 << 9) -#define DL_DEBUG_PRELINK (1 << 10) +#define DL_DEBUG_HELP (1 << 10) +#define DL_DEBUG_PRELINK (1 << 11) /* OS version. */ EXTERN unsigned int _dl_osversion; @@ -1112,6 +1113,9 @@ extern void *_dl_tls_get_addr_soft (struct link_map *l) attribute_hidden; extern int _dl_addr_inside_object (struct link_map *l, const ElfW(Addr) addr) internal_function attribute_hidden; +/* Show show of an object. */ +extern void _dl_show_scope (struct link_map *new, int from); + __END_DECLS #endif /* ldsodefs.h */ diff --git a/libc/sysdeps/i386/dl-trampoline.S b/libc/sysdeps/i386/dl-trampoline.S index 73b08ba67..19e313e6b 100644 --- a/libc/sysdeps/i386/dl-trampoline.S +++ b/libc/sysdeps/i386/dl-trampoline.S @@ -1,5 +1,5 @@ /* PLT trampolines. i386 version. - Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc. + Copyright (C) 2004, 2005, 2007, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -99,7 +99,7 @@ _dl_runtime_profile: +4 %edx %esp free */ - cfi_adjust_cfa_offset (12) + cfi_adjust_cfa_offset (8) 1: movl %ebx, (%esp) cfi_rel_offset (ebx, 0) movl %edx, %ebx # This is the frame buffer size diff --git a/libc/sysdeps/i386/i486/bits/atomic.h b/libc/sysdeps/i386/i486/bits/atomic.h index 4ee6fef69..72986cac9 100644 --- a/libc/sysdeps/i386/i486/bits/atomic.h +++ b/libc/sysdeps/i386/i486/bits/atomic.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2002-2004, 2006, 2007, 2009 Free Software Foundation, Inc. +/* Copyright (C) 2002-2004,2006,2007,2009,2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 2002. @@ -125,10 +125,18 @@ typedef uintmax_t uatomic_max_t; really going to be used the code below can be used on Intel Pentium and later, but NOT on i486. */ #if 1 -# define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \ - ({ __typeof (*mem) ret = *(mem); abort (); ret = (newval); ret = (oldval); }) -# define __arch_c_compare_and_exchange_val_64_acq(mem, newval, oldval) \ - ({ __typeof (*mem) ret = *(mem); abort (); ret = (newval); ret = (oldval); }) +# define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \ + ({ __typeof (*mem) ret = *(mem); \ + abort (); \ + ret = (newval); \ + ret = (oldval); \ + ret; }) +# define __arch_c_compare_and_exchange_val_64_acq(mem, newval, oldval) \ + ({ __typeof (*mem) ret = *(mem); \ + abort (); \ + ret = (newval); \ + ret = (oldval); \ + ret; }) #else # ifdef __PIC__ # define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \ diff --git a/libc/sysdeps/i386/i486/bits/string.h b/libc/sysdeps/i386/i486/bits/string.h index 9f05c7e65..4f8c104fe 100644 --- a/libc/sysdeps/i386/i486/bits/string.h +++ b/libc/sysdeps/i386/i486/bits/string.h @@ -1,6 +1,6 @@ /* Optimized, inlined string functions. i486 version. - Copyright (C) 1997,1998,1999,2000,2001,2002,2003,2004,2007 - Free Software Foundation, Inc. + Copyright (C) 1997,1998,1999,2000,2001,2002,2003,2004,2007,2011 + Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -1058,8 +1058,8 @@ __strncat_g (char *__dest, __const char __src[], size_t __n) "movl %4, %3\n\t" "decl %1\n\t" "1:\n\t" - "decl %3\n\t" - "js 2f\n\t" + "subl $1,%3\n\t" + "jc 2f\n\t" "movb (%2),%b0\n\t" "movsb\n\t" "testb %b0,%b0\n\t" @@ -1078,8 +1078,8 @@ __strncat_g (char *__dest, __const char __src[], size_t __n) "leal 1(%1),%1\n\t" "jne 1b\n" "2:\n\t" - "decl %3\n\t" - "js 3f\n\t" + "subl $1,%3\n\t" + "jc 3f\n\t" "movb (%2),%b0\n\t" "leal 1(%2),%2\n\t" "movb %b0,(%1)\n\t" @@ -1219,8 +1219,8 @@ __strncmp_g (__const char *__s1, __const char *__s2, size_t __n) register int __res; __asm__ __volatile__ ("1:\n\t" - "decl %3\n\t" - "js 2f\n\t" + "subl $1,%3\n\t" + "jc 2f\n\t" "movb (%1),%b0\n\t" "incl %1\n\t" "cmpb %b0,(%2)\n\t" diff --git a/libc/sysdeps/i386/i686/multiarch/Makefile b/libc/sysdeps/i386/i686/multiarch/Makefile index 4bae699ca..c89ae9247 100644 --- a/libc/sysdeps/i386/i686/multiarch/Makefile +++ b/libc/sysdeps/i386/i686/multiarch/Makefile @@ -12,7 +12,10 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \ memcmp-ssse3 memcmp-sse4 strcasestr-nonascii varshift \ strlen-sse2 strlen-sse2-bsf strncpy-c strcpy-ssse3 \ strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 strcpy-sse2 \ - strncpy-sse2 stpcpy-sse2 stpncpy-sse2 + strncpy-sse2 stpcpy-sse2 stpncpy-sse2 strcat-ssse3 \ + strcat-sse2 strncat-ssse3 strncat-sse2 strncat-c \ + strchr-sse2 strrchr-sse2 strchr-sse2-bsf strrchr-sse2-bsf \ + wcscmp-sse2 wcscmp-c ifeq (yes,$(config-cflags-sse4)) sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c CFLAGS-varshift.c += -msse4 diff --git a/libc/sysdeps/i386/i686/multiarch/strcat-sse2.S b/libc/sysdeps/i386/i686/multiarch/strcat-sse2.S new file mode 100644 index 000000000..b692036ce --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strcat-sse2.S @@ -0,0 +1,1244 @@ +/* strcat with SSE2 + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + + +#ifndef NOT_IN_libc + +# include + + +# define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ + cfi_rel_offset (REG, 0) + +# define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ + cfi_restore (REG) + +# define PUSH(REG) pushl REG; CFI_PUSH (REG) +# define POP(REG) popl REG; CFI_POP (REG) + +# ifdef SHARED +# define JMPTBL(I, B) I - B + +/* Load an entry in a jump table into ECX and branch to it. TABLE is a + jump table with relative offsets. INDEX is a register contains the + index into the jump table. SCALE is the scale of INDEX. */ + +# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ + /* We first load PC into ECX. */ \ + call __i686.get_pc_thunk.cx; \ + /* Get the address of the jump table. */ \ + addl $(TABLE - .), %ecx; \ + /* Get the entry and convert the relative offset to the \ + absolute address. */ \ + addl (%ecx,INDEX,SCALE), %ecx; \ + /* We loaded the jump table and adjuested ECX. Go. */ \ + jmp *%ecx +# else +# define JMPTBL(I, B) I + +/* Branch to an entry in a jump table. TABLE is a jump table with + absolute offsets. INDEX is a register contains the index into the + jump table. SCALE is the scale of INDEX. */ + +# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ + jmp *TABLE(,INDEX,SCALE) +# endif + +# ifndef STRCAT +# define STRCAT __strcat_sse2 +# endif + +# define PARMS 4 +# define STR1 PARMS+4 +# define STR2 STR1+4 + +# ifdef USE_AS_STRNCAT +# define LEN STR2+8 +# define STR3 STR1+4 +# else +# define STR3 STR1 +# endif + +# define USE_AS_STRCAT +# ifdef USE_AS_STRNCAT +# define RETURN POP(%ebx); POP(%esi); ret; CFI_PUSH(%ebx); CFI_PUSH(%esi); +# else +# define RETURN POP(%esi); ret; CFI_PUSH(%esi); +# endif + +.text +ENTRY (STRCAT) + PUSH (%esi) + mov STR1(%esp), %eax + mov STR2(%esp), %esi +# ifdef USE_AS_STRNCAT + PUSH (%ebx) + movl LEN(%esp), %ebx + test %ebx, %ebx + jz L(ExitZero) +# endif + cmpb $0, (%esi) + mov %esi, %ecx + mov %eax, %edx + jz L(ExitZero) + + and $63, %ecx + and $63, %edx + cmp $32, %ecx + ja L(StrlenCore7_1) + cmp $48, %edx + ja L(alignment_prolog) + + pxor %xmm0, %xmm0 + pxor %xmm4, %xmm4 + pxor %xmm7, %xmm7 + movdqu (%eax), %xmm1 + movdqu (%esi), %xmm5 + pcmpeqb %xmm1, %xmm0 + movdqu 16(%esi), %xmm6 + pmovmskb %xmm0, %ecx + pcmpeqb %xmm5, %xmm4 + pcmpeqb %xmm6, %xmm7 + test %ecx, %ecx + jnz L(exit_less16_) + mov %eax, %ecx + and $-16, %eax + jmp L(loop_prolog) + +L(alignment_prolog): + pxor %xmm0, %xmm0 + pxor %xmm4, %xmm4 + mov %edx, %ecx + pxor %xmm7, %xmm7 + and $15, %ecx + and $-16, %eax + pcmpeqb (%eax), %xmm0 + movdqu (%esi), %xmm5 + movdqu 16(%esi), %xmm6 + pmovmskb %xmm0, %edx + pcmpeqb %xmm5, %xmm4 + shr %cl, %edx + pcmpeqb %xmm6, %xmm7 + test %edx, %edx + jnz L(exit_less16) + add %eax, %ecx + + pxor %xmm0, %xmm0 +L(loop_prolog): + pxor %xmm1, %xmm1 + pxor %xmm2, %xmm2 + pxor %xmm3, %xmm3 + .p2align 4 +L(align16_loop): + pcmpeqb 16(%eax), %xmm0 + pmovmskb %xmm0, %edx + test %edx, %edx + jnz L(exit16) + + pcmpeqb 32(%eax), %xmm1 + pmovmskb %xmm1, %edx + test %edx, %edx + jnz L(exit32) + + pcmpeqb 48(%eax), %xmm2 + pmovmskb %xmm2, %edx + test %edx, %edx + jnz L(exit48) + + pcmpeqb 64(%eax), %xmm3 + pmovmskb %xmm3, %edx + lea 64(%eax), %eax + test %edx, %edx + jz L(align16_loop) + bsf %edx, %edx + add %edx, %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit16): + bsf %edx, %edx + lea 16(%eax, %edx), %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit32): + bsf %edx, %edx + lea 32(%eax, %edx), %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit48): + bsf %edx, %edx + lea 48(%eax, %edx), %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_less16): + bsf %edx, %edx + add %ecx, %eax + add %edx, %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_less16_): + bsf %ecx, %ecx + add %ecx, %eax + + .p2align 4 +L(StartStrcpyPart): + pmovmskb %xmm4, %edx +# ifdef USE_AS_STRNCAT + cmp $16, %ebx + jbe L(CopyFrom1To16BytesTail1Case2OrCase3) +# endif + test %edx, %edx + jnz L(CopyFrom1To16BytesTail1) + + movdqu %xmm5, (%eax) + pmovmskb %xmm7, %edx +# ifdef USE_AS_STRNCAT + cmp $32, %ebx + jbe L(CopyFrom1To32Bytes1Case2OrCase3) +# endif + test %edx, %edx + jnz L(CopyFrom1To32Bytes1) + + mov %esi, %ecx + and $-16, %esi + and $15, %ecx + pxor %xmm0, %xmm0 +# ifdef USE_AS_STRNCAT + add %ecx, %ebx +# endif + sub %ecx, %eax + jmp L(Unalign16Both) + +L(StrlenCore7_1): + mov %eax, %ecx + pxor %xmm0, %xmm0 + and $15, %ecx + and $-16, %eax + pcmpeqb (%eax), %xmm0 + pmovmskb %xmm0, %edx + shr %cl, %edx + test %edx, %edx + jnz L(exit_less16_1) + add %eax, %ecx + + pxor %xmm0, %xmm0 + pxor %xmm1, %xmm1 + pxor %xmm2, %xmm2 + pxor %xmm3, %xmm3 + + .p2align 4 +L(align16_loop_1): + pcmpeqb 16(%eax), %xmm0 + pmovmskb %xmm0, %edx + test %edx, %edx + jnz L(exit16_1) + + pcmpeqb 32(%eax), %xmm1 + pmovmskb %xmm1, %edx + test %edx, %edx + jnz L(exit32_1) + + pcmpeqb 48(%eax), %xmm2 + pmovmskb %xmm2, %edx + test %edx, %edx + jnz L(exit48_1) + + pcmpeqb 64(%eax), %xmm3 + pmovmskb %xmm3, %edx + lea 64(%eax), %eax + test %edx, %edx + jz L(align16_loop_1) + bsf %edx, %edx + add %edx, %eax + jmp L(StartStrcpyPart_1) + + .p2align 4 +L(exit16_1): + bsf %edx, %edx + lea 16(%eax, %edx), %eax + jmp L(StartStrcpyPart_1) + + .p2align 4 +L(exit32_1): + bsf %edx, %edx + lea 32(%eax, %edx), %eax + jmp L(StartStrcpyPart_1) + + .p2align 4 +L(exit48_1): + bsf %edx, %edx + lea 48(%eax, %edx), %eax + jmp L(StartStrcpyPart_1) + + .p2align 4 +L(exit_less16_1): + bsf %edx, %edx + add %ecx, %eax + add %edx, %eax + + .p2align 4 +L(StartStrcpyPart_1): + mov %esi, %ecx + and $15, %ecx + and $-16, %esi + pxor %xmm0, %xmm0 + pxor %xmm1, %xmm1 + +# ifdef USE_AS_STRNCAT + cmp $48, %ebx + ja L(BigN) +# endif + pcmpeqb (%esi), %xmm1 +# ifdef USE_AS_STRNCAT + add %ecx, %ebx +# endif + pmovmskb %xmm1, %edx + shr %cl, %edx +# ifdef USE_AS_STRNCAT + cmp $16, %ebx + jbe L(CopyFrom1To16BytesTailCase2OrCase3) +# endif + test %edx, %edx + jnz L(CopyFrom1To16BytesTail) + + pcmpeqb 16(%esi), %xmm0 + pmovmskb %xmm0, %edx +# ifdef USE_AS_STRNCAT + cmp $32, %ebx + jbe L(CopyFrom1To32BytesCase2OrCase3) +# endif + test %edx, %edx + jnz L(CopyFrom1To32Bytes) + + movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */ + movdqu %xmm1, (%eax) + sub %ecx, %eax + + .p2align 4 +L(Unalign16Both): + mov $16, %ecx + movdqa (%esi, %ecx), %xmm1 + movaps 16(%esi, %ecx), %xmm2 + movdqu %xmm1, (%eax, %ecx) + pcmpeqb %xmm2, %xmm0 + pmovmskb %xmm0, %edx + add $16, %ecx +# ifdef USE_AS_STRNCAT + sub $48, %ebx + jbe L(CopyFrom1To16BytesCase2OrCase3) +# endif + test %edx, %edx + jnz L(CopyFrom1To16Bytes) +L(Unalign16BothBigN): + movaps 16(%esi, %ecx), %xmm3 + movdqu %xmm2, (%eax, %ecx) + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm0, %edx + add $16, %ecx +# ifdef USE_AS_STRNCAT + sub $16, %ebx + jbe L(CopyFrom1To16BytesCase2OrCase3) +# endif + test %edx, %edx + jnz L(CopyFrom1To16Bytes) + + movaps 16(%esi, %ecx), %xmm4 + movdqu %xmm3, (%eax, %ecx) + pcmpeqb %xmm4, %xmm0 + pmovmskb %xmm0, %edx + add $16, %ecx +# ifdef USE_AS_STRNCAT + sub $16, %ebx + jbe L(CopyFrom1To16BytesCase2OrCase3) +# endif + test %edx, %edx + jnz L(CopyFrom1To16Bytes) + + movaps 16(%esi, %ecx), %xmm1 + movdqu %xmm4, (%eax, %ecx) + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm0, %edx + add $16, %ecx +# ifdef USE_AS_STRNCAT + sub $16, %ebx + jbe L(CopyFrom1To16BytesCase2OrCase3) +# endif + test %edx, %edx + jnz L(CopyFrom1To16Bytes) + + movaps 16(%esi, %ecx), %xmm2 + movdqu %xmm1, (%eax, %ecx) + pcmpeqb %xmm2, %xmm0 + pmovmskb %xmm0, %edx + add $16, %ecx +# ifdef USE_AS_STRNCAT + sub $16, %ebx + jbe L(CopyFrom1To16BytesCase2OrCase3) +# endif + test %edx, %edx + jnz L(CopyFrom1To16Bytes) + + movaps 16(%esi, %ecx), %xmm3 + movdqu %xmm2, (%eax, %ecx) + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm0, %edx + add $16, %ecx +# ifdef USE_AS_STRNCAT + sub $16, %ebx + jbe L(CopyFrom1To16BytesCase2OrCase3) +# endif + test %edx, %edx + jnz L(CopyFrom1To16Bytes) + + movdqu %xmm3, (%eax, %ecx) + mov %esi, %edx + lea 16(%esi, %ecx), %esi + and $-0x40, %esi + sub %esi, %edx + sub %edx, %eax +# ifdef USE_AS_STRNCAT + lea 128(%ebx, %edx), %ebx +# endif + movaps (%esi), %xmm2 + movaps %xmm2, %xmm4 + movaps 16(%esi), %xmm5 + movaps 32(%esi), %xmm3 + movaps %xmm3, %xmm6 + movaps 48(%esi), %xmm7 + pminub %xmm5, %xmm2 + pminub %xmm7, %xmm3 + pminub %xmm2, %xmm3 + pcmpeqb %xmm0, %xmm3 + pmovmskb %xmm3, %edx +# ifdef USE_AS_STRNCAT + sub $64, %ebx + jbe L(UnalignedLeaveCase2OrCase3) +# endif + test %edx, %edx + jnz L(Unaligned64Leave) + + .p2align 4 +L(Unaligned64Loop_start): + add $64, %eax + add $64, %esi + movdqu %xmm4, -64(%eax) + movaps (%esi), %xmm2 + movdqa %xmm2, %xmm4 + movdqu %xmm5, -48(%eax) + movaps 16(%esi), %xmm5 + pminub %xmm5, %xmm2 + movaps 32(%esi), %xmm3 + movdqu %xmm6, -32(%eax) + movaps %xmm3, %xmm6 + movdqu %xmm7, -16(%eax) + movaps 48(%esi), %xmm7 + pminub %xmm7, %xmm3 + pminub %xmm2, %xmm3 + pcmpeqb %xmm0, %xmm3 + pmovmskb %xmm3, %edx +# ifdef USE_AS_STRNCAT + sub $64, %ebx + jbe L(UnalignedLeaveCase2OrCase3) +# endif + test %edx, %edx + jz L(Unaligned64Loop_start) + +L(Unaligned64Leave): + pxor %xmm1, %xmm1 + + pcmpeqb %xmm4, %xmm0 + pcmpeqb %xmm5, %xmm1 + pmovmskb %xmm0, %edx + pmovmskb %xmm1, %ecx + test %edx, %edx + jnz L(CopyFrom1To16BytesUnaligned_0) + test %ecx, %ecx + jnz L(CopyFrom1To16BytesUnaligned_16) + + pcmpeqb %xmm6, %xmm0 + pcmpeqb %xmm7, %xmm1 + pmovmskb %xmm0, %edx + pmovmskb %xmm1, %ecx + test %edx, %edx + jnz L(CopyFrom1To16BytesUnaligned_32) + + bsf %ecx, %edx + movdqu %xmm4, (%eax) + movdqu %xmm5, 16(%eax) + movdqu %xmm6, 32(%eax) + add $48, %esi + add $48, %eax + BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) + +# ifdef USE_AS_STRNCAT + .p2align 4 +L(BigN): + pcmpeqb (%esi), %xmm1 + pmovmskb %xmm1, %edx + shr %cl, %edx + test %edx, %edx + jnz L(CopyFrom1To16BytesTail) + + pcmpeqb 16(%esi), %xmm0 + pmovmskb %xmm0, %edx + test %edx, %edx + jnz L(CopyFrom1To32Bytes) + + movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */ + movdqu %xmm1, (%eax) + sub %ecx, %eax + sub $48, %ebx + add %ecx, %ebx + + mov $16, %ecx + movdqa (%esi, %ecx), %xmm1 + movaps 16(%esi, %ecx), %xmm2 + movdqu %xmm1, (%eax, %ecx) + pcmpeqb %xmm2, %xmm0 + pmovmskb %xmm0, %edx + add $16, %ecx + test %edx, %edx + jnz L(CopyFrom1To16Bytes) + jmp L(Unalign16BothBigN) +# endif + +/*------------end of main part-------------------------------*/ + +/* Case1 */ + .p2align 4 +L(CopyFrom1To16Bytes): + add %ecx, %eax + add %ecx, %esi + bsf %edx, %edx + BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) + + .p2align 4 +L(CopyFrom1To16BytesTail): + add %ecx, %esi + bsf %edx, %edx + BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) + + .p2align 4 +L(CopyFrom1To32Bytes1): + add $16, %esi + add $16, %eax +L(CopyFrom1To16BytesTail1): + bsf %edx, %edx + BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) + + .p2align 4 +L(CopyFrom1To32Bytes): + bsf %edx, %edx + add %ecx, %esi + add $16, %edx + sub %ecx, %edx + BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) + + .p2align 4 +L(CopyFrom1To16BytesUnaligned_0): + bsf %edx, %edx + BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) + + .p2align 4 +L(CopyFrom1To16BytesUnaligned_16): + bsf %ecx, %edx + movdqu %xmm4, (%eax) + add $16, %esi + add $16, %eax + BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) + + .p2align 4 +L(CopyFrom1To16BytesUnaligned_32): + bsf %edx, %edx + movdqu %xmm4, (%eax) + movdqu %xmm5, 16(%eax) + add $32, %esi + add $32, %eax + BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) + +# ifdef USE_AS_STRNCAT + + .p2align 4 +L(CopyFrom1To16BytesExit): + BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) + +/* Case2 */ + + .p2align 4 +L(CopyFrom1To16BytesCase2): + add $16, %ebx + add %ecx, %eax + add %ecx, %esi + bsf %edx, %edx + cmp %ebx, %edx + jb L(CopyFrom1To16BytesExit) + BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) + + .p2align 4 +L(CopyFrom1To32BytesCase2): + sub %ecx, %ebx + add %ecx, %esi + bsf %edx, %edx + add $16, %edx + sub %ecx, %edx + cmp %ebx, %edx + jb L(CopyFrom1To16BytesExit) + BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) + +L(CopyFrom1To16BytesTailCase2): + sub %ecx, %ebx + add %ecx, %esi + bsf %edx, %edx + cmp %ebx, %edx + jb L(CopyFrom1To16BytesExit) + BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) + +L(CopyFrom1To16BytesTail1Case2): + bsf %edx, %edx + cmp %ebx, %edx + jb L(CopyFrom1To16BytesExit) + BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) + +/* Case2 or Case3, Case3 */ + + .p2align 4 +L(CopyFrom1To16BytesCase2OrCase3): + test %edx, %edx + jnz L(CopyFrom1To16BytesCase2) +L(CopyFrom1To16BytesCase3): + add $16, %ebx + add %ecx, %eax + add %ecx, %esi + BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) + + .p2align 4 +L(CopyFrom1To32BytesCase2OrCase3): + test %edx, %edx + jnz L(CopyFrom1To32BytesCase2) + sub %ecx, %ebx + add %ecx, %esi + BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) + + .p2align 4 +L(CopyFrom1To16BytesTailCase2OrCase3): + test %edx, %edx + jnz L(CopyFrom1To16BytesTailCase2) + sub %ecx, %ebx + add %ecx, %esi + BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) + + .p2align 4 +L(CopyFrom1To32Bytes1Case2OrCase3): + add $16, %eax + add $16, %esi + sub $16, %ebx +L(CopyFrom1To16BytesTail1Case2OrCase3): + test %edx, %edx + jnz L(CopyFrom1To16BytesTail1Case2) + BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) + +# endif + +# ifdef USE_AS_STRNCAT + .p2align 4 +L(StrncatExit0): + movb %bh, (%eax) + mov STR3(%esp), %eax + RETURN +# endif + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit1): + movb %bh, 1(%eax) +# endif +L(Exit1): +# ifdef USE_AS_STRNCAT + movb (%esi), %dh +# endif + movb %dh, (%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit2): + movb %bh, 2(%eax) +# endif +L(Exit2): + movw (%esi), %dx + movw %dx, (%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit3): + movb %bh, 3(%eax) +# endif +L(Exit3): + movw (%esi), %cx + movw %cx, (%eax) +# ifdef USE_AS_STRNCAT + movb 2(%esi), %dh +# endif + movb %dh, 2(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit4): + movb %bh, 4(%eax) +# endif +L(Exit4): + movl (%esi), %edx + movl %edx, (%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit5): + movb %bh, 5(%eax) +# endif +L(Exit5): + movl (%esi), %ecx +# ifdef USE_AS_STRNCAT + movb 4(%esi), %dh +# endif + movb %dh, 4(%eax) + movl %ecx, (%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit6): + movb %bh, 6(%eax) +# endif +L(Exit6): + movl (%esi), %ecx + movw 4(%esi), %dx + movl %ecx, (%eax) + movw %dx, 4(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit7): + movb %bh, 7(%eax) +# endif +L(Exit7): + movl (%esi), %ecx + movl 3(%esi), %edx + movl %ecx, (%eax) + movl %edx, 3(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit8): + movb %bh, 8(%eax) +# endif +L(Exit8): + movlpd (%esi), %xmm0 + movlpd %xmm0, (%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit9): + movb %bh, 9(%eax) +# endif +L(Exit9): + movlpd (%esi), %xmm0 +# ifdef USE_AS_STRNCAT + movb 8(%esi), %dh +# endif + movb %dh, 8(%eax) + movlpd %xmm0, (%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit10): + movb %bh, 10(%eax) +# endif +L(Exit10): + movlpd (%esi), %xmm0 + movw 8(%esi), %dx + movlpd %xmm0, (%eax) + movw %dx, 8(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit11): + movb %bh, 11(%eax) +# endif +L(Exit11): + movlpd (%esi), %xmm0 + movl 7(%esi), %edx + movlpd %xmm0, (%eax) + movl %edx, 7(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit12): + movb %bh, 12(%eax) +# endif +L(Exit12): + movlpd (%esi), %xmm0 + movl 8(%esi), %edx + movlpd %xmm0, (%eax) + movl %edx, 8(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit13): + movb %bh, 13(%eax) +# endif +L(Exit13): + movlpd (%esi), %xmm0 + movlpd 5(%esi), %xmm1 + movlpd %xmm0, (%eax) + movlpd %xmm1, 5(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit14): + movb %bh, 14(%eax) +# endif +L(Exit14): + movlpd (%esi), %xmm0 + movlpd 6(%esi), %xmm1 + movlpd %xmm0, (%eax) + movlpd %xmm1, 6(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit15): + movb %bh, 15(%eax) +# endif +L(Exit15): + movlpd (%esi), %xmm0 + movlpd 7(%esi), %xmm1 + movlpd %xmm0, (%eax) + movlpd %xmm1, 7(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit16): + movb %bh, 16(%eax) +# endif +L(Exit16): + movdqu (%esi), %xmm0 + movdqu %xmm0, (%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit17): + movb %bh, 17(%eax) +# endif +L(Exit17): + movdqu (%esi), %xmm0 +# ifdef USE_AS_STRNCAT + movb 16(%esi), %dh +# endif + movdqu %xmm0, (%eax) + movb %dh, 16(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit18): + movb %bh, 18(%eax) +# endif +L(Exit18): + movdqu (%esi), %xmm0 + movw 16(%esi), %cx + movdqu %xmm0, (%eax) + movw %cx, 16(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit19): + movb %bh, 19(%eax) +# endif +L(Exit19): + movdqu (%esi), %xmm0 + movl 15(%esi), %ecx + movdqu %xmm0, (%eax) + movl %ecx, 15(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit20): + movb %bh, 20(%eax) +# endif +L(Exit20): + movdqu (%esi), %xmm0 + movl 16(%esi), %ecx + movdqu %xmm0, (%eax) + movl %ecx, 16(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit21): + movb %bh, 21(%eax) +# endif +L(Exit21): + movdqu (%esi), %xmm0 + movl 16(%esi), %ecx +# ifdef USE_AS_STRNCAT + movb 20(%esi), %dh +# endif + movdqu %xmm0, (%eax) + movl %ecx, 16(%eax) + movb %dh, 20(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit22): + movb %bh, 22(%eax) +# endif +L(Exit22): + movdqu (%esi), %xmm0 + movlpd 14(%esi), %xmm3 + movdqu %xmm0, (%eax) + movlpd %xmm3, 14(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit23): + movb %bh, 23(%eax) +# endif +L(Exit23): + movdqu (%esi), %xmm0 + movlpd 15(%esi), %xmm3 + movdqu %xmm0, (%eax) + movlpd %xmm3, 15(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit24): + movb %bh, 24(%eax) +# endif +L(Exit24): + movdqu (%esi), %xmm0 + movlpd 16(%esi), %xmm2 + movdqu %xmm0, (%eax) + movlpd %xmm2, 16(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit25): + movb %bh, 25(%eax) +# endif +L(Exit25): + movdqu (%esi), %xmm0 + movlpd 16(%esi), %xmm2 +# ifdef USE_AS_STRNCAT + movb 24(%esi), %dh +# endif + movdqu %xmm0, (%eax) + movlpd %xmm2, 16(%eax) + movb %dh, 24(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit26): + movb %bh, 26(%eax) +# endif +L(Exit26): + movdqu (%esi), %xmm0 + movlpd 16(%esi), %xmm2 + movw 24(%esi), %cx + movdqu %xmm0, (%eax) + movlpd %xmm2, 16(%eax) + movw %cx, 24(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit27): + movb %bh, 27(%eax) +# endif +L(Exit27): + movdqu (%esi), %xmm0 + movlpd 16(%esi), %xmm2 + movl 23(%esi), %ecx + movdqu %xmm0, (%eax) + movlpd %xmm2, 16(%eax) + movl %ecx, 23(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit28): + movb %bh, 28(%eax) +# endif +L(Exit28): + movdqu (%esi), %xmm0 + movlpd 16(%esi), %xmm2 + movl 24(%esi), %ecx + movdqu %xmm0, (%eax) + movlpd %xmm2, 16(%eax) + movl %ecx, 24(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit29): + movb %bh, 29(%eax) +# endif +L(Exit29): + movdqu (%esi), %xmm0 + movdqu 13(%esi), %xmm2 + movdqu %xmm0, (%eax) + movdqu %xmm2, 13(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit30): + movb %bh, 30(%eax) +# endif +L(Exit30): + movdqu (%esi), %xmm0 + movdqu 14(%esi), %xmm2 + movdqu %xmm0, (%eax) + movdqu %xmm2, 14(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit31): + movb %bh, 31(%eax) +# endif +L(Exit31): + movdqu (%esi), %xmm0 + movdqu 15(%esi), %xmm2 + movdqu %xmm0, (%eax) + movdqu %xmm2, 15(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +# ifdef USE_AS_STRNCAT +L(StrncatExit32): + movb %bh, 32(%eax) +# endif +L(Exit32): + movdqu (%esi), %xmm0 + movdqu 16(%esi), %xmm2 + movdqu %xmm0, (%eax) + movdqu %xmm2, 16(%eax) + mov STR3(%esp), %eax + RETURN + +# ifdef USE_AS_STRNCAT + + .p2align 4 +L(UnalignedLeaveCase2OrCase3): + test %edx, %edx + jnz L(Unaligned64LeaveCase2) +L(Unaligned64LeaveCase3): + lea 64(%ebx), %ecx + and $-16, %ecx + add $48, %ebx + jl L(CopyFrom1To16BytesCase3) + movdqu %xmm4, (%eax) + sub $16, %ebx + jb L(CopyFrom1To16BytesCase3) + movdqu %xmm5, 16(%eax) + sub $16, %ebx + jb L(CopyFrom1To16BytesCase3) + movdqu %xmm6, 32(%eax) + sub $16, %ebx + jb L(CopyFrom1To16BytesCase3) + movdqu %xmm7, 48(%eax) + xor %bh, %bh + movb %bh, 64(%eax) + mov STR3(%esp), %eax + RETURN + + .p2align 4 +L(Unaligned64LeaveCase2): + xor %ecx, %ecx + pcmpeqb %xmm4, %xmm0 + pmovmskb %xmm0, %edx + add $48, %ebx + jle L(CopyFrom1To16BytesCase2OrCase3) + test %edx, %edx + jnz L(CopyFrom1To16Bytes) + + pcmpeqb %xmm5, %xmm0 + pmovmskb %xmm0, %edx + movdqu %xmm4, (%eax) + add $16, %ecx + sub $16, %ebx + jbe L(CopyFrom1To16BytesCase2OrCase3) + test %edx, %edx + jnz L(CopyFrom1To16Bytes) + + pcmpeqb %xmm6, %xmm0 + pmovmskb %xmm0, %edx + movdqu %xmm5, 16(%eax) + add $16, %ecx + sub $16, %ebx + jbe L(CopyFrom1To16BytesCase2OrCase3) + test %edx, %edx + jnz L(CopyFrom1To16Bytes) + + pcmpeqb %xmm7, %xmm0 + pmovmskb %xmm0, %edx + movdqu %xmm6, 32(%eax) + lea 16(%eax, %ecx), %eax + lea 16(%esi, %ecx), %esi + bsf %edx, %edx + cmp %ebx, %edx + jb L(CopyFrom1To16BytesExit) + BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) +# endif + .p2align 4 +L(ExitZero): + RETURN + +END (STRCAT) + + .p2align 4 + .section .rodata +L(ExitTable): + .int JMPTBL(L(Exit1), L(ExitTable)) + .int JMPTBL(L(Exit2), L(ExitTable)) + .int JMPTBL(L(Exit3), L(ExitTable)) + .int JMPTBL(L(Exit4), L(ExitTable)) + .int JMPTBL(L(Exit5), L(ExitTable)) + .int JMPTBL(L(Exit6), L(ExitTable)) + .int JMPTBL(L(Exit7), L(ExitTable)) + .int JMPTBL(L(Exit8), L(ExitTable)) + .int JMPTBL(L(Exit9), L(ExitTable)) + .int JMPTBL(L(Exit10), L(ExitTable)) + .int JMPTBL(L(Exit11), L(ExitTable)) + .int JMPTBL(L(Exit12), L(ExitTable)) + .int JMPTBL(L(Exit13), L(ExitTable)) + .int JMPTBL(L(Exit14), L(ExitTable)) + .int JMPTBL(L(Exit15), L(ExitTable)) + .int JMPTBL(L(Exit16), L(ExitTable)) + .int JMPTBL(L(Exit17), L(ExitTable)) + .int JMPTBL(L(Exit18), L(ExitTable)) + .int JMPTBL(L(Exit19), L(ExitTable)) + .int JMPTBL(L(Exit20), L(ExitTable)) + .int JMPTBL(L(Exit21), L(ExitTable)) + .int JMPTBL(L(Exit22), L(ExitTable)) + .int JMPTBL(L(Exit23), L(ExitTable)) + .int JMPTBL(L(Exit24), L(ExitTable)) + .int JMPTBL(L(Exit25), L(ExitTable)) + .int JMPTBL(L(Exit26), L(ExitTable)) + .int JMPTBL(L(Exit27), L(ExitTable)) + .int JMPTBL(L(Exit28), L(ExitTable)) + .int JMPTBL(L(Exit29), L(ExitTable)) + .int JMPTBL(L(Exit30), L(ExitTable)) + .int JMPTBL(L(Exit31), L(ExitTable)) + .int JMPTBL(L(Exit32), L(ExitTable)) +# ifdef USE_AS_STRNCAT +L(ExitStrncatTable): + .int JMPTBL(L(StrncatExit0), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit1), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit2), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit3), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit4), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit5), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit6), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit7), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit8), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit9), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit10), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit11), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit12), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit13), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit14), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit15), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit16), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit17), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit18), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit19), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit20), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit21), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit22), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit23), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit24), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit25), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit26), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit27), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit28), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit29), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit30), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit31), L(ExitStrncatTable)) + .int JMPTBL(L(StrncatExit32), L(ExitStrncatTable)) +# endif +#endif diff --git a/libc/sysdeps/i386/i686/multiarch/strcat-ssse3.S b/libc/sysdeps/i386/i686/multiarch/strcat-ssse3.S new file mode 100644 index 000000000..d03b40a5f --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strcat-ssse3.S @@ -0,0 +1,573 @@ +/* strcat with SSSE3 + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + + +#ifndef NOT_IN_libc + +# include + +# define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ + cfi_rel_offset (REG, 0) + +# define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ + cfi_restore (REG) + +# define PUSH(REG) pushl REG; CFI_PUSH (REG) +# define POP(REG) popl REG; CFI_POP (REG) + +# ifndef STRCAT +# define STRCAT __strcat_ssse3 +# endif + +# define PARMS 4 +# define STR1 PARMS+4 +# define STR2 STR1+4 + +# ifdef USE_AS_STRNCAT +# define LEN STR2+8 +# endif + +# define USE_AS_STRCAT + +.text +ENTRY (STRCAT) + PUSH (%edi) + mov STR1(%esp), %edi + mov %edi, %edx + +# define RETURN jmp L(StartStrcpyPart) +# include "strlen-sse2.S" + +L(StartStrcpyPart): + mov STR2(%esp), %ecx + lea (%edi, %eax), %edx +# ifdef USE_AS_STRNCAT + PUSH (%ebx) + mov LEN(%esp), %ebx + test %ebx, %ebx + jz L(StrncatExit0) + cmp $8, %ebx + jbe L(StrncatExit8Bytes) +# endif + cmpb $0, (%ecx) + jz L(Exit1) + cmpb $0, 1(%ecx) + jz L(Exit2) + cmpb $0, 2(%ecx) + jz L(Exit3) + cmpb $0, 3(%ecx) + jz L(Exit4) + cmpb $0, 4(%ecx) + jz L(Exit5) + cmpb $0, 5(%ecx) + jz L(Exit6) + cmpb $0, 6(%ecx) + jz L(Exit7) + cmpb $0, 7(%ecx) + jz L(Exit8) + cmpb $0, 8(%ecx) + jz L(Exit9) +# ifdef USE_AS_STRNCAT + cmp $16, %ebx + jb L(StrncatExit15Bytes) +# endif + cmpb $0, 9(%ecx) + jz L(Exit10) + cmpb $0, 10(%ecx) + jz L(Exit11) + cmpb $0, 11(%ecx) + jz L(Exit12) + cmpb $0, 12(%ecx) + jz L(Exit13) + cmpb $0, 13(%ecx) + jz L(Exit14) + cmpb $0, 14(%ecx) + jz L(Exit15) + cmpb $0, 15(%ecx) + jz L(Exit16) +# ifdef USE_AS_STRNCAT + cmp $16, %ebx + je L(StrncatExit16) + +# define RETURN1 \ + POP (%ebx); \ + POP (%edi); \ + ret; \ + CFI_PUSH (%ebx); \ + CFI_PUSH (%edi) +# define USE_AS_STRNCPY +# else +# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi) +# endif +# include "strcpy-ssse3.S" + .p2align 4 +L(CopyFrom1To16Bytes): + add %esi, %edx + add %esi, %ecx + + POP (%esi) + test %al, %al + jz L(ExitHigh) + test $0x01, %al + jnz L(Exit1) + test $0x02, %al + jnz L(Exit2) + test $0x04, %al + jnz L(Exit3) + test $0x08, %al + jnz L(Exit4) + test $0x10, %al + jnz L(Exit5) + test $0x20, %al + jnz L(Exit6) + test $0x40, %al + jnz L(Exit7) + movlpd (%ecx), %xmm0 + movlpd %xmm0, (%edx) + movl %edi, %eax + RETURN1 + + .p2align 4 +L(ExitHigh): + test $0x01, %ah + jnz L(Exit9) + test $0x02, %ah + jnz L(Exit10) + test $0x04, %ah + jnz L(Exit11) + test $0x08, %ah + jnz L(Exit12) + test $0x10, %ah + jnz L(Exit13) + test $0x20, %ah + jnz L(Exit14) + test $0x40, %ah + jnz L(Exit15) + movlpd (%ecx), %xmm0 + movlpd 8(%ecx), %xmm1 + movlpd %xmm0, (%edx) + movlpd %xmm1, 8(%edx) + movl %edi, %eax + RETURN1 + + .p2align 4 +L(StrncatExit1): + movb %bh, 1(%edx) +L(Exit1): + movb (%ecx), %al + movb %al, (%edx) + movl %edi, %eax + RETURN1 + + .p2align 4 +L(StrncatExit2): + movb %bh, 2(%edx) +L(Exit2): + movw (%ecx), %ax + movw %ax, (%edx) + movl %edi, %eax + RETURN1 + + .p2align 4 +L(StrncatExit3): + movb %bh, 3(%edx) +L(Exit3): + movw (%ecx), %ax + movw %ax, (%edx) + movb 2(%ecx), %al + movb %al, 2(%edx) + movl %edi, %eax + RETURN1 + + .p2align 4 +L(StrncatExit4): + movb %bh, 4(%edx) +L(Exit4): + movl (%ecx), %eax + movl %eax, (%edx) + movl %edi, %eax + RETURN1 + + .p2align 4 +L(StrncatExit5): + movb %bh, 5(%edx) +L(Exit5): + movl (%ecx), %eax + movl %eax, (%edx) + movb 4(%ecx), %al + movb %al, 4(%edx) + movl %edi, %eax + RETURN1 + + .p2align 4 +L(StrncatExit6): + movb %bh, 6(%edx) +L(Exit6): + movl (%ecx), %eax + movl %eax, (%edx) + movw 4(%ecx), %ax + movw %ax, 4(%edx) + movl %edi, %eax + RETURN1 + + .p2align 4 +L(StrncatExit7): + movb %bh, 7(%edx) +L(Exit7): + movl (%ecx), %eax + movl %eax, (%edx) + movl 3(%ecx), %eax + movl %eax, 3(%edx) + movl %edi, %eax + RETURN1 + + .p2align 4 +L(StrncatExit8): + movb %bh, 8(%edx) +L(Exit8): + movlpd (%ecx), %xmm0 + movlpd %xmm0, (%edx) + movl %edi, %eax + RETURN1 + + .p2align 4 +L(StrncatExit9): + movb %bh, 9(%edx) +L(Exit9): + movlpd (%ecx), %xmm0 + movlpd %xmm0, (%edx) + movb 8(%ecx), %al + movb %al, 8(%edx) + movl %edi, %eax + RETURN1 + + .p2align 4 +L(StrncatExit10): + movb %bh, 10(%edx) +L(Exit10): + movlpd (%ecx), %xmm0 + movlpd %xmm0, (%edx) + movw 8(%ecx), %ax + movw %ax, 8(%edx) + movl %edi, %eax + RETURN1 + + .p2align 4 +L(StrncatExit11): + movb %bh, 11(%edx) +L(Exit11): + movlpd (%ecx), %xmm0 + movlpd %xmm0, (%edx) + movl 7(%ecx), %eax + movl %eax, 7(%edx) + movl %edi, %eax + RETURN1 + + .p2align 4 +L(StrncatExit12): + movb %bh, 12(%edx) +L(Exit12): + movlpd (%ecx), %xmm0 + movlpd %xmm0, (%edx) + movl 8(%ecx), %eax + movl %eax, 8(%edx) + movl %edi, %eax + RETURN1 + + .p2align 4 +L(StrncatExit13): + movb %bh, 13(%edx) +L(Exit13): + movlpd (%ecx), %xmm0 + movlpd %xmm0, (%edx) + movlpd 5(%ecx), %xmm0 + movlpd %xmm0, 5(%edx) + movl %edi, %eax + RETURN1 + + .p2align 4 +L(StrncatExit14): + movb %bh, 14(%edx) +L(Exit14): + movlpd (%ecx), %xmm0 + movlpd %xmm0, (%edx) + movlpd 6(%ecx), %xmm0 + movlpd %xmm0, 6(%edx) + movl %edi, %eax + RETURN1 + + .p2align 4 +L(StrncatExit15): + movb %bh, 15(%edx) +L(Exit15): + movlpd (%ecx), %xmm0 + movlpd %xmm0, (%edx) + movlpd 7(%ecx), %xmm0 + movlpd %xmm0, 7(%edx) + movl %edi, %eax + RETURN1 + + .p2align 4 +L(StrncatExit16): + movb %bh, 16(%edx) +L(Exit16): + movlpd (%ecx), %xmm0 + movlpd 8(%ecx), %xmm1 + movlpd %xmm0, (%edx) + movlpd %xmm1, 8(%edx) + movl %edi, %eax + RETURN1 + +# ifdef USE_AS_STRNCPY + + CFI_PUSH(%esi) + + .p2align 4 +L(CopyFrom1To16BytesCase2): + add $16, %ebx + add %esi, %ecx + lea (%esi, %edx), %esi + lea -9(%ebx), %edx + and $1<<7, %dh + or %al, %dh + test %dh, %dh + lea (%esi), %edx + POP (%esi) + jz L(ExitHighCase2) + + test $0x01, %al + jnz L(Exit1) + cmp $1, %ebx + je L(StrncatExit1) + test $0x02, %al + jnz L(Exit2) + cmp $2, %ebx + je L(StrncatExit2) + test $0x04, %al + jnz L(Exit3) + cmp $3, %ebx + je L(StrncatExit3) + test $0x08, %al + jnz L(Exit4) + cmp $4, %ebx + je L(StrncatExit4) + test $0x10, %al + jnz L(Exit5) + cmp $5, %ebx + je L(StrncatExit5) + test $0x20, %al + jnz L(Exit6) + cmp $6, %ebx + je L(StrncatExit6) + test $0x40, %al + jnz L(Exit7) + cmp $7, %ebx + je L(StrncatExit7) + movlpd (%ecx), %xmm0 + movlpd %xmm0, (%edx) + lea 7(%edx), %eax + cmpb $1, (%eax) + sbb $-1, %eax + xor %cl, %cl + movb %cl, (%eax) + movl %edi, %eax + RETURN1 + + .p2align 4 +L(ExitHighCase2): + test $0x01, %ah + jnz L(Exit9) + cmp $9, %ebx + je L(StrncatExit9) + test $0x02, %ah + jnz L(Exit10) + cmp $10, %ebx + je L(StrncatExit10) + test $0x04, %ah + jnz L(Exit11) + cmp $11, %ebx + je L(StrncatExit11) + test $0x8, %ah + jnz L(Exit12) + cmp $12, %ebx + je L(StrncatExit12) + test $0x10, %ah + jnz L(Exit13) + cmp $13, %ebx + je L(StrncatExit13) + test $0x20, %ah + jnz L(Exit14) + cmp $14, %ebx + je L(StrncatExit14) + test $0x40, %ah + jnz L(Exit15) + cmp $15, %ebx + je L(StrncatExit15) + movlpd (%ecx), %xmm0 + movlpd %xmm0, (%edx) + movlpd 8(%ecx), %xmm1 + movlpd %xmm1, 8(%edx) + movl %edi, %eax + RETURN1 + + CFI_PUSH(%esi) + +L(CopyFrom1To16BytesCase2OrCase3): + test %eax, %eax + jnz L(CopyFrom1To16BytesCase2) + + .p2align 4 +L(CopyFrom1To16BytesCase3): + add $16, %ebx + add %esi, %edx + add %esi, %ecx + + POP (%esi) + + cmp $8, %ebx + ja L(ExitHighCase3) + cmp $1, %ebx + je L(StrncatExit1) + cmp $2, %ebx + je L(StrncatExit2) + cmp $3, %ebx + je L(StrncatExit3) + cmp $4, %ebx + je L(StrncatExit4) + cmp $5, %ebx + je L(StrncatExit5) + cmp $6, %ebx + je L(StrncatExit6) + cmp $7, %ebx + je L(StrncatExit7) + movlpd (%ecx), %xmm0 + movlpd %xmm0, (%edx) + movb %bh, 8(%edx) + movl %edi, %eax + RETURN1 + + .p2align 4 +L(ExitHighCase3): + cmp $9, %ebx + je L(StrncatExit9) + cmp $10, %ebx + je L(StrncatExit10) + cmp $11, %ebx + je L(StrncatExit11) + cmp $12, %ebx + je L(StrncatExit12) + cmp $13, %ebx + je L(StrncatExit13) + cmp $14, %ebx + je L(StrncatExit14) + cmp $15, %ebx + je L(StrncatExit15) + movlpd (%ecx), %xmm0 + movlpd %xmm0, (%edx) + movlpd 8(%ecx), %xmm1 + movlpd %xmm1, 8(%edx) + movb %bh, 16(%edx) + movl %edi, %eax + RETURN1 + + .p2align 4 +L(StrncatExit0): + movl %edi, %eax + RETURN1 + + .p2align 4 +L(StrncatExit15Bytes): + cmp $9, %ebx + je L(StrncatExit9) + cmpb $0, 9(%ecx) + jz L(Exit10) + cmp $10, %ebx + je L(StrncatExit10) + cmpb $0, 10(%ecx) + jz L(Exit11) + cmp $11, %ebx + je L(StrncatExit11) + cmpb $0, 11(%ecx) + jz L(Exit12) + cmp $12, %ebx + je L(StrncatExit12) + cmpb $0, 12(%ecx) + jz L(Exit13) + cmp $13, %ebx + je L(StrncatExit13) + cmpb $0, 13(%ecx) + jz L(Exit14) + cmp $14, %ebx + je L(StrncatExit14) + movlpd (%ecx), %xmm0 + movlpd %xmm0, (%edx) + movlpd 7(%ecx), %xmm0 + movlpd %xmm0, 7(%edx) + lea 14(%edx), %eax + cmpb $1, (%eax) + sbb $-1, %eax + movb %bh, (%eax) + movl %edi, %eax + RETURN1 + + .p2align 4 +L(StrncatExit8Bytes): + cmpb $0, (%ecx) + jz L(Exit1) + cmp $1, %ebx + je L(StrncatExit1) + cmpb $0, 1(%ecx) + jz L(Exit2) + cmp $2, %ebx + je L(StrncatExit2) + cmpb $0, 2(%ecx) + jz L(Exit3) + cmp $3, %ebx + je L(StrncatExit3) + cmpb $0, 3(%ecx) + jz L(Exit4) + cmp $4, %ebx + je L(StrncatExit4) + cmpb $0, 4(%ecx) + jz L(Exit5) + cmp $5, %ebx + je L(StrncatExit5) + cmpb $0, 5(%ecx) + jz L(Exit6) + cmp $6, %ebx + je L(StrncatExit6) + cmpb $0, 6(%ecx) + jz L(Exit7) + cmp $7, %ebx + je L(StrncatExit7) + movlpd (%ecx), %xmm0 + movlpd %xmm0, (%edx) + lea 7(%edx), %eax + cmpb $1, (%eax) + sbb $-1, %eax + movb %bh, (%eax) + movl %edi, %eax + RETURN1 + +# endif +END (STRCAT) +#endif diff --git a/libc/sysdeps/i386/i686/multiarch/strcat.S b/libc/sysdeps/i386/i686/multiarch/strcat.S new file mode 100644 index 000000000..14d4f91aa --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strcat.S @@ -0,0 +1,130 @@ +/* Multiple versions of strcat + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include + +#ifndef USE_AS_STRNCAT +# ifndef STRCAT +# define STRCAT strcat +# endif +#endif + +#ifdef USE_AS_STRNCAT +# define STRCAT_SSSE3 __strncat_ssse3 +# define STRCAT_SSE2 __strncat_sse2 +# define STRCAT_IA32 __strncat_ia32 +# define __GI_STRCAT __GI_strncat +#else +# define STRCAT_SSSE3 __strcat_ssse3 +# define STRCAT_SSE2 __strcat_sse2 +# define STRCAT_IA32 __strcat_ia32 +# define __GI_STRCAT __GI_strcat +#endif + + +/* Define multiple versions only for the definition in libc. Don't + define multiple versions for strncat in static library since we + need strncat before the initialization happened. */ +#ifndef NOT_IN_libc + +# ifdef SHARED + .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits + .globl __i686.get_pc_thunk.bx + .hidden __i686.get_pc_thunk.bx + .p2align 4 + .type __i686.get_pc_thunk.bx,@function +__i686.get_pc_thunk.bx: + movl (%esp), %ebx + ret + + .text +ENTRY(STRCAT) + .type STRCAT, @gnu_indirect_function + pushl %ebx + cfi_adjust_cfa_offset (4) + cfi_rel_offset (ebx, 0) + call __i686.get_pc_thunk.bx + addl $_GLOBAL_OFFSET_TABLE_, %ebx + cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) + jne 1f + call __init_cpu_features +1: leal STRCAT_IA32@GOTOFF(%ebx), %eax + testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx) + jz 2f + leal STRCAT_SSE2@GOTOFF(%ebx), %eax + testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx) + jnz 2f + testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) + jz 2f + leal STRCAT_SSSE3@GOTOFF(%ebx), %eax +2: popl %ebx + cfi_adjust_cfa_offset (-4) + cfi_restore (ebx) + ret +END(STRCAT) +# else + +ENTRY(STRCAT) + .type STRCAT, @gnu_indirect_function + cmpl $0, KIND_OFFSET+__cpu_features + jne 1f + call __init_cpu_features +1: leal STRCAT_IA32, %eax + testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features + jz 2f + leal STRCAT_SSE2, %eax + testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features + jnz 2f + testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features + jz 2f + leal STRCAT_SSSE3, %eax +2: ret +END(STRCAT) + +# endif + +# undef ENTRY +# define ENTRY(name) \ + .type STRCAT_IA32, @function; \ + .align 16; \ + STRCAT_IA32: cfi_startproc; \ + CALL_MCOUNT +# undef END +# define END(name) \ + cfi_endproc; .size STRCAT_IA32, .-STRCAT_IA32 + +# ifdef SHARED +# undef libc_hidden_builtin_def +/* It doesn't make sense to send libc-internal strcat calls through a PLT. + The speedup we get from using SSSE3 instruction is likely eaten away + by the indirect call in the PLT. */ +# define libc_hidden_builtin_def(name) \ + .globl __GI_STRCAT; __GI_STRCAT = STRCAT_IA32 +# undef libc_hidden_def +# define libc_hidden_def(name) \ + .globl __GI___STRCAT; __GI___STRCAT = STRCAT_IA32 + +# endif +#endif + +#ifndef USE_AS_STRNCAT +# include "../../i486/strcat.S" +#endif diff --git a/libc/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S b/libc/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S new file mode 100644 index 000000000..5a19ba26b --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S @@ -0,0 +1,159 @@ +/* strchr with SSE2 with bsf + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef NOT_IN_libc + +# include + +# define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ + cfi_rel_offset (REG, 0) + +# define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ + cfi_restore (REG) + +# define PUSH(REG) pushl REG; CFI_PUSH (REG) +# define POP(REG) popl REG; CFI_POP (REG) + +# define PARMS 8 +# define ENTRANCE PUSH(%edi) +# define RETURN POP(%edi); ret; CFI_PUSH(%edi); + +# define STR1 PARMS +# define STR2 STR1+4 + + .text +ENTRY (__strchr_sse2_bsf) + + ENTRANCE + mov STR1(%esp), %ecx + movd STR2(%esp), %xmm1 + + pxor %xmm2, %xmm2 + mov %ecx, %edi + punpcklbw %xmm1, %xmm1 + punpcklbw %xmm1, %xmm1 + /* ECX has OFFSET. */ + and $15, %ecx + pshufd $0, %xmm1, %xmm1 + je L(loop) + +/* Handle unaligned string. */ + and $-16, %edi + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + pcmpeqb %xmm1, %xmm0 + /* Find where NULL is. */ + pmovmskb %xmm2, %edx + /* Check if there is a match. */ + pmovmskb %xmm0, %eax + /* Remove the leading bytes. */ + sarl %cl, %edx + sarl %cl, %eax + test %eax, %eax + je L(unaligned_no_match) + /* Check which byte is a match. */ + bsf %eax, %eax + /* Is there a NULL? */ + test %edx, %edx + je L(unaligned_match) + bsf %edx, %edx + cmpl %edx, %eax + /* Return NULL if NULL comes first. */ + ja L(return_null) +L(unaligned_match): + add %edi, %eax + add %ecx, %eax + RETURN + + .p2align 4 +L(unaligned_no_match): + test %edx, %edx + jne L(return_null) + pxor %xmm2, %xmm2 + + add $16, %edi + + .p2align 4 +/* Loop start on aligned string. */ +L(loop): + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %edi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %edx + pmovmskb %xmm0, %eax + or %eax, %edx + jnz L(matches) + + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %edi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %edx + pmovmskb %xmm0, %eax + or %eax, %edx + jnz L(matches) + + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %edi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %edx + pmovmskb %xmm0, %eax + or %eax, %edx + jnz L(matches) + + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %edi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %edx + pmovmskb %xmm0, %eax + or %eax, %edx + jnz L(matches) + jmp L(loop) + +L(matches): + pmovmskb %xmm2, %edx + test %eax, %eax + jz L(return_null) + bsf %eax, %eax + /* There is a match. First find where NULL is. */ + test %edx, %edx + je L(match) + bsf %edx, %ecx + /* Check if NULL comes first. */ + cmpl %ecx, %eax + ja L(return_null) +L(match): + sub $16, %edi + add %edi, %eax + RETURN + +/* Return NULL. */ + .p2align 4 +L(return_null): + xor %eax, %eax + RETURN + +END (__strchr_sse2_bsf) +#endif diff --git a/libc/sysdeps/i386/i686/multiarch/strchr-sse2.S b/libc/sysdeps/i386/i686/multiarch/strchr-sse2.S new file mode 100644 index 000000000..a73b21ecc --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strchr-sse2.S @@ -0,0 +1,349 @@ +/* strchr SSE2 without bsf + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef NOT_IN_libc + +# include + +# define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ + cfi_rel_offset (REG, 0) + +# define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ + cfi_restore (REG) + +# define PUSH(REG) pushl REG; CFI_PUSH (REG) +# define POP(REG) popl REG; CFI_POP (REG) + +# define PARMS 8 +# define ENTRANCE PUSH(%edi) +# define RETURN POP(%edi); ret; CFI_PUSH(%edi); + +# define STR1 PARMS +# define STR2 STR1+4 + + .text +ENTRY (__strchr_sse2) + + ENTRANCE + mov STR1(%esp), %ecx + movd STR2(%esp), %xmm1 + + pxor %xmm2, %xmm2 + mov %ecx, %edi + punpcklbw %xmm1, %xmm1 + punpcklbw %xmm1, %xmm1 + /* ECX has OFFSET. */ + and $15, %ecx + pshufd $0, %xmm1, %xmm1 + je L(loop) + +/* Handle unaligned string. */ + and $-16, %edi + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + pcmpeqb %xmm1, %xmm0 + /* Find where NULL is. */ + pmovmskb %xmm2, %edx + /* Check if there is a match. */ + pmovmskb %xmm0, %eax + /* Remove the leading bytes. */ + sarl %cl, %edx + sarl %cl, %eax + test %eax, %eax + jz L(unaligned_no_match) + /* Check which byte is a match. */ + /* Is there a NULL? */ + add %ecx, %edi + test %edx, %edx + jz L(match_case1) + jmp L(match_case2) + + .p2align 4 +L(unaligned_no_match): + test %edx, %edx + jne L(return_null) + + pxor %xmm2, %xmm2 + add $16, %edi + + .p2align 4 +/* Loop start on aligned string. */ +L(loop): + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %edx + pmovmskb %xmm0, %eax + test %eax, %eax + jnz L(matches) + test %edx, %edx + jnz L(return_null) + add $16, %edi + + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %edx + pmovmskb %xmm0, %eax + test %eax, %eax + jnz L(matches) + test %edx, %edx + jnz L(return_null) + add $16, %edi + + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %edx + pmovmskb %xmm0, %eax + test %eax, %eax + jnz L(matches) + test %edx, %edx + jnz L(return_null) + add $16, %edi + + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %edx + pmovmskb %xmm0, %eax + test %eax, %eax + jnz L(matches) + test %edx, %edx + jnz L(return_null) + add $16, %edi + jmp L(loop) + +L(matches): + /* There is a match. First find where NULL is. */ + test %edx, %edx + jz L(match_case1) + + .p2align 4 +L(match_case2): + test %al, %al + jz L(match_higth_case2) + + mov %al, %cl + and $15, %cl + jnz L(match_case2_4) + + mov %dl, %ch + and $15, %ch + jnz L(return_null) + + test $0x10, %al + jnz L(Exit5) + test $0x10, %dl + jnz L(return_null) + test $0x20, %al + jnz L(Exit6) + test $0x20, %dl + jnz L(return_null) + test $0x40, %al + jnz L(Exit7) + test $0x40, %dl + jnz L(return_null) + lea 7(%edi), %eax + RETURN + + .p2align 4 +L(match_case2_4): + test $0x01, %al + jnz L(Exit1) + test $0x01, %dl + jnz L(return_null) + test $0x02, %al + jnz L(Exit2) + test $0x02, %dl + jnz L(return_null) + test $0x04, %al + jnz L(Exit3) + test $0x04, %dl + jnz L(return_null) + lea 3(%edi), %eax + RETURN + + .p2align 4 +L(match_higth_case2): + test %dl, %dl + jnz L(return_null) + + mov %ah, %cl + and $15, %cl + jnz L(match_case2_12) + + mov %dh, %ch + and $15, %ch + jnz L(return_null) + + test $0x10, %ah + jnz L(Exit13) + test $0x10, %dh + jnz L(return_null) + test $0x20, %ah + jnz L(Exit14) + test $0x20, %dh + jnz L(return_null) + test $0x40, %ah + jnz L(Exit15) + test $0x40, %dh + jnz L(return_null) + lea 15(%edi), %eax + RETURN + + .p2align 4 +L(match_case2_12): + test $0x01, %ah + jnz L(Exit9) + test $0x01, %dh + jnz L(return_null) + test $0x02, %ah + jnz L(Exit10) + test $0x02, %dh + jnz L(return_null) + test $0x04, %ah + jnz L(Exit11) + test $0x04, %dh + jnz L(return_null) + lea 11(%edi), %eax + RETURN + + .p2align 4 +L(match_case1): + test %al, %al + jz L(match_higth_case1) + + test $0x01, %al + jnz L(Exit1) + test $0x02, %al + jnz L(Exit2) + test $0x04, %al + jnz L(Exit3) + test $0x08, %al + jnz L(Exit4) + test $0x10, %al + jnz L(Exit5) + test $0x20, %al + jnz L(Exit6) + test $0x40, %al + jnz L(Exit7) + lea 7(%edi), %eax + RETURN + + .p2align 4 +L(match_higth_case1): + test $0x01, %ah + jnz L(Exit9) + test $0x02, %ah + jnz L(Exit10) + test $0x04, %ah + jnz L(Exit11) + test $0x08, %ah + jnz L(Exit12) + test $0x10, %ah + jnz L(Exit13) + test $0x20, %ah + jnz L(Exit14) + test $0x40, %ah + jnz L(Exit15) + lea 15(%edi), %eax + RETURN + + .p2align 4 +L(Exit1): + lea (%edi), %eax + RETURN + + .p2align 4 +L(Exit2): + lea 1(%edi), %eax + RETURN + + .p2align 4 +L(Exit3): + lea 2(%edi), %eax + RETURN + + .p2align 4 +L(Exit4): + lea 3(%edi), %eax + RETURN + + .p2align 4 +L(Exit5): + lea 4(%edi), %eax + RETURN + + .p2align 4 +L(Exit6): + lea 5(%edi), %eax + RETURN + + .p2align 4 +L(Exit7): + lea 6(%edi), %eax + RETURN + + .p2align 4 +L(Exit9): + lea 8(%edi), %eax + RETURN + + .p2align 4 +L(Exit10): + lea 9(%edi), %eax + RETURN + + .p2align 4 +L(Exit11): + lea 10(%edi), %eax + RETURN + + .p2align 4 +L(Exit12): + lea 11(%edi), %eax + RETURN + + .p2align 4 +L(Exit13): + lea 12(%edi), %eax + RETURN + + .p2align 4 +L(Exit14): + lea 13(%edi), %eax + RETURN + + .p2align 4 +L(Exit15): + lea 14(%edi), %eax + RETURN + +/* Return NULL. */ + .p2align 4 +L(return_null): + xor %eax, %eax + RETURN + +END (__strchr_sse2) +#endif diff --git a/libc/sysdeps/i386/i686/multiarch/strchr.S b/libc/sysdeps/i386/i686/multiarch/strchr.S new file mode 100644 index 000000000..aed967c67 --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strchr.S @@ -0,0 +1,76 @@ +/* Multiple versions of strchr + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include + +#ifndef NOT_IN_libc + .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits + .globl __i686.get_pc_thunk.bx + .hidden __i686.get_pc_thunk.bx + .p2align 4 + .type __i686.get_pc_thunk.bx,@function +__i686.get_pc_thunk.bx: + movl (%esp), %ebx + ret + + .text +ENTRY(strchr) + .type strchr, @gnu_indirect_function + pushl %ebx + cfi_adjust_cfa_offset (4) + cfi_rel_offset (ebx, 0) + call __i686.get_pc_thunk.bx + addl $_GLOBAL_OFFSET_TABLE_, %ebx + cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) + jne 1f + call __init_cpu_features +1: leal __strchr_ia32@GOTOFF(%ebx), %eax + testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __strchr_sse2_bsf@GOTOFF(%ebx), %eax + testl $bit_Slow_BSF, FEATURE_OFFSET+index_Slow_BSF+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __strchr_sse2@GOTOFF(%ebx), %eax +2: popl %ebx + cfi_adjust_cfa_offset (-4); + cfi_restore (ebx) + ret +END(strchr) + +# undef ENTRY +# define ENTRY(name) \ + .type __strchr_ia32, @function; \ + .globl __strchr_ia32; \ + .p2align 4; \ + __strchr_ia32: cfi_startproc; \ + CALL_MCOUNT +# undef END +# define END(name) \ + cfi_endproc; .size __strchr_ia32, .-__strchr_ia32 +# undef libc_hidden_builtin_def +/* IFUNC doesn't work with the hidden functions in shared library since + they will be called without setting up EBX needed for PLT which is + used by IFUNC. */ +# define libc_hidden_builtin_def(name) \ + .globl __GI_strchr; __GI_strchr = __strchr_ia32 +#endif + +#include "../../i586/strchr.S" diff --git a/libc/sysdeps/i386/i686/multiarch/strcpy-ssse3.S b/libc/sysdeps/i386/i686/multiarch/strcpy-ssse3.S index 75a1952e6..073856ff8 100644 --- a/libc/sysdeps/i386/i686/multiarch/strcpy-ssse3.S +++ b/libc/sysdeps/i386/i686/multiarch/strcpy-ssse3.S @@ -20,39 +20,39 @@ #ifndef NOT_IN_libc +# ifndef USE_AS_STRCAT +# include -# include - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ +# define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ cfi_rel_offset (REG, 0) -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ +# define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ cfi_restore (REG) -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) +# define PUSH(REG) pushl REG; CFI_PUSH (REG) +# define POP(REG) popl REG; CFI_POP (REG) -# ifndef STRCPY -# define STRCPY __strcpy_ssse3 -# endif +# ifndef STRCPY +# define STRCPY __strcpy_ssse3 +# endif -# ifdef USE_AS_STRNCPY -# define PARMS 8 -# define ENTRANCE PUSH(%ebx) -# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx); -# define RETURN1 POP(%edi); POP(%ebx); ret; CFI_PUSH(%ebx); CFI_PUSH(%edi) -# else -# define PARMS 4 -# define ENTRANCE -# define RETURN ret -# define RETURN1 POP(%edi); ret; CFI_PUSH(%edi) -# endif +# ifdef USE_AS_STRNCPY +# define PARMS 8 +# define ENTRANCE PUSH(%ebx) +# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx); +# define RETURN1 POP(%edi); POP(%ebx); ret; CFI_PUSH(%ebx); CFI_PUSH(%edi) +# else +# define PARMS 4 +# define ENTRANCE +# define RETURN ret +# define RETURN1 POP(%edi); ret; CFI_PUSH(%edi) +# endif -# define STR1 PARMS -# define STR2 STR1+4 -# define LEN STR2+4 +# define STR1 PARMS +# define STR2 STR1+4 +# define LEN STR2+4 /* In this code following instructions are used for copying: movb - 1 byte @@ -60,9 +60,9 @@ movl - 4 byte movlpd - 8 byte movaps - 16 byte - requires 16 byte alignment - of sourse and destination adresses. + of sourse and destination adresses. 16 byte alignment: adress is 32bit value, - right four bit of adress shall be 0. + right four bit of adress shall be 0. */ .text @@ -70,13 +70,13 @@ ENTRY (STRCPY) ENTRANCE mov STR1(%esp), %edx mov STR2(%esp), %ecx -# ifdef USE_AS_STRNCPY +# ifdef USE_AS_STRNCPY movl LEN(%esp), %ebx test %ebx, %ebx jz L(ExitTail0) cmp $8, %ebx jbe L(StrncpyExit8Bytes) -# endif +# endif cmpb $0, (%ecx) jz L(ExitTail1) cmpb $0, 1(%ecx) @@ -93,10 +93,10 @@ ENTRY (STRCPY) jz L(ExitTail7) cmpb $0, 7(%ecx) jz L(ExitTail8) -# ifdef USE_AS_STRNCPY +# ifdef USE_AS_STRNCPY cmp $16, %ebx jb L(StrncpyExit15Bytes) -# endif +# endif cmpb $0, 8(%ecx) jz L(ExitTail9) cmpb $0, 9(%ecx) @@ -111,18 +111,20 @@ ENTRY (STRCPY) jz L(ExitTail14) cmpb $0, 14(%ecx) jz L(ExitTail15) -# ifdef USE_AS_STRNCPY +# ifdef USE_AS_STRNCPY cmp $16, %ebx je L(ExitTail16) -# endif +# endif cmpb $0, 15(%ecx) jz L(ExitTail16) PUSH (%edi) mov %edx, %edi +# endif PUSH (%esi) # ifdef USE_AS_STRNCPY mov %ecx, %esi + sub $16, %ebx and $0xf, %esi /* add 16 bytes ecx_shift to ebx */ @@ -159,7 +161,7 @@ ENTRY (STRCPY) /* eax = 0: there isn't end of string from position esi to esi+15 */ # ifdef USE_AS_STRNCPY - sub $32, %ebx + sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) # endif test %eax, %eax @@ -2217,12 +2219,17 @@ L(Shl15LoopExit): mov $1, %esi palignr $15, %xmm1, %xmm6 movaps %xmm6, (%edx) +# ifdef USE_AS_STRCAT + jmp L(CopyFrom1To16Bytes) +# endif + +# ifndef USE_AS_STRCAT .p2align 4 L(CopyFrom1To16Bytes): -# ifdef USE_AS_STRNCPY +# ifdef USE_AS_STRNCPY add $16, %ebx -# endif +# endif add %esi, %edx add %esi, %ecx @@ -2248,20 +2255,20 @@ L(CopyFrom1To16Bytes): L(Exit8): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 7(%edx), %eax -# else +# else movl %edi, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $8, %ebx lea 8(%edx), %ecx jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN1 .p2align 4 @@ -2287,23 +2294,23 @@ L(Exit16): movlpd %xmm0, (%edx) movlpd 8(%ecx), %xmm0 movlpd %xmm0, 8(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 15(%edx), %eax -# else +# else movl %edi, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $16, %ebx lea 16(%edx), %ecx jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN1 -# ifdef USE_AS_STRNCPY +# ifdef USE_AS_STRNCPY CFI_PUSH(%esi) @@ -2425,46 +2432,46 @@ L(Less12Case3): /* but more than 8 */ jl L(Exit9) je L(Exit10) jg L(Exit11) -# endif +# endif .p2align 4 L(Exit1): movb (%ecx), %al movb %al, (%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea (%edx), %eax -# else +# else movl %edi, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $1, %ebx lea 1(%edx), %ecx jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN1 .p2align 4 L(Exit2): movw (%ecx), %ax movw %ax, (%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 1(%edx), %eax -# else +# else movl %edi, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $2, %ebx lea 2(%edx), %ecx jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN1 .p2align 4 @@ -2473,40 +2480,40 @@ L(Exit3): movw %ax, (%edx) movb 2(%ecx), %al movb %al, 2(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 2(%edx), %eax -# else +# else movl %edi, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $3, %ebx lea 3(%edx), %ecx jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN1 .p2align 4 L(Exit4): movl (%ecx), %eax movl %eax, (%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 3(%edx), %eax -# else +# else movl %edi, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $4, %ebx lea 4(%edx), %ecx jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN1 .p2align 4 @@ -2515,20 +2522,20 @@ L(Exit5): movl %eax, (%edx) movb 4(%ecx), %al movb %al, 4(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 4(%edx), %eax -# else +# else movl %edi, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $5, %ebx lea 5(%edx), %ecx jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN1 .p2align 4 @@ -2537,20 +2544,20 @@ L(Exit6): movl %eax, (%edx) movw 4(%ecx), %ax movw %ax, 4(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 5(%edx), %eax -# else +# else movl %edi, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $6, %ebx lea 6(%edx), %ecx jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN1 .p2align 4 @@ -2559,20 +2566,20 @@ L(Exit7): movl %eax, (%edx) movl 3(%ecx), %eax movl %eax, 3(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 6(%edx), %eax -# else +# else movl %edi, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $7, %ebx lea 7(%edx), %ecx jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN1 .p2align 4 @@ -2581,20 +2588,20 @@ L(Exit9): movlpd %xmm0, (%edx) movb 8(%ecx), %al movb %al, 8(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 8(%edx), %eax -# else +# else movl %edi, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $9, %ebx lea 9(%edx), %ecx jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN1 .p2align 4 @@ -2603,20 +2610,20 @@ L(Exit10): movlpd %xmm0, (%edx) movw 8(%ecx), %ax movw %ax, 8(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 9(%edx), %eax -# else +# else movl %edi, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $10, %ebx lea 10(%edx), %ecx jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN1 .p2align 4 @@ -2625,20 +2632,20 @@ L(Exit11): movlpd %xmm0, (%edx) movl 7(%ecx), %eax movl %eax, 7(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 10(%edx), %eax -# else +# else movl %edi, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $11, %ebx lea 11(%edx), %ecx jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN1 .p2align 4 @@ -2647,20 +2654,20 @@ L(Exit12): movlpd %xmm0, (%edx) movl 8(%ecx), %eax movl %eax, 8(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 11(%edx), %eax -# else +# else movl %edi, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $12, %ebx lea 12(%edx), %ecx jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN1 .p2align 4 @@ -2669,20 +2676,20 @@ L(Exit13): movlpd %xmm0, (%edx) movlpd 5(%ecx), %xmm0 movlpd %xmm0, 5(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 12(%edx), %eax -# else +# else movl %edi, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $13, %ebx lea 13(%edx), %ecx jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN1 .p2align 4 @@ -2691,20 +2698,20 @@ L(Exit14): movlpd %xmm0, (%edx) movlpd 6(%ecx), %xmm0 movlpd %xmm0, 6(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 13(%edx), %eax -# else +# else movl %edi, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $14, %ebx lea 14(%edx), %ecx jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN1 .p2align 4 @@ -2713,25 +2720,25 @@ L(Exit15): movlpd %xmm0, (%edx) movlpd 7(%ecx), %xmm0 movlpd %xmm0, 7(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 14(%edx), %eax -# else +# else movl %edi, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $15, %ebx lea 15(%edx), %ecx jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN1 CFI_POP (%edi) -# ifdef USE_AS_STRNCPY +# ifdef USE_AS_STRNCPY .p2align 4 L(Fill0): RETURN @@ -2865,11 +2872,11 @@ L(FillLess12): /* but more than 8 */ je L(Fill10) jmp L(Fill11) - CFI_PUSH(%edi) + CFI_PUSH (%edi) .p2align 4 L(StrncpyFillTailWithZero1): - POP (%edi) + POP (%edi) L(StrncpyFillTailWithZero): pxor %xmm0, %xmm0 xor %edx, %edx @@ -2916,46 +2923,46 @@ L(StrncpyFillLess32): movdqa %xmm0, (%ecx) lea 16(%ecx), %ecx jmp L(FillFrom1To16Bytes) -# endif +# endif .p2align 4 L(ExitTail1): movb (%ecx), %al movb %al, (%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea (%edx), %eax -# else +# else movl %edx, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $1, %ebx lea 1(%edx), %ecx jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN .p2align 4 L(ExitTail2): movw (%ecx), %ax movw %ax, (%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 1(%edx), %eax -# else +# else movl %edx, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $2, %ebx lea 2(%edx), %ecx jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN .p2align 4 @@ -2964,40 +2971,40 @@ L(ExitTail3): movw %ax, (%edx) movb 2(%ecx), %al movb %al, 2(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 2(%edx), %eax -# else +# else movl %edx, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $3, %ebx lea 3(%edx), %ecx jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN .p2align 4 L(ExitTail4): movl (%ecx), %eax movl %eax, (%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 3(%edx), %eax -# else +# else movl %edx, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $4, %ebx lea 4(%edx), %ecx jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN .p2align 4 @@ -3006,20 +3013,20 @@ L(ExitTail5): movl %eax, (%edx) movb 4(%ecx), %al movb %al, 4(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 4(%edx), %eax -# else +# else movl %edx, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $5, %ebx lea 5(%edx), %ecx jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN .p2align 4 @@ -3028,20 +3035,20 @@ L(ExitTail6): movl %eax, (%edx) movw 4(%ecx), %ax movw %ax, 4(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 5(%edx), %eax -# else +# else movl %edx, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $6, %ebx lea 6(%edx), %ecx jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN .p2align 4 @@ -3050,20 +3057,40 @@ L(ExitTail7): movl %eax, (%edx) movl 3(%ecx), %eax movl %eax, 3(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 6(%edx), %eax -# else +# else movl %edx, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $7, %ebx lea 7(%edx), %ecx jnz L(StrncpyFillTailWithZero) +# ifdef USE_AS_STPCPY + cmpb $1, (%eax) + sbb $-1, %eax +# endif +# endif + RETURN + + .p2align 4 +L(ExitTail8): + movlpd (%ecx), %xmm0 + movlpd %xmm0, (%edx) # ifdef USE_AS_STPCPY + lea 7(%edx), %eax +# else + movl %edx, %eax +# endif +# ifdef USE_AS_STRNCPY + sub $8, %ebx + lea 8(%edx), %ecx + jnz L(StrncpyFillTailWithZero) +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN .p2align 4 @@ -3072,20 +3099,20 @@ L(ExitTail9): movlpd %xmm0, (%edx) movb 8(%ecx), %al movb %al, 8(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 8(%edx), %eax -# else +# else movl %edx, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $9, %ebx lea 9(%edx), %ecx jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN .p2align 4 @@ -3094,20 +3121,20 @@ L(ExitTail10): movlpd %xmm0, (%edx) movw 8(%ecx), %ax movw %ax, 8(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 9(%edx), %eax -# else +# else movl %edx, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $10, %ebx lea 10(%edx), %ecx jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN .p2align 4 @@ -3116,20 +3143,20 @@ L(ExitTail11): movlpd %xmm0, (%edx) movl 7(%ecx), %eax movl %eax, 7(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 10(%edx), %eax -# else +# else movl %edx, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $11, %ebx lea 11(%edx), %ecx jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN .p2align 4 @@ -3138,20 +3165,20 @@ L(ExitTail12): movlpd %xmm0, (%edx) movl 8(%ecx), %eax movl %eax, 8(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 11(%edx), %eax -# else +# else movl %edx, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $12, %ebx lea 12(%edx), %ecx jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN .p2align 4 @@ -3160,20 +3187,20 @@ L(ExitTail13): movlpd %xmm0, (%edx) movlpd 5(%ecx), %xmm0 movlpd %xmm0, 5(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 12(%edx), %eax -# else +# else movl %edx, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $13, %ebx lea 13(%edx), %ecx jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN .p2align 4 @@ -3182,20 +3209,42 @@ L(ExitTail14): movlpd %xmm0, (%edx) movlpd 6(%ecx), %xmm0 movlpd %xmm0, 6(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 13(%edx), %eax -# else +# else movl %edx, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $14, %ebx lea 14(%edx), %ecx jnz L(StrncpyFillTailWithZero) +# ifdef USE_AS_STPCPY + cmpb $1, (%eax) + sbb $-1, %eax +# endif +# endif + RETURN + + .p2align 4 +L(ExitTail15): + movlpd (%ecx), %xmm0 + movlpd %xmm0, (%edx) + movlpd 7(%ecx), %xmm0 + movlpd %xmm0, 7(%edx) # ifdef USE_AS_STPCPY + lea 14(%edx), %eax +# else + movl %edx, %eax +# endif +# ifdef USE_AS_STRNCPY + sub $15, %ebx + lea 15(%edx), %ecx + jnz L(StrncpyFillTailWithZero) +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN .p2align 4 @@ -3204,24 +3253,28 @@ L(ExitTail16): movlpd %xmm0, (%edx) movlpd 8(%ecx), %xmm0 movlpd %xmm0, 8(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 15(%edx), %eax -# else +# else movl %edx, %eax -# endif -# ifdef USE_AS_STRNCPY +# endif +# ifdef USE_AS_STRNCPY sub $16, %ebx lea 16(%edx), %ecx jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax +# endif # endif -# endif RETURN +#endif + # ifdef USE_AS_STRNCPY - CFI_PUSH (%esi) - CFI_PUSH (%edi) +# ifndef USE_AS_STRCAT + CFI_PUSH (%esi) + CFI_PUSH (%edi) +# endif L(StrncpyLeaveCase2OrCase3): test %eax, %eax jnz L(Aligned64LeaveCase2) @@ -3979,9 +4032,13 @@ L(StrncpyExit15): movaps %xmm6, (%edx, %esi) lea 1(%esi), %esi jmp L(CopyFrom1To16BytesCase3) +# endif + +# ifndef USE_AS_STRCAT +# ifdef USE_AS_STRNCPY + CFI_POP (%esi) + CFI_POP (%edi) - CFI_POP (%esi) - CFI_POP (%edi) .p2align 4 L(ExitTail0): movl %edx, %eax @@ -4013,31 +4070,19 @@ L(StrncpyExit15Bytes): je L(ExitTail14) cmpb $0, 13(%ecx) jz L(ExitTail14) -# endif - - .p2align 4 -L(ExitTail15): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movlpd 7(%ecx), %xmm0 movlpd %xmm0, 7(%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 14(%edx), %eax -# else - movl %edx, %eax -# endif -# ifdef USE_AS_STRNCPY - sub $15, %ebx - lea 15(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax -# endif -# endif +# else + movl %edx, %eax +# endif RETURN -# ifdef USE_AS_STRNCPY .p2align 4 L(StrncpyExit8Bytes): cmp $1, %ebx @@ -4068,27 +4113,19 @@ L(StrncpyExit8Bytes): je L(ExitTail7) cmpb $0, 6(%ecx) jz L(ExitTail7) -# endif - .p2align 4 -L(ExitTail8): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) -# ifdef USE_AS_STPCPY +# ifdef USE_AS_STPCPY lea 7(%edx), %eax -# else - movl %edx, %eax -# endif -# ifdef USE_AS_STRNCPY - sub $8, %ebx - lea 8(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax -# endif -# endif +# else + movl %edx, %eax +# endif RETURN +# endif -END (STRCPY) +END (STRCPY) +# endif #endif diff --git a/libc/sysdeps/i386/i686/multiarch/strlen-sse2.S b/libc/sysdeps/i386/i686/multiarch/strlen-sse2.S index 65809d985..ca549bafc 100644 --- a/libc/sysdeps/i386/i686/multiarch/strlen-sse2.S +++ b/libc/sysdeps/i386/i686/multiarch/strlen-sse2.S @@ -1,5 +1,5 @@ /* strlen with SSE2 - Copyright (C) 2010 Free Software Foundation, Inc. + Copyright (C) 2010, 2011 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -18,30 +18,32 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ -#if defined SHARED && !defined NOT_IN_libc +#if (defined USE_AS_STRCAT || defined SHARED) && !defined NOT_IN_libc +# ifndef USE_AS_STRCAT -#include -#include "asm-syntax.h" +# include +# include "asm-syntax.h" -#define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) +# define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ + cfi_rel_offset (REG, 0) -#define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) +# define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ + cfi_restore (REG) -#define PUSH(REG) pushl REG; CFI_PUSH (REG) -#define POP(REG) popl REG; CFI_POP (REG) -#define PARMS 4 -#define STR PARMS -#define ENTRANCE -#define RETURN ret +# define PUSH(REG) pushl REG; CFI_PUSH (REG) +# define POP(REG) popl REG; CFI_POP (REG) +# define PARMS 4 +# define STR PARMS +# define ENTRANCE +# define RETURN ret .text ENTRY (__strlen_sse2) ENTRANCE mov STR(%esp), %edx +# endif xor %eax, %eax cmpb $0, (%edx) jz L(exit_tail0) @@ -77,9 +79,8 @@ ENTRY (__strlen_sse2) jz L(exit_tail15) pxor %xmm0, %xmm0 mov %edx, %eax - mov %edx, %ecx + lea 16(%edx), %ecx and $-16, %eax - add $16, %ecx add $16, %eax pcmpeqb (%eax), %xmm0 @@ -183,51 +184,41 @@ ENTRY (__strlen_sse2) jnz L(exit) and $-0x40, %eax - PUSH (%esi) - PUSH (%edi) - PUSH (%ebx) - PUSH (%ebp) - xor %ebp, %ebp L(aligned_64): - pcmpeqb (%eax), %xmm0 - pcmpeqb 16(%eax), %xmm1 - pcmpeqb 32(%eax), %xmm2 - pcmpeqb 48(%eax), %xmm3 - pmovmskb %xmm0, %edx - pmovmskb %xmm1, %esi - pmovmskb %xmm2, %edi - pmovmskb %xmm3, %ebx - or %edx, %ebp - or %esi, %ebp - or %edi, %ebp - or %ebx, %ebp + movaps (%eax), %xmm0 + movaps 16(%eax), %xmm1 + movaps 32(%eax), %xmm2 + movaps 48(%eax), %xmm6 + pminub %xmm1, %xmm0 + pminub %xmm6, %xmm2 + pminub %xmm0, %xmm2 + pcmpeqb %xmm3, %xmm2 + pmovmskb %xmm2, %edx + test %edx, %edx lea 64(%eax), %eax jz L(aligned_64) -L(48leave): + + pcmpeqb -64(%eax), %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + lea 48(%ecx), %ecx + jnz L(exit) + + pcmpeqb %xmm1, %xmm3 + pmovmskb %xmm3, %edx test %edx, %edx - jnz L(aligned_64_exit_16) - test %esi, %esi - jnz L(aligned_64_exit_32) - test %edi, %edi - jnz L(aligned_64_exit_48) - mov %ebx, %edx - lea (%eax), %eax - jmp L(aligned_64_exit) -L(aligned_64_exit_48): - lea -16(%eax), %eax - mov %edi, %edx - jmp L(aligned_64_exit) -L(aligned_64_exit_32): - lea -32(%eax), %eax - mov %esi, %edx - jmp L(aligned_64_exit) -L(aligned_64_exit_16): - lea -48(%eax), %eax -L(aligned_64_exit): - POP (%ebp) - POP (%ebx) - POP (%edi) - POP (%esi) + lea -16(%ecx), %ecx + jnz L(exit) + + pcmpeqb -32(%eax), %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + lea -16(%ecx), %ecx + jnz L(exit) + + pcmpeqb %xmm6, %xmm3 + pmovmskb %xmm3, %edx + lea -16(%ecx), %ecx L(exit): sub %ecx, %eax test %dl, %dl @@ -340,8 +331,8 @@ L(exit_tail14): L(exit_tail15): add $15, %eax +# ifndef USE_AS_STRCAT ret - END (__strlen_sse2) - +# endif #endif diff --git a/libc/sysdeps/i386/i686/multiarch/strncat-c.c b/libc/sysdeps/i386/i686/multiarch/strncat-c.c new file mode 100644 index 000000000..132a00054 --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strncat-c.c @@ -0,0 +1,8 @@ +#define STRNCAT __strncat_ia32 +#ifdef SHARED +#undef libc_hidden_def +#define libc_hidden_def(name) \ + __hidden_ver1 (__strncat_ia32, __GI___strncat, __strncat_ia32); +#endif + +#include "string/strncat.c" diff --git a/libc/sysdeps/i386/i686/multiarch/strncat-sse2.S b/libc/sysdeps/i386/i686/multiarch/strncat-sse2.S new file mode 100644 index 000000000..f1045b72b --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strncat-sse2.S @@ -0,0 +1,4 @@ +#define STRCAT __strncat_sse2 +#define USE_AS_STRNCAT + +#include "strcat-sse2.S" diff --git a/libc/sysdeps/i386/i686/multiarch/strncat-ssse3.S b/libc/sysdeps/i386/i686/multiarch/strncat-ssse3.S new file mode 100644 index 000000000..625b90a97 --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strncat-ssse3.S @@ -0,0 +1,4 @@ +#define STRCAT __strncat_ssse3 +#define USE_AS_STRNCAT + +#include "strcat-ssse3.S" diff --git a/libc/sysdeps/i386/i686/multiarch/strncat.S b/libc/sysdeps/i386/i686/multiarch/strncat.S new file mode 100644 index 000000000..fd569c223 --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strncat.S @@ -0,0 +1,3 @@ +#define STRCAT strncat +#define USE_AS_STRNCAT +#include "strcat.S" diff --git a/libc/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S b/libc/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S new file mode 100644 index 000000000..f40dfdc02 --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S @@ -0,0 +1,283 @@ +/* strrchr with SSE2 with bsf and bsr + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef NOT_IN_libc + +# include + +# define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ + cfi_rel_offset (REG, 0) + +# define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ + cfi_restore (REG) + +# define PUSH(REG) pushl REG; CFI_PUSH (REG) +# define POP(REG) popl REG; CFI_POP (REG) + +# define PARMS 4 +# define STR1 PARMS +# define STR2 STR1+4 + + .text +ENTRY (__strrchr_sse2_bsf) + + mov STR1(%esp), %ecx + movd STR2(%esp), %xmm1 + + PUSH (%edi) + pxor %xmm2, %xmm2 + mov %ecx, %edi + punpcklbw %xmm1, %xmm1 + punpcklbw %xmm1, %xmm1 + /* ECX has OFFSET. */ + and $63, %ecx + cmp $48, %ecx + pshufd $0, %xmm1, %xmm1 + ja L(crosscashe) + +/* unaligned string. */ + movdqu (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + pcmpeqb %xmm1, %xmm0 + /* Find where NULL is. */ + pmovmskb %xmm2, %edx + /* Check if there is a match. */ + pmovmskb %xmm0, %eax + + test %eax, %eax + jnz L(unaligned_match1) + + test %edx, %edx + jnz L(return_null) + + and $-16, %edi + add $16, %edi + + PUSH (%esi) + PUSH (%ebx) + + xor %ebx, %ebx + jmp L(loop) + + CFI_POP (%esi) + CFI_POP (%ebx) + + .p2align 4 +L(unaligned_return_value1): + bsf %edx, %ecx + mov $2, %edx + shl %cl, %edx + sub $1, %edx + and %edx, %eax + jz L(return_null) + bsr %eax, %eax + add %edi, %eax + POP (%edi) + ret + CFI_PUSH (%edi) + + .p2align 4 +L(unaligned_match1): + test %edx, %edx + jnz L(unaligned_return_value1) + + PUSH (%esi) + PUSH (%ebx) + + mov %eax, %ebx + lea 16(%edi), %esi + and $-16, %edi + add $16, %edi + jmp L(loop) + + CFI_POP (%esi) + CFI_POP (%ebx) + + .p2align 4 + L(crosscashe): +/* Hancle unaligned string. */ + and $15, %ecx + and $-16, %edi + pxor %xmm3, %xmm3 + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm3 + pcmpeqb %xmm1, %xmm0 + /* Find where NULL is. */ + pmovmskb %xmm3, %edx + /* Check if there is a match. */ + pmovmskb %xmm0, %eax + /* Remove the leading bytes. */ + shr %cl, %edx + shr %cl, %eax + + test %eax, %eax + jnz L(unaligned_match) + + test %edx, %edx + jnz L(return_null) + + add $16, %edi + + PUSH (%esi) + PUSH (%ebx) + + xor %ebx, %ebx + jmp L(loop) + + CFI_POP (%esi) + CFI_POP (%ebx) + + .p2align 4 +L(unaligned_return_value): + add %ecx, %edi + bsf %edx, %ecx + mov $2, %edx + shl %cl, %edx + sub $1, %edx + and %edx, %eax + jz L(return_null) + bsr %eax, %eax + add %edi, %eax + POP (%edi) + ret + CFI_PUSH (%edi) + + .p2align 4 +L(unaligned_match): + test %edx, %edx + jnz L(unaligned_return_value) + + PUSH (%esi) + PUSH (%ebx) + + mov %eax, %ebx + add $16, %edi + lea (%edi, %ecx), %esi + +/* Loop start on aligned string. */ + .p2align 4 +L(loop): + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %edi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %ecx + pmovmskb %xmm0, %eax + or %eax, %ecx + jnz L(matches) + + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %edi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %ecx + pmovmskb %xmm0, %eax + or %eax, %ecx + jnz L(matches) + + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %edi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %ecx + pmovmskb %xmm0, %eax + or %eax, %ecx + jnz L(matches) + + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %edi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %ecx + pmovmskb %xmm0, %eax + or %eax, %ecx + jz L(loop) + +L(matches): + test %eax, %eax + jnz L(match) +L(return_value): + test %ebx, %ebx + jz L(return_null_1) + bsr %ebx, %eax + add %esi, %eax + + POP (%ebx) + POP (%esi) + + sub $16, %eax + POP (%edi) + ret + + CFI_PUSH (%edi) + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(match): + pmovmskb %xmm2, %ecx + test %ecx, %ecx + jnz L(return_value_1) + mov %eax, %ebx + mov %edi, %esi + jmp L(loop) + + .p2align 4 +L(return_value_1): + bsf %ecx, %ecx + mov $2, %edx + shl %cl, %edx + sub $1, %edx + and %edx, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + + bsr %eax, %eax + add %edi, %eax + sub $16, %eax + POP (%edi) + ret + + CFI_PUSH (%edi) +/* Return NULL. */ + .p2align 4 +L(return_null): + xor %eax, %eax + POP (%edi) + ret + + CFI_PUSH (%edi) + CFI_PUSH (%ebx) + CFI_PUSH (%esi) +/* Return NULL. */ + .p2align 4 +L(return_null_1): + POP (%ebx) + POP (%esi) + POP (%edi) + xor %eax, %eax + ret + +END (__strrchr_sse2_bsf) +#endif diff --git a/libc/sysdeps/i386/i686/multiarch/strrchr-sse2.S b/libc/sysdeps/i386/i686/multiarch/strrchr-sse2.S new file mode 100644 index 000000000..71cc69dfe --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strrchr-sse2.S @@ -0,0 +1,709 @@ +/* strrchr SSE2 without bsf and bsr + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef NOT_IN_libc + +# include + +# define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ + cfi_rel_offset (REG, 0) + +# define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ + cfi_restore (REG) + +# define PUSH(REG) pushl REG; CFI_PUSH (REG) +# define POP(REG) popl REG; CFI_POP (REG) + +# define PARMS 8 +# define ENTRANCE PUSH(%edi); +# define RETURN POP(%edi); ret; CFI_PUSH(%edi); + +# define STR1 PARMS +# define STR2 STR1+4 + + .text +ENTRY (__strrchr_sse2) + + ENTRANCE + mov STR1(%esp), %ecx + movd STR2(%esp), %xmm1 + + pxor %xmm2, %xmm2 + mov %ecx, %edi + punpcklbw %xmm1, %xmm1 + punpcklbw %xmm1, %xmm1 + /* ECX has OFFSET. */ + and $63, %ecx + cmp $48, %ecx + pshufd $0, %xmm1, %xmm1 + ja L(crosscache) + +/* unaligned string. */ + movdqu (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + pcmpeqb %xmm1, %xmm0 + /* Find where NULL is. */ + pmovmskb %xmm2, %ecx + /* Check if there is a match. */ + pmovmskb %xmm0, %eax + add $16, %edi + + test %eax, %eax + jnz L(unaligned_match1) + + test %ecx, %ecx + jnz L(return_null) + + and $-16, %edi + + PUSH (%esi) + PUSH (%ebx) + + xor %ebx, %ebx + jmp L(loop) + + CFI_POP (%esi) + CFI_POP (%ebx) + + .p2align 4 +L(unaligned_match1): + test %ecx, %ecx + jnz L(prolog_find_zero_1) + + PUSH (%esi) + PUSH (%ebx) + + mov %eax, %ebx + mov %edi, %esi + and $-16, %edi + jmp L(loop) + + CFI_POP (%esi) + CFI_POP (%ebx) + + .p2align 4 +L(crosscache): +/* Hancle unaligned string. */ + and $15, %ecx + and $-16, %edi + pxor %xmm3, %xmm3 + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm3 + pcmpeqb %xmm1, %xmm0 + /* Find where NULL is. */ + pmovmskb %xmm3, %edx + /* Check if there is a match. */ + pmovmskb %xmm0, %eax + /* Remove the leading bytes. */ + shr %cl, %edx + shr %cl, %eax + add $16, %edi + + test %eax, %eax + jnz L(unaligned_match) + + test %edx, %edx + jnz L(return_null) + + PUSH (%esi) + PUSH (%ebx) + + xor %ebx, %ebx + jmp L(loop) + + CFI_POP (%esi) + CFI_POP (%ebx) + + .p2align 4 +L(unaligned_match): + test %edx, %edx + jnz L(prolog_find_zero) + + PUSH (%esi) + PUSH (%ebx) + + mov %eax, %ebx + lea (%edi, %ecx), %esi + +/* Loop start on aligned string. */ + .p2align 4 +L(loop): + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %edi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %ecx + pmovmskb %xmm0, %eax + or %eax, %ecx + jnz L(matches) + + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %edi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %ecx + pmovmskb %xmm0, %eax + or %eax, %ecx + jnz L(matches) + + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %edi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %ecx + pmovmskb %xmm0, %eax + or %eax, %ecx + jnz L(matches) + + movdqa (%edi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %edi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %ecx + pmovmskb %xmm0, %eax + or %eax, %ecx + jz L(loop) + +L(matches): + test %eax, %eax + jnz L(match) +L(return_value): + test %ebx, %ebx + jz L(return_null_1) + mov %ebx, %eax + mov %esi, %edi + + POP (%ebx) + POP (%esi) + + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(return_null_1): + POP (%ebx) + POP (%esi) + + xor %eax, %eax + RETURN + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(match): + pmovmskb %xmm2, %ecx + test %ecx, %ecx + jnz L(find_zero) + mov %eax, %ebx + mov %edi, %esi + jmp L(loop) + + .p2align 4 +L(find_zero): + test %cl, %cl + jz L(find_zero_high) + mov %cl, %dl + and $15, %dl + jz L(find_zero_8) + test $0x01, %cl + jnz L(FindZeroExit1) + test $0x02, %cl + jnz L(FindZeroExit2) + test $0x04, %cl + jnz L(FindZeroExit3) + and $1 << 4 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(find_zero_8): + test $0x10, %cl + jnz L(FindZeroExit5) + test $0x20, %cl + jnz L(FindZeroExit6) + test $0x40, %cl + jnz L(FindZeroExit7) + and $1 << 8 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(find_zero_high): + mov %ch, %dh + and $15, %dh + jz L(find_zero_high_8) + test $0x01, %ch + jnz L(FindZeroExit9) + test $0x02, %ch + jnz L(FindZeroExit10) + test $0x04, %ch + jnz L(FindZeroExit11) + and $1 << 12 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(find_zero_high_8): + test $0x10, %ch + jnz L(FindZeroExit13) + test $0x20, %ch + jnz L(FindZeroExit14) + test $0x40, %ch + jnz L(FindZeroExit15) + and $1 << 16 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit1): + and $1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit2): + and $1 << 2 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit3): + and $1 << 3 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit5): + and $1 << 5 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit6): + and $1 << 6 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit7): + and $1 << 7 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit9): + and $1 << 9 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit10): + and $1 << 10 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit11): + and $1 << 11 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit13): + and $1 << 13 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit14): + and $1 << 14 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + jmp L(match_exit) + + CFI_PUSH (%ebx) + CFI_PUSH (%esi) + + .p2align 4 +L(FindZeroExit15): + and $1 << 15 - 1, %eax + jz L(return_value) + + POP (%ebx) + POP (%esi) + + .p2align 4 +L(match_exit): + test %ah, %ah + jnz L(match_exit_high) + mov %al, %dl + and $15 << 4, %dl + jnz L(match_exit_8) + test $0x08, %al + jnz L(Exit4) + test $0x04, %al + jnz L(Exit3) + test $0x02, %al + jnz L(Exit2) + lea -16(%edi), %eax + RETURN + + .p2align 4 +L(match_exit_8): + test $0x80, %al + jnz L(Exit8) + test $0x40, %al + jnz L(Exit7) + test $0x20, %al + jnz L(Exit6) + lea -12(%edi), %eax + RETURN + + .p2align 4 +L(match_exit_high): + mov %ah, %dh + and $15 << 4, %dh + jnz L(match_exit_high_8) + test $0x08, %ah + jnz L(Exit12) + test $0x04, %ah + jnz L(Exit11) + test $0x02, %ah + jnz L(Exit10) + lea -8(%edi), %eax + RETURN + + .p2align 4 +L(match_exit_high_8): + test $0x80, %ah + jnz L(Exit16) + test $0x40, %ah + jnz L(Exit15) + test $0x20, %ah + jnz L(Exit14) + lea -4(%edi), %eax + RETURN + + .p2align 4 +L(Exit2): + lea -15(%edi), %eax + RETURN + + .p2align 4 +L(Exit3): + lea -14(%edi), %eax + RETURN + + .p2align 4 +L(Exit4): + lea -13(%edi), %eax + RETURN + + .p2align 4 +L(Exit6): + lea -11(%edi), %eax + RETURN + + .p2align 4 +L(Exit7): + lea -10(%edi), %eax + RETURN + + .p2align 4 +L(Exit8): + lea -9(%edi), %eax + RETURN + + .p2align 4 +L(Exit10): + lea -7(%edi), %eax + RETURN + + .p2align 4 +L(Exit11): + lea -6(%edi), %eax + RETURN + + .p2align 4 +L(Exit12): + lea -5(%edi), %eax + RETURN + + .p2align 4 +L(Exit14): + lea -3(%edi), %eax + RETURN + + .p2align 4 +L(Exit15): + lea -2(%edi), %eax + RETURN + + .p2align 4 +L(Exit16): + lea -1(%edi), %eax + RETURN + +/* Return NULL. */ + .p2align 4 +L(return_null): + xor %eax, %eax + RETURN + + .p2align 4 +L(prolog_find_zero): + add %ecx, %edi + mov %edx, %ecx +L(prolog_find_zero_1): + test %cl, %cl + jz L(prolog_find_zero_high) + mov %cl, %dl + and $15, %dl + jz L(prolog_find_zero_8) + test $0x01, %cl + jnz L(PrologFindZeroExit1) + test $0x02, %cl + jnz L(PrologFindZeroExit2) + test $0x04, %cl + jnz L(PrologFindZeroExit3) + and $1 << 4 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(prolog_find_zero_8): + test $0x10, %cl + jnz L(PrologFindZeroExit5) + test $0x20, %cl + jnz L(PrologFindZeroExit6) + test $0x40, %cl + jnz L(PrologFindZeroExit7) + and $1 << 8 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(prolog_find_zero_high): + mov %ch, %dh + and $15, %dh + jz L(prolog_find_zero_high_8) + test $0x01, %ch + jnz L(PrologFindZeroExit9) + test $0x02, %ch + jnz L(PrologFindZeroExit10) + test $0x04, %ch + jnz L(PrologFindZeroExit11) + and $1 << 12 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(prolog_find_zero_high_8): + test $0x10, %ch + jnz L(PrologFindZeroExit13) + test $0x20, %ch + jnz L(PrologFindZeroExit14) + test $0x40, %ch + jnz L(PrologFindZeroExit15) + and $1 << 16 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit1): + and $1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit2): + and $1 << 2 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit3): + and $1 << 3 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit5): + and $1 << 5 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit6): + and $1 << 6 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit7): + and $1 << 7 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit9): + and $1 << 9 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit10): + and $1 << 10 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit11): + and $1 << 11 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit13): + and $1 << 13 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit14): + and $1 << 14 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + + .p2align 4 +L(PrologFindZeroExit15): + and $1 << 15 - 1, %eax + jnz L(match_exit) + xor %eax, %eax + RETURN + +END (__strrchr_sse2) +#endif diff --git a/libc/sysdeps/i386/i686/multiarch/strrchr.S b/libc/sysdeps/i386/i686/multiarch/strrchr.S new file mode 100644 index 000000000..866cac094 --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strrchr.S @@ -0,0 +1,76 @@ +/* Multiple versions of strrchr + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include + +#ifndef NOT_IN_libc + .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits + .globl __i686.get_pc_thunk.bx + .hidden __i686.get_pc_thunk.bx + .p2align 4 + .type __i686.get_pc_thunk.bx,@function +__i686.get_pc_thunk.bx: + movl (%esp), %ebx + ret + + .text +ENTRY(strrchr) + .type strrchr, @gnu_indirect_function + pushl %ebx + cfi_adjust_cfa_offset (4) + cfi_rel_offset (ebx, 0) + call __i686.get_pc_thunk.bx + addl $_GLOBAL_OFFSET_TABLE_, %ebx + cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) + jne 1f + call __init_cpu_features +1: leal __strrchr_ia32@GOTOFF(%ebx), %eax + testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __strrchr_sse2_bsf@GOTOFF(%ebx), %eax + testl $bit_Slow_BSF, FEATURE_OFFSET+index_Slow_BSF+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __strrchr_sse2@GOTOFF(%ebx), %eax +2: popl %ebx + cfi_adjust_cfa_offset (-4); + cfi_restore (ebx) + ret +END(strrchr) + +# undef ENTRY +# define ENTRY(name) \ + .type __strrchr_ia32, @function; \ + .globl __strrchr_ia32; \ + .p2align 4; \ + __strrchr_ia32: cfi_startproc; \ + CALL_MCOUNT +# undef END +# define END(name) \ + cfi_endproc; .size __strrchr_ia32, .-__strrchr_ia32 +# undef libc_hidden_builtin_def +/* IFUNC doesn't work with the hidden functions in shared library since + they will be called without setting up EBX needed for PLT which is + used by IFUNC. */ +# define libc_hidden_builtin_def(name) \ + .globl __GI_strrchr; __GI_strrchr = __strrchr_ia32 +#endif + +#include "../../strrchr.S" diff --git a/libc/sysdeps/i386/i686/multiarch/strspn.S b/libc/sysdeps/i386/i686/multiarch/strspn.S index dbdf1af48..cd26c8018 100644 --- a/libc/sysdeps/i386/i686/multiarch/strspn.S +++ b/libc/sysdeps/i386/i686/multiarch/strspn.S @@ -76,8 +76,8 @@ END(strspn) # define ENTRY(name) \ .type __strspn_ia32, @function; \ .globl __strspn_ia32; \ - .p2align 4 - __strspn_ia32: cfi_startproc; \ + .p2align 4; \ +__strspn_ia32: cfi_startproc; \ CALL_MCOUNT # undef END # define END(name) \ diff --git a/libc/sysdeps/i386/i686/multiarch/wcscmp-c.c b/libc/sysdeps/i386/i686/multiarch/wcscmp-c.c new file mode 100644 index 000000000..9592455d0 --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/wcscmp-c.c @@ -0,0 +1,10 @@ +#ifndef NOT_IN_libc + +# define WCSCMP __wcscmp_ia32 + +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__wcscmp_ia32, __GI_wcscmp, __wcscmp_ia32); +#endif + +#include "wcsmbs/wcscmp.c" diff --git a/libc/sysdeps/i386/i686/multiarch/wcscmp-sse2.S b/libc/sysdeps/i386/i686/multiarch/wcscmp-sse2.S new file mode 100644 index 000000000..404a9a4d4 --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/wcscmp-sse2.S @@ -0,0 +1,1003 @@ +/* wcscmp with SSE2 + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef NOT_IN_libc + +# include +# include "asm-syntax.h" + +# define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ + cfi_rel_offset (REG, 0) + +# define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ + cfi_restore (REG) + +# define PUSH(REG) pushl REG; CFI_PUSH (REG) +# define POP(REG) popl REG; CFI_POP (REG) + +# ifndef STRCMP +# define STRCMP __wcscmp_sse2 +# endif + +# define ENTRANCE PUSH(%esi); PUSH(%edi) +# define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi); +# define PARMS 4 +# define STR1 PARMS +# define STR2 STR1+4 + + .text +ENTRY (STRCMP) +/* + * This implementation uses SSE to compare up to 16 bytes at a time. +*/ + mov STR1(%esp), %edx + mov STR2(%esp), %eax + + mov (%eax), %ecx + cmp %ecx, (%edx) + jne L(neq) + test %ecx, %ecx + jz L(eq) + + mov 4(%eax), %ecx + cmp %ecx, 4(%edx) + jne L(neq) + test %ecx, %ecx + jz L(eq) + + mov 8(%eax), %ecx + cmp %ecx, 8(%edx) + jne L(neq) + test %ecx, %ecx + jz L(eq) + + mov 12(%eax), %ecx + cmp %ecx, 12(%edx) + jne L(neq) + test %ecx, %ecx + jz L(eq) + + ENTRANCE + add $16, %eax + add $16, %edx + + mov %eax, %esi + mov %edx, %edi + pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ + mov %al, %ch + mov %dl, %cl + and $63, %eax /* esi alignment in cache line */ + and $63, %edx /* edi alignment in cache line */ + and $15, %cl + jz L(continue_00) + cmp $16, %edx + jb L(continue_0) + cmp $32, %edx + jb L(continue_16) + cmp $48, %edx + jb L(continue_32) + +L(continue_48): + and $15, %ch + jz L(continue_48_00) + cmp $16, %eax + jb L(continue_0_48) + cmp $32, %eax + jb L(continue_16_48) + cmp $48, %eax + jb L(continue_32_48) + + .p2align 4 +L(continue_48_48): + mov (%esi), %ecx + cmp %ecx, (%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 4(%esi), %ecx + cmp %ecx, 4(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 8(%esi), %ecx + cmp %ecx, 8(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 12(%esi), %ecx + cmp %ecx, 12(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + movdqu 16(%edi), %xmm1 + movdqu 16(%esi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqu 32(%edi), %xmm1 + movdqu 32(%esi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + movdqu 48(%edi), %xmm1 + movdqu 48(%esi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_48) + + add $64, %esi + add $64, %edi + jmp L(continue_48_48) + +L(continue_0): + and $15, %ch + jz L(continue_0_00) + cmp $16, %eax + jb L(continue_0_0) + cmp $32, %eax + jb L(continue_0_16) + cmp $48, %eax + jb L(continue_0_32) + + .p2align 4 +L(continue_0_48): + mov (%esi), %ecx + cmp %ecx, (%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 4(%esi), %ecx + cmp %ecx, 4(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 8(%esi), %ecx + cmp %ecx, 8(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 12(%esi), %ecx + cmp %ecx, 12(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + movdqu 16(%edi), %xmm1 + movdqu 16(%esi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqu 32(%edi), %xmm1 + movdqu 32(%esi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + mov 48(%esi), %ecx + cmp %ecx, 48(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 52(%esi), %ecx + cmp %ecx, 52(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 56(%esi), %ecx + cmp %ecx, 56(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 60(%esi), %ecx + cmp %ecx, 60(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + add $64, %esi + add $64, %edi + jmp L(continue_0_48) + + .p2align 4 +L(continue_00): + and $15, %ch + jz L(continue_00_00) + cmp $16, %eax + jb L(continue_00_0) + cmp $32, %eax + jb L(continue_00_16) + cmp $48, %eax + jb L(continue_00_32) + + .p2align 4 +L(continue_00_48): + pcmpeqd (%edi), %xmm0 + mov (%edi), %eax + pmovmskb %xmm0, %ecx + test %ecx, %ecx + jnz L(less4_double_words1) + + sub (%esi), %eax + jnz L(return) + + mov 4(%edi), %eax + sub 4(%esi), %eax + jnz L(return) + + mov 8(%edi), %eax + sub 8(%esi), %eax + jnz L(return) + + mov 12(%edi), %eax + sub 12(%esi), %eax + jnz L(return) + + movdqu 16(%esi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqu 32(%esi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + movdqu 48(%esi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd 48(%edi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_48) + + add $64, %esi + add $64, %edi + jmp L(continue_00_48) + + .p2align 4 +L(continue_32): + and $15, %ch + jz L(continue_32_00) + cmp $16, %eax + jb L(continue_0_32) + cmp $32, %eax + jb L(continue_16_32) + cmp $48, %eax + jb L(continue_32_32) + + .p2align 4 +L(continue_32_48): + mov (%esi), %ecx + cmp %ecx, (%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 4(%esi), %ecx + cmp %ecx, 4(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 8(%esi), %ecx + cmp %ecx, 8(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 12(%esi), %ecx + cmp %ecx, 12(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 16(%esi), %ecx + cmp %ecx, 16(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 20(%esi), %ecx + cmp %ecx, 20(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 24(%esi), %ecx + cmp %ecx, 24(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 28(%esi), %ecx + cmp %ecx, 28(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + movdqu 32(%edi), %xmm1 + movdqu 32(%esi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + movdqu 48(%edi), %xmm1 + movdqu 48(%esi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_48) + + add $64, %esi + add $64, %edi + jmp L(continue_32_48) + + .p2align 4 +L(continue_16): + and $15, %ch + jz L(continue_16_00) + cmp $16, %eax + jb L(continue_0_16) + cmp $32, %eax + jb L(continue_16_16) + cmp $48, %eax + jb L(continue_16_32) + + .p2align 4 +L(continue_16_48): + mov (%esi), %ecx + cmp %ecx, (%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 4(%esi), %ecx + cmp %ecx, 4(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 8(%esi), %ecx + cmp %ecx, 8(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 12(%esi), %ecx + cmp %ecx, 12(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + movdqu 16(%edi), %xmm1 + movdqu 16(%esi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + mov 32(%esi), %ecx + cmp %ecx, 32(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 36(%esi), %ecx + cmp %ecx, 36(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 40(%esi), %ecx + cmp %ecx, 40(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 44(%esi), %ecx + cmp %ecx, 44(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + movdqu 48(%edi), %xmm1 + movdqu 48(%esi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_48) + + add $64, %esi + add $64, %edi + jmp L(continue_16_48) + + .p2align 4 +L(continue_00_00): + movdqa (%edi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqa 16(%edi), %xmm3 + pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ + pcmpeqd 16(%esi), %xmm3 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm3 /* packed sub of comparison results*/ + pmovmskb %xmm3, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqa 32(%edi), %xmm5 + pcmpeqd %xmm5, %xmm0 /* Any null double_word? */ + pcmpeqd 32(%esi), %xmm5 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm5 /* packed sub of comparison results*/ + pmovmskb %xmm5, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + movdqa 48(%edi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_48) + + add $64, %esi + add $64, %edi + jmp L(continue_00_00) + + .p2align 4 +L(continue_00_32): + movdqu (%esi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + add $16, %esi + add $16, %edi + jmp L(continue_00_48) + + .p2align 4 +L(continue_00_16): + movdqu (%esi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqu 16(%esi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + add $32, %esi + add $32, %edi + jmp L(continue_00_48) + + .p2align 4 +L(continue_00_0): + movdqu (%esi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqu 16(%esi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqu 32(%esi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + add $48, %esi + add $48, %edi + jmp L(continue_00_48) + + .p2align 4 +L(continue_48_00): + pcmpeqd (%esi), %xmm0 + mov (%edi), %eax + pmovmskb %xmm0, %ecx + test %ecx, %ecx + jnz L(less4_double_words1) + + sub (%esi), %eax + jnz L(return) + + mov 4(%edi), %eax + sub 4(%esi), %eax + jnz L(return) + + mov 8(%edi), %eax + sub 8(%esi), %eax + jnz L(return) + + mov 12(%edi), %eax + sub 12(%esi), %eax + jnz L(return) + + movdqu 16(%edi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqu 32(%edi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + movdqu 48(%edi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_48) + + add $64, %esi + add $64, %edi + jmp L(continue_48_00) + + .p2align 4 +L(continue_32_00): + movdqu (%edi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + add $16, %esi + add $16, %edi + jmp L(continue_48_00) + + .p2align 4 +L(continue_16_00): + movdqu (%edi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqu 16(%edi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + add $32, %esi + add $32, %edi + jmp L(continue_48_00) + + .p2align 4 +L(continue_0_00): + movdqu (%edi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqu 16(%edi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqu 32(%edi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + add $48, %esi + add $48, %edi + jmp L(continue_48_00) + + .p2align 4 +L(continue_32_32): + movdqu (%edi), %xmm1 + movdqu (%esi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + add $16, %esi + add $16, %edi + jmp L(continue_48_48) + + .p2align 4 +L(continue_16_16): + movdqu (%edi), %xmm1 + movdqu (%esi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqu 16(%edi), %xmm3 + movdqu 16(%esi), %xmm4 + pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm3 /* packed sub of comparison results*/ + pmovmskb %xmm3, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + add $32, %esi + add $32, %edi + jmp L(continue_48_48) + + .p2align 4 +L(continue_0_0): + movdqu (%edi), %xmm1 + movdqu (%esi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqu 16(%edi), %xmm3 + movdqu 16(%esi), %xmm4 + pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm3 /* packed sub of comparison results*/ + pmovmskb %xmm3, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqu 32(%edi), %xmm1 + movdqu 32(%esi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + add $48, %esi + add $48, %edi + jmp L(continue_48_48) + + .p2align 4 +L(continue_0_16): + movdqu (%edi), %xmm1 + movdqu (%esi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqu 16(%edi), %xmm1 + movdqu 16(%esi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + add $32, %esi + add $32, %edi + jmp L(continue_32_48) + + .p2align 4 +L(continue_0_32): + movdqu (%edi), %xmm1 + movdqu (%esi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + add $16, %esi + add $16, %edi + jmp L(continue_16_48) + + .p2align 4 +L(continue_16_32): + movdqu (%edi), %xmm1 + movdqu (%esi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + add $16, %esi + add $16, %edi + jmp L(continue_32_48) + + .p2align 4 +L(less4_double_words1): + cmp (%esi), %eax + jne L(nequal) + test %eax, %eax + jz L(equal) + + mov 4(%esi), %ecx + cmp %ecx, 4(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 8(%esi), %ecx + cmp %ecx, 8(%edi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 12(%esi), %edx + mov 12(%edi), %eax + sub %edx, %eax + RETURN + + .p2align 4 +L(less4_double_words): + test %dl, %dl + jz L(next_two_double_words) + and $15, %dl + jz L(second_double_word) + mov (%edi), %eax + sub (%esi), %eax + RETURN + + .p2align 4 +L(second_double_word): + mov 4(%edi), %eax + sub 4(%esi), %eax + RETURN + + .p2align 4 +L(next_two_double_words): + and $15, %dh + jz L(fourth_double_word) + mov 8(%edi), %eax + sub 8(%esi), %eax + RETURN + + .p2align 4 +L(fourth_double_word): + mov 12(%edi), %eax + sub 12(%esi), %eax + RETURN + + .p2align 4 +L(less4_double_words_16): + test %dl, %dl + jz L(next_two_double_words_16) + and $15, %dl + jz L(second_double_word_16) + mov 16(%edi), %eax + sub 16(%esi), %eax + RETURN + + .p2align 4 +L(second_double_word_16): + mov 20(%edi), %eax + sub 20(%esi), %eax + RETURN + + .p2align 4 +L(next_two_double_words_16): + and $15, %dh + jz L(fourth_double_word_16) + mov 24(%edi), %eax + sub 24(%esi), %eax + RETURN + + .p2align 4 +L(fourth_double_word_16): + mov 28(%edi), %eax + sub 28(%esi), %eax + RETURN + + .p2align 4 +L(less4_double_words_32): + test %dl, %dl + jz L(next_two_double_words_32) + and $15, %dl + jz L(second_double_word_32) + mov 32(%edi), %eax + sub 32(%esi), %eax + RETURN + + .p2align 4 +L(second_double_word_32): + mov 36(%edi), %eax + sub 36(%esi), %eax + RETURN + + .p2align 4 +L(next_two_double_words_32): + and $15, %dh + jz L(fourth_double_word_32) + mov 40(%edi), %eax + sub 40(%esi), %eax + RETURN + + .p2align 4 +L(fourth_double_word_32): + mov 44(%edi), %eax + sub 44(%esi), %eax + RETURN + + .p2align 4 +L(less4_double_words_48): + test %dl, %dl + jz L(next_two_double_words_48) + and $15, %dl + jz L(second_double_word_48) + mov 48(%edi), %eax + sub 48(%esi), %eax + RETURN + + .p2align 4 +L(second_double_word_48): + mov 52(%edi), %eax + sub 52(%esi), %eax + RETURN + + .p2align 4 +L(next_two_double_words_48): + and $15, %dh + jz L(fourth_double_word_48) + mov 56(%edi), %eax + sub 56(%esi), %eax + RETURN + + .p2align 4 +L(fourth_double_word_48): + mov 60(%edi), %eax + sub 60(%esi), %eax + RETURN + + .p2align 4 +L(return): + RETURN + + .p2align 4 +L(nequal): + mov $1, %eax + ja L(nequal_bigger) + neg %eax + +L(nequal_bigger): + RETURN + + .p2align 4 +L(equal): + xorl %eax, %eax + RETURN + + CFI_POP (%edi) + CFI_POP (%esi) + + .p2align 4 +L(neq): + mov $1, %eax + ja L(neq_bigger) + neg %eax + +L(neq_bigger): + ret + + .p2align 4 +L(eq): + xorl %eax, %eax + ret + +END (STRCMP) +#endif diff --git a/libc/sysdeps/i386/i686/multiarch/wcscmp.S b/libc/sysdeps/i386/i686/multiarch/wcscmp.S new file mode 100644 index 000000000..c1657ad0e --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/wcscmp.S @@ -0,0 +1,57 @@ +/* Multiple versions of wcscmp + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include + +/* Define multiple versions only for the definition in libc and for the + DSO. In static binaries, we need wcscmp before the initialization + happened. */ +#ifndef NOT_IN_libc + .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits + .globl __i686.get_pc_thunk.bx + .hidden __i686.get_pc_thunk.bx + .p2align 4 + .type __i686.get_pc_thunk.bx,@function +__i686.get_pc_thunk.bx: + movl (%esp), %ebx + ret + + .text +ENTRY(wcscmp) + .type wcscmp, @gnu_indirect_function + pushl %ebx + cfi_adjust_cfa_offset (4) + cfi_rel_offset (ebx, 0) + call __i686.get_pc_thunk.bx + addl $_GLOBAL_OFFSET_TABLE_, %ebx + cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) + jne 1f + call __init_cpu_features +1: leal __wcscmp_ia32@GOTOFF(%ebx), %eax + testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __wcscmp_sse2@GOTOFF(%ebx), %eax +2: popl %ebx + cfi_adjust_cfa_offset (-4); + cfi_restore (ebx) + ret +END(wcscmp) +#endif diff --git a/libc/sysdeps/ieee754/dbl-64/k_rem_pio2.c b/libc/sysdeps/ieee754/dbl-64/k_rem_pio2.c index ccf1633bd..911ff1fe3 100644 --- a/libc/sysdeps/ieee754/dbl-64/k_rem_pio2.c +++ b/libc/sysdeps/ieee754/dbl-64/k_rem_pio2.c @@ -300,14 +300,20 @@ recompute: break; case 3: /* painful */ for (i=jz;i>0;i--) { - fw = fq[i-1]+fq[i]; - fq[i] += fq[i-1]-fw; - fq[i-1] = fw; +#if __FLT_EVAL_METHOD__ != 0 + volatile +#endif + double fv = (double)(fq[i-1]+fq[i]); + fq[i] += fq[i-1]-fv; + fq[i-1] = fv; } for (i=jz;i>1;i--) { - fw = fq[i-1]+fq[i]; - fq[i] += fq[i-1]-fw; - fq[i-1] = fw; +#if __FLT_EVAL_METHOD__ != 0 + volatile +#endif + double fv = (double)(fq[i-1]+fq[i]); + fq[i] += fq[i-1]-fv; + fq[i-1] = fv; } for (fw=0.0,i=jz;i>=2;i--) fw += fq[i]; if(ih==0) { diff --git a/libc/sysdeps/mach/hurd/spawni.c b/libc/sysdeps/mach/hurd/spawni.c index 244ca2d6e..3d2b5915b 100644 --- a/libc/sysdeps/mach/hurd/spawni.c +++ b/libc/sysdeps/mach/hurd/spawni.c @@ -1,5 +1,5 @@ /* spawn a new process running an executable. Hurd version. - Copyright (C) 2001,02,04 Free Software Foundation, Inc. + Copyright (C) 2001,02,04,11 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -41,7 +41,7 @@ __spawni (pid_t *pid, const char *file, const posix_spawn_file_actions_t *file_actions, const posix_spawnattr_t *attrp, char *const argv[], char *const envp[], - int use_path) + int xflags) { pid_t new_pid; char *path, *p, *name; @@ -391,7 +391,7 @@ __spawni (pid_t *pid, const char *file, /* Make sure the dtable can hold NEWFD. */ #define EXPAND_DTABLE(newfd) \ - ({ \ + ({ \ if ((unsigned int)newfd >= dtablesize \ && newfd < _hurd_rlimits[RLIMIT_OFILE].rlim_cur) \ { \ @@ -399,7 +399,7 @@ __spawni (pid_t *pid, const char *file, NEW_TABLE (dtable, newfd); \ NEW_TABLE (ulink_dtable, newfd); \ NEW_TABLE (dtable_cells, newfd); \ - dtablesize = newfd + 1; \ + dtablesize = newfd + 1; \ } \ ((unsigned int)newfd < dtablesize ? 0 : EMFILE); \ }) @@ -543,7 +543,7 @@ __spawni (pid_t *pid, const char *file, conditions are diagnosed first and what side effects (file creation, etc) can be observed before what errors. */ - if (! use_path || strchr (file, '/') != NULL) + if ((xflags & SPAWN_XFLAGS_USE_PATH) == 0 || strchr (file, '/') != NULL) /* The FILE parameter is actually a path. */ err = child_lookup (file, O_EXEC, 0, &execfile); else diff --git a/libc/sysdeps/posix/getaddrinfo.c b/libc/sysdeps/posix/getaddrinfo.c index 6d574c51e..a5aafe93a 100644 --- a/libc/sysdeps/posix/getaddrinfo.c +++ b/libc/sysdeps/posix/getaddrinfo.c @@ -432,7 +432,10 @@ gaih_inet (const char *name, const struct gaih_service *service, /* In case the output string is the same as the input string no new string has been allocated. */ if (p != name) - malloc_name = true; + { + name = p; + malloc_name = true; + } } #endif diff --git a/libc/sysdeps/posix/spawni.c b/libc/sysdeps/posix/spawni.c index f19862fff..129edcd6d 100644 --- a/libc/sysdeps/posix/spawni.c +++ b/libc/sysdeps/posix/spawni.c @@ -28,6 +28,7 @@ #include "spawn_int.h" #include #include +#include /* The Unix standard contains a long explanation of the way to signal @@ -38,6 +39,7 @@ #define SPAWN_ERROR 127 +#if SHLIB_COMPAT (libc, GLIBC_2_2, GLIBC_2_15) /* The file is accessible but it is not an executable file. Invoke the shell to interpret it as a script. */ static void @@ -64,6 +66,10 @@ script_execute (const char *file, char *const argv[], char *const envp[]) __execve (new_argv[0], new_argv, envp); } } +# define tryshell (xflags & SPAWN_XFLAGS_TRY_SHELL) +#else +# define tryshell 0 +#endif /* Spawn a new process executing PATH with the attributes describes in *ATTRP. @@ -72,7 +78,7 @@ int __spawni (pid_t *pid, const char *file, const posix_spawn_file_actions_t *file_actions, const posix_spawnattr_t *attrp, char *const argv[], - char *const envp[], int use_path) + char *const envp[], int xflags) { pid_t new_pid; char *path, *p, *name; @@ -226,12 +232,12 @@ __spawni (pid_t *pid, const char *file, } } - if (! use_path || strchr (file, '/') != NULL) + if ((xflags & SPAWN_XFLAGS_USE_PATH) == 0 || strchr (file, '/') != NULL) { /* The FILE parameter is actually a path. */ __execve (file, argv, envp); - if (errno == ENOEXEC) + if (tryshell && errno == ENOEXEC) script_execute (file, argv, envp); /* Oh, oh. `execve' returns. This is bad. */ @@ -277,7 +283,7 @@ __spawni (pid_t *pid, const char *file, /* Try to execute this name. If it works, execv will not return. */ __execve (startp, argv, envp); - if (errno == ENOEXEC) + if (tryshell && errno == ENOEXEC) script_execute (startp, argv, envp); switch (errno) diff --git a/libc/sysdeps/powerpc/dl-tls.h b/libc/sysdeps/powerpc/dl-tls.h index 9ab81f996..5fd1b232c 100644 --- a/libc/sysdeps/powerpc/dl-tls.h +++ b/libc/sysdeps/powerpc/dl-tls.h @@ -17,6 +17,8 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ +#ifndef _PPC_DL_TLS_H +# define _PPC_DL_TLS_H 1 /* Type used for the representation of TLS information in the TOC. */ typedef struct @@ -50,3 +52,5 @@ extern void *__tls_get_addr (tls_index *ti); /* Value used for dtv entries for which the allocation is delayed. */ #define TLS_DTV_UNALLOCATED ((void *) -1l) + +#endif /* dl-tls.h */ diff --git a/libc/sysdeps/powerpc/fpu/libm-test-ulps b/libc/sysdeps/powerpc/fpu/libm-test-ulps index 8516e915e..7f187ac72 100644 --- a/libc/sysdeps/powerpc/fpu/libm-test-ulps +++ b/libc/sysdeps/powerpc/fpu/libm-test-ulps @@ -395,6 +395,8 @@ ldouble: 1 Test "Imaginary part of: ctan (0.75 + 1.25 i) == 0.160807785916206426725166058173438663 + 0.975363285031235646193581759755216379 i": double: 1 idouble: 1 +ildouble: 1 +ldouble: 1 # ctanh Test "Real part of: ctanh (-2 - 3 i) == -0.965385879022133124278480269394560686 + 0.988437503832249372031403430350121098e-2 i": diff --git a/libc/sysdeps/powerpc/powerpc64/dl-irel.h b/libc/sysdeps/powerpc/powerpc64/dl-irel.h index 3c2668fbb..bd2c73ab0 100644 --- a/libc/sysdeps/powerpc/powerpc64/dl-irel.h +++ b/libc/sysdeps/powerpc/powerpc64/dl-irel.h @@ -1,6 +1,6 @@ /* Machine-dependent ELF indirect relocation inline functions. PowerPC64 version. - Copyright (C) 2009 Free Software Foundation, Inc. + Copyright (C) 2009, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -23,16 +23,11 @@ #include #include +#include +#include #define ELF_MACHINE_IRELA 1 -typedef struct -{ - Elf64_Addr fd_func; - Elf64_Addr fd_toc; - Elf64_Addr fd_aux; -} Elf64_FuncDesc; - static inline Elf64_Addr __attribute ((always_inline)) elf_ifunc_invoke (Elf64_Addr addr) diff --git a/libc/sysdeps/pthread/aio_suspend.c b/libc/sysdeps/pthread/aio_suspend.c index b85b16d10..99f3a80c5 100644 --- a/libc/sysdeps/pthread/aio_suspend.c +++ b/libc/sysdeps/pthread/aio_suspend.c @@ -92,6 +92,18 @@ cleanup (void *arg) pthread_mutex_unlock (&__aio_requests_mutex); } +#ifdef DONT_NEED_AIO_MISC_COND +static int +__attribute__ ((noinline)) +do_aio_misc_wait(int *cntr, const struct timespec *timeout) +{ + int result = 0; + + AIO_MISC_WAIT(result, *cntr, timeout, 1); + + return result; +} +#endif int aio_suspend (list, nent, timeout) @@ -169,7 +181,7 @@ aio_suspend (list, nent, timeout) pthread_cleanup_push (cleanup, &clparam); #ifdef DONT_NEED_AIO_MISC_COND - AIO_MISC_WAIT (result, cntr, timeout, 1); + result = do_aio_misc_wait(&cntr, timeout); #else if (timeout == NULL) result = pthread_cond_wait (&cond, &__aio_requests_mutex); diff --git a/libc/sysdeps/sparc/dl-procinfo.c b/libc/sysdeps/sparc/dl-procinfo.c index db7e86746..4a629caa1 100644 --- a/libc/sysdeps/sparc/dl-procinfo.c +++ b/libc/sysdeps/sparc/dl-procinfo.c @@ -1,5 +1,5 @@ /* Data for Linux/sparc version of processor capability information. - Copyright (C) 2002,2003,2006 Free Software Foundation, Inc. + Copyright (C) 2002,2003,2006,2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Jakub Jelinek , 2002. @@ -47,10 +47,13 @@ #if !defined PROCINFO_DECL && defined SHARED ._dl_sparc_cap_flags #else -PROCINFO_CLASS const char _dl_sparc_cap_flags[8][7] +PROCINFO_CLASS const char _dl_sparc_cap_flags[24][11] #endif #ifndef PROCINFO_DECL - = { "flush", "stbar", "swap", "muldiv", "v9", "ultra3", "v9v", "v9v2" } + = { "flush", "stbar", "swap", "muldiv", "v9", "ultra3", "v9v", "v9v2", + "mul32", "div32", "fsmuld", "v8plus", "popc", "vis", "vis2", + "ASIBlkInit", "fmaf", "vis3", "hpc", "random", "trans", "fjfmau", + "ima", "cspare" } #endif #if !defined SHARED || defined PROCINFO_DECL ; diff --git a/libc/sysdeps/sparc/dl-procinfo.h b/libc/sysdeps/sparc/dl-procinfo.h index 04ebda751..1aea45867 100644 --- a/libc/sysdeps/sparc/dl-procinfo.h +++ b/libc/sysdeps/sparc/dl-procinfo.h @@ -1,5 +1,5 @@ /* Linux/sparc version of processor capability information handling macros. - Copyright (C) 1999,2000,2001,2002,2003,2004,2006 + Copyright (C) 1999,2000,2001,2002,2003,2004,2006,2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Jakub Jelinek , 1999. @@ -23,8 +23,9 @@ #define _DL_PROCINFO_H 1 #include +#include -#define _DL_HWCAP_COUNT 8 +#define _DL_HWCAP_COUNT 24 static inline int __attribute__ ((unused)) diff --git a/libc/sysdeps/sparc/sparc32/bits/atomic.h b/libc/sysdeps/sparc/sparc32/bits/atomic.h index ef553f727..b9b51245f 100644 --- a/libc/sysdeps/sparc/sparc32/bits/atomic.h +++ b/libc/sysdeps/sparc/sparc32/bits/atomic.h @@ -1,5 +1,5 @@ /* Atomic operations. sparc32 version. - Copyright (C) 2003, 2004, 2006 Free Software Foundation, Inc. + Copyright (C) 2003, 2004, 2006, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Jakub Jelinek , 2003. @@ -22,6 +22,7 @@ #define _BITS_ATOMIC_H 1 #include +#include typedef int8_t atomic8_t; typedef uint8_t uatomic8_t; @@ -238,13 +239,10 @@ volatile unsigned char __sparc32_atomic_locks[64] apps on v9 CPUs e.g. with process shared primitives, use cas insn on v9 CPUs and ldstub on pre-v9. */ -/* Avoid include here. */ extern uint64_t _dl_hwcap __attribute__((weak)); -# define __ATOMIC_HWCAP_SPARC_V9 16 # define __atomic_is_v9 \ (__builtin_expect (&_dl_hwcap != 0, 1) \ - && __builtin_expect (_dl_hwcap & __ATOMIC_HWCAP_SPARC_V9, \ - __ATOMIC_HWCAP_SPARC_V9)) + && __builtin_expect (_dl_hwcap & HWCAP_SPARC_V9, HWCAP_SPARC_V9)) # define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \ ({ \ diff --git a/libc/sysdeps/sparc/sparc32/dl-machine.h b/libc/sysdeps/sparc/sparc32/dl-machine.h index f8e8fe417..f2bc94a07 100644 --- a/libc/sysdeps/sparc/sparc32/dl-machine.h +++ b/libc/sysdeps/sparc/sparc32/dl-machine.h @@ -1,5 +1,5 @@ /* Machine-dependent ELF dynamic relocation inline functions. SPARC version. - Copyright (C) 1996-2003, 2004, 2005, 2006, 2007, 2010 + Copyright (C) 1996-2003, 2004, 2005, 2006, 2007, 2010, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/libc/sysdeps/sparc/sparc32/fpu/s_fabs.S b/libc/sysdeps/sparc/sparc32/fpu/s_fabs.S new file mode 100644 index 000000000..425fab49a --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/fpu/s_fabs.S @@ -0,0 +1,29 @@ +/* Float absolute value, sparc32 version. + Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include + +ENTRY (__fabs) + st %o0, [%sp+72] + st %o1, [%sp+76] + ldd [%sp+72], %f0 + retl + fabss %f0, %f0 +END (__fabs) +weak_alias (__fabs, fabs) diff --git a/libc/sysdeps/sparc/sparc32/fpu/s_fabs.c b/libc/sysdeps/sparc/sparc32/fpu/s_fabs.c deleted file mode 100644 index b883e6e86..000000000 --- a/libc/sysdeps/sparc/sparc32/fpu/s_fabs.c +++ /dev/null @@ -1,11 +0,0 @@ -#include -#include - -double __fabs (double x) -{ - return __builtin_fabs (x); -} -weak_alias (__fabs, fabs) -#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) -compat_symbol (libm, __fabs, fabsl, GLIBC_2_0); -#endif diff --git a/libc/sysdeps/sparc/sparc32/fpu/s_fabsf.S b/libc/sysdeps/sparc/sparc32/fpu/s_fabsf.S index e1487247d..b233c62b4 100644 --- a/libc/sysdeps/sparc/sparc32/fpu/s_fabsf.S +++ b/libc/sysdeps/sparc/sparc32/fpu/s_fabsf.S @@ -21,8 +21,8 @@ #include ENTRY (__fabsf) - st %o0, [%sp+64] - ld [%sp+64], %f0 + st %o0, [%sp+68] + ld [%sp+68], %f0 retl fabss %f0, %f0 END (__fabsf) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_fabs.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_fabs.S new file mode 100644 index 000000000..547840ee5 --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_fabs.S @@ -0,0 +1,29 @@ +/* Float absolute value, sparc32+v9 version. + Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include + +ENTRY (__fabs) + st %o0, [%sp+72] + st %o1, [%sp+76] + ldd [%sp+72], %f0 + retl + fabsd %f0, %f0 +END (__fabs) +weak_alias (__fabs, fabs) diff --git a/libc/sysdeps/sparc/sparc64/dl-machine.h b/libc/sysdeps/sparc/sparc64/dl-machine.h index aaa22d654..3f71a6677 100644 --- a/libc/sysdeps/sparc/sparc64/dl-machine.h +++ b/libc/sysdeps/sparc/sparc64/dl-machine.h @@ -264,9 +264,9 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) #define elf_machine_relplt elf_machine_rela /* Undo the sub %sp, 6*8, %sp; add %sp, STACK_BIAS + 22*8, %o0 below - to get at the value we want in __libc_stack_end. */ + (but w/o STACK_BIAS) to get at the value we want in __libc_stack_end. */ #define DL_STACK_END(cookie) \ - ((void *) (((long) (cookie)) - (22 - 6) * 8 - STACK_BIAS)) + ((void *) (((long) (cookie)) - (22 - 6) * 8)) /* Initial entry point code for the dynamic linker. The C function `_dl_start' is the real entry point; diff --git a/libc/sysdeps/sparc/sparc64/jmpbuf-unwind.h b/libc/sysdeps/sparc/sparc64/jmpbuf-unwind.h index f7eed15ea..f19b6498f 100644 --- a/libc/sysdeps/sparc/sparc64/jmpbuf-unwind.h +++ b/libc/sysdeps/sparc/sparc64/jmpbuf-unwind.h @@ -24,14 +24,14 @@ /* Test if longjmp to JMPBUF would unwind the frame containing a local variable at ADDRESS. */ #define _JMPBUF_UNWINDS(jmpbuf, address, demangle) \ - ((unsigned long int) (address) < (jmpbuf)->uc_mcontext.mc_fp + 2047) + ((unsigned long int) (address) < (jmpbuf)->uc_mcontext.mc_gregs[MC_O6] + 2047) #define _JMPBUF_CFA_UNWINDS_ADJ(_jmpbuf, _context, _adj) \ _JMPBUF_UNWINDS_ADJ (_jmpbuf, (void *) _Unwind_GetCFA (_context), _adj) #define _JMPBUF_UNWINDS_ADJ(_jmpbuf, _address, _adj) \ ((uintptr_t) (_address) - (_adj) \ - < (uintptr_t) (_jmpbuf)[0].uc_mcontext.mc_fp + 2047 - (_adj)) + < (uintptr_t) (_jmpbuf)[0].uc_mcontext.mc_gregs[MC_O6] + 2047 - (_adj)) /* We use the normal lobngjmp for unwinding. */ #define __libc_unwind_longjmp(buf, val) __libc_longjmp (buf, val) diff --git a/libc/sysdeps/sparc/sparc64/multiarch/memcpy.S b/libc/sysdeps/sparc/sparc64/multiarch/memcpy.S index c12dc3bbe..c022a4052 100644 --- a/libc/sysdeps/sparc/sparc64/multiarch/memcpy.S +++ b/libc/sysdeps/sparc/sparc64/multiarch/memcpy.S @@ -1,5 +1,5 @@ /* Multiple versions of memcpy - Copyright (C) 2010 Free Software Foundation, Inc. + Copyright (C) 2010, 2011 Free Software Foundation, Inc. Contributed by David S. Miller (davem@davemloft.net) This file is part of the GNU C Library. @@ -33,9 +33,9 @@ ENTRY(memcpy) 1: add %o7, %o3, %o3 mov %o5, %o7 # endif - andcc %o0, 0x80, %g0 ! HWCAP_SPARC_N2 + andcc %o0, HWCAP_SPARC_N2, %g0 be 1f - andcc %o0, 0x40, %g0 ! HWCAP_SPARC_BLKINIT + andcc %o0, HWCAP_SPARC_BLKINIT, %g0 # ifdef SHARED sethi %gdop_hix22(__memcpy_niagara2), %o1 xor %o1, %gdop_lox10(__memcpy_niagara2), %o1 @@ -45,7 +45,7 @@ ENTRY(memcpy) ba 10f nop 1: be 1f - andcc %o0, 0x20, %g0 ! HWCAP_SPARC_ULTRA3 + andcc %o0, HWCAP_SPARC_ULTRA3, %g0 # ifdef SHARED sethi %gdop_hix22(__memcpy_niagara1), %o1 xor %o1, %gdop_lox10(__memcpy_niagara1), %o1 diff --git a/libc/sysdeps/sparc/sparc64/multiarch/memset.S b/libc/sysdeps/sparc/sparc64/multiarch/memset.S index 2e2744874..e2674d7ad 100644 --- a/libc/sysdeps/sparc/sparc64/multiarch/memset.S +++ b/libc/sysdeps/sparc/sparc64/multiarch/memset.S @@ -1,5 +1,5 @@ /* Multiple versions of memset and bzero - Copyright (C) 2010 Free Software Foundation, Inc. + Copyright (C) 2010, 2011 Free Software Foundation, Inc. Contributed by David S. Miller (davem@davemloft.net) This file is part of the GNU C Library. @@ -33,7 +33,7 @@ ENTRY(memset) 1: add %o7, %o3, %o3 mov %o5, %o7 # endif - andcc %o0, 0x40, %g0 ! HWCAP_SPARC_BLKINIT + andcc %o0, HWCAP_SPARC_BLKINIT, %g0 be 9f nop # ifdef SHARED @@ -69,7 +69,7 @@ ENTRY(__bzero) 1: add %o7, %o3, %o3 mov %o5, %o7 # endif - andcc %o0, 0x40, %g0 ! HWCAP_SPARC_BLKINIT + andcc %o0, HWCAP_SPARC_BLKINIT, %g0 be 9f nop # ifdef SHARED diff --git a/libc/sysdeps/sparc/sparc64/strcmp.S b/libc/sysdeps/sparc/sparc64/strcmp.S index fade4c4cb..263bb40a2 100644 --- a/libc/sysdeps/sparc/sparc64/strcmp.S +++ b/libc/sysdeps/sparc/sparc64/strcmp.S @@ -1,9 +1,8 @@ /* Compare two strings for differences. For SPARC v9. - Copyright (C) 1997, 1999, 2003 Free Software Foundation, Inc. + Copyright (C) 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Jan Vondrak and - Jakub Jelinek . + Contributed by David S. Miller The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -22,259 +21,182 @@ #include #include + #ifndef XCC .register %g2, #scratch .register %g3, #scratch .register %g6, #scratch #endif - /* Normally, this uses - ((xword - 0x0101010101010101) & 0x8080808080808080) test - to find out if any byte in xword could be zero. This is fast, but - also gives false alarm for any byte in range 0x81-0xff. It does - not matter for correctness, as if this test tells us there could - be some zero byte, we check it byte by byte, but if bytes with - high bits set are common in the strings, then this will give poor - performance. You can #define EIGHTBIT_NOT_RARE and the algorithm - will use one tick slower, but more precise test - ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), - which does not give any false alarms (but if some bits are set, - one cannot assume from it which bytes are zero and which are not). - It is yet to be measured, what is the correct default for glibc - in these days for an average user. +#define rSTR1 %o0 +#define rSTR2 %o1 +#define r0101 %o2 /* 0x0101010101010101 */ +#define r8080 %o3 /* 0x8080808080808080 */ +#define rSTRXOR %o4 +#define rWORD1 %o5 +#define rTMP1 %g1 +#define rTMP2 %g2 +#define rWORD2 %g3 +#define rSLL %g4 +#define rSRL %g5 +#define rBARREL %g6 + + /* There are two cases, either the two pointers are aligned + * identically or they are not. If they have the same + * alignment we can use the normal full speed loop. Otherwise + * we have to use the barrel-shifter version. */ .text - .align 32 + .align 32 ENTRY(strcmp) - sethi %hi(0x01010101), %g1 /* IEU0 Group */ - andcc %o0, 7, %g0 /* IEU1 */ - bne,pn %icc, 7f /* CTI */ - or %g1, %lo(0x01010101), %g1 /* IEU0 Group */ - - andcc %o1, 7, %g3 /* IEU1 */ - bne,pn %icc, 9f /* CTI */ - sllx %g1, 32, %g2 /* IEU0 Group */ - ldx [%o0], %o2 /* Load */ - - or %g1, %g2, %g1 /* IEU0 Group */ -1: ldx [%o1], %o3 /* Load */ - sub %o1, %o0, %o1 /* IEU1 */ - sllx %g1, 7, %g2 /* IEU0 Group */ - -2: add %o0, 8, %o0 /* IEU1 */ - sub %o2, %g1, %g3 /* IEU0 Group */ - subcc %o2, %o3, %g0 /* IEU1 */ - bne,pn %xcc, 13f /* CTI */ - -#ifdef EIGHTBIT_NOT_RARE - andn %g3, %o2, %g4 /* IEU0 Group */ - ldxa [%o0] ASI_PNF, %o2 /* Load */ - andcc %g4, %g2, %g0 /* IEU1 Group */ -#else - ldxa [%o0] ASI_PNF, %o2 /* Load Group */ - andcc %g3, %g2, %g0 /* IEU1 */ -#endif - be,a,pt %xcc, 2b /* CTI */ - ldxa [%o1 + %o0] ASI_PNF, %o3 /* Load Group */ - - addcc %g3, %g1, %o4 /* IEU1 */ - srlx %g3, 32, %g3 /* IEU0 */ - andcc %g3, %g2, %g0 /* IEU1 Group */ - be,pt %xcc, 3f /* CTI */ - - srlx %o4, 56, %o5 /* IEU0 */ - andcc %o5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4f /* CTI */ - srlx %o4, 48, %o5 /* IEU0 */ - - andcc %o5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4f /* CTI */ - srlx %o4, 40, %o5 /* IEU0 */ - andcc %o5, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 4f /* CTI */ - srlx %o4, 32, %o5 /* IEU0 */ - andcc %o5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4f /* CTI */ - -3: srlx %o4, 24, %o5 /* IEU0 */ - andcc %o5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4f /* CTI */ - srlx %o4, 16, %o5 /* IEU0 */ - - andcc %o5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4f /* CTI */ - srlx %o4, 8, %o5 /* IEU0 */ - andcc %o5, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 4f /* CTI */ - andcc %o4, 0xff, %g0 /* IEU1 Group */ - bne,a,pn %icc, 2b /* CTI */ - ldxa [%o1 + %o0] ASI_PNF, %o3 /* Load */ - -4: retl /* CTI+IEU1 Group */ - clr %o0 /* IEU0 */ - - .align 32 -13: mov 0xff, %g6 /* IEU0 Group */ -#ifdef EIGHTBIT_NOT_RARE - andcc %g4, %g2, %g0 /* IEU1 */ -#else - andcc %g3, %g2, %g0 /* IEU1 */ -#endif - be,pt %xcc, 25f /* CTI */ - addcc %g3, %g1, %o4 /* IEU1 Group */ - - srlx %g3, 32, %g3 /* IEU0 */ - andcc %g3, %g2, %g0 /* IEU1 Group */ - be,pt %xcc, 23f /* CTI */ - sllx %g6, 56, %o5 /* IEU0 */ - - andcc %o4, %o5, %g0 /* IEU1 Group */ - be,pn %xcc, 24f /* CTI */ - sllx %g6, 48, %o5 /* IEU0 */ - andcc %o4, %o5, %g0 /* IEU1 Group */ - - be,pn %xcc, 24f /* CTI */ - sllx %g6, 40, %o5 /* IEU0 */ - andcc %o4, %o5, %g0 /* IEU1 Group */ - be,pn %xcc, 24f /* CTI */ - - sllx %g6, 32, %o5 /* IEU0 */ - andcc %o4, %o5, %g0 /* IEU1 Group */ - be,pn %xcc, 24f /* CTI */ -23: sllx %g6, 24, %o5 /* IEU0 */ - - andcc %o4, %o5, %g0 /* IEU1 Group */ - be,pn %icc, 24f /* CTI */ - sllx %g6, 16, %o5 /* IEU0 */ - andcc %o4, %o5, %g0 /* IEU1 Group */ - - be,pn %icc, 24f /* CTI */ - sllx %g6, 8, %o5 /* IEU0 */ - andcc %o4, %o5, %g0 /* IEU1 Group */ - be,pn %icc, 24f /* CTI */ - - mov %g6, %o5 /* IEU0 */ -25: cmp %o4, %o3 /* IEU1 Group */ -5: mov -1, %o0 /* IEU0 */ - retl /* CTI+IEU1 Group */ - - movgu %xcc, 1, %o0 /* Single Group */ - - .align 16 -24: sub %o5, 1, %g6 /* IEU0 Group */ - clr %o0 /* IEU1 */ - or %o5, %g6, %o5 /* IEU0 Group */ - andn %o4, %o5, %o4 /* IEU0 Group */ - - andn %o3, %o5, %o3 /* IEU1 */ - cmp %o4, %o3 /* IEU1 Group */ - movgu %xcc, 1, %o0 /* Single Group */ - retl /* CTI+IEU1 Group */ - - movlu %xcc, -1, %o0 /* Single Group */ -6: retl /* CTI+IEU1 Group */ - mov %o4, %o0 /* IEU0 */ - - .align 16 -7: ldub [%o0], %o2 /* Load */ - add %o0, 1, %o0 /* IEU1 */ - ldub [%o1], %o3 /* Load Group */ - sllx %g1, 32, %g2 /* IEU0 */ - -8: add %o1, 1, %o1 /* IEU1 */ - subcc %o2, %o3, %o4 /* IEU1 Group */ - bne,pn %xcc, 6b /* CTI */ - lduba [%o0] ASI_PNF, %o2 /* Load */ - - brz,pn %o3, 4b /* CTI+IEU1 Group */ - lduba [%o1] ASI_PNF, %o3 /* Load */ - andcc %o0, 7, %g0 /* IEU1 Group */ - bne,a,pn %icc, 8b /* CTI */ - - add %o0, 1, %o0 /* IEU0 */ - or %g1, %g2, %g1 /* IEU0 Group */ - andcc %o1, 7, %g3 /* IEU1 */ - be,a,pn %icc, 1b /* CTI */ - - ldxa [%o0] ASI_PNF, %o2 /* Load Group */ -9: sllx %g3, 3, %g5 /* IEU0 */ - mov 64, %o5 /* IEU1 */ - sub %o1, %g3, %o1 /* IEU0 Group */ - - sub %o5, %g5, %o5 /* IEU1 */ - ldxa [%o1] ASI_PNF, %g6 /* Load Group */ - or %g1, %g2, %g1 /* IEU0 */ - sub %o1, %o0, %o1 /* IEU1 */ - - sllx %g1, 7, %g2 /* IEU0 Group */ - add %o1, 8, %o1 /* IEU1 */ - /* %g1 = 0101010101010101 - * %g2 = 8080808080800880 - * %g5 = number of bits to shift left - * %o5 = number of bits to shift right */ -10: sllx %g6, %g5, %o3 /* IEU0 Group */ - ldxa [%o1 + %o0] ASI_PNF, %g6 /* Load */ - -11: srlx %g6, %o5, %o4 /* IEU0 Group */ - ldxa [%o0] ASI_PNF, %o2 /* Load */ - or %o3, %o4, %o3 /* IEU1 */ - add %o0, 8, %o0 /* IEU0 Group */ - - subcc %o2, %o3, %g0 /* IEU1 */ -#ifdef EIGHTBIT_NOT_RARE - sub %o2, %g1, %g3 /* IEU0 Group */ - bne,pn %xcc, 13b /* CTI */ - andn %g3, %o2, %g4 /* IEU0 Group */ - - andcc %g4, %g2, %g0 /* IEU1 Group */ - be,pt %xcc, 10b /* CTI */ - srlx %g4, 32, %g4 /* IEU0 */ - andcc %g4, %g2, %g0 /* IEU1 Group */ -#else - bne,pn %xcc, 13b /* CTI */ - sub %o2, %g1, %g3 /* IEU0 Group */ - andcc %g3, %g2, %g0 /* IEU1 Group */ - - be,pt %xcc, 10b /* CTI */ - srlx %g3, 32, %g3 /* IEU0 */ - andcc %g3, %g2, %g0 /* IEU1 Group */ -#endif - be,pt %xcc, 12f /* CTI */ - - srlx %o2, 56, %g3 /* IEU0 */ - andcc %g3, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4b /* CTI */ - srlx %o2, 48, %g3 /* IEU0 */ - - andcc %g3, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4b /* CTI */ - srlx %o2, 40, %g3 /* IEU0 */ - andcc %g3, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 4b /* CTI */ - srlx %o2, 32, %g3 /* IEU0 */ - andcc %g3, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4b /* CTI */ - -12: srlx %o2, 24, %g3 /* IEU0 */ - andcc %g3, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4b /* CTI */ - srlx %o2, 16, %g3 /* IEU0 */ - - andcc %g3, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4b /* CTI */ - srlx %o2, 8, %g3 /* IEU0 */ - andcc %g3, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 4b /* CTI */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4b /* CTI */ - sllx %g6, %g5, %o3 /* IEU0 */ - - ba,pt %xcc, 11b /* CTI Group */ - ldxa [%o1 + %o0] ASI_PNF, %g6 /* Load */ + or rSTR2, rSTR1, rTMP1 + sethi %hi(0x80808080), r8080 + + andcc rTMP1, 0x7, %g0 + bne,pn %icc, .Lmaybe_barrel_shift + or r8080, %lo(0x80808080), r8080 + ldx [rSTR1], rWORD1 + + sub rSTR2, rSTR1, rSTR2 + sllx r8080, 32, rTMP1 + + ldx [rSTR1 + rSTR2], rWORD2 + or r8080, rTMP1, r8080 + + ba,pt %xcc, .Laligned_loop_entry + srlx r8080, 7, r0101 + + .align 32 +.Laligned_loop_entry: +.Laligned_loop: + add rSTR1, 8, rSTR1 + + sub rWORD1, r0101, rTMP2 + xorcc rWORD1, rWORD2, rSTRXOR + bne,pn %xcc, .Lcommon_endstring + + andn r8080, rWORD1, rTMP1 + + ldxa [rSTR1] ASI_PNF, rWORD1 + andcc rTMP1, rTMP2, %g0 + be,a,pt %xcc, .Laligned_loop + + ldxa [rSTR1 + rSTR2] ASI_PNF, rWORD2 + +.Lcommon_equal: + retl + mov 0, %o0 + + /* All loops terminate here once they find an unequal word. + * If a zero byte appears in the word before the first unequal + * byte, we must report zero. Otherwise we report '1' or '-1' + * depending upon whether the first mis-matching byte is larger + * in the first string or the second, respectively. + * + * First we compute a 64-bit mask value that has "0x01" in + * each byte where a zero exists in rWORD1. rSTRXOR holds the + * value (rWORD1 ^ rWORD2). Therefore, if considered as an + * unsigned quantity, our "0x01" mask value is "greater than" + * rSTRXOR then a zero terminating byte comes first and + * therefore we report '0'. + * + * The formula for this mask is: + * + * mask_tmp1 = ~rWORD1 & 0x8080808080808080; + * mask_tmp2 = ((rWORD1 & 0x7f7f7f7f7f7f7f7f) + + * 0x7f7f7f7f7f7f7f7f); + * + * mask = ((mask_tmp1 & ~mask_tmp2) >> 7); + */ +.Lcommon_endstring: + andn rWORD1, r8080, rTMP2 + or r8080, 1, %o1 + + mov 1, %o0 + sub rTMP2, %o1, rTMP2 + + cmp rWORD1, rWORD2 + andn rTMP1, rTMP2, rTMP1 + + movleu %xcc, -1, %o0 + srlx rTMP1, 7, rTMP1 + + cmp rTMP1, rSTRXOR + retl + movgu %xcc, 0, %o0 + +.Lmaybe_barrel_shift: + sub rSTR2, rSTR1, rSTR2 + sllx r8080, 32, rTMP1 + + or r8080, rTMP1, r8080 + and rSTR1, 0x7, rTMP2 + + srlx r8080, 7, r0101 + andn rSTR1, 0x7, rSTR1 + + ldxa [rSTR1] ASI_PNF, rWORD1 + andcc rSTR2, 0x7, rSLL + sll rTMP2, 3, rSTRXOR + + bne,pn %icc, .Lneed_barrel_shift + mov -1, rTMP1 + ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL + + srlx rTMP1, rSTRXOR, rTMP2 + + orn rWORD1, rTMP2, rWORD1 + ba,pt %xcc, .Laligned_loop_entry + orn rBARREL, rTMP2, rWORD2 + +.Lneed_barrel_shift: + sllx rSLL, 3, rSLL + andn rSTR2, 0x7, rSTR2 + + ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL + mov 64, rTMP2 + sub rTMP2, rSLL, rSRL + + srlx rTMP1, rSTRXOR, rTMP1 + add rSTR2, 8, rSTR2 + + orn rWORD1, rTMP1, rWORD1 + sllx rBARREL, rSLL, rWORD2 + ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL + + add rSTR1, 8, rSTR1 + sub rWORD1, r0101, rTMP2 + + srlx rBARREL, rSRL, rSTRXOR + + or rWORD2, rSTRXOR, rWORD2 + + orn rWORD2, rTMP1, rWORD2 + ba,pt %xcc, .Lbarrel_shift_loop_entry + andn r8080, rWORD1, rTMP1 + +.Lbarrel_shift_loop: + sllx rBARREL, rSLL, rWORD2 + ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL + + add rSTR1, 8, rSTR1 + sub rWORD1, r0101, rTMP2 + + srlx rBARREL, rSRL, rSTRXOR + andn r8080, rWORD1, rTMP1 + + or rWORD2, rSTRXOR, rWORD2 + +.Lbarrel_shift_loop_entry: + xorcc rWORD1, rWORD2, rSTRXOR + bne,pn %xcc, .Lcommon_endstring + + andcc rTMP1, rTMP2, %g0 + be,a,pt %xcc, .Lbarrel_shift_loop + ldxa [rSTR1] ASI_PNF, rWORD1 + + retl + mov 0, %o0 END(strcmp) libc_hidden_builtin_def (strcmp) diff --git a/libc/sysdeps/sparc/sysdep.h b/libc/sysdeps/sparc/sysdep.h new file mode 100644 index 000000000..bba41416d --- /dev/null +++ b/libc/sysdeps/sparc/sysdep.h @@ -0,0 +1,45 @@ +/* Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* Bits present in AT_HWCAP on SPARC. */ + +#define HWCAP_SPARC_FLUSH 0x00000001 +#define HWCAP_SPARC_STBAR 0x00000002 +#define HWCAP_SPARC_SWAP 0x00000004 +#define HWCAP_SPARC_MULDIV 0x00000008 +#define HWCAP_SPARC_V9 0x00000010 +#define HWCAP_SPARC_ULTRA3 0x00000020 +#define HWCAP_SPARC_BLKINIT 0x00000040 +#define HWCAP_SPARC_N2 0x00000080 +#define HWCAP_SPARC_MUL32 0x00000100 +#define HWCAP_SPARC_DIV32 0x00000200 +#define HWCAP_SPARC_FSMULD 0x00000400 +#define HWCAP_SPARC_V8PLUS 0x00000800 +#define HWCAP_SPARC_POPC 0x00001000 +#define HWCAP_SPARC_VIS 0x00002000 +#define HWCAP_SPARC_VIS2 0x00004000 +#define HWCAP_SPARC_ASI_BLK_INIT 0x00008000 +#define HWCAP_SPARC_FMAF 0x00010000 +#define HWCAP_SPARC_VIS3 0x00020000 +#define HWCAP_SPARC_HPC 0x00040000 +#define HWCAP_SPARC_RANDOM 0x00080000 +#define HWCAP_SPARC_TRANS 0x00100000 +#define HWCAP_SPARC_FJFMAU 0x00200000 +#define HWCAP_SPARC_IMA 0x00400000 +#define HWCAP_SPARC_ASI_CACHE_SPARING \ + 0x00800000 diff --git a/libc/sysdeps/unix/opendir.c b/libc/sysdeps/unix/opendir.c index c2d1ddaf8..58d31764d 100644 --- a/libc/sysdeps/unix/opendir.c +++ b/libc/sysdeps/unix/opendir.c @@ -17,6 +17,7 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ +#include #include #include #include @@ -76,9 +77,9 @@ tryopen_o_directory (void) #endif -/* Open a directory stream on NAME. */ DIR * -__opendir (const char *name) +internal_function +__opendirat (int dfd, const char *name) { struct stat64 statbuf; struct stat64 *statp = NULL; @@ -116,7 +117,13 @@ __opendir (const char *name) #ifdef O_CLOEXEC flags |= O_CLOEXEC; #endif - int fd = open_not_cancel_2 (name, flags); + int fd; +#ifdef IS_IN_rtld + assert (dfd == AT_FDCWD); + fd = open_not_cancel_2 (name, flags); +#else + fd = openat_not_cancel_3 (dfd, name, flags); +#endif if (__builtin_expect (fd, 0) < 0) return NULL; @@ -140,6 +147,14 @@ __opendir (const char *name) return __alloc_dir (fd, true, 0, statp); } + + +/* Open a directory stream on NAME. */ +DIR * +__opendir (const char *name) +{ + return __opendirat (AT_FDCWD, name); +} weak_alias (__opendir, opendir) diff --git a/libc/sysdeps/unix/sparc/sysdep.h b/libc/sysdeps/unix/sparc/sysdep.h index 24225d962..590b7abc4 100644 --- a/libc/sysdeps/unix/sparc/sysdep.h +++ b/libc/sysdeps/unix/sparc/sysdep.h @@ -1,4 +1,5 @@ -/* Copyright (C) 1993, 1994, 1995, 1997, 2003 Free Software Foundation, Inc. +/* Copyright (C) 1993, 1994, 1995, 1997, 2003, 2011 + Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,6 +18,7 @@ 02111-1307 USA. */ #include +#include #ifdef __ASSEMBLER__ @@ -27,6 +29,26 @@ #define syscall_error C_SYMBOL_NAME(__syscall_error) #endif +#ifdef PIC +#define SETUP_PIC_REG(reg, tmp) \ + .ifndef __sparc_get_pc_thunk.reg; \ + .section .text.__sparc_get_pc_thunk.reg,"axG",@progbits,__sparc_get_pc_thunk.reg,comdat; \ + .align 32; \ + .weak __sparc_get_pc_thunk.reg; \ + .hidden __sparc_get_pc_thunk.reg; \ + .type __sparc_get_pc_thunk.reg, #function; \ +__sparc_get_pc_thunk.reg: \ + jmp %o7 + 8; \ + add %o7, %reg, %##reg; \ + .previous; \ + .endif; \ + sethi %hi(_GLOBAL_OFFSET_TABLE_-4), %##reg; \ + mov %o7, %##tmp; \ + call __sparc_get_pc_thunk.reg; \ + or %##reg, %lo(_GLOBAL_OFFSET_TABLE_+4), %##reg; \ + mov %##tmp, %o7; +#endif + #ifdef HAVE_ELF #define ENTRY(name) \ .global C_SYMBOL_NAME(name); \ diff --git a/libc/sysdeps/unix/sysv/linux/Makefile b/libc/sysdeps/unix/sysv/linux/Makefile index f0a7c6f98..254d809e8 100644 --- a/libc/sysdeps/unix/sysv/linux/Makefile +++ b/libc/sysdeps/unix/sysv/linux/Makefile @@ -23,7 +23,7 @@ sysdep_routines += sysctl clone llseek umount umount2 readahead \ CFLAGS-gethostid.c = -fexceptions -CFLAGS-tst-writev.c += -DARTIFICIAL_LIMIT=0x7ffff000 +CFLAGS-tst-writev.c += "-DARTIFICIAL_LIMIT=0x80000000-__getpagesize()" sysdep_headers += sys/mount.h sys/acct.h sys/sysctl.h \ sys/klog.h sys/kdaemon.h \ diff --git a/libc/sysdeps/unix/sysv/linux/bits/socket.h b/libc/sysdeps/unix/sysv/linux/bits/socket.h index 96d9fed03..36757d64d 100644 --- a/libc/sysdeps/unix/sysv/linux/bits/socket.h +++ b/libc/sysdeps/unix/sysv/linux/bits/socket.h @@ -111,7 +111,8 @@ enum __socket_type #define PF_IEEE802154 36 /* IEEE 802.15.4 sockets. */ #define PF_CAIF 37 /* CAIF sockets. */ #define PF_ALG 38 /* Algorithm sockets. */ -#define PF_MAX 39 /* For now.. */ +#define PF_NFC 39 /* NFC sockets. */ +#define PF_MAX 40 /* For now.. */ /* Address families. */ #define AF_UNSPEC PF_UNSPEC @@ -154,6 +155,7 @@ enum __socket_type #define AF_IEEE802154 PF_IEEE802154 #define AF_CAIF PF_CAIF #define AF_ALG PF_ALG +#define AF_NFC PF_NFC #define AF_MAX PF_MAX /* Socket level values. Others are defined in the appropriate headers. diff --git a/libc/sysdeps/unix/sysv/linux/i386/scandir64.c b/libc/sysdeps/unix/sysv/linux/i386/scandir64.c index 690be813b..3953f464b 100644 --- a/libc/sysdeps/unix/sysv/linux/i386/scandir64.c +++ b/libc/sysdeps/unix/sysv/linux/i386/scandir64.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2000, 2004 Free Software Foundation, Inc. +/* Copyright (C) 2000, 2004, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -19,8 +19,10 @@ #include #define SCANDIR __scandir64 +#define SCANDIRAT scandirat64 #define READDIR __readdir64 #define DIRENT_TYPE struct dirent64 +#define SKIP_SCANDIR_CANCEL 1 #include @@ -33,15 +35,107 @@ versioned_symbol (libc, __scandir64, scandir64, GLIBC_2_2); #if SHLIB_COMPAT(libc, GLIBC_2_1, GLIBC_2_2) +# include +# include +# include "olddirent.h" -#include +int +__old_scandir64 (dir, namelist, select, cmp) + const char *dir; + struct __old_dirent64 ***namelist; + int (*select) (const struct __old_dirent64 *); + int (*cmp) (const struct __old_dirent64 **, + const struct __old_dirent64 **); +{ + DIR *dp = __opendir (dir); + struct __old_dirent64 **v = NULL; + size_t vsize = 0; + struct scandir_cancel_struct c; + struct __old_dirent64 *d; + int save; -#define SCANDIR attribute_compat_text_section __old_scandir64 -#define READDIR __old_readdir64 -#define DIRENT_TYPE struct __old_dirent64 + if (dp == NULL) + return -1; -#include + save = errno; + __set_errno (0); + + c.dp = dp; + c.v = NULL; + c.cnt = 0; + __libc_cleanup_push (__scandir_cancel_handler, &c); + + while ((d = __old_readdir64 (dp)) != NULL) + { + int use_it = select == NULL; + + if (! use_it) + { + use_it = select (d); + /* The select function might have changed errno. It was + zero before and it need to be again to make the latter + tests work. */ + __set_errno (0); + } + + if (use_it) + { + struct __old_dirent64 *vnew; + size_t dsize; + + /* Ignore errors from select or readdir */ + __set_errno (0); + + if (__builtin_expect (c.cnt == vsize, 0)) + { + struct __old_dirent64 **new; + if (vsize == 0) + vsize = 10; + else + vsize *= 2; + new = (struct __old_dirent64 **) realloc (v, + vsize * sizeof (*v)); + if (new == NULL) + break; + v = new; + c.v = (void *) v; + } + + dsize = &d->d_name[_D_ALLOC_NAMLEN (d)] - (char *) d; + vnew = (struct __old_dirent64 *) malloc (dsize); + if (vnew == NULL) + break; + + v[c.cnt++] = (struct __old_dirent64 *) memcpy (vnew, d, dsize); + } + } + + if (__builtin_expect (errno, 0) != 0) + { + save = errno; + + while (c.cnt > 0) + free (v[--c.cnt]); + free (v); + c.cnt = -1; + } + else + { + /* Sort the list if we have a comparison function to sort with. */ + if (cmp != NULL) + qsort (v, c.cnt, sizeof (*v), + (int (*) (const void *, const void *)) cmp); + + *namelist = v; + } + + __libc_cleanup_pop (0); + + (void) __closedir (dp); + __set_errno (save); + return c.cnt; +} compat_symbol (libc, __old_scandir64, scandir64, GLIBC_2_1); #endif diff --git a/libc/sysdeps/unix/sysv/linux/ia64/sys/ptrace.h b/libc/sysdeps/unix/sysv/linux/ia64/sys/ptrace.h index c824be2da..82d8843ce 100644 --- a/libc/sysdeps/unix/sysv/linux/ia64/sys/ptrace.h +++ b/libc/sysdeps/unix/sysv/linux/ia64/sys/ptrace.h @@ -1,5 +1,5 @@ /* `ptrace' debugger support interface. Linux/ia64 version. - Copyright (C) 2001, 2006, 2007 Free Software Foundation, Inc. + Copyright (C) 2001, 2006, 2007, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -108,8 +108,35 @@ enum __ptrace_request #define PT_GETSIGINFO PTRACE_GETSIGINFO /* Set new siginfo for process. */ - PTRACE_SETSIGINFO = 0x4203 + PTRACE_SETSIGINFO = 0x4203, #define PT_SETSIGINFO PTRACE_SETSIGINFO + + /* Get register content. */ + PTRACE_GETREGSET = 0x4204, +#define PTRACE_GETREGSET PTRACE_GETREGSET + + /* Set register content. */ + PTRACE_SETREGSET = 0x4205, +#define PTRACE_SETREGSET PTRACE_SETREGSET + + /* Like PTRACE_ATTACH, but do not force tracee to trap and do not affect + signal or group stop state. */ + PTRACE_SEIZE = 0x4206, +#define PTRACE_SEIZE PTRACE_SEIZE + + /* Trap seized tracee. */ + PTRACE_INTERRUPT = 0x4207, +#define PTRACE_INTERRUPT PTRACE_INTERRUPT + + /* Wait for next group event. */ + PTRACE_LISTEN = 0x4208 +}; + + +/* Flag for PTRACE_LISTEN. */ +enum __ptrace_flags +{ + PTRACE_SEIZE_DEVEL = 0x80000000 }; /* pt_all_user_regs is used for PTRACE_GETREGS/PTRACE_SETREGS. */ diff --git a/libc/sysdeps/unix/sysv/linux/kernel-features.h b/libc/sysdeps/unix/sysv/linux/kernel-features.h index d91f581a9..58f833e96 100644 --- a/libc/sysdeps/unix/sysv/linux/kernel-features.h +++ b/libc/sysdeps/unix/sysv/linux/kernel-features.h @@ -546,3 +546,8 @@ #if __LINUX_KERNEL_VERSION >= 0x020627 # define __ASSUME_SENDMMSG 1 #endif + +/* getcpu is a syscall for x86-64 since 3.1. */ +#if defined __x86_64__ && __LINUX_KERNEL_VERSION >= 0x030100 +# define __ASSUME_GETCPU_SYSCALL 1 +#endif diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/scandir64.c b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/scandir64.c new file mode 100644 index 000000000..506fd8877 --- /dev/null +++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/scandir64.c @@ -0,0 +1 @@ +#include diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/scandir64.c b/libc/sysdeps/unix/sysv/linux/powerpc/scandir64.c deleted file mode 100644 index 506fd8877..000000000 --- a/libc/sysdeps/unix/sysv/linux/powerpc/scandir64.c +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/sys/ptrace.h b/libc/sysdeps/unix/sysv/linux/powerpc/sys/ptrace.h index 23e75fbcf..da5803505 100644 --- a/libc/sysdeps/unix/sysv/linux/powerpc/sys/ptrace.h +++ b/libc/sysdeps/unix/sysv/linux/powerpc/sys/ptrace.h @@ -1,5 +1,5 @@ /* `ptrace' debugger support interface. Linux version. - Copyright (C) 2001, 2006, 2007 Free Software Foundation, Inc. + Copyright (C) 2001, 2006, 2007, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -95,8 +95,35 @@ enum __ptrace_request #define PT_GETSIGINFO PTRACE_GETSIGINFO /* Set new siginfo for process. */ - PTRACE_SETSIGINFO = 0x4203 + PTRACE_SETSIGINFO = 0x4203, #define PT_SETSIGINFO PTRACE_SETSIGINFO + + /* Get register content. */ + PTRACE_GETREGSET = 0x4204, +#define PTRACE_GETREGSET PTRACE_GETREGSET + + /* Set register content. */ + PTRACE_SETREGSET = 0x4205, +#define PTRACE_SETREGSET PTRACE_SETREGSET + + /* Like PTRACE_ATTACH, but do not force tracee to trap and do not affect + signal or group stop state. */ + PTRACE_SEIZE = 0x4206, +#define PTRACE_SEIZE PTRACE_SEIZE + + /* Trap seized tracee. */ + PTRACE_INTERRUPT = 0x4207, +#define PTRACE_INTERRUPT PTRACE_INTERRUPT + + /* Wait for next group event. */ + PTRACE_LISTEN = 0x4208 +}; + + +/* Flag for PTRACE_LISTEN. */ +enum __ptrace_flags +{ + PTRACE_SEIZE_DEVEL = 0x80000000 }; /* Options set using PTRACE_SETOPTIONS. */ diff --git a/libc/sysdeps/unix/sysv/linux/s390/sys/ptrace.h b/libc/sysdeps/unix/sysv/linux/s390/sys/ptrace.h index ac186387f..bed48af09 100644 --- a/libc/sysdeps/unix/sysv/linux/s390/sys/ptrace.h +++ b/libc/sysdeps/unix/sysv/linux/s390/sys/ptrace.h @@ -1,5 +1,5 @@ /* `ptrace' debugger support interface. Linux version. - Copyright (C) 2000, 2006, 2007 Free Software Foundation, Inc. + Copyright (C) 2000, 2006, 2007, 2011 Free Software Foundation, Inc. Contributed by Denis Joseph Barrow (djbarrow@de.ibm.com). This file is part of the GNU C Library. @@ -134,8 +134,35 @@ enum __ptrace_request #define PT_GETSIGINFO PTRACE_GETSIGINFO /* Set new siginfo for process. */ - PTRACE_SETSIGINFO = 0x4203 + PTRACE_SETSIGINFO = 0x4203, #define PT_SETSIGINFO PTRACE_SETSIGINFO + + /* Get register content. */ + PTRACE_GETREGSET = 0x4204, +#define PTRACE_GETREGSET PTRACE_GETREGSET + + /* Set register content. */ + PTRACE_SETREGSET = 0x4205, +#define PTRACE_SETREGSET PTRACE_SETREGSET + + /* Like PTRACE_ATTACH, but do not force tracee to trap and do not affect + signal or group stop state. */ + PTRACE_SEIZE = 0x4206, +#define PTRACE_SEIZE PTRACE_SEIZE + + /* Trap seized tracee. */ + PTRACE_INTERRUPT = 0x4207, +#define PTRACE_INTERRUPT PTRACE_INTERRUPT + + /* Wait for next group event. */ + PTRACE_LISTEN = 0x4208 +}; + + +/* Flag for PTRACE_LISTEN. */ +enum __ptrace_flags +{ + PTRACE_SEIZE_DEVEL = 0x80000000 }; /* Options set using PTRACE_SETOPTIONS. */ diff --git a/libc/sysdeps/unix/sysv/linux/sparc/bits/resource.h b/libc/sysdeps/unix/sysv/linux/sparc/bits/resource.h index 04d33e4aa..5c00b8fbb 100644 --- a/libc/sysdeps/unix/sysv/linux/sparc/bits/resource.h +++ b/libc/sysdeps/unix/sysv/linux/sparc/bits/resource.h @@ -130,11 +130,11 @@ enum __rlimit_resource #ifndef __USE_FILE_OFFSET64 # define RLIM_INFINITY ((long int)(~0UL >> 1)) #else -# define RLIM_INFINITY 0x7fffffffffffffffLL +# define RLIM_INFINITY 0xffffffffffffffffLL #endif #ifdef __USE_LARGEFILE64 -# define RLIM64_INFINITY 0x7fffffffffffffffLL +# define RLIM64_INFINITY 0xffffffffffffffffLL #endif #endif diff --git a/libc/sysdeps/unix/sysv/linux/sparc/bits/socket.h b/libc/sysdeps/unix/sysv/linux/sparc/bits/socket.h index f9538747b..5f85d2170 100644 --- a/libc/sysdeps/unix/sysv/linux/sparc/bits/socket.h +++ b/libc/sysdeps/unix/sysv/linux/sparc/bits/socket.h @@ -111,7 +111,8 @@ enum __socket_type #define PF_IEEE802154 36 /* IEEE 802.15.4 sockets. */ #define PF_CAIF 37 /* CAIF sockets. */ #define PF_ALG 38 /* Algorithm sockets. */ -#define PF_MAX 39 /* For now.. */ +#define PF_NFC 39 /* NFC sockets. */ +#define PF_MAX 40 /* For now.. */ /* Address families. */ #define AF_UNSPEC PF_UNSPEC @@ -154,6 +155,7 @@ enum __socket_type #define AF_IEEE802154 PF_IEEE802154 #define AF_CAIF PF_CAIF #define AF_ALG PF_ALG +#define AF_NFC PF_NFC #define AF_MAX PF_MAX /* Socket level values. Others are defined in the appropriate headers. diff --git a/libc/sysdeps/unix/sysv/linux/sparc/sparc32/____longjmp_chk.S b/libc/sysdeps/unix/sysv/linux/sparc/sparc32/____longjmp_chk.S index 0ecd7ddf8..bbb9eaa73 100644 --- a/libc/sysdeps/unix/sysv/linux/sparc/sparc32/____longjmp_chk.S +++ b/libc/sysdeps/unix/sysv/linux/sparc/sparc32/____longjmp_chk.S @@ -41,10 +41,14 @@ ENTRY (____longjmp_chk) nop save %sp, -80, %sp + cfi_remember_state + cfi_def_cfa_register(%fp) + cfi_window_save + cfi_register(%o7, %i7) clr %o0 add %sp, 64, %o1 - set __NR_sigaltstack, %g1 + LOADSYSCALL(sigaltstack) ta 0x10 bcs .Lok ld [%sp + 64 + 4], %o2 @@ -54,8 +58,8 @@ ENTRY (____longjmp_chk) ld [%sp + 64 + 8], %o1 add %o0, %o1, %o0 - sub %o0, %g3, %o0 - cmp %o1, %o0 + sub %o0, %g5, %o0 + cmp %o0, %o1 bgeu .Lok nop @@ -76,6 +80,7 @@ ENTRY (____longjmp_chk) .Lok: restore + cfi_restore_state .Lok_norestore: ld ENV(o0,JB_FP), %g3 /* Cache target FP in register %g3. */ diff --git a/libc/sysdeps/unix/sysv/linux/sparc/sparc32/sparcv9/fpu/Implies b/libc/sysdeps/unix/sysv/linux/sparc/sparc32/sparcv9/fpu/Implies new file mode 100644 index 000000000..70568c5d1 --- /dev/null +++ b/libc/sysdeps/unix/sysv/linux/sparc/sparc32/sparcv9/fpu/Implies @@ -0,0 +1,3 @@ +# We must list this here to move it ahead of the ldbl-opt code. +sparc/sparc32/sparcv9/fpu +sparc/sparc32/fpu diff --git a/libc/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h b/libc/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h index 3cb0a48c9..8af045dc2 100644 --- a/libc/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h +++ b/libc/sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h @@ -57,20 +57,14 @@ C_LABEL(name) \ #define LOC(name) .L##name - /* If the offset to __syscall_error fits into a signed 22-bit - * immediate branch offset, the linker will relax the call into - * a normal branch. - */ #define PSEUDO(name, syscall_name, args) \ .text; \ - .globl __syscall_error; \ ENTRY(name); \ LOADSYSCALL(syscall_name); \ ta 0x10; \ bcc 1f; \ - mov %o7, %g1; \ - call __syscall_error; \ - mov %g1, %o7; \ + nop; \ + SYSCALL_ERROR_HANDLER \ 1: #define PSEUDO_NOERRNO(name, syscall_name, args)\ @@ -88,50 +82,70 @@ ENTRY(name); \ #define PSEUDO_END(name) \ END(name) -#else /* __ASSEMBLER__ */ - -#if defined SHARED && defined DO_VERSIONING && defined PIC \ - && !defined NO_HIDDEN && !defined NOT_IN_libc -# define CALL_ERRNO_LOCATION "call __GI___errno_location;" +#ifndef PIC +# define SYSCALL_ERROR_HANDLER \ + mov %o7, %g1; \ + call __syscall_error; \ + mov %g1, %o7; #else -# define CALL_ERRNO_LOCATION "call __errno_location;" -#endif +# if RTLD_PRIVATE_ERRNO +# define SYSCALL_ERROR_HANDLER \ +0: SETUP_PIC_REG(o2,g1) \ + sethi %hi(rtld_errno), %g1; \ + or %g1, %lo(rtld_errno), %g1; \ + ld [%o2 + %g1], %g1; \ + st %o0, [%g1]; \ + jmp %o7 + 8; \ + mov -1, %o0; +# elif defined _LIBC_REENTRANT + +# if USE___THREAD +# ifndef NOT_IN_libc +# define SYSCALL_ERROR_ERRNO __libc_errno +# else +# define SYSCALL_ERROR_ERRNO errno +# endif +# define SYSCALL_ERROR_HANDLER \ +0: SETUP_PIC_REG(o2,g1) \ + sethi %tie_hi22(SYSCALL_ERROR_ERRNO), %g1; \ + add %g1, %tie_lo10(SYSCALL_ERROR_ERRNO), %g1; \ + ld [%o2 + %g1], %g1, %tie_ld(SYSCALL_ERROR_ERRNO); \ + st %o0, [%g7 + %g1]; \ + jmp %o7 + 8; \ + mov -1, %o0; +# else +# define SYSCALL_ERROR_HANDLER \ +0: save %sp, -96, %sp; \ + cfi_def_cfa_register(%fp); \ + cfi_window_save; \ + cfi_register (%o7, %i7); \ + call __errno_location; \ + nop; \ + st %i0, [%o0]; \ + jmp %i7 + 8; \ + restore %g0, -1, %o0; +# endif +# else +# define SYSCALL_ERROR_HANDLER \ +0: SETUP_PIC_REG(o2,g1) \ + sethi %hi(errno), %g1; \ + or %g1, %lo(errno), %g1; \ + ld [%o2 + %g1], %g1; \ + st %o0, [%g1]; \ + jmp %o7 + 8; \ + mov -1, %o0; +# endif /* _LIBC_REENTRANT */ +#endif /* PIC */ + + +#else /* __ASSEMBLER__ */ #define __SYSCALL_STRING \ "ta 0x10;" \ - "bcs 2f;" \ - " nop;" \ - "1:" \ - ".subsection 2;" \ - "2:" \ - "save %%sp, -192, %%sp;" \ - CALL_ERRNO_LOCATION \ - " nop;" \ - "st %%i0,[%%o0];" \ - "ba 1b;" \ - " restore %%g0, -1, %%o0;" \ - ".previous;" - -#define __CLONE_SYSCALL_STRING \ - "ta 0x10;" \ - "bcs 2f;" \ - " sub %%o1, 1, %%o1;" \ - "and %%o0, %%o1, %%o0;" \ - "1:" \ - ".subsection 2;" \ - "2:" \ - "save %%sp, -192, %%sp;" \ - CALL_ERRNO_LOCATION \ - " nop;" \ - "st %%i0, [%%o0];" \ - "ba 1b;" \ - " restore %%g0, -1, %%o0;" \ - ".previous;" - -#define __INTERNAL_SYSCALL_STRING \ - "ta 0x10;" \ - "bcs,a 1f;" \ - " sub %%g0, %%o0, %%o0;" \ + "bcc 1f;" \ + " mov 0, %%g1;" \ + "sub %%g0, %%o0, %%o0;" \ + "mov 1, %%g1;" \ "1:" #define __SYSCALL_CLOBBERS \ diff --git a/libc/sysdeps/unix/sysv/linux/sparc/sparc64/____longjmp_chk.S b/libc/sysdeps/unix/sysv/linux/sparc/sparc64/____longjmp_chk.S index 9a4c8fe3b..836e62efe 100644 --- a/libc/sysdeps/unix/sysv/linux/sparc/sparc64/____longjmp_chk.S +++ b/libc/sysdeps/unix/sysv/linux/sparc/sparc64/____longjmp_chk.S @@ -40,8 +40,29 @@ ENTRY (____longjmp_chk) bleu,pt %xcc, .Lok nop - save %sp, -128, %sp + save %sp, -208, %sp + cfi_remember_state + cfi_def_cfa_register(%fp) + cfi_window_save + cfi_register(%o7, %i7) + add %fp, 2023, %o1 + clr %o0 + LOADSYSCALL(sigaltstack) + ta 0x6d + bcs,pn %xcc, .Lok2 + lduw [%fp + 2031], %l2 + andcc %l2, 0x1, %g0 + be,pn %xcc, .Lfail + ldx [%fp + 2023], %l0 + ldx [%fp + 2039], %l1 + sub %l0, STACK_BIAS, %l0 + add %l0, %l1, %l0 + sub %l0, %i2, %l0 + cmp %l0, %l1 + bgeu,pt %xcc, .Lok2 + nop +.Lfail: #ifdef PIC 1: call 2f sethi %hi(_GLOBAL_OFFSET_TABLE_-(1b-.)), %l7 @@ -56,7 +77,8 @@ ENTRY (____longjmp_chk) call HIDDEN_JUMPTARGET(__fortify_fail) nop - restore +.Lok2: restore + cfi_restore_state .Lok: /* Modify the context with the value we want to return. */ diff --git a/libc/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h b/libc/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h index 79fa13de7..bdd1d45bd 100644 --- a/libc/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h +++ b/libc/sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h @@ -21,7 +21,7 @@ #ifndef _LINUX_SPARC64_SYSDEP_H #define _LINUX_SPARC64_SYSDEP_H 1 -#include +#include #ifdef IS_IN_rtld # include /* Defines RTLD_PRIVATE_ERRNO. */ @@ -64,20 +64,14 @@ C_LABEL(name) \ cfi_endproc; \ .size name, . - name - /* If the offset to __syscall_error fits into a signed 22-bit - * immediate branch offset, the linker will relax the call into - * a normal branch. - */ #define PSEUDO(name, syscall_name, args) \ .text; \ - .globl __syscall_error; \ ENTRY(name); \ LOADSYSCALL(syscall_name); \ ta 0x6d; \ bcc,pt %xcc, 1f; \ - mov %o7, %g1; \ - call __syscall_error; \ - mov %g1, %o7; \ + nop; \ + SYSCALL_ERROR_HANDLER \ 1: #define PSEUDO_NOERRNO(name, syscall_name, args)\ @@ -95,51 +89,69 @@ ENTRY(name); \ #define PSEUDO_END(name) \ END(name) - -/* Careful here! This "ret" define can interfere; use jmpl if unsure. */ -#define ret retl; nop -#define ret_NOERRNO retl; nop -#define ret_ERRVAL retl; nop -#define r0 %o0 -#define r1 %o1 -#define MOVE(x,y) mov x, y +#ifndef PIC +# define SYSCALL_ERROR_HANDLER \ + mov %o7, %g1; \ + call __syscall_error; \ + mov %g1, %o7; +#else +# if RTLD_PRIVATE_ERRNO +# define SYSCALL_ERROR_HANDLER \ +0: SETUP_PIC_REG(o2,g1) \ + sethi %hi(rtld_errno), %g1; \ + or %g1, %lo(rtld_errno), %g1; \ + ldx [%o2 + %g1], %g1; \ + st %o0, [%g1]; \ + jmp %o7 + 8; \ + mov -1, %o0; +# elif defined _LIBC_REENTRANT + +# if USE___THREAD +# ifndef NOT_IN_libc +# define SYSCALL_ERROR_ERRNO __libc_errno +# else +# define SYSCALL_ERROR_ERRNO errno +# endif +# define SYSCALL_ERROR_HANDLER \ +0: SETUP_PIC_REG(o2,g1) \ + sethi %tie_hi22(SYSCALL_ERROR_ERRNO), %g1; \ + add %g1, %tie_lo10(SYSCALL_ERROR_ERRNO), %g1; \ + ldx [%o2 + %g1], %g1, %tie_ldx(SYSCALL_ERROR_ERRNO);\ + st %o0, [%g7 + %g1]; \ + jmp %o7 + 8; \ + mov -1, %o0; +# else +# define SYSCALL_ERROR_HANDLER \ +0: save %sp, -176, %sp; \ + cfi_def_cfa_register(%fp); \ + cfi_window_save; \ + cfi_register (%o7, %i7); \ + call __errno_location; \ + nop; \ + st %i0, [%o0]; \ + jmp %i7 + 8; \ + restore %g0, -1, %o0; +# endif +# else +# define SYSCALL_ERROR_HANDLER \ +0: SETUP_PIC_REG(o2,g1) \ + sethi %hi(errno), %g1; \ + or %g1, %lo(errno), %g1; \ + ldx [%o2 + %g1], %g1; \ + st %o0, [%g1]; \ + jmp %o7 + 8; \ + mov -1, %o0; +# endif /* _LIBC_REENTRANT */ +#endif /* PIC */ #else /* __ASSEMBLER__ */ -#if defined SHARED && defined DO_VERSIONING && defined PIC \ - && !defined NO_HIDDEN && !defined NOT_IN_libc -# define CALL_ERRNO_LOCATION "call __GI___errno_location;" -#else -# define CALL_ERRNO_LOCATION "call __errno_location;" -#endif - #define __SYSCALL_STRING \ "ta 0x6d;" \ "bcc,pt %%xcc, 1f;" \ - " nop;" \ - "save %%sp, -192, %%sp;" \ - CALL_ERRNO_LOCATION \ - " nop;" \ - "st %%i0,[%%o0];" \ - "restore %%g0, -1, %%o0;" \ - "1:" - -#define __CLONE_SYSCALL_STRING \ - "ta 0x6d;" \ - "bcc,pt %%xcc, 1f;" \ - " sub %%o1, 1, %%o1;" \ - "save %%sp, -192, %%sp;" \ - CALL_ERRNO_LOCATION \ - " mov -1, %%i1;" \ - "st %%i0,[%%o0];" \ - "restore %%g0, -1, %%o0;" \ - "1:" \ - "and %%o0, %%o1, %%o0" - -#define __INTERNAL_SYSCALL_STRING \ - "ta 0x6d;" \ - "bcs,a,pt %%xcc, 1f;" \ - " sub %%g0, %%o0, %%o0;" \ + " mov 0, %%g1;" \ + "sub %%g0, %%o0, %%o0;" \ + "mov 1, %%g1;" \ "1:" #define __SYSCALL_CLOBBERS \ diff --git a/libc/sysdeps/unix/sysv/linux/sparc/sys/ptrace.h b/libc/sysdeps/unix/sysv/linux/sparc/sys/ptrace.h index a7b204b33..e25090ea8 100644 --- a/libc/sysdeps/unix/sysv/linux/sparc/sys/ptrace.h +++ b/libc/sysdeps/unix/sysv/linux/sparc/sys/ptrace.h @@ -1,5 +1,5 @@ /* `ptrace' debugger support interface. Linux/SPARC version. - Copyright (C) 1996, 1997, 1998, 1999, 2000, 2006, 2007 + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2006, 2007, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -179,8 +179,35 @@ enum __ptrace_request #define PT_GETSIGINFO PTRACE_GETSIGINFO /* Set new siginfo for process. */ - PTRACE_SETSIGINFO = 0x4203 + PTRACE_SETSIGINFO = 0x4203, #define PT_SETSIGINFO PTRACE_SETSIGINFO + + /* Get register content. */ + PTRACE_GETREGSET = 0x4204, +#define PTRACE_GETREGSET PTRACE_GETREGSET + + /* Set register content. */ + PTRACE_SETREGSET = 0x4205, +#define PTRACE_SETREGSET PTRACE_SETREGSET + + /* Like PTRACE_ATTACH, but do not force tracee to trap and do not affect + signal or group stop state. */ + PTRACE_SEIZE = 0x4206, +#define PTRACE_SEIZE PTRACE_SEIZE + + /* Trap seized tracee. */ + PTRACE_INTERRUPT = 0x4207, +#define PTRACE_INTERRUPT PTRACE_INTERRUPT + + /* Wait for next group event. */ + PTRACE_LISTEN = 0x4208 +}; + + +/* Flag for PTRACE_LISTEN. */ +enum __ptrace_flags +{ + PTRACE_SEIZE_DEVEL = 0x80000000 }; /* Options set using PTRACE_SETOPTIONS. */ diff --git a/libc/sysdeps/unix/sysv/linux/sparc/sysdep.h b/libc/sysdeps/unix/sysv/linux/sparc/sysdep.h index 101638a53..f69e1a92f 100644 --- a/libc/sysdeps/unix/sysv/linux/sparc/sysdep.h +++ b/libc/sysdeps/unix/sysv/linux/sparc/sysdep.h @@ -21,101 +21,109 @@ #define _LINUX_SPARC_SYSDEP_H 1 #undef INLINE_SYSCALL -#define INLINE_SYSCALL(name, nr, args...) \ - inline_syscall##nr(__SYSCALL_STRING, __NR_##name, args) +#define INLINE_SYSCALL(name, nr, args...) \ +({ INTERNAL_SYSCALL_DECL(err); \ + unsigned int resultvar = INTERNAL_SYSCALL(name, err, nr, args); \ + if (__builtin_expect (INTERNAL_SYSCALL_ERROR_P (resultvar, err), 0)) \ + { \ + __set_errno (INTERNAL_SYSCALL_ERRNO (resultvar, err)); \ + resultvar = 0xffffffff; \ + } \ + (int) resultvar; \ +}) #undef INTERNAL_SYSCALL_DECL -#define INTERNAL_SYSCALL_DECL(err) do { } while (0) +#define INTERNAL_SYSCALL_DECL(err) \ + register long err __asm__("g1"); #undef INTERNAL_SYSCALL #define INTERNAL_SYSCALL(name, err, nr, args...) \ - inline_syscall##nr(__INTERNAL_SYSCALL_STRING, __NR_##name, args) + inline_syscall##nr(__SYSCALL_STRING, err, __NR_##name, args) #undef INTERNAL_SYSCALL_NCS #define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \ - inline_syscall##nr(__INTERNAL_SYSCALL_STRING, name, args) + inline_syscall##nr(__SYSCALL_STRING, err, name, args) #undef INTERNAL_SYSCALL_ERROR_P -#define INTERNAL_SYSCALL_ERROR_P(val, err) \ - ((unsigned long) (val) >= -515L) +#define INTERNAL_SYSCALL_ERROR_P(val, err) ((err) != 0) #undef INTERNAL_SYSCALL_ERRNO #define INTERNAL_SYSCALL_ERRNO(val, err) (-(val)) -#define inline_syscall0(string,name,dummy...) \ +#define inline_syscall0(string,err,name,dummy...) \ ({ \ register long __o0 __asm__ ("o0"); \ - register long __g1 __asm__ ("g1") = name; \ - __asm __volatile (string : "=r" (__g1), "=r" (__o0) : \ - "0" (__g1) : \ + err = name; \ + __asm __volatile (string : "=r" (err), "=r" (__o0) : \ + "0" (err) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) -#define inline_syscall1(string,name,arg1) \ +#define inline_syscall1(string,err,name,arg1) \ ({ \ register long __o0 __asm__ ("o0") = (long)(arg1); \ - register long __g1 __asm__ ("g1") = name; \ - __asm __volatile (string : "=r" (__g1), "=r" (__o0) : \ - "0" (__g1), "1" (__o0) : \ + err = name; \ + __asm __volatile (string : "=r" (err), "=r" (__o0) : \ + "0" (err), "1" (__o0) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) -#define inline_syscall2(string,name,arg1,arg2) \ +#define inline_syscall2(string,err,name,arg1,arg2) \ ({ \ register long __o0 __asm__ ("o0") = (long)(arg1); \ register long __o1 __asm__ ("o1") = (long)(arg2); \ - register long __g1 __asm__ ("g1") = name; \ - __asm __volatile (string : "=r" (__g1), "=r" (__o0) : \ - "0" (__g1), "1" (__o0), "r" (__o1) : \ + err = name; \ + __asm __volatile (string : "=r" (err), "=r" (__o0) : \ + "0" (err), "1" (__o0), "r" (__o1) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) -#define inline_syscall3(string,name,arg1,arg2,arg3) \ +#define inline_syscall3(string,err,name,arg1,arg2,arg3) \ ({ \ register long __o0 __asm__ ("o0") = (long)(arg1); \ register long __o1 __asm__ ("o1") = (long)(arg2); \ register long __o2 __asm__ ("o2") = (long)(arg3); \ - register long __g1 __asm__ ("g1") = name; \ - __asm __volatile (string : "=r" (__g1), "=r" (__o0) : \ - "0" (__g1), "1" (__o0), "r" (__o1), \ + err = name; \ + __asm __volatile (string : "=r" (err), "=r" (__o0) : \ + "0" (err), "1" (__o0), "r" (__o1), \ "r" (__o2) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) -#define inline_syscall4(string,name,arg1,arg2,arg3,arg4) \ +#define inline_syscall4(string,err,name,arg1,arg2,arg3,arg4) \ ({ \ register long __o0 __asm__ ("o0") = (long)(arg1); \ register long __o1 __asm__ ("o1") = (long)(arg2); \ register long __o2 __asm__ ("o2") = (long)(arg3); \ register long __o3 __asm__ ("o3") = (long)(arg4); \ - register long __g1 __asm__ ("g1") = name; \ - __asm __volatile (string : "=r" (__g1), "=r" (__o0) : \ - "0" (__g1), "1" (__o0), "r" (__o1), \ + err = name; \ + __asm __volatile (string : "=r" (err), "=r" (__o0) : \ + "0" (err), "1" (__o0), "r" (__o1), \ "r" (__o2), "r" (__o3) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) -#define inline_syscall5(string,name,arg1,arg2,arg3,arg4,arg5) \ +#define inline_syscall5(string,err,name,arg1,arg2,arg3,arg4,arg5) \ ({ \ register long __o0 __asm__ ("o0") = (long)(arg1); \ register long __o1 __asm__ ("o1") = (long)(arg2); \ register long __o2 __asm__ ("o2") = (long)(arg3); \ register long __o3 __asm__ ("o3") = (long)(arg4); \ register long __o4 __asm__ ("o4") = (long)(arg5); \ - register long __g1 __asm__ ("g1") = name; \ - __asm __volatile (string : "=r" (__g1), "=r" (__o0) : \ - "0" (__g1), "1" (__o0), "r" (__o1), \ + err = name; \ + __asm __volatile (string : "=r" (err), "=r" (__o0) : \ + "0" (err), "1" (__o0), "r" (__o1), \ "r" (__o2), "r" (__o3), "r" (__o4) : \ __SYSCALL_CLOBBERS); \ __o0; \ }) -#define inline_syscall6(string,name,arg1,arg2,arg3,arg4,arg5,arg6) \ +#define inline_syscall6(string,err,name,arg1,arg2,arg3,arg4,arg5,arg6) \ ({ \ register long __o0 __asm__ ("o0") = (long)(arg1); \ register long __o1 __asm__ ("o1") = (long)(arg2); \ @@ -123,9 +131,9 @@ register long __o3 __asm__ ("o3") = (long)(arg4); \ register long __o4 __asm__ ("o4") = (long)(arg5); \ register long __o5 __asm__ ("o5") = (long)(arg6); \ - register long __g1 __asm__ ("g1") = name; \ - __asm __volatile (string : "=r" (__g1), "=r" (__o0) : \ - "0" (__g1), "1" (__o0), "r" (__o1), \ + err = name; \ + __asm __volatile (string : "=r" (err), "=r" (__o0) : \ + "0" (err), "1" (__o0), "r" (__o1), \ "r" (__o2), "r" (__o3), "r" (__o4), \ "r" (__o5) : \ __SYSCALL_CLOBBERS); \ @@ -140,11 +148,20 @@ register long __o3 __asm__ ("o3") = (long)(arg4); \ register long __o4 __asm__ ("o4") = (long)(arg5); \ register long __g1 __asm__ ("g1") = __NR_clone; \ - __asm __volatile (__CLONE_SYSCALL_STRING : \ + __asm __volatile (__SYSCALL_STRING : \ "=r" (__g1), "=r" (__o0), "=r" (__o1) : \ "0" (__g1), "1" (__o0), "2" (__o1), \ "r" (__o2), "r" (__o3), "r" (__o4) : \ __SYSCALL_CLOBBERS); \ + if (__builtin_expect (INTERNAL_SYSCALL_ERROR_P (__o0, __g1), 0)) \ + { \ + __set_errno (INTERNAL_SYSCALL_ERRNO (__o0, __g1)); \ + __o0 = -1L; \ + } \ + else \ + { \ + __o0 &= (__o1 - 1); \ + } \ __o0; \ }) diff --git a/libc/sysdeps/unix/sysv/linux/sys/ptrace.h b/libc/sysdeps/unix/sysv/linux/sys/ptrace.h index 08658f976..910f2942e 100644 --- a/libc/sysdeps/unix/sysv/linux/sys/ptrace.h +++ b/libc/sysdeps/unix/sysv/linux/sys/ptrace.h @@ -1,5 +1,5 @@ /* `ptrace' debugger support interface. Linux version. - Copyright (C) 1996-1999,2000,2006,2007 Free Software Foundation, Inc. + Copyright (C) 1996-1999,2000,2006,2007,2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -125,13 +125,40 @@ enum __ptrace_request #define PT_GETSIGINFO PTRACE_GETSIGINFO /* Set new siginfo for process. */ - PTRACE_SETSIGINFO = 0x4203 + PTRACE_SETSIGINFO = 0x4203, #define PT_SETSIGINFO PTRACE_SETSIGINFO + + /* Get register content. */ + PTRACE_GETREGSET = 0x4204, +#define PTRACE_GETREGSET PTRACE_GETREGSET + + /* Set register content. */ + PTRACE_SETREGSET = 0x4205, +#define PTRACE_SETREGSET PTRACE_SETREGSET + + /* Like PTRACE_ATTACH, but do not force tracee to trap and do not affect + signal or group stop state. */ + PTRACE_SEIZE = 0x4206, +#define PTRACE_SEIZE PTRACE_SEIZE + + /* Trap seized tracee. */ + PTRACE_INTERRUPT = 0x4207, +#define PTRACE_INTERRUPT PTRACE_INTERRUPT + + /* Wait for next group event. */ + PTRACE_LISTEN = 0x4208 }; +/* Flag for PTRACE_LISTEN. */ +enum __ptrace_flags +{ + PTRACE_SEIZE_DEVEL = 0x80000000 +}; + /* Options set using PTRACE_SETOPTIONS. */ -enum __ptrace_setoptions { +enum __ptrace_setoptions +{ PTRACE_O_TRACESYSGOOD = 0x00000001, PTRACE_O_TRACEFORK = 0x00000002, PTRACE_O_TRACEVFORK = 0x00000004, @@ -143,7 +170,8 @@ enum __ptrace_setoptions { }; /* Wait extended result codes for the above trace options. */ -enum __ptrace_eventcodes { +enum __ptrace_eventcodes +{ PTRACE_EVENT_FORK = 1, PTRACE_EVENT_VFORK = 2, PTRACE_EVENT_CLONE = 3, diff --git a/libc/sysdeps/unix/sysv/linux/x86_64/bits/libc-vdso.h b/libc/sysdeps/unix/sysv/linux/x86_64/bits/libc-vdso.h index d7123c9bb..f9bf84e5b 100644 --- a/libc/sysdeps/unix/sysv/linux/x86_64/bits/libc-vdso.h +++ b/libc/sysdeps/unix/sysv/linux/x86_64/bits/libc-vdso.h @@ -1,5 +1,5 @@ /* Resolve function pointers to VDSO functions. - Copyright (C) 2005, 2007 Free Software Foundation, Inc. + Copyright (C) 2005, 2007, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -25,9 +25,6 @@ #ifdef SHARED -extern long int (*__vdso_gettimeofday) (struct timeval *, void *) - attribute_hidden; - extern long int (*__vdso_clock_gettime) (clockid_t, struct timespec *); #endif diff --git a/libc/sysdeps/unix/sysv/linux/x86_64/bits/sem.h b/libc/sysdeps/unix/sysv/linux/x86_64/bits/sem.h index 9b1d993ee..7153e2143 100644 --- a/libc/sysdeps/unix/sysv/linux/x86_64/bits/sem.h +++ b/libc/sysdeps/unix/sysv/linux/x86_64/bits/sem.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2002 Free Software Foundation, Inc. +/* Copyright (C) 2002, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -40,9 +40,13 @@ struct semid_ds { struct ipc_perm sem_perm; /* operation permission struct */ __time_t sem_otime; /* last semop() time */ +#if __WORDSIZE == 32 unsigned long int __unused1; +#endif __time_t sem_ctime; /* last time changed by semctl() */ +#if __WORDSIZE == 32 unsigned long int __unused2; +#endif unsigned long int sem_nsems; /* number of semaphores in set */ unsigned long int __unused3; unsigned long int __unused4; diff --git a/libc/sysdeps/unix/sysv/linux/x86_64/gettimeofday.S b/libc/sysdeps/unix/sysv/linux/x86_64/gettimeofday.S deleted file mode 100644 index f618e738b..000000000 --- a/libc/sysdeps/unix/sysv/linux/x86_64/gettimeofday.S +++ /dev/null @@ -1,49 +0,0 @@ -/* Copyright (C) 2002, 2003, 2007 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include -#define _ERRNO_H 1 -#include - -/* For the calculation see asm/vsyscall.h. */ -#define VSYSCALL_ADDR_vgettimeofday 0xffffffffff600000 - - -ENTRY (__gettimeofday) - /* Align stack. */ - sub $0x8, %rsp - cfi_adjust_cfa_offset(8) -#ifdef SHARED - movq __vdso_gettimeofday(%rip), %rax - PTR_DEMANGLE (%rax) -#else - movq $VSYSCALL_ADDR_vgettimeofday, %rax -#endif - callq *%rax - /* Check error return. */ - cmpl $-4095, %eax - jae SYSCALL_ERROR_LABEL - -L(pseudo_end): - add $0x8, %rsp - cfi_adjust_cfa_offset(-8) - ret -PSEUDO_END(__gettimeofday) - -strong_alias (__gettimeofday, __gettimeofday_internal) -weak_alias (__gettimeofday, gettimeofday) diff --git a/libc/sysdeps/unix/sysv/linux/x86_64/gettimeofday.c b/libc/sysdeps/unix/sysv/linux/x86_64/gettimeofday.c new file mode 100644 index 000000000..3aba81ceb --- /dev/null +++ b/libc/sysdeps/unix/sysv/linux/x86_64/gettimeofday.c @@ -0,0 +1,51 @@ +/* Copyright (C) 2002, 2003, 2007, 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include + + +#define VSYSCALL_ADDR_vgettimeofday 0xffffffffff600000ul + + +#ifdef SHARED +void *gettimeofday_ifunc (void) __asm__ ("__gettimeofday"); + +void * +gettimeofday_ifunc (void) +{ + PREPARE_VERSION (linux26, "LINUX_2.6", 61765110); + + /* If the vDSO is not available we fall back on the old vsyscall. */ + return (_dl_vdso_vsym ("gettimeofday", &linux26) + ?: (void *) VSYSCALL_ADDR_vgettimeofday); +} +__asm (".type __gettimeofday, %gnu_indirect_function"); +#else +# include +# include +# include + +int +__gettimeofday (struct timeval *tv, struct timezone *tz) +{ + return INLINE_SYSCALL (gettimeofday, 2, tv, tz); +} +#endif + +weak_alias (__gettimeofday, gettimeofday) +strong_alias (__gettimeofday, __gettimeofday_internal) diff --git a/libc/sysdeps/unix/sysv/linux/x86_64/init-first.c b/libc/sysdeps/unix/sysv/linux/x86_64/init-first.c index e676f623e..25cf08bd7 100644 --- a/libc/sysdeps/unix/sysv/linux/x86_64/init-first.c +++ b/libc/sysdeps/unix/sysv/linux/x86_64/init-first.c @@ -20,15 +20,11 @@ # include # include -long int (*__vdso_gettimeofday) (struct timeval *, void *) attribute_hidden; - long int (*__vdso_clock_gettime) (clockid_t, struct timespec *) __attribute__ ((nocommon)); strong_alias (__vdso_clock_gettime, __GI___vdso_clock_gettime attribute_hidden) -long int (*__vdso_getcpu) (unsigned *, unsigned *, void *); - -long int (*__vdso_time) (time_t *) attribute_hidden; +long int (*__vdso_getcpu) (unsigned *, unsigned *, void *) attribute_hidden; static inline void @@ -36,15 +32,7 @@ _libc_vdso_platform_setup (void) { PREPARE_VERSION (linux26, "LINUX_2.6", 61765110); - void *p = _dl_vdso_vsym ("gettimeofday", &linux26); - /* If the vDSO is not available we fall back on the old vsyscall. */ -#define VSYSCALL_ADDR_vgettimeofday 0xffffffffff600000ul - if (p == NULL) - p = (void *) VSYSCALL_ADDR_vgettimeofday; - PTR_MANGLE (p); - __vdso_gettimeofday = p; - - p = _dl_vdso_vsym ("clock_gettime", &linux26); + void *p = _dl_vdso_vsym ("clock_gettime", &linux26); PTR_MANGLE (p); __GI___vdso_clock_gettime = p; @@ -55,14 +43,6 @@ _libc_vdso_platform_setup (void) p = (void *) VSYSCALL_ADDR_vgetcpu; PTR_MANGLE (p); __vdso_getcpu = p; - - p = _dl_vdso_vsym ("time", &linux26); - /* If the vDSO is not available we fall back on the old vsyscall. */ -#define VSYSCALL_ADDR_vtime 0xffffffffff600400 - if (p == NULL) - p = (void *) VSYSCALL_ADDR_vtime; - PTR_MANGLE (p); - __vdso_time = p; } # define VDSO_SETUP _libc_vdso_platform_setup diff --git a/libc/sysdeps/unix/sysv/linux/x86_64/sched_getcpu.S b/libc/sysdeps/unix/sysv/linux/x86_64/sched_getcpu.S index 8ec7d3fcd..246c95504 100644 --- a/libc/sysdeps/unix/sysv/linux/x86_64/sched_getcpu.S +++ b/libc/sysdeps/unix/sysv/linux/x86_64/sched_getcpu.S @@ -20,6 +20,7 @@ #include #define _ERRNO_H 1 #include +#include /* For the calculation see asm/vsyscall.h. */ #define VSYSCALL_ADDR_vgetcpu 0xffffffffff600800 @@ -38,10 +39,26 @@ ENTRY (sched_getcpu) #ifdef SHARED movq __vdso_getcpu(%rip), %rax PTR_DEMANGLE (%rax) + callq *%rax #else +# ifdef __NR_getcpu + movl $__NR_getcpu, %eax + syscall +# ifndef __ASSUME_GETCPU_SYSCALL + cmpq $-ENOSYS, %rax + jne 1f +# endif +# endif +# ifndef __ASSUME_GETCPU_SYSCALL movq $VSYSCALL_ADDR_vgetcpu, %rax -#endif callq *%rax +1: +# else +# ifndef __NR_getcpu +# error "cannot happen" +# endif +# endif +#endif cmpq $-4095, %rax jae SYSCALL_ERROR_LABEL diff --git a/libc/sysdeps/unix/sysv/linux/x86_64/time.S b/libc/sysdeps/unix/sysv/linux/x86_64/time.S deleted file mode 100644 index 66d7498ce..000000000 --- a/libc/sysdeps/unix/sysv/linux/x86_64/time.S +++ /dev/null @@ -1,47 +0,0 @@ -/* Copyright (C) 2001,02, 2003, 2011 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include -#define _ERRNO_H 1 -#include - -/* For the calculation see asm/vsyscall.h. */ -#define VSYSCALL_ADDR_vtime 0xffffffffff600400 - - -/* Return the current time as a `time_t' and also put it in *T if T is - not NULL. Time is represented as seconds from Jan 1 00:00:00 1970. */ - -ENTRY (time) - /* Align stack. */ - sub $0x8, %rsp - cfi_adjust_cfa_offset(8) - -#ifdef SHARED - movq __vdso_time(%rip), %rax - PTR_DEMANGLE (%rax) -#else - movq $VSYSCALL_ADDR_vtime, %rax -#endif - callq *%rax - - add $0x8, %rsp - cfi_adjust_cfa_offset(-8) - ret -PSEUDO_END_NOERRNO(time) -libc_hidden_def (time) diff --git a/libc/sysdeps/unix/sysv/linux/x86_64/time.c b/libc/sysdeps/unix/sysv/linux/x86_64/time.c new file mode 100644 index 000000000..c1c1a7526 --- /dev/null +++ b/libc/sysdeps/unix/sysv/linux/x86_64/time.c @@ -0,0 +1,49 @@ +/* Copyright (C) 2001,02, 2003, 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include + + +#define VSYSCALL_ADDR_vtime 0xffffffffff600400 + + +#ifdef SHARED +void *time_ifunc (void) __asm__ ("time"); + +void * +time_ifunc (void) +{ + PREPARE_VERSION (linux26, "LINUX_2.6", 61765110); + + /* If the vDSO is not available we fall back on the old vsyscall. */ + return _dl_vdso_vsym ("time", &linux26) ?: (void *) VSYSCALL_ADDR_vtime; +} +__asm (".type time, %gnu_indirect_function"); +#else +# include +# include + +time_t +time (time_t *t) +{ + INTERNAL_SYSCALL_DECL (err); + return INTERNAL_SYSCALL (time, err, 1, t); +} +#endif + +strong_alias (time, __GI_time) diff --git a/libc/sysdeps/wordsize-64/scandirat.c b/libc/sysdeps/wordsize-64/scandirat.c new file mode 100644 index 000000000..02b8fdee0 --- /dev/null +++ b/libc/sysdeps/wordsize-64/scandirat.c @@ -0,0 +1,6 @@ +#define scandirat64 scandirat64_renamed + +#include "../../dirent/scandirat.c" + +#undef scandirat64 +weak_alias (scandirat, scandirat64) diff --git a/libc/sysdeps/wordsize-64/scandirat64.c b/libc/sysdeps/wordsize-64/scandirat64.c new file mode 100644 index 000000000..fb938654a --- /dev/null +++ b/libc/sysdeps/wordsize-64/scandirat64.c @@ -0,0 +1 @@ +/* Defined in scandirat.c. */ diff --git a/libc/sysdeps/x86_64/dl-trampoline.S b/libc/sysdeps/x86_64/dl-trampoline.S index 1b97929aa..258c60945 100644 --- a/libc/sysdeps/x86_64/dl-trampoline.S +++ b/libc/sysdeps/x86_64/dl-trampoline.S @@ -27,8 +27,9 @@ .align 16 cfi_startproc _dl_runtime_resolve: + cfi_adjust_cfa_offset(16) # Incorporate PLT subq $56,%rsp - cfi_adjust_cfa_offset(72) # Incorporate PLT + cfi_adjust_cfa_offset(56) movq %rax,(%rsp) # Preserve registers otherwise clobbered. movq %rcx, 8(%rsp) movq %rdx, 16(%rsp) @@ -141,8 +142,9 @@ L(have_avx): movq %r11,%rbx # Restore rbx xorl %eax, %eax // AVX and XSAVE supported? - testl $((1 << 28) | (1 << 27)), %ecx - je 2f + andl $((1 << 28) | (1 << 27)), %ecx + cmpl $((1 << 28) | (1 << 27)), %ecx + jne 2f xorl %ecx, %ecx // Get XFEATURE_ENABLED_MASK xgetbv @@ -150,20 +152,22 @@ L(have_avx): cmpl $0x6, %eax // Nonzero if SSE and AVX state saving is enabled. sete %al -2: movl %eax, L(have_avx)(%rip) +2: leal -1(%eax,%eax), %eax + movl %eax, L(have_avx)(%rip) cmpl $0, %eax 1: js L(no_avx) # define RESTORE_AVX +# define MORE_CODE # include "dl-trampoline.h" .align 16 L(no_avx): # endif -# undef RESTORE_AVX -# include "dl-trampoline.h" +# undef RESTORE_AVX +# include "dl-trampoline.h" cfi_endproc .size _dl_runtime_profile, .-_dl_runtime_profile @@ -183,11 +187,20 @@ _dl_x86_64_save_sse: movl $1, %eax cpuid movq %r11,%rbx # Restore rbx - movl $1, %eax - testl $(1 << 28), %ecx + xorl %eax, %eax + // AVX and XSAVE supported? + andl $((1 << 28) | (1 << 27)), %ecx + cmpl $((1 << 28) | (1 << 27)), %ecx jne 2f - negl %eax -2: movl %eax, L(have_avx)(%rip) + xorl %ecx, %ecx + // Get XFEATURE_ENABLED_MASK + xgetbv + andl $0x6, %eax + cmpl $0x6, %eax + // Nonzero if SSE and AVX state saving is enabled. + sete %al +2: leal -1(%eax,%eax), %eax + movl %eax, L(have_avx)(%rip) cmpl $0, %eax 1: js L(no_avx5) diff --git a/libc/sysdeps/x86_64/dl-trampoline.h b/libc/sysdeps/x86_64/dl-trampoline.h index 5d49ed440..1c3957983 100644 --- a/libc/sysdeps/x86_64/dl-trampoline.h +++ b/libc/sysdeps/x86_64/dl-trampoline.h @@ -1,6 +1,6 @@ /* Partial PLT profile trampoline to save and restore x86-64 vector registers. - Copyright (C) 2009 Free Software Foundation, Inc. + Copyright (C) 2009, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -195,14 +195,14 @@ _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now, so we just need to allocate the sizeof(La_x86_64_retval) space on the stack, since the alignment has already been taken care of. */ -# ifdef RESTORE_AVX +#ifdef RESTORE_AVX /* sizeof(La_x86_64_retval). Need extra space for 2 SSE registers to detect if xmm0/xmm1 registers are changed by audit module. */ subq $(LRV_SIZE + XMM_SIZE*2), %rsp -# else +#else subq $LRV_SIZE, %rsp # sizeof(La_x86_64_retval) -# endif +#endif movq %rsp, %rcx # La_x86_64_retval argument to %rcx. /* Fill in the La_x86_64_retval structure. */ @@ -212,7 +212,7 @@ movaps %xmm0, LRV_XMM0_OFFSET(%rcx) movaps %xmm1, LRV_XMM1_OFFSET(%rcx) -# ifdef RESTORE_AVX +#ifdef RESTORE_AVX /* This is to support AVX audit modules. */ vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx) vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx) @@ -221,14 +221,14 @@ by audit module. */ vmovdqa %xmm0, (LRV_SIZE)(%rcx) vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx) -# endif +#endif fstpt LRV_ST0_OFFSET(%rcx) fstpt LRV_ST1_OFFSET(%rcx) movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx. movq 40(%rbx), %rsi # Copy args pushed by PLT in register. - movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index + movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index call _dl_call_pltexit /* Restore return registers. */ @@ -238,7 +238,7 @@ movaps LRV_XMM0_OFFSET(%rsp), %xmm0 movaps LRV_XMM1_OFFSET(%rsp), %xmm1 -# ifdef RESTORE_AVX +#ifdef RESTORE_AVX /* Check if xmm0/xmm1 registers are changed by audit module. */ vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2 vpmovmskb %xmm2, %esi @@ -253,7 +253,7 @@ vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1 1: -# endif +#endif fldt LRV_ST1_OFFSET(%rsp) fldt LRV_ST0_OFFSET(%rsp) @@ -267,3 +267,10 @@ # (eats the reloc index and link_map) cfi_adjust_cfa_offset(-48) retq + +#ifdef MORE_CODE + cfi_adjust_cfa_offset(48) + cfi_rel_offset(%rbx, 0) + cfi_def_cfa_register(%rbx) +# undef MORE_CODE +#endif diff --git a/libc/sysdeps/x86_64/l10nflist.c b/libc/sysdeps/x86_64/l10nflist.c new file mode 100644 index 000000000..2e0837233 --- /dev/null +++ b/libc/sysdeps/x86_64/l10nflist.c @@ -0,0 +1,13 @@ +#ifdef __POPCNT__ +# include + +static inline unsigned int +pop (unsigned int x) +{ + return _mm_popcnt_u32 (x); +} +# define ARCH_POP 1 + +#endif + +#include diff --git a/libc/sysdeps/x86_64/multiarch/Makefile b/libc/sysdeps/x86_64/multiarch/Makefile index c959dd195..a5254dc93 100644 --- a/libc/sysdeps/x86_64/multiarch/Makefile +++ b/libc/sysdeps/x86_64/multiarch/Makefile @@ -14,7 +14,8 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \ strcpy-sse2-unaligned strncpy-sse2-unaligned \ stpcpy-sse2-unaligned stpncpy-sse2-unaligned \ strcat-sse2-unaligned strncat-sse2-unaligned \ - strcat-ssse3 strncat-ssse3 strlen-sse2-pminub + strcat-ssse3 strncat-ssse3 strlen-sse2-pminub \ + strrchr-sse2-no-bsf strchr-sse2-no-bsf ifeq (yes,$(config-cflags-sse4)) sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift CFLAGS-varshift.c += -msse4 diff --git a/libc/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S b/libc/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S new file mode 100644 index 000000000..9e9c91680 --- /dev/null +++ b/libc/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S @@ -0,0 +1,281 @@ +/* strchr with SSE2 without bsf + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef NOT_IN_libc + +# include +# include "asm-syntax.h" + + .text +ENTRY (__strchr_sse2_no_bsf) + movd %esi, %xmm1 + movq %rdi, %rcx + punpcklbw %xmm1, %xmm1 + andq $~15, %rdi + pxor %xmm2, %xmm2 + punpcklbw %xmm1, %xmm1 + orl $0xffffffff, %esi + movdqa (%rdi), %xmm0 + pshufd $0, %xmm1, %xmm1 + subq %rdi, %rcx + movdqa %xmm0, %xmm3 + leaq 16(%rdi), %rdi + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm3 + shl %cl, %esi + pmovmskb %xmm0, %eax + pmovmskb %xmm3, %edx + andl %esi, %eax + andl %esi, %edx + test %eax, %eax + jnz L(matches) + test %edx, %edx + jnz L(return_null) + +L(loop): + movdqa (%rdi), %xmm0 + leaq 16(%rdi), %rdi + movdqa %xmm0, %xmm3 + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm3 + pmovmskb %xmm0, %eax + pmovmskb %xmm3, %edx + or %eax, %edx + jz L(loop) + + pmovmskb %xmm3, %edx + test %eax, %eax + jnz L(matches) + +/* Return NULL. */ + .p2align 4 +L(return_null): + xor %rax, %rax + ret + +L(matches): + /* There is a match. First find where NULL is. */ + leaq -16(%rdi), %rdi + test %edx, %edx + jz L(match_case1) + + .p2align 4 +L(match_case2): + test %al, %al + jz L(match_high_case2) + + mov %al, %cl + and $15, %cl + jnz L(match_case2_4) + + mov %dl, %ch + and $15, %ch + jnz L(return_null) + + test $0x10, %al + jnz L(Exit5) + test $0x10, %dl + jnz L(return_null) + test $0x20, %al + jnz L(Exit6) + test $0x20, %dl + jnz L(return_null) + test $0x40, %al + jnz L(Exit7) + test $0x40, %dl + jnz L(return_null) + lea 7(%rdi), %rax + ret + + .p2align 4 +L(match_case2_4): + test $0x01, %al + jnz L(Exit1) + test $0x01, %dl + jnz L(return_null) + test $0x02, %al + jnz L(Exit2) + test $0x02, %dl + jnz L(return_null) + test $0x04, %al + jnz L(Exit3) + test $0x04, %dl + jnz L(return_null) + lea 3(%rdi), %rax + ret + + .p2align 4 +L(match_high_case2): + test %dl, %dl + jnz L(return_null) + + mov %ah, %cl + and $15, %cl + jnz L(match_case2_12) + + mov %dh, %ch + and $15, %ch + jnz L(return_null) + + test $0x10, %ah + jnz L(Exit13) + test $0x10, %dh + jnz L(return_null) + test $0x20, %ah + jnz L(Exit14) + test $0x20, %dh + jnz L(return_null) + test $0x40, %ah + jnz L(Exit15) + test $0x40, %dh + jnz L(return_null) + lea 15(%rdi), %rax + ret + + .p2align 4 +L(match_case2_12): + test $0x01, %ah + jnz L(Exit9) + test $0x01, %dh + jnz L(return_null) + test $0x02, %ah + jnz L(Exit10) + test $0x02, %dh + jnz L(return_null) + test $0x04, %ah + jnz L(Exit11) + test $0x04, %dh + jnz L(return_null) + lea 11(%rdi), %rax + ret + + .p2align 4 +L(match_case1): + test %al, %al + jz L(match_high_case1) + + test $0x01, %al + jnz L(Exit1) + test $0x02, %al + jnz L(Exit2) + test $0x04, %al + jnz L(Exit3) + test $0x08, %al + jnz L(Exit4) + test $0x10, %al + jnz L(Exit5) + test $0x20, %al + jnz L(Exit6) + test $0x40, %al + jnz L(Exit7) + lea 7(%rdi), %rax + ret + + .p2align 4 +L(match_high_case1): + test $0x01, %ah + jnz L(Exit9) + test $0x02, %ah + jnz L(Exit10) + test $0x04, %ah + jnz L(Exit11) + test $0x08, %ah + jnz L(Exit12) + test $0x10, %ah + jnz L(Exit13) + test $0x20, %ah + jnz L(Exit14) + test $0x40, %ah + jnz L(Exit15) + lea 15(%rdi), %rax + ret + + .p2align 4 +L(Exit1): + lea (%rdi), %rax + ret + + .p2align 4 +L(Exit2): + lea 1(%rdi), %rax + ret + + .p2align 4 +L(Exit3): + lea 2(%rdi), %rax + ret + + .p2align 4 +L(Exit4): + lea 3(%rdi), %rax + ret + + .p2align 4 +L(Exit5): + lea 4(%rdi), %rax + ret + + .p2align 4 +L(Exit6): + lea 5(%rdi), %rax + ret + + .p2align 4 +L(Exit7): + lea 6(%rdi), %rax + ret + + .p2align 4 +L(Exit9): + lea 8(%rdi), %rax + ret + + .p2align 4 +L(Exit10): + lea 9(%rdi), %rax + ret + + .p2align 4 +L(Exit11): + lea 10(%rdi), %rax + ret + + .p2align 4 +L(Exit12): + lea 11(%rdi), %rax + ret + + .p2align 4 +L(Exit13): + lea 12(%rdi), %rax + ret + + .p2align 4 +L(Exit14): + lea 13(%rdi), %rax + ret + + .p2align 4 +L(Exit15): + lea 14(%rdi), %rax + ret + +END (__strchr_sse2_no_bsf) +#endif diff --git a/libc/sysdeps/x86_64/multiarch/strchr.S b/libc/sysdeps/x86_64/multiarch/strchr.S index 71845a35f..97a6057bb 100644 --- a/libc/sysdeps/x86_64/multiarch/strchr.S +++ b/libc/sysdeps/x86_64/multiarch/strchr.S @@ -33,7 +33,11 @@ ENTRY(strchr) testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) jz 2f leaq __strchr_sse42(%rip), %rax -2: ret + ret +2: testl $bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip) + jz 3f + leaq __strchr_sse2_no_bsf(%rip), %rax +3: ret END(strchr) diff --git a/libc/sysdeps/x86_64/multiarch/strlen.S b/libc/sysdeps/x86_64/multiarch/strlen.S index d78970742..43e2100f4 100644 --- a/libc/sysdeps/x86_64/multiarch/strlen.S +++ b/libc/sysdeps/x86_64/multiarch/strlen.S @@ -1,5 +1,5 @@ /* strlen(str) -- determine the length of the string STR. - Copyright (C) 2009, 2010 Free Software Foundation, Inc. + Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc. Contributed by Ulrich Drepper . This file is part of the GNU C Library. diff --git a/libc/sysdeps/x86_64/multiarch/strrchr-sse2-no-bsf.S b/libc/sysdeps/x86_64/multiarch/strrchr-sse2-no-bsf.S new file mode 100644 index 000000000..bbc94c3e8 --- /dev/null +++ b/libc/sysdeps/x86_64/multiarch/strrchr-sse2-no-bsf.S @@ -0,0 +1,556 @@ +/* strrchr with SSE2 without bsf and bsr + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#if defined SHARED && !defined NOT_IN_libc + +# include +# include "asm-syntax.h" + + .text +ENTRY (__strrchr_sse2_no_bsf) + + movd %rsi, %xmm1 + pxor %xmm2, %xmm2 + mov %rdi, %rcx + punpcklbw %xmm1, %xmm1 + punpcklbw %xmm1, %xmm1 + /* ECX has OFFSET. */ + and $63, %rcx + cmp $48, %rcx + pshufd $0, %xmm1, %xmm1 + ja L(crosscache) + +/* unaligned string. */ + movdqu (%rdi), %xmm0 + pcmpeqb %xmm0, %xmm2 + pcmpeqb %xmm1, %xmm0 + /* Find where NULL is. */ + pmovmskb %xmm2, %rcx + /* Check if there is a match. */ + pmovmskb %xmm0, %rax + add $16, %rdi + + test %rax, %rax + jnz L(unaligned_match1) + + test %rcx, %rcx + jnz L(return_null) + + and $-16, %rdi + xor %r8, %r8 + jmp L(loop) + + .p2align 4 +L(unaligned_match1): + test %rcx, %rcx + jnz L(prolog_find_zero_1) + + mov %rax, %r8 + mov %rdi, %rsi + and $-16, %rdi + jmp L(loop) + + .p2align 4 +L(crosscache): +/* Hancle unaligned string. */ + and $15, %rcx + and $-16, %rdi + pxor %xmm3, %xmm3 + movdqa (%rdi), %xmm0 + pcmpeqb %xmm0, %xmm3 + pcmpeqb %xmm1, %xmm0 + /* Find where NULL is. */ + pmovmskb %xmm3, %rdx + /* Check if there is a match. */ + pmovmskb %xmm0, %rax + /* Remove the leading bytes. */ + shr %cl, %rdx + shr %cl, %rax + add $16, %rdi + + test %rax, %rax + jnz L(unaligned_match) + + test %rdx, %rdx + jnz L(return_null) + + xor %r8, %r8 + jmp L(loop) + + .p2align 4 +L(unaligned_match): + test %rdx, %rdx + jnz L(prolog_find_zero) + + mov %rax, %r8 + lea (%rdi, %rcx), %rsi + +/* Loop start on aligned string. */ + .p2align 4 +L(loop): + movdqa (%rdi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %rdi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %rcx + pmovmskb %xmm0, %rax + or %rax, %rcx + jnz L(matches) + + movdqa (%rdi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %rdi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %rcx + pmovmskb %xmm0, %rax + or %rax, %rcx + jnz L(matches) + + movdqa (%rdi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %rdi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %rcx + pmovmskb %xmm0, %rax + or %rax, %rcx + jnz L(matches) + + movdqa (%rdi), %xmm0 + pcmpeqb %xmm0, %xmm2 + add $16, %rdi + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm2, %rcx + pmovmskb %xmm0, %rax + or %rax, %rcx + jz L(loop) + +L(matches): + test %rax, %rax + jnz L(match) +L(return_value): + test %r8, %r8 + jz L(return_null) + mov %r8, %rax + mov %rsi, %rdi + jmp L(match_exit) + + .p2align 4 +L(match): + pmovmskb %xmm2, %rcx + test %rcx, %rcx + jnz L(find_zero) + mov %rax, %r8 + mov %rdi, %rsi + jmp L(loop) + + .p2align 4 +L(find_zero): + test %cl, %cl + jz L(find_zero_high) + mov %cl, %dl + and $15, %dl + jz L(find_zero_8) + test $0x01, %cl + jnz L(FindZeroExit1) + test $0x02, %cl + jnz L(FindZeroExit2) + test $0x04, %cl + jnz L(FindZeroExit3) + and $1 << 4 - 1, %rax + jz L(return_value) + jmp L(match_exit) + + .p2align 4 +L(find_zero_8): + test $0x10, %cl + jnz L(FindZeroExit5) + test $0x20, %cl + jnz L(FindZeroExit6) + test $0x40, %cl + jnz L(FindZeroExit7) + and $1 << 8 - 1, %rax + jz L(return_value) + jmp L(match_exit) + + .p2align 4 +L(find_zero_high): + mov %ch, %dh + and $15, %dh + jz L(find_zero_high_8) + test $0x01, %ch + jnz L(FindZeroExit9) + test $0x02, %ch + jnz L(FindZeroExit10) + test $0x04, %ch + jnz L(FindZeroExit11) + and $1 << 12 - 1, %rax + jz L(return_value) + jmp L(match_exit) + + .p2align 4 +L(find_zero_high_8): + test $0x10, %ch + jnz L(FindZeroExit13) + test $0x20, %ch + jnz L(FindZeroExit14) + test $0x40, %ch + jnz L(FindZeroExit15) + and $1 << 16 - 1, %rax + jz L(return_value) + jmp L(match_exit) + + .p2align 4 +L(FindZeroExit1): + and $1, %rax + jz L(return_value) + jmp L(match_exit) + + .p2align 4 +L(FindZeroExit2): + and $1 << 2 - 1, %rax + jz L(return_value) + jmp L(match_exit) + + .p2align 4 +L(FindZeroExit3): + and $1 << 3 - 1, %rax + jz L(return_value) + jmp L(match_exit) + + .p2align 4 +L(FindZeroExit5): + and $1 << 5 - 1, %rax + jz L(return_value) + jmp L(match_exit) + + .p2align 4 +L(FindZeroExit6): + and $1 << 6 - 1, %rax + jz L(return_value) + jmp L(match_exit) + + .p2align 4 +L(FindZeroExit7): + and $1 << 7 - 1, %rax + jz L(return_value) + jmp L(match_exit) + + .p2align 4 +L(FindZeroExit9): + and $1 << 9 - 1, %rax + jz L(return_value) + jmp L(match_exit) + + .p2align 4 +L(FindZeroExit10): + and $1 << 10 - 1, %rax + jz L(return_value) + jmp L(match_exit) + + .p2align 4 +L(FindZeroExit11): + and $1 << 11 - 1, %rax + jz L(return_value) + jmp L(match_exit) + + .p2align 4 +L(FindZeroExit13): + and $1 << 13 - 1, %rax + jz L(return_value) + jmp L(match_exit) + + .p2align 4 +L(FindZeroExit14): + and $1 << 14 - 1, %rax + jz L(return_value) + jmp L(match_exit) + + .p2align 4 +L(FindZeroExit15): + and $1 << 15 - 1, %rax + jz L(return_value) + + .p2align 4 +L(match_exit): + test %ah, %ah + jnz L(match_exit_high) + mov %al, %dl + and $15 << 4, %dl + jnz L(match_exit_8) + test $0x08, %al + jnz L(Exit4) + test $0x04, %al + jnz L(Exit3) + test $0x02, %al + jnz L(Exit2) + lea -16(%rdi), %rax + ret + + .p2align 4 +L(match_exit_8): + test $0x80, %al + jnz L(Exit8) + test $0x40, %al + jnz L(Exit7) + test $0x20, %al + jnz L(Exit6) + lea -12(%rdi), %rax + ret + + .p2align 4 +L(match_exit_high): + mov %ah, %dh + and $15 << 4, %dh + jnz L(match_exit_high_8) + test $0x08, %ah + jnz L(Exit12) + test $0x04, %ah + jnz L(Exit11) + test $0x02, %ah + jnz L(Exit10) + lea -8(%rdi), %rax + ret + + .p2align 4 +L(match_exit_high_8): + test $0x80, %ah + jnz L(Exit16) + test $0x40, %ah + jnz L(Exit15) + test $0x20, %ah + jnz L(Exit14) + lea -4(%rdi), %rax + ret + + .p2align 4 +L(Exit2): + lea -15(%rdi), %rax + ret + + .p2align 4 +L(Exit3): + lea -14(%rdi), %rax + ret + + .p2align 4 +L(Exit4): + lea -13(%rdi), %rax + ret + + .p2align 4 +L(Exit6): + lea -11(%rdi), %rax + ret + + .p2align 4 +L(Exit7): + lea -10(%rdi), %rax + ret + + .p2align 4 +L(Exit8): + lea -9(%rdi), %rax + ret + + .p2align 4 +L(Exit10): + lea -7(%rdi), %rax + ret + + .p2align 4 +L(Exit11): + lea -6(%rdi), %rax + ret + + .p2align 4 +L(Exit12): + lea -5(%rdi), %rax + ret + + .p2align 4 +L(Exit14): + lea -3(%rdi), %rax + ret + + .p2align 4 +L(Exit15): + lea -2(%rdi), %rax + ret + + .p2align 4 +L(Exit16): + lea -1(%rdi), %rax + ret + +/* Return NULL. */ + .p2align 4 +L(return_null): + xor %rax, %rax + ret + + .p2align 4 +L(prolog_find_zero): + add %rcx, %rdi + mov %rdx, %rcx +L(prolog_find_zero_1): + test %cl, %cl + jz L(prolog_find_zero_high) + mov %cl, %dl + and $15, %dl + jz L(prolog_find_zero_8) + test $0x01, %cl + jnz L(PrologFindZeroExit1) + test $0x02, %cl + jnz L(PrologFindZeroExit2) + test $0x04, %cl + jnz L(PrologFindZeroExit3) + and $1 << 4 - 1, %rax + jnz L(match_exit) + xor %rax, %rax + ret + + .p2align 4 +L(prolog_find_zero_8): + test $0x10, %cl + jnz L(PrologFindZeroExit5) + test $0x20, %cl + jnz L(PrologFindZeroExit6) + test $0x40, %cl + jnz L(PrologFindZeroExit7) + and $1 << 8 - 1, %rax + jnz L(match_exit) + xor %rax, %rax + ret + + .p2align 4 +L(prolog_find_zero_high): + mov %ch, %dh + and $15, %dh + jz L(prolog_find_zero_high_8) + test $0x01, %ch + jnz L(PrologFindZeroExit9) + test $0x02, %ch + jnz L(PrologFindZeroExit10) + test $0x04, %ch + jnz L(PrologFindZeroExit11) + and $1 << 12 - 1, %rax + jnz L(match_exit) + xor %rax, %rax + ret + + .p2align 4 +L(prolog_find_zero_high_8): + test $0x10, %ch + jnz L(PrologFindZeroExit13) + test $0x20, %ch + jnz L(PrologFindZeroExit14) + test $0x40, %ch + jnz L(PrologFindZeroExit15) + and $1 << 16 - 1, %rax + jnz L(match_exit) + xor %rax, %rax + ret + + .p2align 4 +L(PrologFindZeroExit1): + and $1, %rax + jnz L(match_exit) + xor %rax, %rax + ret + + .p2align 4 +L(PrologFindZeroExit2): + and $1 << 2 - 1, %rax + jnz L(match_exit) + xor %rax, %rax + ret + + .p2align 4 +L(PrologFindZeroExit3): + and $1 << 3 - 1, %rax + jnz L(match_exit) + xor %rax, %rax + ret + + .p2align 4 +L(PrologFindZeroExit5): + and $1 << 5 - 1, %rax + jnz L(match_exit) + xor %rax, %rax + ret + + .p2align 4 +L(PrologFindZeroExit6): + and $1 << 6 - 1, %rax + jnz L(match_exit) + xor %rax, %rax + ret + + .p2align 4 +L(PrologFindZeroExit7): + and $1 << 7 - 1, %rax + jnz L(match_exit) + xor %rax, %rax + ret + + .p2align 4 +L(PrologFindZeroExit9): + and $1 << 9 - 1, %rax + jnz L(match_exit) + xor %rax, %rax + ret + + .p2align 4 +L(PrologFindZeroExit10): + and $1 << 10 - 1, %rax + jnz L(match_exit) + xor %rax, %rax + ret + + .p2align 4 +L(PrologFindZeroExit11): + and $1 << 11 - 1, %rax + jnz L(match_exit) + xor %rax, %rax + ret + + .p2align 4 +L(PrologFindZeroExit13): + and $1 << 13 - 1, %rax + jnz L(match_exit) + xor %rax, %rax + ret + + .p2align 4 +L(PrologFindZeroExit14): + and $1 << 14 - 1, %rax + jnz L(match_exit) + xor %rax, %rax + ret + + .p2align 4 +L(PrologFindZeroExit15): + and $1 << 15 - 1, %rax + jnz L(match_exit) + xor %rax, %rax + ret + +END (__strrchr_sse2_no_bsf) +#endif diff --git a/libc/sysdeps/x86_64/multiarch/strrchr.S b/libc/sysdeps/x86_64/multiarch/strrchr.S index 0d17fdb58..4a746c79c 100644 --- a/libc/sysdeps/x86_64/multiarch/strrchr.S +++ b/libc/sysdeps/x86_64/multiarch/strrchr.S @@ -35,7 +35,11 @@ ENTRY(strrchr) testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) jz 2f leaq __strrchr_sse42(%rip), %rax -2: ret + ret +2: testl $bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip) + jz 3f + leaq __strrchr_sse2_no_bsf(%rip), %rax +3: ret END(strrchr) /* diff --git a/libc/sysdeps/x86_64/wcscmp.S b/libc/sysdeps/x86_64/wcscmp.S new file mode 100644 index 000000000..14dcab8c8 --- /dev/null +++ b/libc/sysdeps/x86_64/wcscmp.S @@ -0,0 +1,936 @@ +/* Optimized wcscmp for x86-64 with SSE2. + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include + +/* Define multiple versions only for the definition in libc and for + the DSO. In static binaries we need wcscmp before the initialization + happened. */ + + .text +ENTRY (wcscmp) +/* + * This implementation uses SSE to compare up to 16 bytes at a time. +*/ + mov %esi, %eax + mov %edi, %edx + pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ + mov %al, %ch + mov %dl, %cl + and $63, %eax /* rsi alignment in cache line */ + and $63, %edx /* rdi alignment in cache line */ + and $15, %cl + jz L(continue_00) + cmp $16, %edx + jb L(continue_0) + cmp $32, %edx + jb L(continue_16) + cmp $48, %edx + jb L(continue_32) + +L(continue_48): + and $15, %ch + jz L(continue_48_00) + cmp $16, %eax + jb L(continue_0_48) + cmp $32, %eax + jb L(continue_16_48) + cmp $48, %eax + jb L(continue_32_48) + + .p2align 4 +L(continue_48_48): + mov (%rsi), %ecx + cmp %ecx, (%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 4(%rsi), %ecx + cmp %ecx, 4(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 8(%rsi), %ecx + cmp %ecx, 8(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 12(%rsi), %ecx + cmp %ecx, 12(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + movdqu 16(%rdi), %xmm1 + movdqu 16(%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqu 32(%rdi), %xmm1 + movdqu 32(%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + movdqu 48(%rdi), %xmm1 + movdqu 48(%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_48) + + add $64, %rsi + add $64, %rdi + jmp L(continue_48_48) + +L(continue_0): + and $15, %ch + jz L(continue_0_00) + cmp $16, %eax + jb L(continue_0_0) + cmp $32, %eax + jb L(continue_0_16) + cmp $48, %eax + jb L(continue_0_32) + + .p2align 4 +L(continue_0_48): + mov (%rsi), %ecx + cmp %ecx, (%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 4(%rsi), %ecx + cmp %ecx, 4(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 8(%rsi), %ecx + cmp %ecx, 8(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 12(%rsi), %ecx + cmp %ecx, 12(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + movdqu 16(%rdi), %xmm1 + movdqu 16(%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqu 32(%rdi), %xmm1 + movdqu 32(%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + mov 48(%rsi), %ecx + cmp %ecx, 48(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 52(%rsi), %ecx + cmp %ecx, 52(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 56(%rsi), %ecx + cmp %ecx, 56(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 60(%rsi), %ecx + cmp %ecx, 60(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + add $64, %rsi + add $64, %rdi + jmp L(continue_0_48) + + .p2align 4 +L(continue_00): + and $15, %ch + jz L(continue_00_00) + cmp $16, %eax + jb L(continue_00_0) + cmp $32, %eax + jb L(continue_00_16) + cmp $48, %eax + jb L(continue_00_32) + + .p2align 4 +L(continue_00_48): + pcmpeqd (%rdi), %xmm0 + mov (%rdi), %eax + pmovmskb %xmm0, %ecx + test %ecx, %ecx + jnz L(less4_double_words1) + + sub (%rsi), %eax + jnz L(return) + + mov 4(%rdi), %eax + sub 4(%rsi), %eax + jnz L(return) + + mov 8(%rdi), %eax + sub 8(%rsi), %eax + jnz L(return) + + mov 12(%rdi), %eax + sub 12(%rsi), %eax + jnz L(return) + + movdqu 16(%rsi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqu 32(%rsi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + movdqu 48(%rsi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd 48(%rdi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_48) + + add $64, %rsi + add $64, %rdi + jmp L(continue_00_48) + + .p2align 4 +L(continue_32): + and $15, %ch + jz L(continue_32_00) + cmp $16, %eax + jb L(continue_0_32) + cmp $32, %eax + jb L(continue_16_32) + cmp $48, %eax + jb L(continue_32_32) + + .p2align 4 +L(continue_32_48): + mov (%rsi), %ecx + cmp %ecx, (%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 4(%rsi), %ecx + cmp %ecx, 4(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 8(%rsi), %ecx + cmp %ecx, 8(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 12(%rsi), %ecx + cmp %ecx, 12(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 16(%rsi), %ecx + cmp %ecx, 16(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 20(%rsi), %ecx + cmp %ecx, 20(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 24(%rsi), %ecx + cmp %ecx, 24(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 28(%rsi), %ecx + cmp %ecx, 28(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + movdqu 32(%rdi), %xmm1 + movdqu 32(%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + movdqu 48(%rdi), %xmm1 + movdqu 48(%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_48) + + add $64, %rsi + add $64, %rdi + jmp L(continue_32_48) + + .p2align 4 +L(continue_16): + and $15, %ch + jz L(continue_16_00) + cmp $16, %eax + jb L(continue_0_16) + cmp $32, %eax + jb L(continue_16_16) + cmp $48, %eax + jb L(continue_16_32) + + .p2align 4 +L(continue_16_48): + mov (%rsi), %ecx + cmp %ecx, (%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 4(%rsi), %ecx + cmp %ecx, 4(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 8(%rsi), %ecx + cmp %ecx, 8(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 12(%rsi), %ecx + cmp %ecx, 12(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + movdqu 16(%rdi), %xmm1 + movdqu 16(%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + mov 32(%rsi), %ecx + cmp %ecx, 32(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 36(%rsi), %ecx + cmp %ecx, 36(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 40(%rsi), %ecx + cmp %ecx, 40(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 44(%rsi), %ecx + cmp %ecx, 44(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + movdqu 48(%rdi), %xmm1 + movdqu 48(%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_48) + + add $64, %rsi + add $64, %rdi + jmp L(continue_16_48) + + .p2align 4 +L(continue_00_00): + movdqa (%rdi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqa 16(%rdi), %xmm3 + pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ + pcmpeqd 16(%rsi), %xmm3 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm3 /* packed sub of comparison results*/ + pmovmskb %xmm3, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqa 32(%rdi), %xmm5 + pcmpeqd %xmm5, %xmm0 /* Any null double_word? */ + pcmpeqd 32(%rsi), %xmm5 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm5 /* packed sub of comparison results*/ + pmovmskb %xmm5, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + movdqa 48(%rdi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_48) + + add $64, %rsi + add $64, %rdi + jmp L(continue_00_00) + + .p2align 4 +L(continue_00_32): + movdqu (%rsi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + add $16, %rsi + add $16, %rdi + jmp L(continue_00_48) + + .p2align 4 +L(continue_00_16): + movdqu (%rsi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqu 16(%rsi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + add $32, %rsi + add $32, %rdi + jmp L(continue_00_48) + + .p2align 4 +L(continue_00_0): + movdqu (%rsi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqu 16(%rsi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqu 32(%rsi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + add $48, %rsi + add $48, %rdi + jmp L(continue_00_48) + + .p2align 4 +L(continue_48_00): + pcmpeqd (%rsi), %xmm0 + mov (%rdi), %eax + pmovmskb %xmm0, %ecx + test %ecx, %ecx + jnz L(less4_double_words1) + + sub (%rsi), %eax + jnz L(return) + + mov 4(%rdi), %eax + sub 4(%rsi), %eax + jnz L(return) + + mov 8(%rdi), %eax + sub 8(%rsi), %eax + jnz L(return) + + mov 12(%rdi), %eax + sub 12(%rsi), %eax + jnz L(return) + + movdqu 16(%rdi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqu 32(%rdi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + movdqu 48(%rdi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_48) + + add $64, %rsi + add $64, %rdi + jmp L(continue_48_00) + + .p2align 4 +L(continue_32_00): + movdqu (%rdi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + add $16, %rsi + add $16, %rdi + jmp L(continue_48_00) + + .p2align 4 +L(continue_16_00): + movdqu (%rdi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqu 16(%rdi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + add $32, %rsi + add $32, %rdi + jmp L(continue_48_00) + + .p2align 4 +L(continue_0_00): + movdqu (%rdi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqu 16(%rdi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqu 32(%rdi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + add $48, %rsi + add $48, %rdi + jmp L(continue_48_00) + + .p2align 4 +L(continue_32_32): + movdqu (%rdi), %xmm1 + movdqu (%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + add $16, %rsi + add $16, %rdi + jmp L(continue_48_48) + + .p2align 4 +L(continue_16_16): + movdqu (%rdi), %xmm1 + movdqu (%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqu 16(%rdi), %xmm3 + movdqu 16(%rsi), %xmm4 + pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm3 /* packed sub of comparison results*/ + pmovmskb %xmm3, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + add $32, %rsi + add $32, %rdi + jmp L(continue_48_48) + + .p2align 4 +L(continue_0_0): + movdqu (%rdi), %xmm1 + movdqu (%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqu 16(%rdi), %xmm3 + movdqu 16(%rsi), %xmm4 + pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm3 /* packed sub of comparison results*/ + pmovmskb %xmm3, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqu 32(%rdi), %xmm1 + movdqu 32(%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + add $48, %rsi + add $48, %rdi + jmp L(continue_48_48) + + .p2align 4 +L(continue_0_16): + movdqu (%rdi), %xmm1 + movdqu (%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqu 16(%rdi), %xmm1 + movdqu 16(%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + add $32, %rsi + add $32, %rdi + jmp L(continue_32_48) + + .p2align 4 +L(continue_0_32): + movdqu (%rdi), %xmm1 + movdqu (%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + add $16, %rsi + add $16, %rdi + jmp L(continue_16_48) + + .p2align 4 +L(continue_16_32): + movdqu (%rdi), %xmm1 + movdqu (%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + add $16, %rsi + add $16, %rdi + jmp L(continue_32_48) + + .p2align 4 +L(less4_double_words1): + cmp (%rsi), %eax + jne L(nequal) + test %eax, %eax + jz L(equal) + + mov 4(%rsi), %ecx + cmp %ecx, 4(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 8(%rsi), %ecx + cmp %ecx, 8(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 12(%rsi), %edx + mov 12(%rdi), %eax + sub %edx, %eax + ret + + .p2align 4 +L(less4_double_words): + test %dl, %dl + jz L(next_two_double_words) + and $15, %dl + jz L(second_double_word) + mov (%rdi), %eax + sub (%rsi), %eax + ret + + .p2align 4 +L(second_double_word): + mov 4(%rdi), %eax + sub 4(%rsi), %eax + ret + + .p2align 4 +L(next_two_double_words): + and $15, %dh + jz L(fourth_double_word) + mov 8(%rdi), %eax + sub 8(%rsi), %eax + ret + + .p2align 4 +L(fourth_double_word): + mov 12(%rdi), %eax + sub 12(%rsi), %eax + ret + + .p2align 4 +L(less4_double_words_16): + test %dl, %dl + jz L(next_two_double_words_16) + and $15, %dl + jz L(second_double_word_16) + mov 16(%rdi), %eax + sub 16(%rsi), %eax + ret + + .p2align 4 +L(second_double_word_16): + mov 20(%rdi), %eax + sub 20(%rsi), %eax + ret + + .p2align 4 +L(next_two_double_words_16): + and $15, %dh + jz L(fourth_double_word_16) + mov 24(%rdi), %eax + sub 24(%rsi), %eax + ret + + .p2align 4 +L(fourth_double_word_16): + mov 28(%rdi), %eax + sub 28(%rsi), %eax + ret + + .p2align 4 +L(less4_double_words_32): + test %dl, %dl + jz L(next_two_double_words_32) + and $15, %dl + jz L(second_double_word_32) + mov 32(%rdi), %eax + sub 32(%rsi), %eax + ret + + .p2align 4 +L(second_double_word_32): + mov 36(%rdi), %eax + sub 36(%rsi), %eax + ret + + .p2align 4 +L(next_two_double_words_32): + and $15, %dh + jz L(fourth_double_word_32) + mov 40(%rdi), %eax + sub 40(%rsi), %eax + ret + + .p2align 4 +L(fourth_double_word_32): + mov 44(%rdi), %eax + sub 44(%rsi), %eax + ret + + .p2align 4 +L(less4_double_words_48): + test %dl, %dl + jz L(next_two_double_words_48) + and $15, %dl + jz L(second_double_word_48) + mov 48(%rdi), %eax + sub 48(%rsi), %eax + ret + + .p2align 4 +L(second_double_word_48): + mov 52(%rdi), %eax + sub 52(%rsi), %eax + ret + + .p2align 4 +L(next_two_double_words_48): + and $15, %dh + jz L(fourth_double_word_48) + mov 56(%rdi), %eax + sub 56(%rsi), %eax + ret + + .p2align 4 +L(fourth_double_word_48): + mov 60(%rdi), %eax + sub 60(%rsi), %eax + ret + + .p2align 4 +L(return): + ret + + .p2align 4 +L(nequal): + mov $1, %eax + ja L(nequal_bigger) + neg %eax + +L(nequal_bigger): + ret + + .p2align 4 +L(equal): + xor %rax, %rax + ret + +END (wcscmp) +libc_hidden_def (wcscmp) -- cgit v1.2.3