diff options
author | joseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d> | 2011-11-17 21:56:08 +0000 |
---|---|---|
committer | joseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d> | 2011-11-17 21:56:08 +0000 |
commit | 913a0f6c362c8c9aac72f800485678845a60ed06 (patch) | |
tree | 227afb2373db8f9494b69c20069cce6a03ab0914 /libc/sysdeps | |
parent | 86abb02796d5bfc0c71d46ad9923ff8737e03280 (diff) |
Merge changes between r15584 and r15868 from /fsf/trunk.
git-svn-id: svn://svn.eglibc.org/trunk@15869 7b3dc134-2b1b-0410-93df-9e9f96275f8d
Diffstat (limited to 'libc/sysdeps')
63 files changed, 2036 insertions, 637 deletions
diff --git a/libc/sysdeps/generic/dwarf2.h b/libc/sysdeps/generic/dwarf2.h index 9fca4c00e..26ceef68c 100644 --- a/libc/sysdeps/generic/dwarf2.h +++ b/libc/sysdeps/generic/dwarf2.h @@ -1,6 +1,6 @@ /* Declarations and definitions of codes relating to the DWARF2 symbolic debugging information format. - Copyright (C) 1992, 1993, 1995, 1996, 1997, 2000 + Copyright (C) 1992, 1993, 1995, 1996, 1997, 2000, 2011 Free Software Foundation, Inc. Contributed by Gary Funck (gary@intrepid.com). Derived from the DWARF 1 implementation written by Ron Guilmette (rfg@monkeys.com). @@ -22,6 +22,9 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ +#ifndef _DWARF2_H +#define _DWARF2_H 1 + /* This file is derived from the DWARF specification (a public document) Revision 2.0.0 (July 27, 1993) developed by the UNIX International Programming Languages Special Interest Group (UI/PLSIG) and distributed @@ -31,6 +34,7 @@ /* This file is shared between GCC and GDB, and should not contain prototypes. */ +#ifndef __ASSEMBLER__ /* Tag names and codes. */ enum dwarf_tag @@ -560,6 +564,7 @@ enum dwarf_macinfo_record_type DW_MACINFO_vendor_ext = 255 }; +#endif /* !ASSEMBLER */ /* @@@ For use with GNU frame unwind information. */ @@ -583,3 +588,5 @@ enum dwarf_macinfo_record_type #define DW_EH_PE_aligned 0x50 #define DW_EH_PE_indirect 0x80 + +#endif /* dwarf2.h */ diff --git a/libc/sysdeps/generic/sysdep.h b/libc/sysdeps/generic/sysdep.h index 54884d9af..eecbd731f 100644 --- a/libc/sysdeps/generic/sysdep.h +++ b/libc/sysdeps/generic/sysdep.h @@ -1,5 +1,6 @@ /* Generic asm macros used on many machines. - Copyright (C) 1991,92,93,96,98,2002,2003,2009 Free Software Foundation, Inc. + Copyright (C) 1991-1993,96,98,2002,2003,2009,2011 + Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,19 +21,19 @@ #ifndef C_LABEL /* Define a macro we can use to construct the asm name for a C symbol. */ -#ifdef NO_UNDERSCORES -#ifdef __STDC__ -#define C_LABEL(name) name##: -#else -#define C_LABEL(name) name/**/: -#endif -#else -#ifdef __STDC__ -#define C_LABEL(name) _##name##: -#else -#define C_LABEL(name) _/**/name/**/: -#endif -#endif +# ifdef NO_UNDERSCORES +# ifdef __STDC__ +# define C_LABEL(name) name##: +# else +# define C_LABEL(name) name/**/: +# endif +# else +# ifdef __STDC__ +# define C_LABEL(name) _##name##: +# else +# define C_LABEL(name) _/**/name/**/: +# endif +# endif #endif @@ -40,15 +41,17 @@ /* Mark the end of function named SYM. This is used on some platforms to generate correct debugging information. */ # ifndef END -# define END(sym) +# define END(sym) # endif # ifndef JUMPTARGET -# define JUMPTARGET(sym) sym +# define JUMPTARGET(sym) sym # endif +#endif /* Makros to generate eh_frame unwind information. */ -# ifdef HAVE_ASM_CFI_DIRECTIVES +#ifdef HAVE_ASM_CFI_DIRECTIVES +# ifdef __ASSEMBLER__ # define cfi_startproc .cfi_startproc # define cfi_endproc .cfi_endproc # define cfi_def_cfa(reg, off) .cfi_def_cfa reg, off @@ -67,29 +70,9 @@ # define cfi_window_save .cfi_window_save # define cfi_personality(enc, exp) .cfi_personality enc, exp # define cfi_lsda(enc, exp) .cfi_lsda enc, exp -# else -# define cfi_startproc -# define cfi_endproc -# define cfi_def_cfa(reg, off) -# define cfi_def_cfa_register(reg) -# define cfi_def_cfa_offset(off) -# define cfi_adjust_cfa_offset(off) -# define cfi_offset(reg, off) -# define cfi_rel_offset(reg, off) -# define cfi_register(r1, r2) -# define cfi_return_column(reg) -# define cfi_restore(reg) -# define cfi_same_value(reg) -# define cfi_undefined(reg) -# define cfi_remember_state -# define cfi_restore_state -# define cfi_window_save -# define cfi_personality(enc, exp) -# define cfi_lsda(enc, exp) -# endif -#else /* ! ASSEMBLER */ -# ifdef HAVE_ASM_CFI_DIRECTIVES +# else /* ! ASSEMBLER */ + # define CFI_STRINGIFY(Name) CFI_STRINGIFY2 (Name) # define CFI_STRINGIFY2(Name) #Name # define CFI_STARTPROC ".cfi_startproc" @@ -124,43 +107,27 @@ ".cfi_personality " CFI_STRINGIFY(enc) "," CFI_STRINGIFY(exp) # define CFI_LSDA(enc, exp) \ ".cfi_lsda " CFI_STRINGIFY(enc) "," CFI_STRINGIFY(exp) -# else -# define CFI_STARTPROC -# define CFI_ENDPROC -# define CFI_DEF_CFA(reg, off) -# define CFI_DEF_CFA_REGISTER(reg) -# define CFI_DEF_CFA_OFFSET(off) -# define CFI_ADJUST_CFA_OFFSET(off) -# define CFI_OFFSET(reg, off) -# define CFI_REL_OFFSET(reg, off) -# define CFI_REGISTER(r1, r2) -# define CFI_RETURN_COLUMN(reg) -# define CFI_RESTORE(reg) -# define CFI_UNDEFINED(reg) -# define CFI_REMEMBER_STATE -# define CFI_RESTORE_STATE -# define CFI_WINDOW_SAVE -# define CFI_PERSONALITY(enc, exp) -# define CFI_LSDA(enc, exp) # endif -#endif /* __ASSEMBLER__ */ +#else + +# define CFI_STARTPROC +# define CFI_ENDPROC +# define CFI_DEF_CFA(reg, off) +# define CFI_DEF_CFA_REGISTER(reg) +# define CFI_DEF_CFA_OFFSET(off) +# define CFI_ADJUST_CFA_OFFSET(off) +# define CFI_OFFSET(reg, off) +# define CFI_REL_OFFSET(reg, off) +# define CFI_REGISTER(r1, r2) +# define CFI_RETURN_COLUMN(reg) +# define CFI_RESTORE(reg) +# define CFI_UNDEFINED(reg) +# define CFI_REMEMBER_STATE +# define CFI_RESTORE_STATE +# define CFI_WINDOW_SAVE +# define CFI_PERSONALITY(enc, exp) +# define CFI_LSDA(enc, exp) +#endif -/* Values used for encoding parameter of cfi_personality and cfi_lsda. */ -#define DW_EH_PE_absptr 0x00 -#define DW_EH_PE_omit 0xff -#define DW_EH_PE_uleb128 0x01 -#define DW_EH_PE_udata2 0x02 -#define DW_EH_PE_udata4 0x03 -#define DW_EH_PE_udata8 0x04 -#define DW_EH_PE_sleb128 0x09 -#define DW_EH_PE_sdata2 0x0a -#define DW_EH_PE_sdata4 0x0b -#define DW_EH_PE_sdata8 0x0c -#define DW_EH_PE_signed 0x08 -#define DW_EH_PE_pcrel 0x10 -#define DW_EH_PE_textrel 0x20 -#define DW_EH_PE_datarel 0x30 -#define DW_EH_PE_funcrel 0x40 -#define DW_EH_PE_aligned 0x50 -#define DW_EH_PE_indirect 0x80 +#include "dwarf2.h" diff --git a/libc/sysdeps/i386/i686/multiarch/Makefile b/libc/sysdeps/i386/i686/multiarch/Makefile index 5f1853877..426b718e4 100644 --- a/libc/sysdeps/i386/i686/multiarch/Makefile +++ b/libc/sysdeps/i386/i686/multiarch/Makefile @@ -4,6 +4,7 @@ gen-as-const-headers += ifunc-defines.sym endif ifeq ($(subdir),string) +gen-as-const-headers += locale-defines.sym sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \ memmove-ssse3 memcpy-ssse3-rep mempcpy-ssse3-rep \ memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \ @@ -15,11 +16,13 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \ strncpy-sse2 stpcpy-sse2 stpncpy-sse2 strcat-ssse3 \ strcat-sse2 strncat-ssse3 strncat-sse2 strncat-c \ strchr-sse2 strrchr-sse2 strchr-sse2-bsf strrchr-sse2-bsf \ - wcscmp-sse2 wcscmp-c memchr-sse2 memchr-sse2-bsf \ + memchr-sse2 memchr-sse2-bsf \ memrchr-sse2 memrchr-sse2-bsf memrchr-c \ rawmemchr-sse2 rawmemchr-sse2-bsf \ - strnlen-sse2 strnlen-c wcslen-sse2 wcslen-c \ - wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c + strnlen-sse2 strnlen-c \ + strcasecmp_l-c strcasecmp-c strcasecmp_l-ssse3 \ + strncase_l-c strncase-c strncase_l-ssse3 \ + strcasecmp_l-sse4 strncase_l-sse4 ifeq (yes,$(config-cflags-sse4)) sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c CFLAGS-varshift.c += -msse4 @@ -32,6 +35,11 @@ CFLAGS-strcasestr-nonascii.c += -msse4 endif endif +ifeq ($(subdir),wcsmbs) +sysdep_routines += wcscmp-sse2 wcscmp-c wcslen-sse2 wcslen-c \ + wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c +endif + ifeq (mathyes,$(subdir)$(config-cflags-avx)) libm-sysdep_routines += s_fma-fma s_fmaf-fma CFLAGS-s_fma-fma.c += -mavx -mfpmath=sse diff --git a/libc/sysdeps/i386/i686/multiarch/locale-defines.sym b/libc/sysdeps/i386/i686/multiarch/locale-defines.sym new file mode 100644 index 000000000..aebff9a4f --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/locale-defines.sym @@ -0,0 +1,11 @@ +#include <locale/localeinfo.h> +#include <langinfo.h> +#include <stddef.h> + +-- + +LOCALE_T___LOCALES offsetof (struct __locale_struct, __locales) +LC_CTYPE +_NL_CTYPE_NONASCII_CASE +LOCALE_DATA_VALUES offsetof (struct __locale_data, values) +SIZEOF_VALUES sizeof (((struct __locale_data *) 0)->values[0]) diff --git a/libc/sysdeps/i386/i686/multiarch/strcasecmp-c.c b/libc/sysdeps/i386/i686/multiarch/strcasecmp-c.c new file mode 100644 index 000000000..753c6ec84 --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strcasecmp-c.c @@ -0,0 +1,12 @@ +#include <string.h> + +extern __typeof (strcasecmp) __strcasecmp_nonascii; + +#define __strcasecmp __strcasecmp_nonascii +#include <string/strcasecmp.c> + +strong_alias (__strcasecmp_nonascii, __strcasecmp_ia32) + +/* The needs of strcasecmp in libc are minimal, no need to go through + the IFUNC. */ +strong_alias (__strcasecmp_nonascii, __GI___strcasecmp) diff --git a/libc/sysdeps/i386/i686/multiarch/strcasecmp.S b/libc/sysdeps/i386/i686/multiarch/strcasecmp.S new file mode 100644 index 000000000..97603d884 --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strcasecmp.S @@ -0,0 +1,68 @@ +/* Entry point for multi-version x86 strcasecmp. + Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <init-arch.h> + +#ifdef SHARED + .text +ENTRY(__strcasecmp) + .type __strcasecmp, @gnu_indirect_function + pushl %ebx + cfi_adjust_cfa_offset (4) + cfi_rel_offset (ebx, 0) + call __i686.get_pc_thunk.bx + addl $_GLOBAL_OFFSET_TABLE_, %ebx + cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) + jne 1f + call __init_cpu_features +1: leal __strcasecmp_ia32@GOTOFF(%ebx), %eax + testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __strcasecmp_ssse3@GOTOFF(%ebx), %eax + testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __strcasecmp_sse4_2@GOTOFF(%ebx), %eax +2: popl %ebx + cfi_adjust_cfa_offset (-4) + cfi_restore (ebx) + ret +END(__strcasecmp) +#else + .text +ENTRY(__strcasecmp) + .type __strcasecmp, @gnu_indirect_function + cmpl $0, KIND_OFFSET+__cpu_features + jne 1f + call __init_cpu_features +1: leal __strcasecmp_ia32, %eax + testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features + jz 2f + leal __strcasecmp_ssse3, %eax +#if 0 + // XXX Temporarily + testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features + jz 2f + leal __strcasecmp_sse4_2, %eax +#endif +2: ret +END(__strcasecmp) +#endif + +weak_alias (__strcasecmp, strcasecmp) diff --git a/libc/sysdeps/i386/i686/multiarch/strcasecmp_l-c.c b/libc/sysdeps/i386/i686/multiarch/strcasecmp_l-c.c new file mode 100644 index 000000000..d10e87256 --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strcasecmp_l-c.c @@ -0,0 +1,11 @@ +#include <string.h> + +extern __typeof (strcasecmp_l) __strcasecmp_l_nonascii; + +#define __strcasecmp_l __strcasecmp_l_nonascii +#define USE_IN_EXTENDED_LOCALE_MODEL 1 +#include <string/strcasecmp.c> + +/* The needs of strcasecmp in libc are minimal, no need to go through + the IFUNC. */ +strong_alias (__strcasecmp_l_nonascii, __GI___strcasecmp_l) diff --git a/libc/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S b/libc/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S new file mode 100644 index 000000000..411d4153f --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S @@ -0,0 +1,2 @@ +#define USE_AS_STRCASECMP_L 1 +#include "strcmp-sse4.S" diff --git a/libc/sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S b/libc/sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S new file mode 100644 index 000000000..a22b93c51 --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S @@ -0,0 +1,2 @@ +#define USE_AS_STRCASECMP_L 1 +#include "strcmp-ssse3.S" diff --git a/libc/sysdeps/i386/i686/multiarch/strcasecmp_l.S b/libc/sysdeps/i386/i686/multiarch/strcasecmp_l.S new file mode 100644 index 000000000..1322bd86d --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strcasecmp_l.S @@ -0,0 +1,5 @@ +#define STRCMP __strcasecmp_l +#define USE_AS_STRCASECMP_L +#include "strcmp.S" + +weak_alias (__strcasecmp_l, strcasecmp_l) diff --git a/libc/sysdeps/i386/i686/multiarch/strcmp-sse4.S b/libc/sysdeps/i386/i686/multiarch/strcmp-sse4.S index 0de0a113c..c9e0317b6 100644 --- a/libc/sysdeps/i386/i686/multiarch/strcmp-sse4.S +++ b/libc/sysdeps/i386/i686/multiarch/strcmp-sse4.S @@ -1,5 +1,5 @@ /* strcmp with SSE4.2 - Copyright (C) 2010 Free Software Foundation, Inc. + Copyright (C) 2010, 2011 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -34,33 +34,184 @@ #define PUSH(REG) pushl REG; CFI_PUSH (REG) #define POP(REG) popl REG; CFI_POP (REG) -#ifndef USE_AS_STRNCMP +#ifdef USE_AS_STRNCMP # ifndef STRCMP -# define STRCMP __strcmp_sse4_2 +# define STRCMP __strncmp_sse4_2 # endif -# define STR1 4 +# define STR1 8 # define STR2 STR1+4 -# define RETURN ret; .p2align 4 -#else +# define CNT STR2+4 +# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM) +# define REM %ebp +#elif defined USE_AS_STRCASECMP_L +# include "locale-defines.h" # ifndef STRCMP -# define STRCMP __strncmp_sse4_2 +# define STRCMP __strcasecmp_l_sse4_2 +# endif +# ifdef PIC +# define STR1 12 +# else +# define STR1 8 +# endif +# define STR2 STR1+4 +# define LOCALE 12 /* Loaded before the adjustement. */ +# ifdef PIC +# define RETURN POP (%edi); POP (%ebx); ret; \ + .p2align 4; CFI_PUSH (%ebx); CFI_PUSH (%edi) +# else +# define RETURN POP (%edi); ret; .p2align 4; CFI_PUSH (%edi) +# endif +# define NONASCII __strcasecmp_nonascii +#elif defined USE_AS_STRNCASECMP_L +# include "locale-defines.h" +# ifndef STRCMP +# define STRCMP __strncasecmp_l_sse4_2 +# endif +# ifdef PIC +# define STR1 16 +# else +# define STR1 12 # endif -# define STR1 8 # define STR2 STR1+4 # define CNT STR2+4 -# define RETURN POP (%ebp); ret; .p2align 4; CFI_PUSH (%ebp) +# define LOCALE 16 /* Loaded before the adjustement. */ +# ifdef PIC +# define RETURN POP (%edi); POP (REM); POP (%ebx); ret; \ + .p2align 4; \ + CFI_PUSH (%ebx); CFI_PUSH (REM); CFI_PUSH (%edi) +# else +# define RETURN POP (%edi); POP (REM); ret; \ + .p2align 4; CFI_PUSH (REM); CFI_PUSH (%edi) +# endif +# define REM %ebp +# define NONASCII __strncasecmp_nonascii +#else +# ifndef STRCMP +# define STRCMP __strcmp_sse4_2 +# endif +# define STR1 4 +# define STR2 STR1+4 +# define RETURN ret; .p2align 4 #endif .section .text.sse4.2,"ax",@progbits -ENTRY (STRCMP) -#ifdef USE_AS_STRNCMP - PUSH (%ebp) + +#ifdef USE_AS_STRCASECMP_L +ENTRY (__strcasecmp_sse4_2) +# ifdef PIC + PUSH (%ebx) + call __i686.get_pc_thunk.bx + addl $_GLOBAL_OFFSET_TABLE_, %ebx + movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax +# ifdef NO_TLS_DIRECT_SEG_REFS + addl %gs:0, %eax + movl (%eax), %eax +# else + movl %gs:(%eax), %eax +# endif +# else +# ifdef NO_TLS_DIRECT_SEG_REFS + movl %gs:0, %eax + movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax +# else + movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax +# endif +# endif +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 + movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax +# else + movl (%eax), %eax +# endif + testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) + jne __strcasecmp_nonascii + jmp L(ascii) +END (__strcasecmp_sse4_2) +#endif + +#ifdef USE_AS_STRNCASECMP_L +ENTRY (__strncasecmp_sse4_2) +# ifdef PIC + PUSH (%ebx) + call __i686.get_pc_thunk.bx + addl $_GLOBAL_OFFSET_TABLE_, %ebx + movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax +# ifdef NO_TLS_DIRECT_SEG_REFS + addl %gs:0, %eax + movl (%eax), %eax +# else + movl %gs:(%eax), %eax +# endif +# else +# ifdef NO_TLS_DIRECT_SEG_REFS + movl %gs:0, %eax + movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax +# else + movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax +# endif +# endif +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 + movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax +# else + movl (%eax), %eax +# endif + testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) + jne __strncasecmp_nonascii + jmp L(ascii) +END (__strncasecmp_sse4_2) +#endif + + ENTRY (STRCMP) +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movl LOCALE(%esp), %eax +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 + movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax +# else + movl (%eax), %eax +# endif + testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) + jne NONASCII + +# ifdef PIC + PUSH (%ebx) + call __i686.get_pc_thunk.bx + addl $_GLOBAL_OFFSET_TABLE_, %ebx +# endif +L(ascii): + .section .rodata.cst16,"aM",@progbits,16 + .align 16 +.Lbelowupper: + .quad 0x4040404040404040 + .quad 0x4040404040404040 +.Ltopupper: + .quad 0x5b5b5b5b5b5b5b5b + .quad 0x5b5b5b5b5b5b5b5b +.Ltouppermask: + .quad 0x2020202020202020 + .quad 0x2020202020202020 + .previous + +# ifdef PIC +# define UCLOW_reg .Lbelowupper@GOTOFF(%ebx) +# define UCHIGH_reg .Ltopupper@GOTOFF(%ebx) +# define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx) +# else +# define UCLOW_reg .Lbelowupper +# define UCHIGH_reg .Ltopupper +# define LCQWORD_reg .Ltouppermask +# endif +#endif + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + PUSH (REM) +#endif +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + PUSH (%edi) #endif mov STR1(%esp), %edx mov STR2(%esp), %eax -#ifdef USE_AS_STRNCMP - movl CNT(%esp), %ebp - test %ebp, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + movl CNT(%esp), REM + test REM, REM je L(eq) #endif mov %dx, %cx @@ -72,10 +223,40 @@ ENTRY (STRCMP) and $0xfff, %ecx cmp $0xff0, %ecx ja L(first4bytes) +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# define TOLOWER(reg1, reg2) \ + movdqa reg1, %xmm3; \ + movdqa UCHIGH_reg, %xmm4; \ + movdqa reg2, %xmm5; \ + movdqa UCHIGH_reg, %xmm6; \ + pcmpgtb UCLOW_reg, %xmm3; \ + pcmpgtb reg1, %xmm4; \ + pcmpgtb UCLOW_reg, %xmm5; \ + pcmpgtb reg2, %xmm6; \ + pand %xmm4, %xmm3; \ + pand %xmm6, %xmm5; \ + pand LCQWORD_reg, %xmm3; \ + pand LCQWORD_reg, %xmm5; \ + por %xmm3, reg1; \ + por %xmm5, reg2 + + movdqu (%eax), %xmm1 + TOLOWER (%xmm2, %xmm1) + movd %xmm2, %ecx + movd %xmm1, %edi + movdqa %xmm2, %xmm3 + movdqa %xmm1, %xmm4 + cmpl %edi, %ecx +#else +# define TOLOWER(reg1, reg) + movd %xmm2, %ecx cmp (%eax), %ecx +#endif jne L(less4bytes) +#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L movdqu (%eax), %xmm1 +#endif pxor %xmm2, %xmm1 pxor %xmm0, %xmm0 ptest %xmm1, %xmm0 @@ -84,113 +265,210 @@ ENTRY (STRCMP) ptest %xmm2, %xmm0 jnc L(less16bytes) -#ifdef USE_AS_STRNCMP - sub $16, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + sub $16, REM jbe L(eq) #endif add $16, %edx add $16, %eax L(first4bytes): movzbl (%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl (%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, (%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $1, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $1, REM je L(eq) #endif movzbl 1(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 1(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 1(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $2, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $2, REM je L(eq) #endif movzbl 2(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 2(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 2(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $3, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $3, REM je L(eq) #endif movzbl 3(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 3(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 3(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $4, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $4, REM je L(eq) #endif movzbl 4(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 4(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 4(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $5, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $5, REM je L(eq) #endif movzbl 5(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 5(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 5(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $6, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $6, REM je L(eq) #endif movzbl 6(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 6(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 6(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $7, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $7, REM je L(eq) #endif movzbl 7(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 7(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 7(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - sub $8, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + sub $8, REM je L(eq) #endif add $8, %eax add $8, %edx - PUSH (%ebx) +#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L PUSH (%edi) +#endif PUSH (%esi) -#ifdef USE_AS_STRNCMP +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cfi_remember_state #endif mov %edx, %edi mov %eax, %esi xorl %eax, %eax L(check_offset): - movl %edi, %ebx + movl %edi, %edx movl %esi, %ecx - andl $0xfff, %ebx + andl $0xfff, %edx andl $0xfff, %ecx - cmpl %ebx, %ecx - cmovl %ebx, %ecx + cmpl %edx, %ecx + cmovl %edx, %ecx lea -0xff0(%ecx), %edx sub %edx, %edi sub %edx, %esi @@ -199,11 +477,12 @@ L(check_offset): L(loop): movdqu (%esi,%edx), %xmm2 movdqu (%edi,%edx), %xmm1 + TOLOWER (%xmm2, %xmm1) pcmpistri $0x1a, %xmm2, %xmm1 jbe L(end) -#ifdef USE_AS_STRNCMP - sub $16, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + sub $16, REM jbe L(more16byteseq) #endif @@ -211,13 +490,22 @@ L(loop): jle L(loop) L(crosspage): movzbl (%edi,%edx), %eax - movzbl (%esi,%edx), %ebx - subl %ebx, %eax + movzbl (%esi,%edx), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx +# endif +#endif + subl %ecx, %eax jne L(ret) - testl %ebx, %ebx + testl %ecx, %ecx je L(ret) -#ifdef USE_AS_STRNCMP - sub $1, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + sub $1, REM jbe L(more16byteseq) #endif inc %edx @@ -230,30 +518,44 @@ L(crosspage): .p2align 4 L(end): jnc L(ret) -#ifdef USE_AS_STRNCMP - sub %ecx, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + sub %ecx, REM jbe L(more16byteseq) #endif - lea (%ecx,%edx), %ebx - movzbl (%edi,%ebx), %eax - movzbl (%esi,%ebx), %ecx + lea (%ecx,%edx), %ecx + movzbl (%edi,%ecx), %eax + movzbl (%esi,%ecx), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx +# endif +#endif subl %ecx, %eax L(ret): POP (%esi) POP (%edi) +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + POP (REM) +#endif +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC POP (%ebx) -#ifdef USE_AS_STRNCMP - POP (%ebp) +# endif #endif ret .p2align 4 -#ifdef USE_AS_STRNCMP +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cfi_restore_state L(more16byteseq): POP (%esi) +# ifdef USE_AS_STRNCMP POP (%edi) - POP (%ebx) +# endif #endif L(eq): xorl %eax, %eax @@ -269,27 +571,45 @@ L(neq_bigger): L(less16bytes): add $0xfefefeff, %ecx jnc L(less4bytes) +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movd %xmm3, %edi + xor %edi, %ecx +#else xor (%edx), %ecx +#endif or $0xfefefeff, %ecx add $1, %ecx jnz L(less4bytes) -#ifdef USE_AS_STRNCMP - cmp $4, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $4, REM jbe L(eq) #endif +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + psrldq $4, %xmm3 + psrldq $4, %xmm4 + movd %xmm3, %ecx + movd %xmm4, %edi + cmp %edi, %ecx + mov %ecx, %edi +#else mov 4(%edx), %ecx cmp 4(%eax), %ecx +#endif jne L(more4bytes) add $0xfefefeff, %ecx jnc L(more4bytes) +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + xor %edi, %ecx +#else xor 4(%edx), %ecx +#endif or $0xfefefeff, %ecx add $1, %ecx jnz L(more4bytes) -#ifdef USE_AS_STRNCMP - sub $8, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + sub $8, REM jbe L(eq) #endif @@ -298,80 +618,176 @@ L(less16bytes): L(less4bytes): movzbl (%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl (%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, (%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $1, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $1, REM je L(eq) #endif movzbl 1(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 1(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 1(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $2, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $2, REM je L(eq) #endif movzbl 2(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 2(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 2(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $3, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $3, REM je L(eq) #endif movzbl 3(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 3(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 3(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) L(more4bytes): -#ifdef USE_AS_STRNCMP - cmp $4, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $4, REM je L(eq) #endif movzbl 4(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 4(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 4(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $5, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $5, REM je L(eq) #endif movzbl 5(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 5(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 5(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $6, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $6, REM je L(eq) #endif movzbl 6(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 6(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 6(%edx) +#endif jne L(neq) cmpl $0, %ecx je L(eq) -#ifdef USE_AS_STRNCMP - cmp $7, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $7, REM je L(eq) #endif movzbl 7(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 7(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else cmpb %cl, 7(%edx) +#endif jne L(neq) jmp L(eq) diff --git a/libc/sysdeps/i386/i686/multiarch/strcmp-ssse3.S b/libc/sysdeps/i386/i686/multiarch/strcmp-ssse3.S index 607b69b6f..cbba46550 100644 --- a/libc/sysdeps/i386/i686/multiarch/strcmp-ssse3.S +++ b/libc/sysdeps/i386/i686/multiarch/strcmp-ssse3.S @@ -1,5 +1,5 @@ /* strcmp with SSSE3 - Copyright (C) 2010 Free Software Foundation, Inc. + Copyright (C) 2010, 2011 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -34,43 +34,201 @@ #define PUSH(REG) pushl REG; CFI_PUSH (REG) #define POP(REG) popl REG; CFI_POP (REG) -#ifndef USE_AS_STRNCMP +#ifdef USE_AS_STRNCMP # ifndef STRCMP -# define STRCMP __strcmp_ssse3 +# define STRCMP __strncmp_ssse3 # endif -# define STR1 4 +# define STR1 8 # define STR2 STR1+4 -# define RETURN ret; .p2align 4 +# define CNT STR2+4 +# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM) +# define UPDATE_STRNCMP_COUNTER \ + /* calculate left number to compare */ \ + mov $16, %esi; \ + sub %ecx, %esi; \ + cmp %esi, REM; \ + jbe L(more8byteseq); \ + sub %esi, REM +# define FLAGS %ebx +# define REM %ebp +#elif defined USE_AS_STRCASECMP_L +# include "locale-defines.h" +# ifndef STRCMP +# define STRCMP __strcasecmp_l_ssse3 +# endif +# ifdef PIC +# define STR1 8 +# else +# define STR1 4 +# endif +# define STR2 STR1+4 +# define LOCALE 12 /* Loaded before the adjustement. */ +# ifdef PIC +# define RETURN POP (%ebx); ret; .p2align 4; CFI_PUSH (%ebx) +# else +# define RETURN ret; .p2align 4 +# endif # define UPDATE_STRNCMP_COUNTER -#else +# define FLAGS (%esp) +# define NONASCII __strcasecmp_nonascii +#elif defined USE_AS_STRNCASECMP_L +# include "locale-defines.h" # ifndef STRCMP -# define STRCMP __strncmp_ssse3 +# define STRCMP __strncasecmp_l_ssse3 +# endif +# ifdef PIC +# define STR1 12 +# else +# define STR1 8 # endif -# define STR1 8 # define STR2 STR1+4 # define CNT STR2+4 -# define RETURN POP (%ebp); ret; .p2align 4; CFI_PUSH (%ebp) +# define LOCALE 16 /* Loaded before the adjustement. */ +# ifdef PIC +# define RETURN POP (REM); POP (%ebx); ret; \ + .p2align 4; CFI_PUSH (%ebx); CFI_PUSH (REM) +# else +# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM) +# endif # define UPDATE_STRNCMP_COUNTER \ /* calculate left number to compare */ \ mov $16, %esi; \ sub %ecx, %esi; \ - cmp %esi, %ebp; \ + cmp %esi, REM; \ jbe L(more8byteseq); \ - sub %esi, %ebp + sub %esi, REM +# define FLAGS (%esp) +# define REM %ebp +# define NONASCII __strncasecmp_nonascii +#else +# ifndef STRCMP +# define STRCMP __strcmp_ssse3 +# endif +# define STR1 4 +# define STR2 STR1+4 +# define RETURN ret; .p2align 4 +# define UPDATE_STRNCMP_COUNTER +# define FLAGS %ebx #endif .section .text.ssse3,"ax",@progbits + +#ifdef USE_AS_STRCASECMP_L +ENTRY (__strcasecmp_ssse3) +# ifdef PIC + PUSH (%ebx) + call __i686.get_pc_thunk.bx + addl $_GLOBAL_OFFSET_TABLE_, %ebx + movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax +# ifdef NO_TLS_DIRECT_SEG_REFS + addl %gs:0, %eax + movl (%eax), %eax +# else + movl %gs:(%eax), %eax +# endif +# else +# ifdef NO_TLS_DIRECT_SEG_REFS + movl %gs:0, %eax + movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax +# else + movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax +# endif +# endif +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 + movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax +# else + movl (%eax), %eax +# endif + testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) + jne __strcasecmp_nonascii + jmp L(ascii) +END (__strcasecmp_ssse3) +#endif + +#ifdef USE_AS_STRNCASECMP_L +ENTRY (__strncasecmp_ssse3) +# ifdef PIC + PUSH (%ebx) + call __i686.get_pc_thunk.bx + addl $_GLOBAL_OFFSET_TABLE_, %ebx + movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax +# ifdef NO_TLS_DIRECT_SEG_REFS + addl %gs:0, %eax + movl (%eax), %eax +# else + movl %gs:(%eax), %eax +# endif +# else +# ifdef NO_TLS_DIRECT_SEG_REFS + movl %gs:0, %eax + movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax +# else + movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax +# endif +# endif +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 + movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax +# else + movl (%eax), %eax +# endif + testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) + jne __strncasecmp_nonascii + jmp L(ascii) +END (__strncasecmp_ssse3) +#endif + ENTRY (STRCMP) -#ifdef USE_AS_STRNCMP - PUSH (%ebp) +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movl LOCALE(%esp), %eax +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 + movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax +# else + movl (%eax), %eax +# endif + testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) + jne NONASCII + +# ifdef PIC + PUSH (%ebx) + call __i686.get_pc_thunk.bx + addl $_GLOBAL_OFFSET_TABLE_, %ebx +# endif +L(ascii): + .section .rodata.cst16,"aM",@progbits,16 + .align 16 +.Lbelowupper: + .quad 0x4040404040404040 + .quad 0x4040404040404040 +.Ltopupper: + .quad 0x5b5b5b5b5b5b5b5b + .quad 0x5b5b5b5b5b5b5b5b +.Ltouppermask: + .quad 0x2020202020202020 + .quad 0x2020202020202020 + .previous + +# ifdef PIC +# define UCLOW_reg .Lbelowupper@GOTOFF(%ebx) +# define UCHIGH_reg .Ltopupper@GOTOFF(%ebx) +# define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx) +# else +# define UCLOW_reg .Lbelowupper +# define UCHIGH_reg .Ltopupper +# define LCQWORD_reg .Ltouppermask +# endif #endif + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + PUSH (REM) +#endif + movl STR1(%esp), %edx movl STR2(%esp), %eax -#ifdef USE_AS_STRNCMP - movl CNT(%esp), %ebp - cmp $16, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + movl CNT(%esp), REM + cmp $16, REM jb L(less16bytes_sncmp) -#else +#elif !defined USE_AS_STRCASECMP_L movzbl (%eax), %ecx cmpb %cl, (%edx) jne L(neq) @@ -135,15 +293,35 @@ ENTRY (STRCMP) movlpd (%edx), %xmm2 movhpd 8(%eax), %xmm1 movhpd 8(%edx), %xmm2 +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# define TOLOWER(reg1, reg2) \ + movdqa reg1, %xmm5; \ + movdqa reg2, %xmm7; \ + movdqa UCHIGH_reg, %xmm6; \ + pcmpgtb UCLOW_reg, %xmm5; \ + pcmpgtb UCLOW_reg, %xmm7; \ + pcmpgtb reg1, %xmm6; \ + pand %xmm6, %xmm5; \ + movdqa UCHIGH_reg, %xmm6; \ + pcmpgtb reg2, %xmm6; \ + pand %xmm6, %xmm7; \ + pand LCQWORD_reg, %xmm5; \ + por %xmm5, reg1; \ + pand LCQWORD_reg, %xmm7; \ + por %xmm7, reg2 + TOLOWER (%xmm1, %xmm2) +#else +# define TOLOWER(reg1, reg2) +#endif pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %ecx sub $0xffff, %ecx jnz L(less16bytes) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(eq) #endif add $16, %eax @@ -151,10 +329,16 @@ ENTRY (STRCMP) L(crosspage): - PUSH (%ebx) +#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + PUSH (FLAGS) +#endif PUSH (%edi) PUSH (%esi) -#ifdef USE_AS_STRNCMP +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + pushl $0 + cfi_adjust_cfa_offset (4) +#endif +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cfi_remember_state #endif @@ -164,11 +348,13 @@ L(crosspage): and $0xf, %edi xor %ecx, %eax xor %edi, %edx - xor %ebx, %ebx +#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + xor FLAGS, FLAGS +#endif cmp %edi, %ecx je L(ashr_0) ja L(bigger) - or $0x20, %ebx + orl $0x20, FLAGS xchg %edx, %eax xchg %ecx, %edi L(bigger): @@ -218,7 +404,13 @@ L(ashr_0): movdqa (%eax), %xmm1 pxor %xmm0, %xmm0 pcmpeqb %xmm1, %xmm0 +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movdqa (%edx), %xmm2 + TOLOWER (%xmm1, %xmm2) + pcmpeqb %xmm2, %xmm1 +#else pcmpeqb (%edx), %xmm1 +#endif psubb %xmm0, %xmm1 pmovmskb %xmm1, %edi shr %cl, %esi @@ -227,23 +419,29 @@ L(ashr_0): mov %ecx, %edi jne L(less32bytes) UPDATE_STRNCMP_COUNTER - mov $0x10, %ebx + movl $0x10, FLAGS mov $0x10, %ecx pxor %xmm0, %xmm0 .p2align 4 L(loop_ashr_0): movdqa (%eax, %ecx), %xmm1 +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L movdqa (%edx, %ecx), %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 +#else + pcmpeqb %xmm1, %xmm0 + pcmpeqb (%edx, %ecx), %xmm1 +#endif psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -262,6 +460,7 @@ L(ashr_1): movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $15, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi @@ -276,7 +475,7 @@ L(ashr_1): movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx - or $1, %ebx + orl $1, FLAGS lea 1(%edx), %edi and $0xfff, %edi sub $0x1000, %edi @@ -292,6 +491,7 @@ L(gobble_ashr_1): movdqa %xmm2, %xmm4 palignr $1, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -299,9 +499,9 @@ L(gobble_ashr_1): pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif @@ -316,6 +516,7 @@ L(gobble_ashr_1): movdqa %xmm2, %xmm4 palignr $1, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -324,9 +525,9 @@ L(gobble_ashr_1): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -340,8 +541,8 @@ L(nibble_ashr_1): test $0xfffe, %esi jnz L(ashr_1_exittail) -#ifdef USE_AS_STRNCMP - cmp $15, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $15, REM jbe L(ashr_1_exittail) #endif pxor %xmm0, %xmm0 @@ -368,6 +569,7 @@ L(ashr_2): movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $14, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi @@ -382,7 +584,7 @@ L(ashr_2): movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx - or $2, %ebx + orl $2, FLAGS lea 2(%edx), %edi and $0xfff, %edi sub $0x1000, %edi @@ -398,6 +600,7 @@ L(gobble_ashr_2): movdqa %xmm2, %xmm4 palignr $2, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -406,9 +609,9 @@ L(gobble_ashr_2): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -422,6 +625,7 @@ L(gobble_ashr_2): movdqa %xmm2, %xmm4 palignr $2, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -430,9 +634,9 @@ L(gobble_ashr_2): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -446,8 +650,8 @@ L(nibble_ashr_2): test $0xfffc, %esi jnz L(ashr_2_exittail) -#ifdef USE_AS_STRNCMP - cmp $14, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $14, REM jbe L(ashr_2_exittail) #endif @@ -475,6 +679,7 @@ L(ashr_3): movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $13, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi @@ -489,7 +694,7 @@ L(ashr_3): movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx - or $3, %ebx + orl $3, FLAGS lea 3(%edx), %edi and $0xfff, %edi sub $0x1000, %edi @@ -505,6 +710,7 @@ L(gobble_ashr_3): movdqa %xmm2, %xmm4 palignr $3, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -513,9 +719,9 @@ L(gobble_ashr_3): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -529,6 +735,7 @@ L(gobble_ashr_3): movdqa %xmm2, %xmm4 palignr $3, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -537,9 +744,9 @@ L(gobble_ashr_3): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -553,8 +760,8 @@ L(nibble_ashr_3): test $0xfff8, %esi jnz L(ashr_3_exittail) -#ifdef USE_AS_STRNCMP - cmp $13, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $13, REM jbe L(ashr_3_exittail) #endif pxor %xmm0, %xmm0 @@ -581,6 +788,7 @@ L(ashr_4): movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $12, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi @@ -595,7 +803,7 @@ L(ashr_4): movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx - or $4, %ebx + orl $4, FLAGS lea 4(%edx), %edi and $0xfff, %edi sub $0x1000, %edi @@ -611,6 +819,7 @@ L(gobble_ashr_4): movdqa %xmm2, %xmm4 palignr $4, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -619,9 +828,9 @@ L(gobble_ashr_4): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif @@ -636,6 +845,7 @@ L(gobble_ashr_4): movdqa %xmm2, %xmm4 palignr $4, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -644,9 +854,9 @@ L(gobble_ashr_4): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif @@ -661,8 +871,8 @@ L(nibble_ashr_4): test $0xfff0, %esi jnz L(ashr_4_exittail) -#ifdef USE_AS_STRNCMP - cmp $12, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $12, REM jbe L(ashr_4_exittail) #endif @@ -690,6 +900,7 @@ L(ashr_5): movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $11, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi @@ -704,7 +915,7 @@ L(ashr_5): movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx - or $5, %ebx + orl $5, FLAGS lea 5(%edx), %edi and $0xfff, %edi sub $0x1000, %edi @@ -720,6 +931,7 @@ L(gobble_ashr_5): movdqa %xmm2, %xmm4 palignr $5, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -728,9 +940,9 @@ L(gobble_ashr_5): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -744,6 +956,7 @@ L(gobble_ashr_5): movdqa %xmm2, %xmm4 palignr $5, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -752,9 +965,9 @@ L(gobble_ashr_5): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -768,8 +981,8 @@ L(nibble_ashr_5): test $0xffe0, %esi jnz L(ashr_5_exittail) -#ifdef USE_AS_STRNCMP - cmp $11, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $11, REM jbe L(ashr_5_exittail) #endif pxor %xmm0, %xmm0 @@ -797,6 +1010,7 @@ L(ashr_6): movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $10, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi @@ -811,7 +1025,7 @@ L(ashr_6): movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx - or $6, %ebx + orl $6, FLAGS lea 6(%edx), %edi and $0xfff, %edi sub $0x1000, %edi @@ -827,6 +1041,7 @@ L(gobble_ashr_6): movdqa %xmm2, %xmm4 palignr $6, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -835,9 +1050,9 @@ L(gobble_ashr_6): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif @@ -852,6 +1067,7 @@ L(gobble_ashr_6): movdqa %xmm2, %xmm4 palignr $6, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -859,9 +1075,9 @@ L(gobble_ashr_6): pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif @@ -876,8 +1092,8 @@ L(nibble_ashr_6): test $0xffc0, %esi jnz L(ashr_6_exittail) -#ifdef USE_AS_STRNCMP - cmp $10, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $10, REM jbe L(ashr_6_exittail) #endif pxor %xmm0, %xmm0 @@ -905,6 +1121,7 @@ L(ashr_7): movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $9, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi @@ -919,7 +1136,7 @@ L(ashr_7): movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx - or $7, %ebx + orl $7, FLAGS lea 8(%edx), %edi and $0xfff, %edi sub $0x1000, %edi @@ -935,6 +1152,7 @@ L(gobble_ashr_7): movdqa %xmm2, %xmm4 palignr $7, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -943,9 +1161,9 @@ L(gobble_ashr_7): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif @@ -960,6 +1178,7 @@ L(gobble_ashr_7): movdqa %xmm2, %xmm4 palignr $7, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -968,9 +1187,9 @@ L(gobble_ashr_7): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif @@ -985,8 +1204,8 @@ L(nibble_ashr_7): test $0xff80, %esi jnz L(ashr_7_exittail) -#ifdef USE_AS_STRNCMP - cmp $9, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $9, REM jbe L(ashr_7_exittail) #endif pxor %xmm0, %xmm0 @@ -1014,6 +1233,7 @@ L(ashr_8): movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $8, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi @@ -1028,7 +1248,7 @@ L(ashr_8): movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx - or $8, %ebx + orl $8, FLAGS lea 8(%edx), %edi and $0xfff, %edi sub $0x1000, %edi @@ -1044,6 +1264,7 @@ L(gobble_ashr_8): movdqa %xmm2, %xmm4 palignr $8, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -1052,9 +1273,9 @@ L(gobble_ashr_8): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -1068,6 +1289,7 @@ L(gobble_ashr_8): movdqa %xmm2, %xmm4 palignr $8, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -1076,9 +1298,9 @@ L(gobble_ashr_8): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -1092,8 +1314,8 @@ L(nibble_ashr_8): test $0xff00, %esi jnz L(ashr_8_exittail) -#ifdef USE_AS_STRNCMP - cmp $8, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $8, REM jbe L(ashr_8_exittail) #endif pxor %xmm0, %xmm0 @@ -1121,6 +1343,7 @@ L(ashr_9): movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $7, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi @@ -1135,7 +1358,7 @@ L(ashr_9): movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx - or $9, %ebx + orl $9, FLAGS lea 9(%edx), %edi and $0xfff, %edi sub $0x1000, %edi @@ -1151,6 +1374,7 @@ L(gobble_ashr_9): movdqa %xmm2, %xmm4 palignr $9, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -1159,9 +1383,9 @@ L(gobble_ashr_9): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -1175,6 +1399,7 @@ L(gobble_ashr_9): movdqa %xmm2, %xmm4 palignr $9, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -1183,9 +1408,9 @@ L(gobble_ashr_9): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -1199,8 +1424,8 @@ L(nibble_ashr_9): test $0xfe00, %esi jnz L(ashr_9_exittail) -#ifdef USE_AS_STRNCMP - cmp $7, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $7, REM jbe L(ashr_9_exittail) #endif pxor %xmm0, %xmm0 @@ -1227,6 +1452,7 @@ L(ashr_10): movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $6, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi @@ -1241,7 +1467,7 @@ L(ashr_10): movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx - or $10, %ebx + orl $10, FLAGS lea 10(%edx), %edi and $0xfff, %edi sub $0x1000, %edi @@ -1257,6 +1483,7 @@ L(gobble_ashr_10): movdqa %xmm2, %xmm4 palignr $10, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -1265,9 +1492,9 @@ L(gobble_ashr_10): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -1281,6 +1508,7 @@ L(gobble_ashr_10): movdqa %xmm2, %xmm4 palignr $10, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -1289,9 +1517,9 @@ L(gobble_ashr_10): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -1305,8 +1533,8 @@ L(nibble_ashr_10): test $0xfc00, %esi jnz L(ashr_10_exittail) -#ifdef USE_AS_STRNCMP - cmp $6, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $6, REM jbe L(ashr_10_exittail) #endif pxor %xmm0, %xmm0 @@ -1333,6 +1561,7 @@ L(ashr_11): movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $5, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi @@ -1347,7 +1576,7 @@ L(ashr_11): movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx - or $11, %ebx + orl $11, FLAGS lea 11(%edx), %edi and $0xfff, %edi sub $0x1000, %edi @@ -1363,6 +1592,7 @@ L(gobble_ashr_11): movdqa %xmm2, %xmm4 palignr $11, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -1371,9 +1601,9 @@ L(gobble_ashr_11): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -1387,6 +1617,7 @@ L(gobble_ashr_11): movdqa %xmm2, %xmm4 palignr $11, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -1395,9 +1626,9 @@ L(gobble_ashr_11): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -1411,8 +1642,8 @@ L(nibble_ashr_11): test $0xf800, %esi jnz L(ashr_11_exittail) -#ifdef USE_AS_STRNCMP - cmp $5, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $5, REM jbe L(ashr_11_exittail) #endif pxor %xmm0, %xmm0 @@ -1439,6 +1670,7 @@ L(ashr_12): movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $4, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi @@ -1453,7 +1685,7 @@ L(ashr_12): movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx - or $12, %ebx + orl $12, FLAGS lea 12(%edx), %edi and $0xfff, %edi sub $0x1000, %edi @@ -1469,6 +1701,7 @@ L(gobble_ashr_12): movdqa %xmm2, %xmm4 palignr $12, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -1477,9 +1710,9 @@ L(gobble_ashr_12): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif @@ -1494,6 +1727,7 @@ L(gobble_ashr_12): movdqa %xmm2, %xmm4 palignr $12, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -1502,9 +1736,9 @@ L(gobble_ashr_12): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -1518,8 +1752,8 @@ L(nibble_ashr_12): test $0xf000, %esi jnz L(ashr_12_exittail) -#ifdef USE_AS_STRNCMP - cmp $4, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $4, REM jbe L(ashr_12_exittail) #endif pxor %xmm0, %xmm0 @@ -1546,6 +1780,7 @@ L(ashr_13): movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi @@ -1560,7 +1795,7 @@ L(ashr_13): movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx - or $13, %ebx + orl $13, FLAGS lea 13(%edx), %edi and $0xfff, %edi sub $0x1000, %edi @@ -1576,6 +1811,7 @@ L(gobble_ashr_13): movdqa %xmm2, %xmm4 palignr $13, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -1584,9 +1820,9 @@ L(gobble_ashr_13): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -1600,6 +1836,7 @@ L(gobble_ashr_13): movdqa %xmm2, %xmm4 palignr $13, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -1608,9 +1845,9 @@ L(gobble_ashr_13): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -1624,8 +1861,8 @@ L(nibble_ashr_13): test $0xe000, %esi jnz L(ashr_13_exittail) -#ifdef USE_AS_STRNCMP - cmp $3, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $3, REM jbe L(ashr_13_exittail) #endif pxor %xmm0, %xmm0 @@ -1652,6 +1889,7 @@ L(ashr_14): movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $2, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi @@ -1666,7 +1904,7 @@ L(ashr_14): movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx - or $14, %ebx + orl $14, FLAGS lea 14(%edx), %edi and $0xfff, %edi sub $0x1000, %edi @@ -1682,6 +1920,7 @@ L(gobble_ashr_14): movdqa %xmm2, %xmm4 palignr $14, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -1690,9 +1929,9 @@ L(gobble_ashr_14): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -1706,6 +1945,7 @@ L(gobble_ashr_14): movdqa %xmm2, %xmm4 palignr $14, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -1714,9 +1954,9 @@ L(gobble_ashr_14): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -1730,8 +1970,8 @@ L(nibble_ashr_14): test $0xc000, %esi jnz L(ashr_14_exittail) -#ifdef USE_AS_STRNCMP - cmp $2, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $2, REM jbe L(ashr_14_exittail) #endif pxor %xmm0, %xmm0 @@ -1759,6 +1999,7 @@ L(ashr_15): movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $1, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi @@ -1773,7 +2014,7 @@ L(ashr_15): movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx - or $15, %ebx + orl $15, FLAGS lea 15(%edx), %edi and $0xfff, %edi sub $0x1000, %edi @@ -1789,6 +2030,7 @@ L(gobble_ashr_15): movdqa %xmm2, %xmm4 palignr $15, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -1797,9 +2039,9 @@ L(gobble_ashr_15): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -1813,6 +2055,7 @@ L(gobble_ashr_15): movdqa %xmm2, %xmm4 palignr $15, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 @@ -1821,9 +2064,9 @@ L(gobble_ashr_15): sub $0xffff, %esi jnz L(exit) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM jbe L(more8byteseq) #endif add $16, %ecx @@ -1837,8 +2080,8 @@ L(nibble_ashr_15): test $0x8000, %esi jnz L(ashr_15_exittail) -#ifdef USE_AS_STRNCMP - cmp $1, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $1, REM jbe L(ashr_15_exittail) #endif pxor %xmm0, %xmm0 @@ -1854,27 +2097,34 @@ L(ashr_15_exittail): .p2align 4 L(aftertail): + TOLOWER (%xmm1, %xmm3) pcmpeqb %xmm3, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi not %esi L(exit): - mov %ebx, %edi + mov FLAGS, %edi and $0x1f, %edi lea -16(%edi, %ecx), %edi L(less32bytes): add %edi, %edx add %ecx, %eax - test $0x20, %ebx + testl $0x20, FLAGS jz L(ret2) xchg %eax, %edx .p2align 4 L(ret2): mov %esi, %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + addl $4, %esp + cfi_adjust_cfa_offset (-4) +#endif POP (%esi) POP (%edi) - POP (%ebx) +#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + POP (FLAGS) +#endif L(less16bytes): test %cl, %cl jz L(2next_8_bytes) @@ -1899,100 +2149,179 @@ L(less16bytes): test $0x40, %cl jnz L(Byte6) -#ifdef USE_AS_STRNCMP - cmp $7, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $7, REM jbe L(eq) #endif movzx 7(%eax), %ecx movzx 7(%edx), %eax +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax +# endif +#endif sub %ecx, %eax RETURN L(Byte0): -#ifdef USE_AS_STRNCMP - cmp $0, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $0, REM jbe L(eq) #endif movzx (%eax), %ecx movzx (%edx), %eax +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax +# endif +#endif + sub %ecx, %eax RETURN L(Byte1): -#ifdef USE_AS_STRNCMP - cmp $1, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $1, REM jbe L(eq) #endif movzx 1(%eax), %ecx movzx 1(%edx), %eax +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax +# endif +#endif + sub %ecx, %eax RETURN L(Byte2): -#ifdef USE_AS_STRNCMP - cmp $2, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $2, REM jbe L(eq) #endif movzx 2(%eax), %ecx movzx 2(%edx), %eax +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax +# endif +#endif + sub %ecx, %eax RETURN L(Byte3): -#ifdef USE_AS_STRNCMP - cmp $3, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $3, REM jbe L(eq) #endif movzx 3(%eax), %ecx movzx 3(%edx), %eax +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax +# endif +#endif + sub %ecx, %eax RETURN L(Byte4): -#ifdef USE_AS_STRNCMP - cmp $4, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $4, REM jbe L(eq) #endif movzx 4(%eax), %ecx movzx 4(%edx), %eax +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax +# endif +#endif + sub %ecx, %eax RETURN L(Byte5): -#ifdef USE_AS_STRNCMP - cmp $5, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $5, REM jbe L(eq) #endif movzx 5(%eax), %ecx movzx 5(%edx), %eax +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax +# endif +#endif + sub %ecx, %eax RETURN L(Byte6): -#ifdef USE_AS_STRNCMP - cmp $6, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $6, REM jbe L(eq) #endif movzx 6(%eax), %ecx movzx 6(%edx), %eax +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax +# endif +#endif + sub %ecx, %eax RETURN L(2next_8_bytes): add $8, %eax add $8, %edx -#ifdef USE_AS_STRNCMP - cmp $8, %ebp - lea -8(%ebp), %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $8, REM + lea -8(REM), REM jbe L(eq) #endif @@ -2017,196 +2346,455 @@ L(2next_8_bytes): test $0x40, %ch jnz L(Byte6) -#ifdef USE_AS_STRNCMP - cmp $7, %ebp +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $7, REM jbe L(eq) #endif movzx 7(%eax), %ecx movzx 7(%edx), %eax +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax +# endif +#endif + sub %ecx, %eax RETURN +#ifdef USE_AS_STRNCMP +L(neq_sncmp): +#endif L(neq): mov $1, %eax ja L(neq_bigger) neg %eax L(neq_bigger): -#ifdef USE_AS_STRNCMP - POP (%ebp) +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + addl $4, %esp + cfi_adjust_cfa_offset (-4) +#endif +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + POP (REM) +#endif +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + POP (%ebx) +# endif #endif ret -#ifdef USE_AS_STRNCMP +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L .p2align 4 cfi_restore_state L(more8byteseq): + +# ifdef USE_AS_STRNCASECMP_L + addl $4, %esp + cfi_adjust_cfa_offset (-4) +# endif POP (%esi) POP (%edi) - POP (%ebx) +# ifdef USE_AS_STRNCMP + POP (FLAGS) +# endif #endif +#ifdef USE_AS_STRNCMP +L(eq_sncmp): +#endif L(eq): -#ifdef USE_AS_STRNCMP - POP (%ebp) +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + POP (REM) +#endif +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + POP (%ebx) +# endif #endif xorl %eax, %eax ret -#ifdef USE_AS_STRNCMP +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L .p2align 4 - CFI_PUSH (%ebp) +# if defined USE_AS_STRNCASECMP_L && defined PIC + CFI_PUSH (%ebx) +# endif + CFI_PUSH (REM) L(less16bytes_sncmp): - test %ebp, %ebp - jz L(eq) +# ifdef USE_AS_STRNCASECMP_L + PUSH (%esi) +# endif + test REM, REM + jz L(eq_sncmp) movzbl (%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl (%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else cmpb %cl, (%edx) - jne L(neq) +# endif + jne L(neq_sncmp) test %cl, %cl je L(eq) - cmp $1, %ebp - je L(eq) + cmp $1, REM + je L(eq_sncmp) movzbl 1(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 1(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else cmpb %cl, 1(%edx) - jne L(neq) +# endif + jne L(neq_sncmp) test %cl, %cl - je L(eq) + je L(eq_sncmp) - cmp $2, %ebp - je L(eq) + cmp $2, REM + je L(eq_sncmp) movzbl 2(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 2(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else cmpb %cl, 2(%edx) - jne L(neq) +# endif + jne L(neq_sncmp) test %cl, %cl - je L(eq) + je L(eq_sncmp) - cmp $3, %ebp - je L(eq) + cmp $3, REM + je L(eq_sncmp) movzbl 3(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 3(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else cmpb %cl, 3(%edx) - jne L(neq) +# endif + jne L(neq_sncmp) test %cl, %cl - je L(eq) + je L(eq_sncmp) - cmp $4, %ebp - je L(eq) + cmp $4, REM + je L(eq_sncmp) movzbl 4(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 4(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else cmpb %cl, 4(%edx) - jne L(neq) +# endif + jne L(neq_sncmp) test %cl, %cl - je L(eq) + je L(eq_sncmp) - cmp $5, %ebp - je L(eq) + cmp $5, REM + je L(eq_sncmp) movzbl 5(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 5(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else cmpb %cl, 5(%edx) - jne L(neq) +# endif + jne L(neq_sncmp) test %cl, %cl - je L(eq) + je L(eq_sncmp) - cmp $6, %ebp - je L(eq) + cmp $6, REM + je L(eq_sncmp) movzbl 6(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 6(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else cmpb %cl, 6(%edx) - jne L(neq) +# endif + jne L(neq_sncmp) test %cl, %cl - je L(eq) + je L(eq_sncmp) - cmp $7, %ebp - je L(eq) + cmp $7, REM + je L(eq_sncmp) movzbl 7(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 7(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else cmpb %cl, 7(%edx) - jne L(neq) +# endif + jne L(neq_sncmp) test %cl, %cl - je L(eq) + je L(eq_sncmp) - cmp $8, %ebp - je L(eq) + cmp $8, REM + je L(eq_sncmp) movzbl 8(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 8(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else cmpb %cl, 8(%edx) - jne L(neq) +# endif + jne L(neq_sncmp) test %cl, %cl - je L(eq) + je L(eq_sncmp) - cmp $9, %ebp - je L(eq) + cmp $9, REM + je L(eq_sncmp) movzbl 9(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 9(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else cmpb %cl, 9(%edx) - jne L(neq) +# endif + jne L(neq_sncmp) test %cl, %cl - je L(eq) + je L(eq_sncmp) - cmp $10, %ebp - je L(eq) + cmp $10, REM + je L(eq_sncmp) movzbl 10(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 10(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else cmpb %cl, 10(%edx) - jne L(neq) +# endif + jne L(neq_sncmp) test %cl, %cl - je L(eq) + je L(eq_sncmp) - cmp $11, %ebp - je L(eq) + cmp $11, REM + je L(eq_sncmp) movzbl 11(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 11(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else cmpb %cl, 11(%edx) - jne L(neq) +# endif + jne L(neq_sncmp) test %cl, %cl - je L(eq) + je L(eq_sncmp) - cmp $12, %ebp - je L(eq) + cmp $12, REM + je L(eq_sncmp) movzbl 12(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 12(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else cmpb %cl, 12(%edx) - jne L(neq) +# endif + jne L(neq_sncmp) test %cl, %cl - je L(eq) + je L(eq_sncmp) - cmp $13, %ebp - je L(eq) + cmp $13, REM + je L(eq_sncmp) movzbl 13(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 13(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else cmpb %cl, 13(%edx) - jne L(neq) +# endif + jne L(neq_sncmp) test %cl, %cl - je L(eq) + je L(eq_sncmp) - cmp $14, %ebp - je L(eq) + cmp $14, REM + je L(eq_sncmp) movzbl 14(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 14(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else cmpb %cl, 14(%edx) - jne L(neq) +# endif + jne L(neq_sncmp) test %cl, %cl - je L(eq) + je L(eq_sncmp) - cmp $15, %ebp - je L(eq) + cmp $15, REM + je L(eq_sncmp) movzbl 15(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 15(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else cmpb %cl, 15(%edx) - jne L(neq) - test %cl, %cl - je L(eq) +# endif + jne L(neq_sncmp) - POP (%ebp) +# ifdef USE_AS_STRNCASECMP_L +L(eq_sncmp): + POP (%esi) +# endif + POP (REM) +# if defined USE_AS_STRNCASECMP_L && defined PIC + POP (%ebx) +# endif xor %eax, %eax ret + +# ifdef USE_AS_STRNCASECMP_L + .p2align 4 +# ifdef PIC + CFI_PUSH (%ebx) +# endif + CFI_PUSH (REM) + CFI_PUSH (%esi) +L(neq_sncmp): + mov $1, %eax + mov $-1, %edx + cmovna %edx, %eax + POP (%esi) + POP (REM) +# ifdef PIC + POP (%ebx) +# endif + ret +# endif #endif END (STRCMP) diff --git a/libc/sysdeps/i386/i686/multiarch/strcmp.S b/libc/sysdeps/i386/i686/multiarch/strcmp.S index 7136d47e8..28e2d6154 100644 --- a/libc/sysdeps/i386/i686/multiarch/strcmp.S +++ b/libc/sysdeps/i386/i686/multiarch/strcmp.S @@ -1,5 +1,5 @@ /* Multiple versions of strcmp - Copyright (C) 2010 Free Software Foundation, Inc. + Copyright (C) 2010, 2011 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -21,18 +21,30 @@ #include <sysdep.h> #include <init-arch.h> -#ifndef USE_AS_STRNCMP -# define STRCMP strcmp -# define __GI_STRCMP __GI_strcmp -# define __STRCMP_IA32 __strcmp_ia32 -# define __STRCMP_SSSE3 __strcmp_ssse3 -# define __STRCMP_SSE4_2 __strcmp_sse4_2 -#else +#ifdef USE_AS_STRNCMP # define STRCMP strncmp # define __GI_STRCMP __GI_strncmp # define __STRCMP_IA32 __strncmp_ia32 # define __STRCMP_SSSE3 __strncmp_ssse3 # define __STRCMP_SSE4_2 __strncmp_sse4_2 +#elif defined USE_AS_STRCASECMP_L +# define STRCMP __strcasecmp_l +# define __GI_STRCMP __GI_strcasecmp_l +# define __STRCMP_IA32 __strcasecmp_l_ia32 +# define __STRCMP_SSSE3 __strcasecmp_l_ssse3 +# define __STRCMP_SSE4_2 __strcasecmp_l_sse4_2 +#elif defined USE_AS_STRNCASECMP_L +# define STRCMP __strncasecmp_l +# define __GI_STRCMP __GI_strncasecmp_l +# define __STRCMP_IA32 __strncasecmp_l_ia32 +# define __STRCMP_SSSE3 __strncasecmp_l_ssse3 +# define __STRCMP_SSE4_2 __strncasecmp_l_sse4_2 +#else +# define STRCMP strcmp +# define __GI_STRCMP __GI_strcmp +# define __STRCMP_IA32 __strcmp_ia32 +# define __STRCMP_SSSE3 __strcmp_ssse3 +# define __STRCMP_SSE4_2 __strcmp_sse4_2 #endif /* Define multiple versions only for the definition in libc. Don't diff --git a/libc/sysdeps/i386/i686/multiarch/strncase-c.c b/libc/sysdeps/i386/i686/multiarch/strncase-c.c new file mode 100644 index 000000000..76581eb62 --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strncase-c.c @@ -0,0 +1,8 @@ +#include <string.h> + +extern __typeof (strncasecmp) __strncasecmp_nonascii; + +#define __strncasecmp __strncasecmp_nonascii +#include <string/strncase.c> + +strong_alias (__strncasecmp_nonascii, __strncasecmp_ia32) diff --git a/libc/sysdeps/i386/i686/multiarch/strncase.S b/libc/sysdeps/i386/i686/multiarch/strncase.S new file mode 100644 index 000000000..d20532f99 --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strncase.S @@ -0,0 +1,68 @@ +/* Entry point for multi-version x86 strncasecmp. + Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <init-arch.h> + +#ifdef SHARED + .text +ENTRY(__strncasecmp) + .type __strncasecmp, @gnu_indirect_function + pushl %ebx + cfi_adjust_cfa_offset (4) + cfi_rel_offset (ebx, 0) + call __i686.get_pc_thunk.bx + addl $_GLOBAL_OFFSET_TABLE_, %ebx + cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) + jne 1f + call __init_cpu_features +1: leal __strncasecmp_ia32@GOTOFF(%ebx), %eax + testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __strncasecmp_ssse3@GOTOFF(%ebx), %eax + testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __strncasecmp_sse4_2@GOTOFF(%ebx), %eax +2: popl %ebx + cfi_adjust_cfa_offset (-4) + cfi_restore (ebx) + ret +END(__strncasecmp) +#else + .text +ENTRY(__strncasecmp) + .type __strncasecmp, @gnu_indirect_function + cmpl $0, KIND_OFFSET+__cpu_features + jne 1f + call __init_cpu_features +1: leal __strncasecmp_ia32, %eax + testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features + jz 2f + leal __strncasecmp_ssse3, %eax +#if 0 + // XXX Temporarily + testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features + jz 2f + leal __strncasecmp_sse4_2, %eax +#endif +2: ret +END(__strncasecmp) +#endif + +weak_alias (__strncasecmp, strncasecmp) diff --git a/libc/sysdeps/i386/i686/multiarch/strncase_l-c.c b/libc/sysdeps/i386/i686/multiarch/strncase_l-c.c new file mode 100644 index 000000000..0c68b8d1c --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strncase_l-c.c @@ -0,0 +1,11 @@ +#include <string.h> + +extern __typeof (strncasecmp_l) __strncasecmp_l_nonascii; + +#define __strncasecmp_l __strncasecmp_l_nonascii +#define USE_IN_EXTENDED_LOCALE_MODEL 1 +#include <string/strncase.c> + +/* The needs of strcasecmp in libc are minimal, no need to go through + the IFUNC. */ +strong_alias (__strncasecmp_l_nonascii, __GI___strncasecmp_l) diff --git a/libc/sysdeps/i386/i686/multiarch/strncase_l-sse4.S b/libc/sysdeps/i386/i686/multiarch/strncase_l-sse4.S new file mode 100644 index 000000000..557210832 --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strncase_l-sse4.S @@ -0,0 +1,2 @@ +#define USE_AS_STRNCASECMP_L 1 +#include "strcmp-sse4.S" diff --git a/libc/sysdeps/i386/i686/multiarch/strncase_l-ssse3.S b/libc/sysdeps/i386/i686/multiarch/strncase_l-ssse3.S new file mode 100644 index 000000000..d438a1ae3 --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strncase_l-ssse3.S @@ -0,0 +1,2 @@ +#define USE_AS_STRNCASECMP_L 1 +#include "strcmp-ssse3.S" diff --git a/libc/sysdeps/i386/i686/multiarch/strncase_l.S b/libc/sysdeps/i386/i686/multiarch/strncase_l.S new file mode 100644 index 000000000..a808c8cd7 --- /dev/null +++ b/libc/sysdeps/i386/i686/multiarch/strncase_l.S @@ -0,0 +1,5 @@ +#define STRCMP __strncasecmp_l +#define USE_AS_STRNCASECMP_L +#include "strcmp.S" + +weak_alias (__strncasecmp_l, strncasecmp_l) diff --git a/libc/sysdeps/i386/i686/multiarch/strnlen-c.c b/libc/sysdeps/i386/i686/multiarch/strnlen-c.c index 567af2c81..f02465d6a 100644 --- a/libc/sysdeps/i386/i686/multiarch/strnlen-c.c +++ b/libc/sysdeps/i386/i686/multiarch/strnlen-c.c @@ -1,6 +1,6 @@ -#ifndef NOT_IN_libc -# define STRNLEN __strnlen_ia32 -# undef libc_hidden_builtin_def +#define STRNLEN __strnlen_ia32 +#ifdef SHARED +# undef libc_hidden_def # define libc_hidden_def(name) \ __hidden_ver1 (__strnlen_ia32, __GI_strnlen, __strnlen_ia32); #endif diff --git a/libc/sysdeps/i386/i686/multiarch/wcscmp-c.c b/libc/sysdeps/i386/i686/multiarch/wcscmp-c.c index 9592455d0..165c56afb 100644 --- a/libc/sysdeps/i386/i686/multiarch/wcscmp-c.c +++ b/libc/sysdeps/i386/i686/multiarch/wcscmp-c.c @@ -1,10 +1,12 @@ -#ifndef NOT_IN_libc - -# define WCSCMP __wcscmp_ia32 +#include <wchar.h> +#define WCSCMP __wcscmp_ia32 +#ifdef SHARED # undef libc_hidden_def # define libc_hidden_def(name) \ __hidden_ver1 (__wcscmp_ia32, __GI_wcscmp, __wcscmp_ia32); #endif +extern __typeof (wcscmp) __wcscmp_ia32; + #include "wcsmbs/wcscmp.c" diff --git a/libc/sysdeps/i386/i686/multiarch/wcslen-c.c b/libc/sysdeps/i386/i686/multiarch/wcslen-c.c index 49f32a25e..8cebfea0e 100644 --- a/libc/sysdeps/i386/i686/multiarch/wcslen-c.c +++ b/libc/sysdeps/i386/i686/multiarch/wcslen-c.c @@ -1,5 +1,9 @@ +#include <wchar.h> + #ifndef NOT_IN_libc # define WCSLEN __wcslen_ia32 #endif +extern __typeof (wcslen) __wcslen_ia32; + #include "wcsmbs/wcslen.c" diff --git a/libc/sysdeps/i386/i686/multiarch/wmemcmp-c.c b/libc/sysdeps/i386/i686/multiarch/wmemcmp-c.c index 94ff6151f..bd37660fc 100644 --- a/libc/sysdeps/i386/i686/multiarch/wmemcmp-c.c +++ b/libc/sysdeps/i386/i686/multiarch/wmemcmp-c.c @@ -1,5 +1,9 @@ +#include <wchar.h> + #ifndef NOT_IN_libc # define WMEMCMP __wmemcmp_ia32 #endif +extern __typeof (wmemcmp) __wmemcmp_ia32; + #include "wcsmbs/wmemcmp.c" diff --git a/libc/sysdeps/ieee754/dbl-64/mpsqrt.c b/libc/sysdeps/ieee754/dbl-64/mpsqrt.c index d1a80f909..f007cab5d 100644 --- a/libc/sysdeps/ieee754/dbl-64/mpsqrt.c +++ b/libc/sysdeps/ieee754/dbl-64/mpsqrt.c @@ -51,7 +51,7 @@ static double fastiroot(double); void SECTION __mpsqrt(mp_no *x, mp_no *y, int p) { - int i,m,ex,ey; + int i,m,ey; double dx,dy; mp_no mphalf = {0,{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, @@ -66,7 +66,7 @@ __mpsqrt(mp_no *x, mp_no *y, int p) { mphalf.e =0; mphalf.d[0] =ONE; mphalf.d[1] =HALFRAD; mp3halfs.e=1; mp3halfs.d[0]=ONE; mp3halfs.d[1]=ONE; mp3halfs.d[2]=HALFRAD; - ex=EX; ey=EX/2; __cpy(x,&mpxn,p); mpxn.e -= (ey+ey); + ey=EX/2; __cpy(x,&mpxn,p); mpxn.e -= (ey+ey); __mp_dbl(&mpxn,&dx,p); dy=fastiroot(dx); __dbl_mp(dy,&mpu,p); __mul(&mpxn,&mphalf,&mpz,p); diff --git a/libc/sysdeps/ieee754/dbl-64/s_sin.c b/libc/sysdeps/ieee754/dbl-64/s_sin.c index 6f19f158f..5183e55e8 100644 --- a/libc/sysdeps/ieee754/dbl-64/s_sin.c +++ b/libc/sysdeps/ieee754/dbl-64/s_sin.c @@ -138,7 +138,7 @@ __sin(double x){ cor=(ssn+s*ccs-sn*c)+cs*s; res=sn+cor; cor=(sn-res)+cor; - return (res==res+1.025*cor)? res : slow1(x); + return (res==res+1.096*cor)? res : slow1(x); } /* else if (k < 0x3feb6000) */ /*----------------------- 0.855469 <|x|<2.426265 ----------------------*/ diff --git a/libc/sysdeps/ieee754/ldbl-96/e_hypotl.c b/libc/sysdeps/ieee754/ldbl-96/e_hypotl.c index a59320b06..5e0b37ec0 100644 --- a/libc/sysdeps/ieee754/ldbl-96/e_hypotl.c +++ b/libc/sysdeps/ieee754/ldbl-96/e_hypotl.c @@ -70,7 +70,8 @@ k=0; if(__builtin_expect(ea > 0x5f3f,0)) { /* a>2**8000 */ if(ea == 0x7fff) { /* Inf or NaN */ - u_int32_t exp,high,low; + u_int32_t exp __attribute__ ((unused)); + u_int32_t high,low; w = a+b; /* for sNaN */ GET_LDOUBLE_WORDS(exp,high,low,a); if(((high&0x7fffffff)|low)==0) w = a; @@ -85,7 +86,8 @@ } if(__builtin_expect(eb < 0x20bf, 0)) { /* b < 2**-8000 */ if(eb == 0) { /* subnormal b or 0 */ - u_int32_t exp,high,low; + u_int32_t exp __attribute__ ((unused)); + u_int32_t high,low; GET_LDOUBLE_WORDS(exp,high,low,b); if((high|low)==0) return a; SET_LDOUBLE_WORDS(t1, 0x7ffd, 0, 0); /* t1=2^16382 */ diff --git a/libc/sysdeps/ieee754/ldbl-96/e_j0l.c b/libc/sysdeps/ieee754/ldbl-96/e_j0l.c index abf4f109f..325408d5d 100644 --- a/libc/sysdeps/ieee754/ldbl-96/e_j0l.c +++ b/libc/sysdeps/ieee754/ldbl-96/e_j0l.c @@ -108,9 +108,9 @@ __ieee754_j0l (long double x) { long double z, s, c, ss, cc, r, u, v; int32_t ix; - u_int32_t se, i0, i1; + u_int32_t se; - GET_LDOUBLE_WORDS (se, i0, i1, x); + GET_LDOUBLE_EXP (se, x); ix = se & 0x7fff; if (__builtin_expect (ix >= 0x7fff, 0)) return one / (x * x); diff --git a/libc/sysdeps/ieee754/ldbl-96/e_j1l.c b/libc/sysdeps/ieee754/ldbl-96/e_j1l.c index 369fd830f..d7fcc9b44 100644 --- a/libc/sysdeps/ieee754/ldbl-96/e_j1l.c +++ b/libc/sysdeps/ieee754/ldbl-96/e_j1l.c @@ -110,9 +110,9 @@ __ieee754_j1l (long double x) { long double z, c, r, s, ss, cc, u, v, y; int32_t ix; - u_int32_t se, i0, i1; + u_int32_t se; - GET_LDOUBLE_WORDS (se, i0, i1, x); + GET_LDOUBLE_EXP (se, x); ix = se & 0x7fff; if (__builtin_expect (ix >= 0x7fff, 0)) return one / x; diff --git a/libc/sysdeps/posix/getaddrinfo.c b/libc/sysdeps/posix/getaddrinfo.c index a5aafe93a..1a023f91f 100644 --- a/libc/sysdeps/posix/getaddrinfo.c +++ b/libc/sysdeps/posix/getaddrinfo.c @@ -1958,7 +1958,7 @@ gaiconf_init (void) size_t nscopelist = 0; bool scopelist_nullbits = false; - FILE *fp = fopen (GAICONF_FNAME, "rc"); + FILE *fp = fopen (GAICONF_FNAME, "rce"); if (fp != NULL) { struct stat64 st; @@ -2386,7 +2386,7 @@ getaddrinfo (const char *name, const char *service, || (hints->ai_family == PF_INET6 && ! seen_ipv6)) { /* We cannot possibly return a valid answer. */ - free (in6ai); + __free_in6ai (in6ai); return EAI_NONAME; } } @@ -2400,7 +2400,7 @@ getaddrinfo (const char *name, const char *service, { if (hints->ai_flags & AI_NUMERICSERV) { - free (in6ai); + __free_in6ai (in6ai); return EAI_NONAME; } @@ -2422,7 +2422,7 @@ getaddrinfo (const char *name, const char *service, if (last_i != 0) { freeaddrinfo (p); - free (in6ai); + __free_in6ai (in6ai); return -(last_i & GAIH_EAI); } @@ -2434,7 +2434,7 @@ getaddrinfo (const char *name, const char *service, } else { - free (in6ai); + __free_in6ai (in6ai); return EAI_FAMILY; } @@ -2622,7 +2622,7 @@ getaddrinfo (const char *name, const char *service, p->ai_canonname = canonname; } - free (in6ai); + __free_in6ai (in6ai); if (p) { diff --git a/libc/sysdeps/powerpc/fpu/e_sqrt.c b/libc/sysdeps/powerpc/fpu/e_sqrt.c index f9ded2571..d59bd08d5 100644 --- a/libc/sysdeps/powerpc/fpu/e_sqrt.c +++ b/libc/sysdeps/powerpc/fpu/e_sqrt.c @@ -154,6 +154,7 @@ __slow_ieee754_sqrt (double x) return f_wash (x); } +#undef __ieee754_sqrt double __ieee754_sqrt (double x) { diff --git a/libc/sysdeps/powerpc/fpu/e_sqrtf.c b/libc/sysdeps/powerpc/fpu/e_sqrtf.c index 965faee84..9c6b860c9 100644 --- a/libc/sysdeps/powerpc/fpu/e_sqrtf.c +++ b/libc/sysdeps/powerpc/fpu/e_sqrtf.c @@ -130,7 +130,7 @@ __slow_ieee754_sqrtf (float x) return f_washf (x); } - +#undef __ieee754_sqrtf float __ieee754_sqrtf (float x) { diff --git a/libc/sysdeps/powerpc/fpu/math_private.h b/libc/sysdeps/powerpc/fpu/math_private.h index 90021c6d3..c4dd217d1 100644 --- a/libc/sysdeps/powerpc/fpu/math_private.h +++ b/libc/sysdeps/powerpc/fpu/math_private.h @@ -1,5 +1,5 @@ /* Private inline math functions for powerpc. - Copyright (C) 2006 + Copyright (C) 2006, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -25,12 +25,145 @@ #include <ldsodefs.h> #include <dl-procinfo.h> +#include <math/math_private.h> + # if __WORDSIZE == 64 || defined _ARCH_PWR4 # define __CPU_HAS_FSQRT 1 + +#ifndef __ieee754_sqrt +# define __ieee754_sqrt(x) \ + ({ double __z; \ + __asm __volatile ( \ + " fsqrt %0,%1\n" \ + : "=f" (__z) \ + : "f"(x)); \ + __z; }) +#endif +#ifndef __ieee754_sqrtf +# define __ieee754_sqrtf(x) \ + ({ float __z; \ + __asm __volatile ( \ + " fsqrts %0,%1\n" \ + : "=f" (__z) \ + : "f"(x)); \ + __z; }) +#endif + # else # define __CPU_HAS_FSQRT ((GLRO(dl_hwcap) & PPC_FEATURE_64) != 0) +# endif // __WORDSIZE == 64 || defined _ARCH_PWR4 + + +#if defined _ARCH_PWR5X + +# ifndef __round +# define __round(x) \ + ({ double __z; \ + __asm __volatile ( \ + " frin %0,%1\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif +# ifndef __roundf +# define __roundf(x) \ + ({ float __z; \ + __asm __volatile ( \ + " frin %0,%1\n" \ + " frsp %0,%0\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif + +# ifndef __trunc +# define __trunc(x) \ + ({ double __z; \ + __asm __volatile ( \ + " friz %0,%1\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif +# ifndef __truncf +# define __truncf(x) \ + ({ float __z; \ + __asm __volatile ( \ + " friz %0,%1\n" \ + " frsp %0,%0\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif + +# ifndef __ceil +# define __ceil(x) \ + ({ double __z; \ + __asm __volatile ( \ + " frip %0,%1\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif +# ifndef __ceilf +# define __ceilf(x) \ + ({ float __z; \ + __asm __volatile ( \ + " frip %0,%1\n" \ + " frsp %0,%0\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) # endif +# ifndef __floor +# define __floor(x) \ + ({ double __z; \ + __asm __volatile ( \ + " frim %0,%1\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif +# ifndef __floorf +# define __floorf(x) \ + ({ float __z; \ + __asm __volatile ( \ + " frim %0,%1\n" \ + " frsp %0,%0\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif + +#endif /* defined _ARCH_PWR5X */ + + +#if defined _ARCH_PWR6 + +# ifndef __copysign +# define __copysign(x, y) \ + ({ double __z; \ + __asm __volatile ( \ + " fcpsgn %0,%1,%2\n" \ + : "=f" (__z) \ + : "f" (y), "f" (x)); \ + __z; }) +# endif +# ifndef __copysignf +# define __copysignf(x, y) \ + ({ float __z; \ + __asm __volatile ( \ + " fcpsgn %0,%1,%2\n" \ + " frsp %0,%0\n" \ + : "=f" (__z) \ + : "f" (y), "f" (x)); \ + __z; }) +# endif + +#endif /* defined _ARCH_PWR6 */ + + # ifndef __LIBC_INTERNAL_MATH_INLINES extern double __slow_ieee754_sqrt (double); __inline double @@ -78,6 +211,4 @@ __ieee754_sqrtf (float __x) } #endif /* __LIBC_INTERNAL_MATH_INLINES */ -#include <math/math_private.h> - #endif /* _PPC_MATH_PRIVATE_H_ */ diff --git a/libc/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c b/libc/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c index 314abba3b..66d04ceb7 100644 --- a/libc/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c +++ b/libc/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c @@ -20,6 +20,7 @@ #include <math.h> #include <math_private.h> +#undef __ieee754_sqrt double __ieee754_sqrt (double x) { diff --git a/libc/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c b/libc/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c index 715721439..847a2e4ad 100644 --- a/libc/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c +++ b/libc/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c @@ -20,6 +20,7 @@ #include <math.h> #include <math_private.h> +#undef __ieee754_sqrtf float __ieee754_sqrtf (float x) { diff --git a/libc/sysdeps/unix/clock_gettime.c b/libc/sysdeps/unix/clock_gettime.c index d467f2b78..b4355d463 100644 --- a/libc/sysdeps/unix/clock_gettime.c +++ b/libc/sysdeps/unix/clock_gettime.c @@ -113,7 +113,7 @@ clock_gettime (clockid_t clock_id, struct timespec *tp) default: #ifdef SYSDEP_GETTIME_CPU - retval = SYSDEP_GETTIME_CPU (clock_id, tp); + SYSDEP_GETTIME_CPU (clock_id, tp); #endif #if HP_TIMING_AVAIL if ((clock_id & ((1 << CLOCK_IDFIELD_SIZE) - 1)) diff --git a/libc/sysdeps/unix/sysv/linux/Makefile b/libc/sysdeps/unix/sysv/linux/Makefile index 254d809e8..bd4fae2e5 100644 --- a/libc/sysdeps/unix/sysv/linux/Makefile +++ b/libc/sysdeps/unix/sysv/linux/Makefile @@ -162,6 +162,6 @@ CFLAGS-mq_receive.c += -fexceptions endif ifeq ($(subdir),nscd) -sysdep-CFLAGS += -DHAVE_EPOLL -DHAVE_SENDFILE -DHAVE_INOTIFY +sysdep-CFLAGS += -DHAVE_EPOLL -DHAVE_SENDFILE -DHAVE_INOTIFY -DHAVE_NETLINK CFLAGS-gai.c += -DNEED_NETLINK endif diff --git a/libc/sysdeps/unix/sysv/linux/Versions b/libc/sysdeps/unix/sysv/linux/Versions index 3a3e8e8c2..7f2f1b9ff 100644 --- a/libc/sysdeps/unix/sysv/linux/Versions +++ b/libc/sysdeps/unix/sysv/linux/Versions @@ -163,6 +163,9 @@ libc { sendmmsg; } + GLIBC_2.15 { + process_vm_readv; process_vm_writev; + } GLIBC_PRIVATE { # functions used in other libraries __syscall_rt_sigqueueinfo; diff --git a/libc/sysdeps/unix/sysv/linux/bits/uio.h b/libc/sysdeps/unix/sysv/linux/bits/uio.h index 6a283ed77..b2d77195c 100644 --- a/libc/sysdeps/unix/sysv/linux/bits/uio.h +++ b/libc/sysdeps/unix/sysv/linux/bits/uio.h @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1997, 2006 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 2006, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -48,3 +48,28 @@ struct iovec }; #endif + +#if defined _SYS_UIO_H && !defined _BITS_UIO_H_FOR_SYS_UIO_H +#define _BITS_UIO_H_FOR_SYS_UIO_H 1 + +__BEGIN_DECLS + +/* Read from another process' address space. */ +extern ssize_t process_vm_readv (pid_t __pid, __const struct iovec *__lvec, + unsigned long int __liovcnt, + __const struct iovec *__rvec, + unsigned long int __riovcnt, + unsigned long int __flags) + __THROW; + +/* Write to another process' address space. */ +extern ssize_t process_vm_writev (pid_t __pid, __const struct iovec *__lvec, + unsigned long int __liovcnt, + __const struct iovec *__rvec, + unsigned long int __riovcnt, + unsigned long int __flags) + __THROW; + +__END_DECLS + +#endif diff --git a/libc/sysdeps/unix/sysv/linux/check_pf.c b/libc/sysdeps/unix/sysv/linux/check_pf.c index c053adcda..0738a7012 100644 --- a/libc/sysdeps/unix/sysv/linux/check_pf.c +++ b/libc/sysdeps/unix/sysv/linux/check_pf.c @@ -1,5 +1,5 @@ /* Determine protocol families for which interfaces exist. Linux version. - Copyright (C) 2003, 2006, 2007, 2008, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 2003, 2006-2008, 2010, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -33,6 +33,9 @@ #include <not-cancel.h> #include <kernel-features.h> +#include <bits/libc-lock.h> +#include <atomic.h> +#include <nscd/nscd-client.h> #ifndef IFA_F_HOMEADDRESS @@ -43,9 +46,42 @@ #endif -static int -make_request (int fd, pid_t pid, bool *seen_ipv4, bool *seen_ipv6, - struct in6addrinfo **in6ai, size_t *in6ailen) +struct cached_data +{ + uint32_t timestamp; + uint32_t usecnt; + bool seen_ipv4; + bool seen_ipv6; + size_t in6ailen; + struct in6addrinfo in6ai[0]; +}; + +static struct cached_data noai6ai_cached = + { + .usecnt = 1, /* Make sure we never try to delete this entry. */ + .in6ailen = 0 + }; + +static struct cached_data *cache; +__libc_lock_define_initialized (static, lock); + + +#ifdef IS_IN_nscd +static uint32_t nl_timestamp; + +uint32_t +__bump_nl_timestamp (void) +{ + if (atomic_increment_val (&nl_timestamp) == 0) + atomic_increment (&nl_timestamp); + + return nl_timestamp; +} +#endif + + +static struct cached_data * +make_request (int fd, pid_t pid) { struct req { @@ -99,9 +135,6 @@ make_request (int fd, pid_t pid, bool *seen_ipv4, bool *seen_ipv6, sizeof (nladdr))) < 0) goto out_fail; - *seen_ipv4 = false; - *seen_ipv6 = false; - bool done = false; struct in6ailist { @@ -109,6 +142,8 @@ make_request (int fd, pid_t pid, bool *seen_ipv4, bool *seen_ipv6, struct in6ailist *next; } *in6ailist = NULL; size_t in6ailistlen = 0; + bool seen_ipv4 = false; + bool seen_ipv6 = false; do { @@ -172,12 +207,12 @@ make_request (int fd, pid_t pid, bool *seen_ipv4, bool *seen_ipv6, { if (*(const in_addr_t *) address != htonl (INADDR_LOOPBACK)) - *seen_ipv4 = true; + seen_ipv4 = true; } else { if (!IN6_IS_ADDR_LOOPBACK (address)) - *seen_ipv6 = true; + seen_ipv6 = true; } } @@ -211,32 +246,47 @@ make_request (int fd, pid_t pid, bool *seen_ipv4, bool *seen_ipv6, } while (! done); - close_not_cancel_no_status (fd); - - if (*seen_ipv6 && in6ailist != NULL) + struct cached_data *result; + if (seen_ipv6 && in6ailist != NULL) { - *in6ai = malloc (in6ailistlen * sizeof (**in6ai)); - if (*in6ai == NULL) + result = malloc (sizeof (*result) + + in6ailistlen * sizeof (struct in6addrinfo)); + if (result == NULL) goto out_fail; - *in6ailen = in6ailistlen; +#ifdef IS_IN_nscd + result->timestamp = nl_timestamp; +#else + result->timestamp = __nscd_get_nl_timestamp (); +#endif + result->usecnt = 2; + result->seen_ipv4 = seen_ipv4; + result->seen_ipv6 = true; + result->in6ailen = in6ailistlen; do { - (*in6ai)[--in6ailistlen] = in6ailist->info; + result->in6ai[--in6ailistlen] = in6ailist->info; in6ailist = in6ailist->next; } while (in6ailist != NULL); } + else + { + atomic_add (&noai6ai_cached.usecnt, 2); + noai6ai_cached.seen_ipv4 = seen_ipv4; + noai6ai_cached.seen_ipv6 = seen_ipv6; + result = &noai6ai_cached; + } if (use_malloc) free (buf); - return 0; + return result; out_fail: if (use_malloc) free (buf); - return -1; + return NULL; } @@ -260,24 +310,66 @@ __check_pf (bool *seen_ipv4, bool *seen_ipv6, if (! __no_netlink_support) { - int fd = __socket (PF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + struct cached_data *olddata = NULL; + struct cached_data *data = NULL; - struct sockaddr_nl nladdr; - memset (&nladdr, '\0', sizeof (nladdr)); - nladdr.nl_family = AF_NETLINK; + __libc_lock_lock (lock); - socklen_t addr_len = sizeof (nladdr); +#ifdef IS_IN_nscd +# define cache_valid() nl_timestamp != 0 && cache->timestamp == nl_timestamp +#else +# define cache_valid() \ + ({ uint32_t val = __nscd_get_nl_timestamp (); \ + val != 0 && cache->timestamp == val; }) +#endif + if (cache != NULL && cache_valid ()) + { + data = cache; + atomic_increment (&cache->usecnt); + } + else + { + int fd = __socket (PF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - if (fd >= 0 - && __bind (fd, (struct sockaddr *) &nladdr, sizeof (nladdr)) == 0 - && __getsockname (fd, (struct sockaddr *) &nladdr, &addr_len) == 0 - && make_request (fd, nladdr.nl_pid, seen_ipv4, seen_ipv6, - in6ai, in6ailen) == 0) - /* It worked. */ - return; + if (__builtin_expect (fd >= 0, 1)) + { + struct sockaddr_nl nladdr; + memset (&nladdr, '\0', sizeof (nladdr)); + nladdr.nl_family = AF_NETLINK; - if (fd >= 0) - __close (fd); + socklen_t addr_len = sizeof (nladdr); + + if(__bind (fd, (struct sockaddr *) &nladdr, sizeof (nladdr)) == 0 + && __getsockname (fd, (struct sockaddr *) &nladdr, + &addr_len) == 0) + data = make_request (fd, nladdr.nl_pid); + + close_not_cancel_no_status (fd); + } + + if (data != NULL) + { + olddata = cache; + cache = data; + } + } + + __libc_lock_unlock (lock); + + if (data != NULL) + { + /* It worked. */ + *seen_ipv4 = data->seen_ipv4; + *seen_ipv6 = data->seen_ipv6; + *in6ailen = data->in6ailen; + *in6ai = data->in6ai; + + if (olddata != NULL && olddata->usecnt > 0 + && atomic_add_zero (&olddata->usecnt, -1)) + free (olddata); + + return; + } #if __ASSUME_NETLINK_SUPPORT == 0 /* Remember that there is no netlink support. */ @@ -315,3 +407,26 @@ __check_pf (bool *seen_ipv4, bool *seen_ipv6, (void) freeifaddrs (ifa); #endif } + + +void +__free_in6ai (struct in6addrinfo *ai) +{ + if (ai != NULL) + { + struct cached_data *data = + (struct cached_data *) ((char *) ai + - offsetof (struct cached_data, in6ai)); + + if (atomic_add_zero (&data->usecnt, -1)) + { + __libc_lock_lock (lock); + + if (data->usecnt == 0) + /* Still unused. */ + free (data); + + __libc_lock_unlock (lock); + } + } +} diff --git a/libc/sysdeps/unix/sysv/linux/clock_gettime.c b/libc/sysdeps/unix/sysv/linux/clock_gettime.c index 0ae45de37..ad0fe1e8b 100644 --- a/libc/sysdeps/unix/sysv/linux/clock_gettime.c +++ b/libc/sysdeps/unix/sysv/linux/clock_gettime.c @@ -45,8 +45,8 @@ /* This means the REALTIME and MONOTONIC clock are definitely supported in the kernel. */ -# define SYSDEP_GETTIME \ - SYSDEP_GETTIME_CPUTIME \ +# define SYSDEP_GETTIME \ + SYSDEP_GETTIME_CPUTIME; \ case CLOCK_REALTIME: \ case CLOCK_MONOTONIC: \ retval = SYSCALL_GETTIME (clock_id, tp); \ @@ -82,8 +82,8 @@ maybe_syscall_gettime (clockid_t clock_id, struct timespec *tp) /* The REALTIME and MONOTONIC clock might be available. Try the syscall first. */ -# define SYSDEP_GETTIME \ - SYSDEP_GETTIME_CPUTIME \ +# define SYSDEP_GETTIME \ + SYSDEP_GETTIME_CPUTIME; \ case CLOCK_REALTIME: \ case CLOCK_MONOTONIC: \ case CLOCK_MONOTONIC_RAW: \ @@ -100,7 +100,7 @@ maybe_syscall_gettime (clockid_t clock_id, struct timespec *tp) __set_errno (retval); \ retval = -1; \ } \ - break; + break #endif #ifdef __NR_clock_gettime @@ -110,7 +110,9 @@ maybe_syscall_gettime (clockid_t clock_id, struct timespec *tp) # if __ASSUME_POSIX_CPU_TIMERS > 0 -# define SYSDEP_GETTIME_CPU SYSCALL_GETTIME +# define SYSDEP_GETTIME_CPU(clock_id, tp) \ + retval = SYSCALL_GETTIME (clock_id, tp); \ + break # define SYSDEP_GETTIME_CPUTIME /* Default catches them too. */ # else @@ -164,7 +166,7 @@ maybe_syscall_gettime_cpu (clockid_t clock_id, struct timespec *tp) return e; } -# define SYSDEP_GETTIME_CPU \ +# define SYSDEP_GETTIME_CPU(clock_id, tp) \ retval = maybe_syscall_gettime_cpu (clock_id, tp); \ if (retval == 0) \ break; \ @@ -199,7 +201,7 @@ maybe_syscall_gettime_cputime (clockid_t clock_id, struct timespec *tp) break; \ } \ retval = hp_timing_gettime (clock_id, tp); \ - break; + break # if !HP_TIMING_AVAIL # define hp_timing_gettime(clock_id, tp) (__set_errno (EINVAL), -1) # endif diff --git a/libc/sysdeps/unix/sysv/linux/getsysstats.c b/libc/sysdeps/unix/sysv/linux/getsysstats.c index 7feb7a180..bde3fa899 100644 --- a/libc/sysdeps/unix/sysv/linux/getsysstats.c +++ b/libc/sysdeps/unix/sysv/linux/getsysstats.c @@ -296,7 +296,7 @@ phys_pages_info (const char *format) long int result = -1; /* If we haven't found an appropriate entry return 1. */ - FILE *fp = fopen ("/proc/meminfo", "rc"); + FILE *fp = fopen ("/proc/meminfo", "rce"); if (fp != NULL) { /* No threads use this stream. */ diff --git a/libc/sysdeps/unix/sysv/linux/internal_statvfs.c b/libc/sysdeps/unix/sysv/linux/internal_statvfs.c index 2ddec1edb..a0bad02d7 100644 --- a/libc/sysdeps/unix/sysv/linux/internal_statvfs.c +++ b/libc/sysdeps/unix/sysv/linux/internal_statvfs.c @@ -237,9 +237,13 @@ INTERNAL_STATVFS (const char *name, struct STATVFS *buf, buf->f_files = fsbuf->f_files; buf->f_ffree = fsbuf->f_ffree; if (sizeof (buf->f_fsid) == sizeof (fsbuf->f_fsid)) + /* The shifting uses 'unsigned long long int' even though the target + field might only have 32 bits. This is OK since the 'if' branch + is not used in this case but the compiler would still generate + warnings. */ buf->f_fsid = ((fsbuf->f_fsid.__val[0] - & ((1UL << (8 * sizeof (fsbuf->f_fsid.__val[0]))) - 1)) - | ((unsigned long int) fsbuf->f_fsid.__val[1] + & ((1ULL << (8 * sizeof (fsbuf->f_fsid.__val[0]))) - 1)) + | ((unsigned long long int) fsbuf->f_fsid.__val[1] << (8 * (sizeof (buf->f_fsid) - sizeof (fsbuf->f_fsid.__val[0]))))); else diff --git a/libc/sysdeps/unix/sysv/linux/readonly-area.c b/libc/sysdeps/unix/sysv/linux/readonly-area.c index 69e926a7a..ce8ac94c4 100644 --- a/libc/sysdeps/unix/sysv/linux/readonly-area.c +++ b/libc/sysdeps/unix/sysv/linux/readonly-area.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2004, 2005 Free Software Foundation, Inc. +/* Copyright (C) 2004, 2005, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -32,7 +32,7 @@ __readonly_area (const char *ptr, size_t size) { const void *ptr_end = ptr + size; - FILE *fp = fopen ("/proc/self/maps", "rc"); + FILE *fp = fopen ("/proc/self/maps", "rce"); if (fp == NULL) { /* It is the system administrator's choice to not have /proc diff --git a/libc/sysdeps/unix/sysv/linux/syscalls.list b/libc/sysdeps/unix/sysv/linux/syscalls.list index 2bed9e99e..f6e6653d3 100644 --- a/libc/sysdeps/unix/sysv/linux/syscalls.list +++ b/libc/sysdeps/unix/sysv/linux/syscalls.list @@ -109,3 +109,6 @@ name_to_handle_at EXTRA name_to_handle_at i:isppi name_to_handle_at open_by_handle_at EXTRA open_by_handle_at Ci:ipi open_by_handle_at setns EXTRA setns i:ii setns + +process_vm_readv EXTRA process_vm_readv i:ipipii process_vm_readv +process_vm_writev EXTRA process_vm_writev i:ipipii process_vm_writev diff --git a/libc/sysdeps/unix/sysv/linux/x86_64/clock_gettime.c b/libc/sysdeps/unix/sysv/linux/x86_64/clock_gettime.c index 7802701e2..f7121106b 100644 --- a/libc/sysdeps/unix/sysv/linux/x86_64/clock_gettime.c +++ b/libc/sysdeps/unix/sysv/linux/x86_64/clock_gettime.c @@ -3,8 +3,14 @@ #ifdef SHARED # define SYSCALL_GETTIME(id, tp) \ ({ long int (*f) (clockid_t, struct timespec *) = __vdso_clock_gettime; \ + long int v_ret; \ PTR_DEMANGLE (f); \ - f (id, tp); }) + v_ret = f (id, tp); \ + if (INTERNAL_SYSCALL_ERROR_P (v_ret, )) { \ + __set_errno (INTERNAL_SYSCALL_ERRNO (v_ret, )); \ + v_ret = -1; \ + } \ + v_ret; }) # define INTERNAL_GETTIME(id, tp) \ ({ long int (*f) (clockid_t, struct timespec *) = __vdso_clock_gettime; \ PTR_DEMANGLE (f); \ diff --git a/libc/sysdeps/x86_64/fpu/multiarch/Makefile b/libc/sysdeps/x86_64/fpu/multiarch/Makefile index f97ce1699..be689036d 100644 --- a/libc/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/libc/sysdeps/x86_64/fpu/multiarch/Makefile @@ -36,29 +36,18 @@ endif ifeq ($(config-cflags-avx),yes) libm-sysdep_routines += e_exp-avx e_log-avx s_atan-avx \ - e_asin-avx e_atan2-avx s_sin-avx s_tan-avx \ + e_atan2-avx s_sin-avx s_tan-avx \ mplog-avx mpa-avx slowexp-avx \ - sincos32-avx doasin-avx dosincos-avx \ - brandred-avx mpexp-avx \ - mpatan2-avx mpatan-avx mpsqrt-avx mptan-avx + mpexp-avx -CFLAGS-brandred-avx.c = -mavx -CFLAGS-doasin-avx.c = -mavx -CFLAGS-dosincos-avx.c = -mavx -CFLAGS-e_asin-avx.c = -mavx CFLAGS-e_atan2-avx.c = -mavx CFLAGS-e_exp-avx.c = -mavx CFLAGS-e_log-avx.c = -mavx CFLAGS-mpa-avx.c = -mavx -CFLAGS-mpatan-avx.c = -mavx -CFLAGS-mpatan2-avx.c = -mavx CFLAGS-mpexp-avx.c = -mavx CFLAGS-mplog-avx.c = -mavx -CFLAGS-mpsqrt-avx.c = -mavx -CFLAGS-mptan-avx.c = -mavx CFLAGS-s_atan-avx.c = -mavx CFLAGS-s_sin-avx.c = -mavx -CFLAGS-sincos32-avx.c = -mavx CFLAGS-slowexp-avx.c = -mavx CFLAGS-s_tan-avx.c = -mavx endif diff --git a/libc/sysdeps/x86_64/fpu/multiarch/brandred-avx.c b/libc/sysdeps/x86_64/fpu/multiarch/brandred-avx.c deleted file mode 100644 index 2f6edffad..000000000 --- a/libc/sysdeps/x86_64/fpu/multiarch/brandred-avx.c +++ /dev/null @@ -1,4 +0,0 @@ -#define __branred __branred_avx -#define SECTION __attribute__ ((section (".text.avx"))) - -#include <sysdeps/ieee754/dbl-64/branred.c> diff --git a/libc/sysdeps/x86_64/fpu/multiarch/doasin-avx.c b/libc/sysdeps/x86_64/fpu/multiarch/doasin-avx.c deleted file mode 100644 index 86781c156..000000000 --- a/libc/sysdeps/x86_64/fpu/multiarch/doasin-avx.c +++ /dev/null @@ -1,4 +0,0 @@ -#define __doasin __doasin_avx -#define SECTION __attribute__ ((section (".text.avx"))) - -#include <sysdeps/ieee754/dbl-64/doasin.c> diff --git a/libc/sysdeps/x86_64/fpu/multiarch/dosincos-avx.c b/libc/sysdeps/x86_64/fpu/multiarch/dosincos-avx.c deleted file mode 100644 index ffa18743d..000000000 --- a/libc/sysdeps/x86_64/fpu/multiarch/dosincos-avx.c +++ /dev/null @@ -1,6 +0,0 @@ -#define __docos __docos_avx -#define __dubcos __dubcos_avx -#define __dubsin __dubsin_avx -#define SECTION __attribute__ ((section (".text.avx"))) - -#include <sysdeps/ieee754/dbl-64/dosincos.c> diff --git a/libc/sysdeps/x86_64/fpu/multiarch/e_asin-avx.c b/libc/sysdeps/x86_64/fpu/multiarch/e_asin-avx.c deleted file mode 100644 index 7f89e4f14..000000000 --- a/libc/sysdeps/x86_64/fpu/multiarch/e_asin-avx.c +++ /dev/null @@ -1,11 +0,0 @@ -#define __ieee754_acos __ieee754_acos_avx -#define __ieee754_asin __ieee754_asin_avx -#define __cos32 __cos32_avx -#define __doasin __doasin_avx -#define __docos __docos_avx -#define __dubcos __dubcos_avx -#define __dubsin __dubsin_avx -#define __sin32 __sin32_avx -#define SECTION __attribute__ ((section (".text.avx"))) - -#include <sysdeps/ieee754/dbl-64/e_asin.c> diff --git a/libc/sysdeps/x86_64/fpu/multiarch/e_asin.c b/libc/sysdeps/x86_64/fpu/multiarch/e_asin.c index bb711b1ff..e742a9c13 100644 --- a/libc/sysdeps/x86_64/fpu/multiarch/e_asin.c +++ b/libc/sysdeps/x86_64/fpu/multiarch/e_asin.c @@ -1,29 +1,18 @@ -#if defined HAVE_FMA4_SUPPORT || defined HAVE_AVX_SUPPORT +#ifdef HAVE_FMA4_SUPPORT # include <init-arch.h> # include <math_private.h> extern double __ieee754_acos_sse2 (double); extern double __ieee754_asin_sse2 (double); -extern double __ieee754_acos_avx (double); -extern double __ieee754_asin_avx (double); -# ifdef HAVE_FMA4_SUPPORT extern double __ieee754_acos_fma4 (double); extern double __ieee754_asin_fma4 (double); -# else -# undef HAS_FMA4 -# define HAS_FMA4 0 -# define __ieee754_acos_fma4 ((void *) 0) -# define __ieee754_asin_fma4 ((void *) 0) -# endif libm_ifunc (__ieee754_acos, - HAS_FMA4 ? __ieee754_acos_fma4 - : (HAS_AVX ? __ieee754_acos_avx : __ieee754_acos_sse2)); + HAS_FMA4 ? __ieee754_acos_fma4 : __ieee754_acos_sse2); strong_alias (__ieee754_acos, __acos_finite) libm_ifunc (__ieee754_asin, - HAS_FMA4 ? __ieee754_asin_fma4 - : (HAS_AVX ? __ieee754_asin_avx : __ieee754_asin_sse2)); + HAS_FMA4 ? __ieee754_asin_fma4 : __ieee754_asin_sse2); strong_alias (__ieee754_asin, __asin_finite) # define __ieee754_acos __ieee754_acos_sse2 diff --git a/libc/sysdeps/x86_64/fpu/multiarch/e_atan2-avx.c b/libc/sysdeps/x86_64/fpu/multiarch/e_atan2-avx.c index 13155c87d..3012afac3 100644 --- a/libc/sysdeps/x86_64/fpu/multiarch/e_atan2-avx.c +++ b/libc/sysdeps/x86_64/fpu/multiarch/e_atan2-avx.c @@ -2,7 +2,6 @@ #define __add __add_avx #define __dbl_mp __dbl_mp_avx #define __dvd __dvd_avx -#define __mpatan2 __mpatan2_avx #define __mul __mul_avx #define __sub __sub_avx #define SECTION __attribute__ ((section (".text.avx"))) diff --git a/libc/sysdeps/x86_64/fpu/multiarch/mpatan-avx.c b/libc/sysdeps/x86_64/fpu/multiarch/mpatan-avx.c deleted file mode 100644 index e26e1fa3e..000000000 --- a/libc/sysdeps/x86_64/fpu/multiarch/mpatan-avx.c +++ /dev/null @@ -1,10 +0,0 @@ -#define __mpatan __mpatan_avx -#define __add __add_avx -#define __dvd __dvd_avx -#define __mpsqrt __mpsqrt_avx -#define __mul __mul_avx -#define __sub __sub_avx -#define AVOID_MPATAN_H 1 -#define SECTION __attribute__ ((section (".text.avx"))) - -#include <sysdeps/ieee754/dbl-64/mpatan.c> diff --git a/libc/sysdeps/x86_64/fpu/multiarch/mpatan2-avx.c b/libc/sysdeps/x86_64/fpu/multiarch/mpatan2-avx.c deleted file mode 100644 index fa824d69b..000000000 --- a/libc/sysdeps/x86_64/fpu/multiarch/mpatan2-avx.c +++ /dev/null @@ -1,9 +0,0 @@ -#define __mpatan2 __mpatan2_avx -#define __add __add_avx -#define __dvd __dvd_avx -#define __mpatan __mpatan_avx -#define __mpsqrt __mpsqrt_avx -#define __mul __mul_avx -#define SECTION __attribute__ ((section (".text.avx"))) - -#include <sysdeps/ieee754/dbl-64/mpatan2.c> diff --git a/libc/sysdeps/x86_64/fpu/multiarch/mpsqrt-avx.c b/libc/sysdeps/x86_64/fpu/multiarch/mpsqrt-avx.c deleted file mode 100644 index 26f2fb30f..000000000 --- a/libc/sysdeps/x86_64/fpu/multiarch/mpsqrt-avx.c +++ /dev/null @@ -1,8 +0,0 @@ -#define __mpsqrt __mpsqrt_avx -#define __dbl_mp __dbl_mp_avx -#define __mul __mul_avx -#define __sub __sub_avx -#define AVOID_MPSQRT_H 1 -#define SECTION __attribute__ ((section (".text.avx"))) - -#include <sysdeps/ieee754/dbl-64/mpsqrt.c> diff --git a/libc/sysdeps/x86_64/fpu/multiarch/mptan-avx.c b/libc/sysdeps/x86_64/fpu/multiarch/mptan-avx.c deleted file mode 100644 index 02514b783..000000000 --- a/libc/sysdeps/x86_64/fpu/multiarch/mptan-avx.c +++ /dev/null @@ -1,7 +0,0 @@ -#define __mptan __mptan_avx -#define __c32 __c32_avx -#define __dvd __dvd_avx -#define __mpranred __mpranred_avx -#define SECTION __attribute__ ((section (".text.avx"))) - -#include <sysdeps/ieee754/dbl-64/mptan.c> diff --git a/libc/sysdeps/x86_64/fpu/multiarch/s_atan-avx.c b/libc/sysdeps/x86_64/fpu/multiarch/s_atan-avx.c index 8e7022f51..b5cb9c3a7 100644 --- a/libc/sysdeps/x86_64/fpu/multiarch/s_atan-avx.c +++ b/libc/sysdeps/x86_64/fpu/multiarch/s_atan-avx.c @@ -1,7 +1,6 @@ #define atan __atan_avx #define __add __add_avx #define __dbl_mp __dbl_mp_avx -#define __mpatan __mpatan_avx #define __mul __mul_avx #define __sub __sub_avx #define SECTION __attribute__ ((section (".text.avx"))) diff --git a/libc/sysdeps/x86_64/fpu/multiarch/s_sin-avx.c b/libc/sysdeps/x86_64/fpu/multiarch/s_sin-avx.c index c0ccf4be7..e1c6de025 100644 --- a/libc/sysdeps/x86_64/fpu/multiarch/s_sin-avx.c +++ b/libc/sysdeps/x86_64/fpu/multiarch/s_sin-avx.c @@ -1,12 +1,5 @@ #define __cos __cos_avx #define __sin __sin_avx -#define __branred __branred_avx -#define __docos __docos_avx -#define __dubsin __dubsin_avx -#define __mpcos __mpcos_avx -#define __mpcos1 __mpcos1_avx -#define __mpsin __mpsin_avx -#define __mpsin1 __mpsin1_avx #define SECTION __attribute__ ((section (".text.avx"))) #include <sysdeps/ieee754/dbl-64/s_sin.c> diff --git a/libc/sysdeps/x86_64/fpu/multiarch/s_tan-avx.c b/libc/sysdeps/x86_64/fpu/multiarch/s_tan-avx.c index f31ff65b6..53de5d3c9 100644 --- a/libc/sysdeps/x86_64/fpu/multiarch/s_tan-avx.c +++ b/libc/sysdeps/x86_64/fpu/multiarch/s_tan-avx.c @@ -1,8 +1,5 @@ #define tan __tan_avx -#define __branred __branred_avx #define __dbl_mp __dbl_mp_avx -#define __mpranred __mpranred_avx -#define __mptan __mptan_avx #define __sub __sub_avx #define SECTION __attribute__ ((section (".text.avx"))) diff --git a/libc/sysdeps/x86_64/fpu/multiarch/sincos32-avx.c b/libc/sysdeps/x86_64/fpu/multiarch/sincos32-avx.c deleted file mode 100644 index 5af51ab38..000000000 --- a/libc/sysdeps/x86_64/fpu/multiarch/sincos32-avx.c +++ /dev/null @@ -1,15 +0,0 @@ -#define __cos32 __cos32_avx -#define __sin32 __sin32_avx -#define __c32 __c32_avx -#define __mpsin __mpsin_avx -#define __mpsin1 __mpsin1_avx -#define __mpcos __mpcos_avx -#define __mpcos1 __mpcos1_avx -#define __mpranred __mpranred_avx -#define __add __add_avx -#define __dbl_mp __dbl_mp_avx -#define __mul __mul_avx -#define __sub __sub_avx -#define SECTION __attribute__ ((section (".text.avx"))) - -#include <sysdeps/ieee754/dbl-64/sincos32.c> diff --git a/libc/sysdeps/x86_64/multiarch/strcmp-sse42.S b/libc/sysdeps/x86_64/multiarch/strcmp-sse42.S index b93eda13b..7a50ff05d 100644 --- a/libc/sysdeps/x86_64/multiarch/strcmp-sse42.S +++ b/libc/sysdeps/x86_64/multiarch/strcmp-sse42.S @@ -104,7 +104,7 @@ STRCMP_SSE42: # else movq (%rdx), %rax # endif - testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax) + testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax) jne __strcasecmp_l_nonascii #endif #ifdef USE_AS_STRNCASECMP_L @@ -115,7 +115,7 @@ STRCMP_SSE42: # else movq (%rcx), %rax # endif - testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax) + testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax) jne __strncasecmp_l_nonascii #endif diff --git a/libc/sysdeps/x86_64/multiarch/strstr.c b/libc/sysdeps/x86_64/multiarch/strstr.c index b408b752f..36dc6762c 100644 --- a/libc/sysdeps/x86_64/multiarch/strstr.c +++ b/libc/sysdeps/x86_64/multiarch/strstr.c @@ -1,5 +1,5 @@ /* strstr with SSE4.2 intrinsics - Copyright (C) 2009, 2010 Free Software Foundation, Inc. + Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -88,14 +88,12 @@ cross to next page. */ static inline __m128i -__m128i_strloadu (const unsigned char * p) +__m128i_strloadu (const unsigned char * p, __m128i zero) { - int offset = ((size_t) p & (16 - 1)); - - if (offset && (int) ((size_t) p & 0xfff) > 0xff0) + if (__builtin_expect ((int) ((size_t) p & 0xfff) > 0xff0, 0)) { + size_t offset = ((size_t) p & (16 - 1)); __m128i a = _mm_load_si128 ((__m128i *) (p - offset)); - __m128i zero = _mm_setzero_si128 (); int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (a, zero)); if ((bmsk >> offset) != 0) return __m128i_shift_right (a, offset); @@ -106,24 +104,22 @@ __m128i_strloadu (const unsigned char * p) #if defined USE_AS_STRCASESTR && !defined STRCASESTR_NONASCII /* Similar to __m128i_strloadu. Convert to lower case for POSIX/C - locale. */ + locale and other which have single-byte letters only in the ASCII + range. */ static inline __m128i -__m128i_strloadu_tolower (const unsigned char *p, __m128i rangeuc, - __m128i u2ldelta) +__m128i_strloadu_tolower (const unsigned char *p, __m128i zero, __m128i uclow, + __m128i uchigh, __m128i lcqword) { - __m128i frag = __m128i_strloadu (p); + __m128i frag = __m128i_strloadu (p, zero); -#define UCLOW 0x4040404040404040ULL -#define UCHIGH 0x5b5b5b5b5b5b5b5bULL -#define LCQWORD 0x2020202020202020ULL /* Compare if 'Z' > bytes. Inverted way to get a mask for byte <= 'Z'. */ - __m128i r2 = _mm_cmpgt_epi8 (_mm_set1_epi64x (UCHIGH), frag); + __m128i r2 = _mm_cmpgt_epi8 (uchigh, frag); /* Compare if bytes are > 'A' - 1. */ - __m128i r1 = _mm_cmpgt_epi8 (frag, _mm_set1_epi64x (UCLOW)); + __m128i r1 = _mm_cmpgt_epi8 (frag, uclow); /* Mask byte == ff if byte(r2) <= 'Z' and byte(r1) > 'A' - 1. */ __m128i mask = _mm_and_si128 (r2, r1); /* Apply lowercase bit 6 mask for above mask bytes == ff. */ - return _mm_or_si128 (frag, _mm_and_si128 (mask, _mm_set1_epi64x (LCQWORD))); + return _mm_or_si128 (frag, _mm_and_si128 (mask, lcqword)); } #endif @@ -190,14 +186,18 @@ STRSTR_SSE42 (const unsigned char *s1, const unsigned char *s2) != 0, 0)) return __strcasestr_sse42_nonascii (s1, s2); - const __m128i rangeuc = _mm_set_epi64x (0x0, 0x5a41); - const __m128i u2ldelta = _mm_set1_epi64x (0xe0e0e0e0e0e0e0e0); -# define strloadu(p) __m128i_strloadu_tolower (p, rangeuc, u2ldelta) + const __m128i uclow = _mm_set1_epi8 (0x40); + const __m128i uchigh = _mm_set1_epi8 (0x5b); + const __m128i lcqword = _mm_set1_epi8 (0x20); + const __m128i zero = _mm_setzero_si128 (); +# define strloadu(p) __m128i_strloadu_tolower (p, zero, uclow, uchigh, lcqword) # else # define strloadu __m128i_strloadu_tolower +# define zero _mm_setzero_si128 () # endif #else -# define strloadu __m128i_strloadu +# define strloadu(p) __m128i_strloadu (p, zero) + const __m128i zero = _mm_setzero_si128 (); #endif /* p1 > 1 byte long. Load up to 16 bytes of fragment. */ @@ -208,7 +208,7 @@ STRSTR_SSE42 (const unsigned char *s1, const unsigned char *s2) /* p2 is > 1 byte long. */ frag2 = strloadu (p2); else - frag2 = _mm_insert_epi8 (_mm_setzero_si128 (), LOADBYTE (p2[0]), 0); + frag2 = _mm_insert_epi8 (zero, LOADBYTE (p2[0]), 0); /* Unsigned bytes, equal order, does frag2 has null? */ int cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c); @@ -217,8 +217,7 @@ STRSTR_SSE42 (const unsigned char *s1, const unsigned char *s2) int cmp_s = _mm_cmpistrs (frag2, frag1, 0x0c); if (cmp_s & cmp_c) { - int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (frag2, - _mm_setzero_si128 ())); + int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (frag2, zero)); int len; __asm ("bsfl %[bmsk], %[len]" : [len] "=r" (len) : [bmsk] "r" (bmsk)); @@ -344,7 +343,6 @@ re_trace: /* Handle both zero and sign flag set and s1 is shorter in length. */ - __m128i zero = _mm_setzero_si128 (); int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (zero, frag2)); int bmsk1 = _mm_movemask_epi8 (_mm_cmpeq_epi8 (zero, frag1)); int len; diff --git a/libc/sysdeps/x86_64/strcmp.S b/libc/sysdeps/x86_64/strcmp.S index 165073e90..6e5bdaaf2 100644 --- a/libc/sysdeps/x86_64/strcmp.S +++ b/libc/sysdeps/x86_64/strcmp.S @@ -1,5 +1,5 @@ /* Highly optimized version for x86-64. - Copyright (C) 1999, 2000, 2002, 2003, 2005, 2009, 2010 + Copyright (C) 1999, 2000, 2002, 2003, 2005, 2009, 2010, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Based on i686 version contributed by Ulrich Drepper @@ -157,7 +157,7 @@ END (BP_SYM (STRCMP)) # else movq (%rdx), %rax # endif - testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax) + testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax) jne __strcasecmp_l_nonascii # elif defined USE_AS_STRNCASECMP_L /* We have to fall back on the C implementation for locales @@ -167,7 +167,7 @@ END (BP_SYM (STRCMP)) # else movq (%rcx), %rax # endif - testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax) + testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax) jne __strncasecmp_l_nonascii # endif |