diff options
author | joseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d> | 2011-03-21 17:49:12 +0000 |
---|---|---|
committer | joseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d> | 2011-03-21 17:49:12 +0000 |
commit | 5c8cf8f50bb4598c4ecf2aa860aa7bf0ef0c3df7 (patch) | |
tree | 2f079ca4472be83c8bdca9215aa6c8d85f8f5875 /libc/sysdeps | |
parent | f9a421a6b2e3ba46cc615e128091e5eacabb1dac (diff) |
Merge changes between r13053 and r13227 from /fsf/trunk.
ports:
* sysdeps/arm/eabi/bits/predefs.h: Correct license reference in
last paragraph of license notice.
git-svn-id: svn://svn.eglibc.org/trunk@13228 7b3dc134-2b1b-0410-93df-9e9f96275f8d
Diffstat (limited to 'libc/sysdeps')
-rw-r--r-- | libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S | 5 | ||||
-rw-r--r-- | libc/sysdeps/sparc/sparc64/elf/configure | 55 | ||||
-rw-r--r-- | libc/sysdeps/sparc/sparc64/elf/configure.in | 89 | ||||
-rw-r--r-- | libc/sysdeps/unix/sysv/linux/i386/sysconf.c | 63 | ||||
-rw-r--r-- | libc/sysdeps/x86_64/cacheinfo.c | 98 | ||||
-rw-r--r-- | libc/sysdeps/x86_64/memset.S | 68 | ||||
-rw-r--r-- | libc/sysdeps/x86_64/multiarch/init-arch.c | 12 | ||||
-rw-r--r-- | libc/sysdeps/x86_64/multiarch/strcmp.S | 45 |
8 files changed, 311 insertions, 124 deletions
diff --git a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S index 12bf473a8..0f71a987d 100644 --- a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S +++ b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S @@ -1,5 +1,5 @@ /* isnanf(). PowerPC32 version. - Copyright (C) 2008 Free Software Foundation, Inc. + Copyright (C) 2008, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -39,8 +39,7 @@ EALIGN (__isnanf, 4, 0) L(NaN): li r3,1 /* else return 1 */ blr - END (__isnan) + END (__isnanf) hidden_def (__isnanf) weak_alias (__isnanf, isnanf) - diff --git a/libc/sysdeps/sparc/sparc64/elf/configure b/libc/sysdeps/sparc/sparc64/elf/configure index 7962ff52d..f213438aa 100644 --- a/libc/sysdeps/sparc/sparc64/elf/configure +++ b/libc/sysdeps/sparc/sparc64/elf/configure @@ -1,12 +1,61 @@ # This file is generated from configure.in by Autoconf. DO NOT EDIT! # Local configure fragment for sysdeps/sparc/sparc64/elf. -cat >>confdefs.h <<\_ACEOF +if test "$usetls" != no; then +# Check for support of thread-local storage handling in assembler and linker. +{ $as_echo "$as_me:$LINENO: checking for sparc64 TLS support" >&5 +$as_echo_n "checking for sparc64 TLS support... " >&6; } +if test "${libc_cv_sparc64_tls+set}" = set; then + $as_echo_n "(cached) " >&6 +else + cat > conftest.s <<\EOF + .section ".tdata", "awT", @progbits + .globl foo +foo: .word 1 + .section ".tbss", "awT", @nobits + .globl bar +bar: .skip 4 + .text + .globl _start +_start: sethi %tgd_hi22(foo), %l1 + add %l1, %tgd_lo10(foo), %l1 + add %l7, %l1, %o0, %tgd_add(foo) + call __tls_get_addr, %tgd_call(foo) + sethi %tldm_hi22(bar), %l1 + add %l1, %tldm_lo10(bar), %l1 + add %l7, %l1, %o0, %tldm_add(bar) + call __tls_get_addr, %tldm_call(bar) + sethi %tldo_hix22(bar), %l1 + xor %l1, %tldo_lox10(bar), %l1 + add %o0, %l1, %l1, %tldo_add(bar) + sethi %tie_hi22(foo), %l1 + add %l1, %tie_lo10(foo), %l1 + ldx [%l7 + %l1], %l1, %tie_ldx(foo) + add %g7, %l1, %l1, %tie_add(foo) + sethi %tle_hix22(foo), %l1 + xor %l1, %tle_lox10(foo), %l1 +EOF +if { ac_try='${CC-cc} -o conftest.bin $CFLAGS $LDFLAGS conftest.s -nostdlib -nostartfiles 1>&5' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + libc_cv_sparc64_tls=yes +else + libc_cv_sparc64_tls=no +fi +rm -f conftest* +fi +{ $as_echo "$as_me:$LINENO: result: $libc_cv_sparc64_tls" >&5 +$as_echo "$libc_cv_sparc64_tls" >&6; } +if test $libc_cv_sparc64_tls = yes; then + cat >>confdefs.h <<\_ACEOF #define HAVE_TLS_SUPPORT 1 _ACEOF -libc_cv_sparc64_tls=yes - +fi +fi # Check for broken WDISP22 in the linker. { $as_echo "$as_me:$LINENO: checking for sparc64 ld WDISP22 handling" >&5 diff --git a/libc/sysdeps/sparc/sparc64/elf/configure.in b/libc/sysdeps/sparc/sparc64/elf/configure.in index f6281655a..4f7597835 100644 --- a/libc/sysdeps/sparc/sparc64/elf/configure.in +++ b/libc/sysdeps/sparc/sparc64/elf/configure.in @@ -1,52 +1,49 @@ GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory. # Local configure fragment for sysdeps/sparc/sparc64/elf. -AC_DEFINE(HAVE_TLS_SUPPORT) -libc_cv_sparc64_tls=yes - -dnl if test "$usetls" != no; then -dnl # Check for support of thread-local storage handling in assembler and linker. -dnl AC_CACHE_CHECK(for sparc64 TLS support, libc_cv_sparc64_tls, [dnl -dnl changequote(,)dnl -dnl cat > conftest.s <<\EOF -dnl .section ".tdata", "awT", @progbits -dnl .globl foo -dnl foo: .word 1 -dnl .section ".tbss", "awT", @nobits -dnl .globl bar -dnl bar: .skip 4 -dnl .text -dnl .globl main -dnl main: sethi %tgd_hi22(foo), %l1 -dnl add %l1, %tgd_lo10(foo), %l1 -dnl add %l7, %l1, %o0, %tgd_add(foo) -dnl call __tls_get_addr, %tgd_call(foo) -dnl sethi %tldm_hi22(bar), %l1 -dnl add %l1, %tldm_lo10(bar), %l1 -dnl add %l7, %l1, %o0, %tldm_add(bar) -dnl call __tls_get_addr, %tldm_call(bar) -dnl sethi %tldo_hix22(bar), %l1 -dnl xor %l1, %tldo_lox10(bar), %l1 -dnl add %o0, %l1, %l1, %tldo_add(bar) -dnl sethi %tie_hi22(foo), %l1 -dnl add %l1, %tie_lo10(foo), %l1 -dnl ldx [%l7 + %l1], %l1, %tie_ldx(foo) -dnl add %g7, %l1, %l1, %tie_add(foo) -dnl sethi %tle_hix22(foo), %l1 -dnl xor %l1, %tle_lox10(foo), %l1 -dnl EOF -dnl changequote([,])dnl -dnl dnl -dnl if AC_TRY_COMMAND(${CC-cc} -o conftest.bin $CFLAGS conftest.s 1>&AS_MESSAGE_LOG_FD); then -dnl libc_cv_sparc64_tls=yes -dnl else -dnl libc_cv_sparc64_tls=no -dnl fi -dnl rm -f conftest*]) -dnl if test $libc_cv_sparc64_tls = yes; then -dnl AC_DEFINE(HAVE_TLS_SUPPORT) -dnl fi -dnl fi +if test "$usetls" != no; then +# Check for support of thread-local storage handling in assembler and linker. +AC_CACHE_CHECK(for sparc64 TLS support, libc_cv_sparc64_tls, [dnl +changequote(,)dnl +cat > conftest.s <<\EOF + .section ".tdata", "awT", @progbits + .globl foo +foo: .word 1 + .section ".tbss", "awT", @nobits + .globl bar +bar: .skip 4 + .text + .globl _start +_start: sethi %tgd_hi22(foo), %l1 + add %l1, %tgd_lo10(foo), %l1 + add %l7, %l1, %o0, %tgd_add(foo) + call __tls_get_addr, %tgd_call(foo) + sethi %tldm_hi22(bar), %l1 + add %l1, %tldm_lo10(bar), %l1 + add %l7, %l1, %o0, %tldm_add(bar) + call __tls_get_addr, %tldm_call(bar) + sethi %tldo_hix22(bar), %l1 + xor %l1, %tldo_lox10(bar), %l1 + add %o0, %l1, %l1, %tldo_add(bar) + sethi %tie_hi22(foo), %l1 + add %l1, %tie_lo10(foo), %l1 + ldx [%l7 + %l1], %l1, %tie_ldx(foo) + add %g7, %l1, %l1, %tie_add(foo) + sethi %tle_hix22(foo), %l1 + xor %l1, %tle_lox10(foo), %l1 +EOF +changequote([,])dnl +dnl +if AC_TRY_COMMAND(${CC-cc} -o conftest.bin $CFLAGS $LDFLAGS conftest.s -nostdlib -nostartfiles 1>&AS_MESSAGE_LOG_FD); then + libc_cv_sparc64_tls=yes +else + libc_cv_sparc64_tls=no +fi +rm -f conftest*]) +if test $libc_cv_sparc64_tls = yes; then + AC_DEFINE(HAVE_TLS_SUPPORT) +fi +fi # Check for broken WDISP22 in the linker. AC_CACHE_CHECK(for sparc64 ld WDISP22 handling, libc_cv_sparc64_wdisp22, [dnl diff --git a/libc/sysdeps/unix/sysv/linux/i386/sysconf.c b/libc/sysdeps/unix/sysv/linux/i386/sysconf.c index ff3cf9f7c..4ea1a2bf5 100644 --- a/libc/sysdeps/unix/sysv/linux/i386/sysconf.c +++ b/libc/sysdeps/unix/sysv/linux/i386/sysconf.c @@ -1,5 +1,5 @@ /* Get file-specific information about a file. Linux version. - Copyright (C) 2003, 2004, 2006, 2007, 2009 Free Software Foundation, Inc. + Copyright (C) 2003, 2004, 2006, 2007, 2009, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -186,6 +186,55 @@ intel_check_word (int name, unsigned int value, bool *has_level_2, /* No need to look further. */ break; } + else if (byte == 0xff) + { + /* CPUID leaf 0x4 contains all the information. We need to + iterate over it. */ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + + unsigned int round = 0; + while (1) + { + asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" + : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx) + : "0" (4), "2" (round)); + + enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f; + if (type == null) + /* That was the end. */ + break; + + unsigned int level = (eax >> 5) & 0x7; + + if ((level == 1 && type == data + && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE)) + || (level == 1 && type == inst + && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE)) + || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE)) + || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE)) + || (level == 4 && folded_rel_name == M(_SC_LEVEL4_CACHE_SIZE))) + { + unsigned int offset = M(name) - folded_rel_name; + + if (offset == 0) + /* Cache size. */ + return (((ebx >> 22) + 1) + * (((ebx >> 12) & 0x3ff) + 1) + * ((ebx & 0xfff) + 1) + * (ecx + 1)); + if (offset == 1) + return (ebx >> 22) + 1; + + assert (offset == 2); + return (ebx & 0xfff) + 1; + } + } + /* There is no other cache information anywhere else. */ + break; + } else { if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE)) @@ -358,11 +407,11 @@ handle_amd (int name) case _SC_LEVEL2_CACHE_ASSOC: ecx >>= 12; switch (ecx & 0xf) - { - case 0: - case 1: - case 2: - case 4: + { + case 0: + case 1: + case 2: + case 4: return ecx & 0xf; case 6: return 8; @@ -372,7 +421,7 @@ handle_amd (int name) return (ecx << 6) & 0x3fffc00; default: return 0; - } + } case _SC_LEVEL2_CACHE_LINESIZE: return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff; default: diff --git a/libc/sysdeps/x86_64/cacheinfo.c b/libc/sysdeps/x86_64/cacheinfo.c index eae54e725..fdd6427e1 100644 --- a/libc/sysdeps/x86_64/cacheinfo.c +++ b/libc/sysdeps/x86_64/cacheinfo.c @@ -1,5 +1,5 @@ /* x86_64 cache info. - Copyright (C) 2003, 2004, 2006, 2007, 2009 Free Software Foundation, Inc. + Copyright (C) 2003,2004,2006,2007,2009,2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -181,6 +181,55 @@ intel_check_word (int name, unsigned int value, bool *has_level_2, /* No need to look further. */ break; } + else if (byte == 0xff) + { + /* CPUID leaf 0x4 contains all the information. We need to + iterate over it. */ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + + unsigned int round = 0; + while (1) + { + asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" + : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx) + : "0" (4), "2" (round)); + + enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f; + if (type == null) + /* That was the end. */ + break; + + unsigned int level = (eax >> 5) & 0x7; + + if ((level == 1 && type == data + && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE)) + || (level == 1 && type == inst + && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE)) + || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE)) + || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE)) + || (level == 4 && folded_rel_name == M(_SC_LEVEL4_CACHE_SIZE))) + { + unsigned int offset = M(name) - folded_rel_name; + + if (offset == 0) + /* Cache size. */ + return (((ebx >> 22) + 1) + * (((ebx >> 12) & 0x3ff) + 1) + * ((ebx & 0xfff) + 1) + * (ecx + 1)); + if (offset == 1) + return (ebx >> 22) + 1; + + assert (offset == 2); + return (ebx & 0xfff) + 1; + } + } + /* There is no other cache information anywhere else. */ + break; + } else { if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE)) @@ -352,11 +401,11 @@ handle_amd (int name) case _SC_LEVEL2_CACHE_ASSOC: switch ((ecx >> 12) & 0xf) - { - case 0: - case 1: - case 2: - case 4: + { + case 0: + case 1: + case 2: + case 4: return (ecx >> 12) & 0xf; case 6: return 8; @@ -376,7 +425,7 @@ handle_amd (int name) return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff); default: return 0; - } + } /* NOTREACHED */ case _SC_LEVEL2_CACHE_LINESIZE: @@ -521,10 +570,10 @@ init_cacheinfo (void) shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid); if (shared <= 0) - { + { /* Try L2 otherwise. */ - level = 2; - shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid); + level = 2; + shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid); } unsigned int ebx_1; @@ -540,7 +589,7 @@ init_cacheinfo (void) #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION /* Intel prefers SSSE3 instructions for memory/string routines - if they are avaiable. */ + if they are available. */ if ((ecx & 0x200)) __x86_64_preferred_memory_instruction = 3; else @@ -550,7 +599,7 @@ init_cacheinfo (void) /* Figure out the number of logical threads that share the highest cache level. */ if (max_cpuid >= 4) - { + { int i = 0; /* Query until desired cache level is enumerated. */ @@ -565,7 +614,7 @@ init_cacheinfo (void) if ((eax & 0x1f) == 0) goto intel_bug_no_cache_info; } - while (((eax >> 5) & 0x7) != level); + while (((eax >> 5) & 0x7) != level); threads = (eax >> 14) & 0x3ff; @@ -602,7 +651,7 @@ init_cacheinfo (void) threads += 1; } else - { + { intel_bug_no_cache_info: /* Assume that all logical threads share the highest cache level. */ @@ -612,7 +661,7 @@ init_cacheinfo (void) /* Cap usage of highest cache level to the number of supported threads. */ if (shared > 0 && threads > 0) - shared /= threads; + shared /= threads; } /* This spells out "AuthenticAMD". */ else if (is_amd) @@ -621,6 +670,25 @@ init_cacheinfo (void) long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE); shared = handle_amd (_SC_LEVEL3_CACHE_SIZE); +#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION +# ifdef USE_MULTIARCH + eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; + ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx; + ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; + edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx; +# else + __cpuid (1, eax, ebx, ecx, edx); +# endif + + /* AMD prefers SSSE3 instructions for memory/string routines + if they are avaiable, otherwise it prefers integer + instructions. */ + if ((ecx & 0x200)) + __x86_64_preferred_memory_instruction = 3; + else + __x86_64_preferred_memory_instruction = 0; +#endif + /* Get maximum extended function. */ __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx); diff --git a/libc/sysdeps/x86_64/memset.S b/libc/sysdeps/x86_64/memset.S index f6eb71fc7..d43c7f68b 100644 --- a/libc/sysdeps/x86_64/memset.S +++ b/libc/sysdeps/x86_64/memset.S @@ -1,6 +1,6 @@ /* memset/bzero -- set memory area to CH/0 Optimized version for x86-64. - Copyright (C) 2002-2005, 2007, 2008 Free Software Foundation, Inc. + Copyright (C) 2002-2005, 2007, 2008, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -23,7 +23,7 @@ #define __STOS_LOWER_BOUNDARY $8192 #define __STOS_UPPER_BOUNDARY $65536 - .text + .text #if !defined NOT_IN_libc && !defined USE_MULTIARCH ENTRY(__bzero) mov %rsi,%rdx /* Adjust parameter. */ @@ -417,7 +417,7 @@ L(P4Q0): mov %edx,-0x4(%rdi) retq .balign 16 -#if defined(USE_EXTRA_TABLE) +#ifdef USE_EXTRA_TABLE L(P5QI): mov %rdx,-0x95(%rdi) #endif L(P5QH): mov %rdx,-0x8d(%rdi) @@ -596,6 +596,8 @@ L(A6Q0): mov %dx,-0x6(%rdi) jmp L(aligned_now) L(SSE_pre): +#else +L(aligned_now): #endif #if !defined USE_MULTIARCH || defined USE_SSE2 # fill RegXMM0 with the pattern @@ -606,16 +608,16 @@ L(SSE_pre): jge L(byte32sse2_pre) add %r8,%rdi -#ifndef PIC +# ifndef PIC lea L(SSExDx)(%rip),%r9 jmpq *(%r9,%r8,8) -#else +# else lea L(SSE0Q0)(%rip),%r9 lea L(SSExDx)(%rip),%rcx movswq (%rcx,%r8,2),%rcx lea (%rcx,%r9,1),%r9 jmpq *%r9 -#endif +# endif L(SSE0QB): movdqa %xmm0,-0xb0(%rdi) L(SSE0QA): movdqa %xmm0,-0xa0(%rdi) @@ -881,16 +883,16 @@ L(byte32sse2): lea 0x80(%rdi),%rdi jge L(byte32sse2) add %r8,%rdi -#ifndef PIC +# ifndef PIC lea L(SSExDx)(%rip),%r11 jmpq *(%r11,%r8,8) -#else +# else lea L(SSE0Q0)(%rip),%r11 lea L(SSExDx)(%rip),%rcx movswq (%rcx,%r8,2),%rcx lea (%rcx,%r11,1),%r11 jmpq *%r11 -#endif +# endif .balign 16 L(sse2_nt_move_pre): @@ -916,20 +918,20 @@ L(sse2_nt_move): jge L(sse2_nt_move) sfence add %r8,%rdi -#ifndef PIC +# ifndef PIC lea L(SSExDx)(%rip),%r11 jmpq *(%r11,%r8,8) -#else +# else lea L(SSE0Q0)(%rip),%r11 lea L(SSExDx)(%rip),%rcx movswq (%rcx,%r8,2),%rcx lea (%rcx,%r11,1),%r11 jmpq *%r11 -#endif +# endif .pushsection .rodata .balign 16 -#ifndef PIC +# ifndef PIC L(SSExDx): .quad L(SSE0Q0), L(SSE1Q0), L(SSE2Q0), L(SSE3Q0) .quad L(SSE4Q0), L(SSE5Q0), L(SSE6Q0), L(SSE7Q0) @@ -979,7 +981,7 @@ L(SSExDx): .quad L(SSE4QB), L(SSE5QB), L(SSE6QB), L(SSE7QB) .quad L(SSE8QB), L(SSE9QB), L(SSE10QB), L(SSE11QB) .quad L(SSE12QB), L(SSE13QB), L(SSE14QB), L(SSE15QB) -#else +# else L(SSExDx): .short L(SSE0Q0) -L(SSE0Q0) .short L(SSE1Q0) -L(SSE0Q0) @@ -1196,14 +1198,14 @@ L(SSExDx): .short L(SSE13QB)-L(SSE0Q0) .short L(SSE14QB)-L(SSE0Q0) .short L(SSE15QB)-L(SSE0Q0) -#endif +# endif .popsection #endif /* !defined USE_MULTIARCH || defined USE_SSE2 */ .balign 16 +#ifndef USE_MULTIARCH L(aligned_now): -#ifndef USE_MULTIARCH cmpl $0x1,__x86_64_preferred_memory_instruction(%rip) jg L(SSE_pre) #endif /* USE_MULTIARCH */ @@ -1246,17 +1248,17 @@ L(8byte_move_loop): L(8byte_move_skip): andl $127,%r8d - lea (%rdi,%r8,1),%rdi + lea (%rdi,%r8,1),%rdi #ifndef PIC - lea L(setPxQx)(%rip),%r11 - jmpq *(%r11,%r8,8) # old scheme remained for nonPIC + lea L(setPxQx)(%rip),%r11 + jmpq *(%r11,%r8,8) # old scheme remained for nonPIC #else - lea L(Got0)(%rip),%r11 + lea L(Got0)(%rip),%r11 lea L(setPxQx)(%rip),%rcx movswq (%rcx,%r8,2),%rcx - lea (%rcx,%r11,1),%r11 - jmpq *%r11 + lea (%rcx,%r11,1),%r11 + jmpq *%r11 #endif .balign 16 @@ -1290,16 +1292,16 @@ L(8byte_stos_skip): ja L(8byte_nt_move) andl $7,%r8d - lea (%rdi,%r8,1),%rdi + lea (%rdi,%r8,1),%rdi #ifndef PIC - lea L(setPxQx)(%rip),%r11 - jmpq *(%r11,%r8,8) # old scheme remained for nonPIC + lea L(setPxQx)(%rip),%r11 + jmpq *(%r11,%r8,8) # old scheme remained for nonPIC #else - lea L(Got0)(%rip),%r11 + lea L(Got0)(%rip),%r11 lea L(setPxQx)(%rip),%rcx movswq (%rcx,%r8,2),%rcx lea (%rcx,%r11,1),%r11 - jmpq *%r11 + jmpq *%r11 #endif .balign 16 @@ -1338,16 +1340,16 @@ L(8byte_nt_move_loop): L(8byte_nt_move_skip): andl $127,%r8d - lea (%rdi,%r8,1),%rdi + lea (%rdi,%r8,1),%rdi #ifndef PIC - lea L(setPxQx)(%rip),%r11 - jmpq *(%r11,%r8,8) # old scheme remained for nonPIC + lea L(setPxQx)(%rip),%r11 + jmpq *(%r11,%r8,8) # old scheme remained for nonPIC #else - lea L(Got0)(%rip),%r11 + lea L(Got0)(%rip),%r11 lea L(setPxQx)(%rip),%rcx movswq (%rcx,%r8,2),%rcx - lea (%rcx,%r11,1),%r11 - jmpq *%r11 + lea (%rcx,%r11,1),%r11 + jmpq *%r11 #endif END (memset) diff --git a/libc/sysdeps/x86_64/multiarch/init-arch.c b/libc/sysdeps/x86_64/multiarch/init-arch.c index f0d2bb7d1..34ec2df2d 100644 --- a/libc/sysdeps/x86_64/multiarch/init-arch.c +++ b/libc/sysdeps/x86_64/multiarch/init-arch.c @@ -1,6 +1,6 @@ /* Initialize CPU feature data. This file is part of the GNU C Library. - Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc. + Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc. Contributed by Ulrich Drepper <drepper@redhat.com>. The GNU C Library is free software; you can redistribute it and/or @@ -60,7 +60,7 @@ __init_cpu_features (void) get_common_indeces (&family, &model); /* Intel processors prefer SSE instruction for memory/string - routines if they are avaiable. */ + routines if they are available. */ __cpu_features.feature[index_Prefer_SSE_for_memop] |= bit_Prefer_SSE_for_memop; @@ -107,6 +107,14 @@ __init_cpu_features (void) kind = arch_kind_amd; get_common_indeces (&family, &model); + + unsigned int ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; + + /* AMD processors prefer SSE instructions for memory/string routines + if they are available, otherwise they prefer integer instructions. */ + if ((ecx & 0x200)) + __cpu_features.feature[index_Prefer_SSE_for_memop] + |= bit_Prefer_SSE_for_memop; } else kind = arch_kind_other; diff --git a/libc/sysdeps/x86_64/multiarch/strcmp.S b/libc/sysdeps/x86_64/multiarch/strcmp.S index 185928957..8879855d9 100644 --- a/libc/sysdeps/x86_64/multiarch/strcmp.S +++ b/libc/sysdeps/x86_64/multiarch/strcmp.S @@ -452,6 +452,7 @@ LABEL(loop_ashr_1_use_sse4_2): add $16, %r10 jg LABEL(nibble_ashr_1_use_sse4_2) +LABEL(nibble_ashr_1_use_sse4_2_restart): movdqa (%rdi, %rdx), %xmm0 palignr $1, -16(%rdi, %rdx), %xmm0 # if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L @@ -499,7 +500,7 @@ LABEL(nibble_ashr_1_use_sse4_2): jae LABEL(nibble_ashr_use_sse4_2_exit) # endif cmp $14, %ecx - ja LABEL(loop_ashr_1_use_sse4_2) + ja LABEL(nibble_ashr_1_use_sse4_2_restart) jmp LABEL(nibble_ashr_use_sse4_2_exit) @@ -544,6 +545,7 @@ LABEL(loop_ashr_2_use_sse4_2): add $16, %r10 jg LABEL(nibble_ashr_2_use_sse4_2) +LABEL(nibble_ashr_2_use_sse4_2_restart): movdqa (%rdi, %rdx), %xmm0 palignr $2, -16(%rdi, %rdx), %xmm0 # if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L @@ -591,7 +593,7 @@ LABEL(nibble_ashr_2_use_sse4_2): jae LABEL(nibble_ashr_use_sse4_2_exit) # endif cmp $13, %ecx - ja LABEL(loop_ashr_2_use_sse4_2) + ja LABEL(nibble_ashr_2_use_sse4_2_restart) jmp LABEL(nibble_ashr_use_sse4_2_exit) @@ -636,6 +638,7 @@ LABEL(loop_ashr_3_use_sse4_2): add $16, %r10 jg LABEL(nibble_ashr_3_use_sse4_2) +LABEL(nibble_ashr_3_use_sse4_2_restart): movdqa (%rdi, %rdx), %xmm0 palignr $3, -16(%rdi, %rdx), %xmm0 # if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L @@ -683,7 +686,7 @@ LABEL(nibble_ashr_3_use_sse4_2): jae LABEL(nibble_ashr_use_sse4_2_exit) # endif cmp $12, %ecx - ja LABEL(loop_ashr_3_use_sse4_2) + ja LABEL(nibble_ashr_3_use_sse4_2_restart) jmp LABEL(nibble_ashr_use_sse4_2_exit) @@ -729,6 +732,7 @@ LABEL(loop_ashr_4_use_sse4_2): add $16, %r10 jg LABEL(nibble_ashr_4_use_sse4_2) +LABEL(nibble_ashr_4_use_sse4_2_restart): movdqa (%rdi, %rdx), %xmm0 palignr $4, -16(%rdi, %rdx), %xmm0 # if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L @@ -776,7 +780,7 @@ LABEL(nibble_ashr_4_use_sse4_2): jae LABEL(nibble_ashr_use_sse4_2_exit) # endif cmp $11, %ecx - ja LABEL(loop_ashr_4_use_sse4_2) + ja LABEL(nibble_ashr_4_use_sse4_2_restart) jmp LABEL(nibble_ashr_use_sse4_2_exit) @@ -822,6 +826,7 @@ LABEL(loop_ashr_5_use_sse4_2): add $16, %r10 jg LABEL(nibble_ashr_5_use_sse4_2) +LABEL(nibble_ashr_5_use_sse4_2_restart): movdqa (%rdi, %rdx), %xmm0 palignr $5, -16(%rdi, %rdx), %xmm0 # if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L @@ -870,7 +875,7 @@ LABEL(nibble_ashr_5_use_sse4_2): jae LABEL(nibble_ashr_use_sse4_2_exit) # endif cmp $10, %ecx - ja LABEL(loop_ashr_5_use_sse4_2) + ja LABEL(nibble_ashr_5_use_sse4_2_restart) jmp LABEL(nibble_ashr_use_sse4_2_exit) @@ -916,6 +921,7 @@ LABEL(loop_ashr_6_use_sse4_2): add $16, %r10 jg LABEL(nibble_ashr_6_use_sse4_2) +LABEL(nibble_ashr_6_use_sse4_2_restart): movdqa (%rdi, %rdx), %xmm0 palignr $6, -16(%rdi, %rdx), %xmm0 # if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L @@ -963,7 +969,7 @@ LABEL(nibble_ashr_6_use_sse4_2): jae LABEL(nibble_ashr_use_sse4_2_exit) # endif cmp $9, %ecx - ja LABEL(loop_ashr_6_use_sse4_2) + ja LABEL(nibble_ashr_6_use_sse4_2_restart) jmp LABEL(nibble_ashr_use_sse4_2_exit) @@ -1009,6 +1015,7 @@ LABEL(loop_ashr_7_use_sse4_2): add $16, %r10 jg LABEL(nibble_ashr_7_use_sse4_2) +LABEL(nibble_ashr_7_use_sse4_2_restart): movdqa (%rdi, %rdx), %xmm0 palignr $7, -16(%rdi, %rdx), %xmm0 # if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L @@ -1056,7 +1063,7 @@ LABEL(nibble_ashr_7_use_sse4_2): jae LABEL(nibble_ashr_use_sse4_2_exit) # endif cmp $8, %ecx - ja LABEL(loop_ashr_7_use_sse4_2) + ja LABEL(nibble_ashr_7_use_sse4_2_restart) jmp LABEL(nibble_ashr_use_sse4_2_exit) @@ -1102,6 +1109,7 @@ LABEL(loop_ashr_8_use_sse4_2): add $16, %r10 jg LABEL(nibble_ashr_8_use_sse4_2) +LABEL(nibble_ashr_8_use_sse4_2_restart): movdqa (%rdi, %rdx), %xmm0 palignr $8, -16(%rdi, %rdx), %xmm0 # if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L @@ -1149,7 +1157,7 @@ LABEL(nibble_ashr_8_use_sse4_2): jae LABEL(nibble_ashr_use_sse4_2_exit) # endif cmp $7, %ecx - ja LABEL(loop_ashr_8_use_sse4_2) + ja LABEL(nibble_ashr_8_use_sse4_2_restart) jmp LABEL(nibble_ashr_use_sse4_2_exit) @@ -1195,6 +1203,7 @@ LABEL(loop_ashr_9_use_sse4_2): add $16, %r10 jg LABEL(nibble_ashr_9_use_sse4_2) +LABEL(nibble_ashr_9_use_sse4_2_restart): movdqa (%rdi, %rdx), %xmm0 palignr $9, -16(%rdi, %rdx), %xmm0 @@ -1243,7 +1252,7 @@ LABEL(nibble_ashr_9_use_sse4_2): jae LABEL(nibble_ashr_use_sse4_2_exit) # endif cmp $6, %ecx - ja LABEL(loop_ashr_9_use_sse4_2) + ja LABEL(nibble_ashr_9_use_sse4_2_restart) jmp LABEL(nibble_ashr_use_sse4_2_exit) @@ -1289,6 +1298,7 @@ LABEL(loop_ashr_10_use_sse4_2): add $16, %r10 jg LABEL(nibble_ashr_10_use_sse4_2) +LABEL(nibble_ashr_10_use_sse4_2_restart): movdqa (%rdi, %rdx), %xmm0 palignr $10, -16(%rdi, %rdx), %xmm0 # if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L @@ -1336,7 +1346,7 @@ LABEL(nibble_ashr_10_use_sse4_2): jae LABEL(nibble_ashr_use_sse4_2_exit) # endif cmp $5, %ecx - ja LABEL(loop_ashr_10_use_sse4_2) + ja LABEL(nibble_ashr_10_use_sse4_2_restart) jmp LABEL(nibble_ashr_use_sse4_2_exit) @@ -1382,6 +1392,7 @@ LABEL(loop_ashr_11_use_sse4_2): add $16, %r10 jg LABEL(nibble_ashr_11_use_sse4_2) +LABEL(nibble_ashr_11_use_sse4_2_restart): movdqa (%rdi, %rdx), %xmm0 palignr $11, -16(%rdi, %rdx), %xmm0 # if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L @@ -1429,7 +1440,7 @@ LABEL(nibble_ashr_11_use_sse4_2): jae LABEL(nibble_ashr_use_sse4_2_exit) # endif cmp $4, %ecx - ja LABEL(loop_ashr_11_use_sse4_2) + ja LABEL(nibble_ashr_11_use_sse4_2_restart) jmp LABEL(nibble_ashr_use_sse4_2_exit) @@ -1475,6 +1486,7 @@ LABEL(loop_ashr_12_use_sse4_2): add $16, %r10 jg LABEL(nibble_ashr_12_use_sse4_2) +LABEL(nibble_ashr_12_use_sse4_2_restart): movdqa (%rdi, %rdx), %xmm0 palignr $12, -16(%rdi, %rdx), %xmm0 # if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L @@ -1522,7 +1534,7 @@ LABEL(nibble_ashr_12_use_sse4_2): jae LABEL(nibble_ashr_use_sse4_2_exit) # endif cmp $3, %ecx - ja LABEL(loop_ashr_12_use_sse4_2) + ja LABEL(nibble_ashr_12_use_sse4_2_restart) jmp LABEL(nibble_ashr_use_sse4_2_exit) @@ -1569,6 +1581,7 @@ LABEL(loop_ashr_13_use_sse4_2): add $16, %r10 jg LABEL(nibble_ashr_13_use_sse4_2) +LABEL(nibble_ashr_13_use_sse4_2_restart): movdqa (%rdi, %rdx), %xmm0 palignr $13, -16(%rdi, %rdx), %xmm0 # if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L @@ -1616,7 +1629,7 @@ LABEL(nibble_ashr_13_use_sse4_2): jae LABEL(nibble_ashr_use_sse4_2_exit) # endif cmp $2, %ecx - ja LABEL(loop_ashr_13_use_sse4_2) + ja LABEL(nibble_ashr_13_use_sse4_2_restart) jmp LABEL(nibble_ashr_use_sse4_2_exit) @@ -1663,6 +1676,7 @@ LABEL(loop_ashr_14_use_sse4_2): add $16, %r10 jg LABEL(nibble_ashr_14_use_sse4_2) +LABEL(nibble_ashr_14_use_sse4_2_restart): movdqa (%rdi, %rdx), %xmm0 palignr $14, -16(%rdi, %rdx), %xmm0 # if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L @@ -1710,7 +1724,7 @@ LABEL(nibble_ashr_14_use_sse4_2): jae LABEL(nibble_ashr_use_sse4_2_exit) # endif cmp $1, %ecx - ja LABEL(loop_ashr_14_use_sse4_2) + ja LABEL(nibble_ashr_14_use_sse4_2_restart) jmp LABEL(nibble_ashr_use_sse4_2_exit) @@ -1759,6 +1773,7 @@ LABEL(loop_ashr_15_use_sse4_2): add $16, %r10 jg LABEL(nibble_ashr_15_use_sse4_2) +LABEL(nibble_ashr_15_use_sse4_2_restart): movdqa (%rdi, %rdx), %xmm0 palignr $15, -16(%rdi, %rdx), %xmm0 # if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L @@ -1806,7 +1821,7 @@ LABEL(nibble_ashr_15_use_sse4_2): jae LABEL(nibble_ashr_use_sse4_2_exit) # endif cmp $0, %ecx - ja LABEL(loop_ashr_15_use_sse4_2) + ja LABEL(nibble_ashr_15_use_sse4_2_restart) LABEL(nibble_ashr_use_sse4_2_exit): # if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L |