summaryrefslogtreecommitdiff
path: root/libc/sysdeps
diff options
context:
space:
mode:
authorjoseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d>2011-03-21 17:49:12 +0000
committerjoseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d>2011-03-21 17:49:12 +0000
commit5c8cf8f50bb4598c4ecf2aa860aa7bf0ef0c3df7 (patch)
tree2f079ca4472be83c8bdca9215aa6c8d85f8f5875 /libc/sysdeps
parentf9a421a6b2e3ba46cc615e128091e5eacabb1dac (diff)
Merge changes between r13053 and r13227 from /fsf/trunk.
ports: * sysdeps/arm/eabi/bits/predefs.h: Correct license reference in last paragraph of license notice. git-svn-id: svn://svn.eglibc.org/trunk@13228 7b3dc134-2b1b-0410-93df-9e9f96275f8d
Diffstat (limited to 'libc/sysdeps')
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S5
-rw-r--r--libc/sysdeps/sparc/sparc64/elf/configure55
-rw-r--r--libc/sysdeps/sparc/sparc64/elf/configure.in89
-rw-r--r--libc/sysdeps/unix/sysv/linux/i386/sysconf.c63
-rw-r--r--libc/sysdeps/x86_64/cacheinfo.c98
-rw-r--r--libc/sysdeps/x86_64/memset.S68
-rw-r--r--libc/sysdeps/x86_64/multiarch/init-arch.c12
-rw-r--r--libc/sysdeps/x86_64/multiarch/strcmp.S45
8 files changed, 311 insertions, 124 deletions
diff --git a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S
index 12bf473a8..0f71a987d 100644
--- a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S
+++ b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S
@@ -1,5 +1,5 @@
/* isnanf(). PowerPC32 version.
- Copyright (C) 2008 Free Software Foundation, Inc.
+ Copyright (C) 2008, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -39,8 +39,7 @@ EALIGN (__isnanf, 4, 0)
L(NaN):
li r3,1 /* else return 1 */
blr
- END (__isnan)
+ END (__isnanf)
hidden_def (__isnanf)
weak_alias (__isnanf, isnanf)
-
diff --git a/libc/sysdeps/sparc/sparc64/elf/configure b/libc/sysdeps/sparc/sparc64/elf/configure
index 7962ff52d..f213438aa 100644
--- a/libc/sysdeps/sparc/sparc64/elf/configure
+++ b/libc/sysdeps/sparc/sparc64/elf/configure
@@ -1,12 +1,61 @@
# This file is generated from configure.in by Autoconf. DO NOT EDIT!
# Local configure fragment for sysdeps/sparc/sparc64/elf.
-cat >>confdefs.h <<\_ACEOF
+if test "$usetls" != no; then
+# Check for support of thread-local storage handling in assembler and linker.
+{ $as_echo "$as_me:$LINENO: checking for sparc64 TLS support" >&5
+$as_echo_n "checking for sparc64 TLS support... " >&6; }
+if test "${libc_cv_sparc64_tls+set}" = set; then
+ $as_echo_n "(cached) " >&6
+else
+ cat > conftest.s <<\EOF
+ .section ".tdata", "awT", @progbits
+ .globl foo
+foo: .word 1
+ .section ".tbss", "awT", @nobits
+ .globl bar
+bar: .skip 4
+ .text
+ .globl _start
+_start: sethi %tgd_hi22(foo), %l1
+ add %l1, %tgd_lo10(foo), %l1
+ add %l7, %l1, %o0, %tgd_add(foo)
+ call __tls_get_addr, %tgd_call(foo)
+ sethi %tldm_hi22(bar), %l1
+ add %l1, %tldm_lo10(bar), %l1
+ add %l7, %l1, %o0, %tldm_add(bar)
+ call __tls_get_addr, %tldm_call(bar)
+ sethi %tldo_hix22(bar), %l1
+ xor %l1, %tldo_lox10(bar), %l1
+ add %o0, %l1, %l1, %tldo_add(bar)
+ sethi %tie_hi22(foo), %l1
+ add %l1, %tie_lo10(foo), %l1
+ ldx [%l7 + %l1], %l1, %tie_ldx(foo)
+ add %g7, %l1, %l1, %tie_add(foo)
+ sethi %tle_hix22(foo), %l1
+ xor %l1, %tle_lox10(foo), %l1
+EOF
+if { ac_try='${CC-cc} -o conftest.bin $CFLAGS $LDFLAGS conftest.s -nostdlib -nostartfiles 1>&5'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ libc_cv_sparc64_tls=yes
+else
+ libc_cv_sparc64_tls=no
+fi
+rm -f conftest*
+fi
+{ $as_echo "$as_me:$LINENO: result: $libc_cv_sparc64_tls" >&5
+$as_echo "$libc_cv_sparc64_tls" >&6; }
+if test $libc_cv_sparc64_tls = yes; then
+ cat >>confdefs.h <<\_ACEOF
#define HAVE_TLS_SUPPORT 1
_ACEOF
-libc_cv_sparc64_tls=yes
-
+fi
+fi
# Check for broken WDISP22 in the linker.
{ $as_echo "$as_me:$LINENO: checking for sparc64 ld WDISP22 handling" >&5
diff --git a/libc/sysdeps/sparc/sparc64/elf/configure.in b/libc/sysdeps/sparc/sparc64/elf/configure.in
index f6281655a..4f7597835 100644
--- a/libc/sysdeps/sparc/sparc64/elf/configure.in
+++ b/libc/sysdeps/sparc/sparc64/elf/configure.in
@@ -1,52 +1,49 @@
GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
# Local configure fragment for sysdeps/sparc/sparc64/elf.
-AC_DEFINE(HAVE_TLS_SUPPORT)
-libc_cv_sparc64_tls=yes
-
-dnl if test "$usetls" != no; then
-dnl # Check for support of thread-local storage handling in assembler and linker.
-dnl AC_CACHE_CHECK(for sparc64 TLS support, libc_cv_sparc64_tls, [dnl
-dnl changequote(,)dnl
-dnl cat > conftest.s <<\EOF
-dnl .section ".tdata", "awT", @progbits
-dnl .globl foo
-dnl foo: .word 1
-dnl .section ".tbss", "awT", @nobits
-dnl .globl bar
-dnl bar: .skip 4
-dnl .text
-dnl .globl main
-dnl main: sethi %tgd_hi22(foo), %l1
-dnl add %l1, %tgd_lo10(foo), %l1
-dnl add %l7, %l1, %o0, %tgd_add(foo)
-dnl call __tls_get_addr, %tgd_call(foo)
-dnl sethi %tldm_hi22(bar), %l1
-dnl add %l1, %tldm_lo10(bar), %l1
-dnl add %l7, %l1, %o0, %tldm_add(bar)
-dnl call __tls_get_addr, %tldm_call(bar)
-dnl sethi %tldo_hix22(bar), %l1
-dnl xor %l1, %tldo_lox10(bar), %l1
-dnl add %o0, %l1, %l1, %tldo_add(bar)
-dnl sethi %tie_hi22(foo), %l1
-dnl add %l1, %tie_lo10(foo), %l1
-dnl ldx [%l7 + %l1], %l1, %tie_ldx(foo)
-dnl add %g7, %l1, %l1, %tie_add(foo)
-dnl sethi %tle_hix22(foo), %l1
-dnl xor %l1, %tle_lox10(foo), %l1
-dnl EOF
-dnl changequote([,])dnl
-dnl dnl
-dnl if AC_TRY_COMMAND(${CC-cc} -o conftest.bin $CFLAGS conftest.s 1>&AS_MESSAGE_LOG_FD); then
-dnl libc_cv_sparc64_tls=yes
-dnl else
-dnl libc_cv_sparc64_tls=no
-dnl fi
-dnl rm -f conftest*])
-dnl if test $libc_cv_sparc64_tls = yes; then
-dnl AC_DEFINE(HAVE_TLS_SUPPORT)
-dnl fi
-dnl fi
+if test "$usetls" != no; then
+# Check for support of thread-local storage handling in assembler and linker.
+AC_CACHE_CHECK(for sparc64 TLS support, libc_cv_sparc64_tls, [dnl
+changequote(,)dnl
+cat > conftest.s <<\EOF
+ .section ".tdata", "awT", @progbits
+ .globl foo
+foo: .word 1
+ .section ".tbss", "awT", @nobits
+ .globl bar
+bar: .skip 4
+ .text
+ .globl _start
+_start: sethi %tgd_hi22(foo), %l1
+ add %l1, %tgd_lo10(foo), %l1
+ add %l7, %l1, %o0, %tgd_add(foo)
+ call __tls_get_addr, %tgd_call(foo)
+ sethi %tldm_hi22(bar), %l1
+ add %l1, %tldm_lo10(bar), %l1
+ add %l7, %l1, %o0, %tldm_add(bar)
+ call __tls_get_addr, %tldm_call(bar)
+ sethi %tldo_hix22(bar), %l1
+ xor %l1, %tldo_lox10(bar), %l1
+ add %o0, %l1, %l1, %tldo_add(bar)
+ sethi %tie_hi22(foo), %l1
+ add %l1, %tie_lo10(foo), %l1
+ ldx [%l7 + %l1], %l1, %tie_ldx(foo)
+ add %g7, %l1, %l1, %tie_add(foo)
+ sethi %tle_hix22(foo), %l1
+ xor %l1, %tle_lox10(foo), %l1
+EOF
+changequote([,])dnl
+dnl
+if AC_TRY_COMMAND(${CC-cc} -o conftest.bin $CFLAGS $LDFLAGS conftest.s -nostdlib -nostartfiles 1>&AS_MESSAGE_LOG_FD); then
+ libc_cv_sparc64_tls=yes
+else
+ libc_cv_sparc64_tls=no
+fi
+rm -f conftest*])
+if test $libc_cv_sparc64_tls = yes; then
+ AC_DEFINE(HAVE_TLS_SUPPORT)
+fi
+fi
# Check for broken WDISP22 in the linker.
AC_CACHE_CHECK(for sparc64 ld WDISP22 handling, libc_cv_sparc64_wdisp22, [dnl
diff --git a/libc/sysdeps/unix/sysv/linux/i386/sysconf.c b/libc/sysdeps/unix/sysv/linux/i386/sysconf.c
index ff3cf9f7c..4ea1a2bf5 100644
--- a/libc/sysdeps/unix/sysv/linux/i386/sysconf.c
+++ b/libc/sysdeps/unix/sysv/linux/i386/sysconf.c
@@ -1,5 +1,5 @@
/* Get file-specific information about a file. Linux version.
- Copyright (C) 2003, 2004, 2006, 2007, 2009 Free Software Foundation, Inc.
+ Copyright (C) 2003, 2004, 2006, 2007, 2009, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -186,6 +186,55 @@ intel_check_word (int name, unsigned int value, bool *has_level_2,
/* No need to look further. */
break;
}
+ else if (byte == 0xff)
+ {
+ /* CPUID leaf 0x4 contains all the information. We need to
+ iterate over it. */
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+
+ unsigned int round = 0;
+ while (1)
+ {
+ asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
+ : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx)
+ : "0" (4), "2" (round));
+
+ enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f;
+ if (type == null)
+ /* That was the end. */
+ break;
+
+ unsigned int level = (eax >> 5) & 0x7;
+
+ if ((level == 1 && type == data
+ && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE))
+ || (level == 1 && type == inst
+ && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE))
+ || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE))
+ || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
+ || (level == 4 && folded_rel_name == M(_SC_LEVEL4_CACHE_SIZE)))
+ {
+ unsigned int offset = M(name) - folded_rel_name;
+
+ if (offset == 0)
+ /* Cache size. */
+ return (((ebx >> 22) + 1)
+ * (((ebx >> 12) & 0x3ff) + 1)
+ * ((ebx & 0xfff) + 1)
+ * (ecx + 1));
+ if (offset == 1)
+ return (ebx >> 22) + 1;
+
+ assert (offset == 2);
+ return (ebx & 0xfff) + 1;
+ }
+ }
+ /* There is no other cache information anywhere else. */
+ break;
+ }
else
{
if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
@@ -358,11 +407,11 @@ handle_amd (int name)
case _SC_LEVEL2_CACHE_ASSOC:
ecx >>= 12;
switch (ecx & 0xf)
- {
- case 0:
- case 1:
- case 2:
- case 4:
+ {
+ case 0:
+ case 1:
+ case 2:
+ case 4:
return ecx & 0xf;
case 6:
return 8;
@@ -372,7 +421,7 @@ handle_amd (int name)
return (ecx << 6) & 0x3fffc00;
default:
return 0;
- }
+ }
case _SC_LEVEL2_CACHE_LINESIZE:
return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
default:
diff --git a/libc/sysdeps/x86_64/cacheinfo.c b/libc/sysdeps/x86_64/cacheinfo.c
index eae54e725..fdd6427e1 100644
--- a/libc/sysdeps/x86_64/cacheinfo.c
+++ b/libc/sysdeps/x86_64/cacheinfo.c
@@ -1,5 +1,5 @@
/* x86_64 cache info.
- Copyright (C) 2003, 2004, 2006, 2007, 2009 Free Software Foundation, Inc.
+ Copyright (C) 2003,2004,2006,2007,2009,2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -181,6 +181,55 @@ intel_check_word (int name, unsigned int value, bool *has_level_2,
/* No need to look further. */
break;
}
+ else if (byte == 0xff)
+ {
+ /* CPUID leaf 0x4 contains all the information. We need to
+ iterate over it. */
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+
+ unsigned int round = 0;
+ while (1)
+ {
+ asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
+ : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx)
+ : "0" (4), "2" (round));
+
+ enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f;
+ if (type == null)
+ /* That was the end. */
+ break;
+
+ unsigned int level = (eax >> 5) & 0x7;
+
+ if ((level == 1 && type == data
+ && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE))
+ || (level == 1 && type == inst
+ && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE))
+ || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE))
+ || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
+ || (level == 4 && folded_rel_name == M(_SC_LEVEL4_CACHE_SIZE)))
+ {
+ unsigned int offset = M(name) - folded_rel_name;
+
+ if (offset == 0)
+ /* Cache size. */
+ return (((ebx >> 22) + 1)
+ * (((ebx >> 12) & 0x3ff) + 1)
+ * ((ebx & 0xfff) + 1)
+ * (ecx + 1));
+ if (offset == 1)
+ return (ebx >> 22) + 1;
+
+ assert (offset == 2);
+ return (ebx & 0xfff) + 1;
+ }
+ }
+ /* There is no other cache information anywhere else. */
+ break;
+ }
else
{
if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
@@ -352,11 +401,11 @@ handle_amd (int name)
case _SC_LEVEL2_CACHE_ASSOC:
switch ((ecx >> 12) & 0xf)
- {
- case 0:
- case 1:
- case 2:
- case 4:
+ {
+ case 0:
+ case 1:
+ case 2:
+ case 4:
return (ecx >> 12) & 0xf;
case 6:
return 8;
@@ -376,7 +425,7 @@ handle_amd (int name)
return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
default:
return 0;
- }
+ }
/* NOTREACHED */
case _SC_LEVEL2_CACHE_LINESIZE:
@@ -521,10 +570,10 @@ init_cacheinfo (void)
shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
if (shared <= 0)
- {
+ {
/* Try L2 otherwise. */
- level = 2;
- shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
+ level = 2;
+ shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
}
unsigned int ebx_1;
@@ -540,7 +589,7 @@ init_cacheinfo (void)
#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
/* Intel prefers SSSE3 instructions for memory/string routines
- if they are avaiable. */
+ if they are available. */
if ((ecx & 0x200))
__x86_64_preferred_memory_instruction = 3;
else
@@ -550,7 +599,7 @@ init_cacheinfo (void)
/* Figure out the number of logical threads that share the
highest cache level. */
if (max_cpuid >= 4)
- {
+ {
int i = 0;
/* Query until desired cache level is enumerated. */
@@ -565,7 +614,7 @@ init_cacheinfo (void)
if ((eax & 0x1f) == 0)
goto intel_bug_no_cache_info;
}
- while (((eax >> 5) & 0x7) != level);
+ while (((eax >> 5) & 0x7) != level);
threads = (eax >> 14) & 0x3ff;
@@ -602,7 +651,7 @@ init_cacheinfo (void)
threads += 1;
}
else
- {
+ {
intel_bug_no_cache_info:
/* Assume that all logical threads share the highest cache level. */
@@ -612,7 +661,7 @@ init_cacheinfo (void)
/* Cap usage of highest cache level to the number of supported
threads. */
if (shared > 0 && threads > 0)
- shared /= threads;
+ shared /= threads;
}
/* This spells out "AuthenticAMD". */
else if (is_amd)
@@ -621,6 +670,25 @@ init_cacheinfo (void)
long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
+#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
+# ifdef USE_MULTIARCH
+ eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
+ ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
+ ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
+ edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
+# else
+ __cpuid (1, eax, ebx, ecx, edx);
+# endif
+
+ /* AMD prefers SSSE3 instructions for memory/string routines
+ if they are avaiable, otherwise it prefers integer
+ instructions. */
+ if ((ecx & 0x200))
+ __x86_64_preferred_memory_instruction = 3;
+ else
+ __x86_64_preferred_memory_instruction = 0;
+#endif
+
/* Get maximum extended function. */
__cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
diff --git a/libc/sysdeps/x86_64/memset.S b/libc/sysdeps/x86_64/memset.S
index f6eb71fc7..d43c7f68b 100644
--- a/libc/sysdeps/x86_64/memset.S
+++ b/libc/sysdeps/x86_64/memset.S
@@ -1,6 +1,6 @@
/* memset/bzero -- set memory area to CH/0
Optimized version for x86-64.
- Copyright (C) 2002-2005, 2007, 2008 Free Software Foundation, Inc.
+ Copyright (C) 2002-2005, 2007, 2008, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -23,7 +23,7 @@
#define __STOS_LOWER_BOUNDARY $8192
#define __STOS_UPPER_BOUNDARY $65536
- .text
+ .text
#if !defined NOT_IN_libc && !defined USE_MULTIARCH
ENTRY(__bzero)
mov %rsi,%rdx /* Adjust parameter. */
@@ -417,7 +417,7 @@ L(P4Q0): mov %edx,-0x4(%rdi)
retq
.balign 16
-#if defined(USE_EXTRA_TABLE)
+#ifdef USE_EXTRA_TABLE
L(P5QI): mov %rdx,-0x95(%rdi)
#endif
L(P5QH): mov %rdx,-0x8d(%rdi)
@@ -596,6 +596,8 @@ L(A6Q0): mov %dx,-0x6(%rdi)
jmp L(aligned_now)
L(SSE_pre):
+#else
+L(aligned_now):
#endif
#if !defined USE_MULTIARCH || defined USE_SSE2
# fill RegXMM0 with the pattern
@@ -606,16 +608,16 @@ L(SSE_pre):
jge L(byte32sse2_pre)
add %r8,%rdi
-#ifndef PIC
+# ifndef PIC
lea L(SSExDx)(%rip),%r9
jmpq *(%r9,%r8,8)
-#else
+# else
lea L(SSE0Q0)(%rip),%r9
lea L(SSExDx)(%rip),%rcx
movswq (%rcx,%r8,2),%rcx
lea (%rcx,%r9,1),%r9
jmpq *%r9
-#endif
+# endif
L(SSE0QB): movdqa %xmm0,-0xb0(%rdi)
L(SSE0QA): movdqa %xmm0,-0xa0(%rdi)
@@ -881,16 +883,16 @@ L(byte32sse2):
lea 0x80(%rdi),%rdi
jge L(byte32sse2)
add %r8,%rdi
-#ifndef PIC
+# ifndef PIC
lea L(SSExDx)(%rip),%r11
jmpq *(%r11,%r8,8)
-#else
+# else
lea L(SSE0Q0)(%rip),%r11
lea L(SSExDx)(%rip),%rcx
movswq (%rcx,%r8,2),%rcx
lea (%rcx,%r11,1),%r11
jmpq *%r11
-#endif
+# endif
.balign 16
L(sse2_nt_move_pre):
@@ -916,20 +918,20 @@ L(sse2_nt_move):
jge L(sse2_nt_move)
sfence
add %r8,%rdi
-#ifndef PIC
+# ifndef PIC
lea L(SSExDx)(%rip),%r11
jmpq *(%r11,%r8,8)
-#else
+# else
lea L(SSE0Q0)(%rip),%r11
lea L(SSExDx)(%rip),%rcx
movswq (%rcx,%r8,2),%rcx
lea (%rcx,%r11,1),%r11
jmpq *%r11
-#endif
+# endif
.pushsection .rodata
.balign 16
-#ifndef PIC
+# ifndef PIC
L(SSExDx):
.quad L(SSE0Q0), L(SSE1Q0), L(SSE2Q0), L(SSE3Q0)
.quad L(SSE4Q0), L(SSE5Q0), L(SSE6Q0), L(SSE7Q0)
@@ -979,7 +981,7 @@ L(SSExDx):
.quad L(SSE4QB), L(SSE5QB), L(SSE6QB), L(SSE7QB)
.quad L(SSE8QB), L(SSE9QB), L(SSE10QB), L(SSE11QB)
.quad L(SSE12QB), L(SSE13QB), L(SSE14QB), L(SSE15QB)
-#else
+# else
L(SSExDx):
.short L(SSE0Q0) -L(SSE0Q0)
.short L(SSE1Q0) -L(SSE0Q0)
@@ -1196,14 +1198,14 @@ L(SSExDx):
.short L(SSE13QB)-L(SSE0Q0)
.short L(SSE14QB)-L(SSE0Q0)
.short L(SSE15QB)-L(SSE0Q0)
-#endif
+# endif
.popsection
#endif /* !defined USE_MULTIARCH || defined USE_SSE2 */
.balign 16
+#ifndef USE_MULTIARCH
L(aligned_now):
-#ifndef USE_MULTIARCH
cmpl $0x1,__x86_64_preferred_memory_instruction(%rip)
jg L(SSE_pre)
#endif /* USE_MULTIARCH */
@@ -1246,17 +1248,17 @@ L(8byte_move_loop):
L(8byte_move_skip):
andl $127,%r8d
- lea (%rdi,%r8,1),%rdi
+ lea (%rdi,%r8,1),%rdi
#ifndef PIC
- lea L(setPxQx)(%rip),%r11
- jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
+ lea L(setPxQx)(%rip),%r11
+ jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
#else
- lea L(Got0)(%rip),%r11
+ lea L(Got0)(%rip),%r11
lea L(setPxQx)(%rip),%rcx
movswq (%rcx,%r8,2),%rcx
- lea (%rcx,%r11,1),%r11
- jmpq *%r11
+ lea (%rcx,%r11,1),%r11
+ jmpq *%r11
#endif
.balign 16
@@ -1290,16 +1292,16 @@ L(8byte_stos_skip):
ja L(8byte_nt_move)
andl $7,%r8d
- lea (%rdi,%r8,1),%rdi
+ lea (%rdi,%r8,1),%rdi
#ifndef PIC
- lea L(setPxQx)(%rip),%r11
- jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
+ lea L(setPxQx)(%rip),%r11
+ jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
#else
- lea L(Got0)(%rip),%r11
+ lea L(Got0)(%rip),%r11
lea L(setPxQx)(%rip),%rcx
movswq (%rcx,%r8,2),%rcx
lea (%rcx,%r11,1),%r11
- jmpq *%r11
+ jmpq *%r11
#endif
.balign 16
@@ -1338,16 +1340,16 @@ L(8byte_nt_move_loop):
L(8byte_nt_move_skip):
andl $127,%r8d
- lea (%rdi,%r8,1),%rdi
+ lea (%rdi,%r8,1),%rdi
#ifndef PIC
- lea L(setPxQx)(%rip),%r11
- jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
+ lea L(setPxQx)(%rip),%r11
+ jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
#else
- lea L(Got0)(%rip),%r11
+ lea L(Got0)(%rip),%r11
lea L(setPxQx)(%rip),%rcx
movswq (%rcx,%r8,2),%rcx
- lea (%rcx,%r11,1),%r11
- jmpq *%r11
+ lea (%rcx,%r11,1),%r11
+ jmpq *%r11
#endif
END (memset)
diff --git a/libc/sysdeps/x86_64/multiarch/init-arch.c b/libc/sysdeps/x86_64/multiarch/init-arch.c
index f0d2bb7d1..34ec2df2d 100644
--- a/libc/sysdeps/x86_64/multiarch/init-arch.c
+++ b/libc/sysdeps/x86_64/multiarch/init-arch.c
@@ -1,6 +1,6 @@
/* Initialize CPU feature data.
This file is part of the GNU C Library.
- Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+ Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
Contributed by Ulrich Drepper <drepper@redhat.com>.
The GNU C Library is free software; you can redistribute it and/or
@@ -60,7 +60,7 @@ __init_cpu_features (void)
get_common_indeces (&family, &model);
/* Intel processors prefer SSE instruction for memory/string
- routines if they are avaiable. */
+ routines if they are available. */
__cpu_features.feature[index_Prefer_SSE_for_memop]
|= bit_Prefer_SSE_for_memop;
@@ -107,6 +107,14 @@ __init_cpu_features (void)
kind = arch_kind_amd;
get_common_indeces (&family, &model);
+
+ unsigned int ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
+
+ /* AMD processors prefer SSE instructions for memory/string routines
+ if they are available, otherwise they prefer integer instructions. */
+ if ((ecx & 0x200))
+ __cpu_features.feature[index_Prefer_SSE_for_memop]
+ |= bit_Prefer_SSE_for_memop;
}
else
kind = arch_kind_other;
diff --git a/libc/sysdeps/x86_64/multiarch/strcmp.S b/libc/sysdeps/x86_64/multiarch/strcmp.S
index 185928957..8879855d9 100644
--- a/libc/sysdeps/x86_64/multiarch/strcmp.S
+++ b/libc/sysdeps/x86_64/multiarch/strcmp.S
@@ -452,6 +452,7 @@ LABEL(loop_ashr_1_use_sse4_2):
add $16, %r10
jg LABEL(nibble_ashr_1_use_sse4_2)
+LABEL(nibble_ashr_1_use_sse4_2_restart):
movdqa (%rdi, %rdx), %xmm0
palignr $1, -16(%rdi, %rdx), %xmm0
# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
@@ -499,7 +500,7 @@ LABEL(nibble_ashr_1_use_sse4_2):
jae LABEL(nibble_ashr_use_sse4_2_exit)
# endif
cmp $14, %ecx
- ja LABEL(loop_ashr_1_use_sse4_2)
+ ja LABEL(nibble_ashr_1_use_sse4_2_restart)
jmp LABEL(nibble_ashr_use_sse4_2_exit)
@@ -544,6 +545,7 @@ LABEL(loop_ashr_2_use_sse4_2):
add $16, %r10
jg LABEL(nibble_ashr_2_use_sse4_2)
+LABEL(nibble_ashr_2_use_sse4_2_restart):
movdqa (%rdi, %rdx), %xmm0
palignr $2, -16(%rdi, %rdx), %xmm0
# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
@@ -591,7 +593,7 @@ LABEL(nibble_ashr_2_use_sse4_2):
jae LABEL(nibble_ashr_use_sse4_2_exit)
# endif
cmp $13, %ecx
- ja LABEL(loop_ashr_2_use_sse4_2)
+ ja LABEL(nibble_ashr_2_use_sse4_2_restart)
jmp LABEL(nibble_ashr_use_sse4_2_exit)
@@ -636,6 +638,7 @@ LABEL(loop_ashr_3_use_sse4_2):
add $16, %r10
jg LABEL(nibble_ashr_3_use_sse4_2)
+LABEL(nibble_ashr_3_use_sse4_2_restart):
movdqa (%rdi, %rdx), %xmm0
palignr $3, -16(%rdi, %rdx), %xmm0
# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
@@ -683,7 +686,7 @@ LABEL(nibble_ashr_3_use_sse4_2):
jae LABEL(nibble_ashr_use_sse4_2_exit)
# endif
cmp $12, %ecx
- ja LABEL(loop_ashr_3_use_sse4_2)
+ ja LABEL(nibble_ashr_3_use_sse4_2_restart)
jmp LABEL(nibble_ashr_use_sse4_2_exit)
@@ -729,6 +732,7 @@ LABEL(loop_ashr_4_use_sse4_2):
add $16, %r10
jg LABEL(nibble_ashr_4_use_sse4_2)
+LABEL(nibble_ashr_4_use_sse4_2_restart):
movdqa (%rdi, %rdx), %xmm0
palignr $4, -16(%rdi, %rdx), %xmm0
# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
@@ -776,7 +780,7 @@ LABEL(nibble_ashr_4_use_sse4_2):
jae LABEL(nibble_ashr_use_sse4_2_exit)
# endif
cmp $11, %ecx
- ja LABEL(loop_ashr_4_use_sse4_2)
+ ja LABEL(nibble_ashr_4_use_sse4_2_restart)
jmp LABEL(nibble_ashr_use_sse4_2_exit)
@@ -822,6 +826,7 @@ LABEL(loop_ashr_5_use_sse4_2):
add $16, %r10
jg LABEL(nibble_ashr_5_use_sse4_2)
+LABEL(nibble_ashr_5_use_sse4_2_restart):
movdqa (%rdi, %rdx), %xmm0
palignr $5, -16(%rdi, %rdx), %xmm0
# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
@@ -870,7 +875,7 @@ LABEL(nibble_ashr_5_use_sse4_2):
jae LABEL(nibble_ashr_use_sse4_2_exit)
# endif
cmp $10, %ecx
- ja LABEL(loop_ashr_5_use_sse4_2)
+ ja LABEL(nibble_ashr_5_use_sse4_2_restart)
jmp LABEL(nibble_ashr_use_sse4_2_exit)
@@ -916,6 +921,7 @@ LABEL(loop_ashr_6_use_sse4_2):
add $16, %r10
jg LABEL(nibble_ashr_6_use_sse4_2)
+LABEL(nibble_ashr_6_use_sse4_2_restart):
movdqa (%rdi, %rdx), %xmm0
palignr $6, -16(%rdi, %rdx), %xmm0
# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
@@ -963,7 +969,7 @@ LABEL(nibble_ashr_6_use_sse4_2):
jae LABEL(nibble_ashr_use_sse4_2_exit)
# endif
cmp $9, %ecx
- ja LABEL(loop_ashr_6_use_sse4_2)
+ ja LABEL(nibble_ashr_6_use_sse4_2_restart)
jmp LABEL(nibble_ashr_use_sse4_2_exit)
@@ -1009,6 +1015,7 @@ LABEL(loop_ashr_7_use_sse4_2):
add $16, %r10
jg LABEL(nibble_ashr_7_use_sse4_2)
+LABEL(nibble_ashr_7_use_sse4_2_restart):
movdqa (%rdi, %rdx), %xmm0
palignr $7, -16(%rdi, %rdx), %xmm0
# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
@@ -1056,7 +1063,7 @@ LABEL(nibble_ashr_7_use_sse4_2):
jae LABEL(nibble_ashr_use_sse4_2_exit)
# endif
cmp $8, %ecx
- ja LABEL(loop_ashr_7_use_sse4_2)
+ ja LABEL(nibble_ashr_7_use_sse4_2_restart)
jmp LABEL(nibble_ashr_use_sse4_2_exit)
@@ -1102,6 +1109,7 @@ LABEL(loop_ashr_8_use_sse4_2):
add $16, %r10
jg LABEL(nibble_ashr_8_use_sse4_2)
+LABEL(nibble_ashr_8_use_sse4_2_restart):
movdqa (%rdi, %rdx), %xmm0
palignr $8, -16(%rdi, %rdx), %xmm0
# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
@@ -1149,7 +1157,7 @@ LABEL(nibble_ashr_8_use_sse4_2):
jae LABEL(nibble_ashr_use_sse4_2_exit)
# endif
cmp $7, %ecx
- ja LABEL(loop_ashr_8_use_sse4_2)
+ ja LABEL(nibble_ashr_8_use_sse4_2_restart)
jmp LABEL(nibble_ashr_use_sse4_2_exit)
@@ -1195,6 +1203,7 @@ LABEL(loop_ashr_9_use_sse4_2):
add $16, %r10
jg LABEL(nibble_ashr_9_use_sse4_2)
+LABEL(nibble_ashr_9_use_sse4_2_restart):
movdqa (%rdi, %rdx), %xmm0
palignr $9, -16(%rdi, %rdx), %xmm0
@@ -1243,7 +1252,7 @@ LABEL(nibble_ashr_9_use_sse4_2):
jae LABEL(nibble_ashr_use_sse4_2_exit)
# endif
cmp $6, %ecx
- ja LABEL(loop_ashr_9_use_sse4_2)
+ ja LABEL(nibble_ashr_9_use_sse4_2_restart)
jmp LABEL(nibble_ashr_use_sse4_2_exit)
@@ -1289,6 +1298,7 @@ LABEL(loop_ashr_10_use_sse4_2):
add $16, %r10
jg LABEL(nibble_ashr_10_use_sse4_2)
+LABEL(nibble_ashr_10_use_sse4_2_restart):
movdqa (%rdi, %rdx), %xmm0
palignr $10, -16(%rdi, %rdx), %xmm0
# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
@@ -1336,7 +1346,7 @@ LABEL(nibble_ashr_10_use_sse4_2):
jae LABEL(nibble_ashr_use_sse4_2_exit)
# endif
cmp $5, %ecx
- ja LABEL(loop_ashr_10_use_sse4_2)
+ ja LABEL(nibble_ashr_10_use_sse4_2_restart)
jmp LABEL(nibble_ashr_use_sse4_2_exit)
@@ -1382,6 +1392,7 @@ LABEL(loop_ashr_11_use_sse4_2):
add $16, %r10
jg LABEL(nibble_ashr_11_use_sse4_2)
+LABEL(nibble_ashr_11_use_sse4_2_restart):
movdqa (%rdi, %rdx), %xmm0
palignr $11, -16(%rdi, %rdx), %xmm0
# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
@@ -1429,7 +1440,7 @@ LABEL(nibble_ashr_11_use_sse4_2):
jae LABEL(nibble_ashr_use_sse4_2_exit)
# endif
cmp $4, %ecx
- ja LABEL(loop_ashr_11_use_sse4_2)
+ ja LABEL(nibble_ashr_11_use_sse4_2_restart)
jmp LABEL(nibble_ashr_use_sse4_2_exit)
@@ -1475,6 +1486,7 @@ LABEL(loop_ashr_12_use_sse4_2):
add $16, %r10
jg LABEL(nibble_ashr_12_use_sse4_2)
+LABEL(nibble_ashr_12_use_sse4_2_restart):
movdqa (%rdi, %rdx), %xmm0
palignr $12, -16(%rdi, %rdx), %xmm0
# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
@@ -1522,7 +1534,7 @@ LABEL(nibble_ashr_12_use_sse4_2):
jae LABEL(nibble_ashr_use_sse4_2_exit)
# endif
cmp $3, %ecx
- ja LABEL(loop_ashr_12_use_sse4_2)
+ ja LABEL(nibble_ashr_12_use_sse4_2_restart)
jmp LABEL(nibble_ashr_use_sse4_2_exit)
@@ -1569,6 +1581,7 @@ LABEL(loop_ashr_13_use_sse4_2):
add $16, %r10
jg LABEL(nibble_ashr_13_use_sse4_2)
+LABEL(nibble_ashr_13_use_sse4_2_restart):
movdqa (%rdi, %rdx), %xmm0
palignr $13, -16(%rdi, %rdx), %xmm0
# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
@@ -1616,7 +1629,7 @@ LABEL(nibble_ashr_13_use_sse4_2):
jae LABEL(nibble_ashr_use_sse4_2_exit)
# endif
cmp $2, %ecx
- ja LABEL(loop_ashr_13_use_sse4_2)
+ ja LABEL(nibble_ashr_13_use_sse4_2_restart)
jmp LABEL(nibble_ashr_use_sse4_2_exit)
@@ -1663,6 +1676,7 @@ LABEL(loop_ashr_14_use_sse4_2):
add $16, %r10
jg LABEL(nibble_ashr_14_use_sse4_2)
+LABEL(nibble_ashr_14_use_sse4_2_restart):
movdqa (%rdi, %rdx), %xmm0
palignr $14, -16(%rdi, %rdx), %xmm0
# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
@@ -1710,7 +1724,7 @@ LABEL(nibble_ashr_14_use_sse4_2):
jae LABEL(nibble_ashr_use_sse4_2_exit)
# endif
cmp $1, %ecx
- ja LABEL(loop_ashr_14_use_sse4_2)
+ ja LABEL(nibble_ashr_14_use_sse4_2_restart)
jmp LABEL(nibble_ashr_use_sse4_2_exit)
@@ -1759,6 +1773,7 @@ LABEL(loop_ashr_15_use_sse4_2):
add $16, %r10
jg LABEL(nibble_ashr_15_use_sse4_2)
+LABEL(nibble_ashr_15_use_sse4_2_restart):
movdqa (%rdi, %rdx), %xmm0
palignr $15, -16(%rdi, %rdx), %xmm0
# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
@@ -1806,7 +1821,7 @@ LABEL(nibble_ashr_15_use_sse4_2):
jae LABEL(nibble_ashr_use_sse4_2_exit)
# endif
cmp $0, %ecx
- ja LABEL(loop_ashr_15_use_sse4_2)
+ ja LABEL(nibble_ashr_15_use_sse4_2_restart)
LABEL(nibble_ashr_use_sse4_2_exit):
# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L