From 6d5d61ad6033044a890eeecdcc60427f5e52d03d Mon Sep 17 00:00:00 2001 From: joseph Date: Thu, 24 Jun 2010 15:01:34 +0000 Subject: Merge changes between r10687 and r10807 from /fsf/trunk. git-svn-id: svn://svn.eglibc.org/trunk@10808 7b3dc134-2b1b-0410-93df-9e9f96275f8d --- libc/ChangeLog | 60 ++ libc/NEWS | 8 +- libc/dlfcn/Makefile | 26 +- libc/elf/dl-reloc.c | 6 +- libc/libio/stdio.h | 16 +- libc/manual/libc.texinfo | 4 +- libc/nis/nss_nis/nis-initgroups.c | 2 +- libc/nptl/ChangeLog | 5 + .../nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.S | 4 +- libc/posix/getopt.h | 8 +- libc/posix/group_member.c | 9 +- libc/sysdeps/powerpc/powerpc32/power7/memcmp.S | 988 +++++++++++++++++++++ libc/sysdeps/powerpc/powerpc32/power7/strncmp.S | 177 ++++ libc/sysdeps/powerpc/powerpc64/power7/memcmp.S | 984 ++++++++++++++++++++ libc/sysdeps/powerpc/powerpc64/power7/strncmp.S | 181 ++++ libc/sysdeps/unix/sysv/linux/getlogin.c | 5 +- libc/sysdeps/unix/sysv/linux/getlogin_r.c | 32 +- libc/sysdeps/unix/sysv/linux/i386/bits/fcntl.h | 2 + libc/sysdeps/unix/sysv/linux/ia64/bits/fcntl.h | 2 + libc/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h | 2 + libc/sysdeps/unix/sysv/linux/s390/bits/fcntl.h | 2 + libc/sysdeps/unix/sysv/linux/sh/bits/fcntl.h | 2 + libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h | 2 + libc/sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h | 2 + libc/wcsmbs/wchar.h | 16 +- 25 files changed, 2478 insertions(+), 67 deletions(-) create mode 100644 libc/sysdeps/powerpc/powerpc32/power7/memcmp.S create mode 100644 libc/sysdeps/powerpc/powerpc32/power7/strncmp.S create mode 100644 libc/sysdeps/powerpc/powerpc64/power7/memcmp.S create mode 100644 libc/sysdeps/powerpc/powerpc64/power7/strncmp.S (limited to 'libc') diff --git a/libc/ChangeLog b/libc/ChangeLog index d5f9d3033..85b6e69d6 100644 --- a/libc/ChangeLog +++ b/libc/ChangeLog @@ -1,3 +1,63 @@ +2010-06-21 Andreas Schwab + + * sysdeps/unix/sysv/linux/getlogin_r.c (__getlogin_r_loginuid): + Restore proper fallback handling. + +2010-06-19 Ulrich Drepper + + [BZ #11701] + * posix/group_member.c (__group_member): Correct checking loop. + + * sysdeps/unix/sysv/linux/getlogin_r.c (__getlogin_r_loginuid): Handle + OOM in getpwuid_r correctly. Return error number when the caller + should return, otherwise -1. + (getlogin_r): Adjust to return also for result of __getlogin_r_loginuid + call returning > 0 value. + * sysdeps/unix/sysv/linux/getlogin.c (getlogin): Likewise. + +2010-06-07 Andreas Schwab + + * dlfcn/Makefile: Remove explicit dependencies on libc.so and + libc_nonshared.a from targets in modules-names. + +2010-06-02 Kirill A. Shutemov + + * elf/dl-reloc.c: Flush cache after solving TEXTRELs if arch + requires it. + +2010-06-10 Luis Machado + + * sysdeps/powerpc/powerpc32/power7/memcmp.S: New file + * sysdeps/powerpc/powerpc64/power7/memcmp.S: New file. + * sysdeps/powerpc/powerpc32/power7/strncmp.S: New file. + * sysdeps/powerpc/powerpc64/power7/strncmp.S: New file. + +2010-06-02 Andreas Schwab + + * nis/nss_nis/nis-initgroups.c (get_uid): Properly resize buffer. + +2010-06-14 Ulrich Drepper + + * sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h: Define F_SETPIPE_SZ + and F_GETPIPE_SZ. + * sysdeps/unix/sysv/linux/i386/bits/fcntl.h: Likewise. + * sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h: Likewise. + * sysdeps/unix/sysv/linux/s390/bits/fcntl.h: Likewise. + * sysdeps/unix/sysv/linux/sparc/bits/fcntl.h: Likewise. + * sysdeps/unix/sysv/linux/sh/bits/fcntl.h: Likewise. + * sysdeps/unix/sysv/linux/ia64/bits/fcntl.h: Likewise + +2010-06-14 Roland McGrath + + * manual/libc.texinfo (@copying): Change to GFDL v1.3. + +2010-06-07 Jakub Jelinek + + * libio/stdio.h (sscanf, vsscanf): Use __REDIRECT_NTH instead of + __REDIRECT followed by __THROW. + * wcsmbs/wchar.h (swscanf, vswscanf): Likewise. + * posix/getopt.h (getopt): Likewise. + 2010-06-02 Emilio Pozuelo Monfort * hurd/lookup-at.c (__file_name_lookup_at): Accept diff --git a/libc/NEWS b/libc/NEWS index 4d869d2c8..aafc4b055 100644 --- a/libc/NEWS +++ b/libc/NEWS @@ -1,4 +1,4 @@ -GNU C Library NEWS -- history of user-visible changes. 2010-5-19 +GNU C Library NEWS -- history of user-visible changes. 2010-6-19 Copyright (C) 1992-2009, 2010 Free Software Foundation, Inc. See the end for copying conditions. @@ -7,7 +7,11 @@ using `glibc' in the "product" field. Version 2.13 -* POWER7 optimizations: memset +* The following bugs are resolved with this release: + + 11640, 11701 + +* POWER7 optimizations: memset, memcmp, strncmp Version 2.12 diff --git a/libc/dlfcn/Makefile b/libc/dlfcn/Makefile index 1d110768c..f67466e56 100644 --- a/libc/dlfcn/Makefile +++ b/libc/dlfcn/Makefile @@ -1,4 +1,4 @@ -# Copyright (C) 1995-2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. +# Copyright (C) 1995-2006, 2010 Free Software Foundation, Inc. # This file is part of the GNU C Library. # The GNU C Library is free software; you can redistribute it and/or @@ -95,9 +95,9 @@ LDFLAGS-glreflib3.so = -Wl,-rpath,: LDFLAGS-default = $(LDFLAGS-rdynamic) $(objpfx)default: $(libdl) $(objpfx)defaultmod1.so $(objpfx)defaultmod2.so -$(objpfx)defaultmod1.so: $(libdl) $(common-objpfx)libc_nonshared.a +$(objpfx)defaultmod1.so: $(libdl) LDFLAGS-defaultmod2.so = $(LDFLAGS-Bsymbolic) -$(objpfx)defaultmod2.so: $(libdl) $(common-objpfx)libc_nonshared.a +$(objpfx)defaultmod2.so: $(libdl) $(objpfx)errmsg1: $(libdl) $(objpfx)errmsg1.out: $(objpfx)errmsg1 $(objpfx)errmsg1mod.so @@ -108,8 +108,6 @@ $(objpfx)tstatexit.out: $(objpfx)tstatexit $(objpfx)modatexit.so $(objpfx)tstcxaatexit: $(libdl) $(objpfx)tstcxaatexit.out: $(objpfx)tstcxaatexit $(objpfx)modcxaatexit.so -$(objpfx)modatexit.so: $(common-objpfx)libc.so $(common-objpfx)libc_nonshared.a - $(objpfx)tststatic: $(objpfx)libdl.a $(objpfx)tststatic.out: $(objpfx)tststatic $(objpfx)modstatic.so @@ -117,36 +115,24 @@ $(objpfx)tststatic2: $(objpfx)libdl.a $(objpfx)tststatic2.out: $(objpfx)tststatic2 $(objpfx)modstatic.so \ $(objpfx)modstatic2.so -$(objpfx)modstatic2.so: $(libdl) $(common-objpfx)libc.so \ - $(common-objpfx)libc_nonshared.a +$(objpfx)modstatic2.so: $(libdl) $(objpfx)bug-dlopen1: $(libdl) $(objpfx)bug-dlsym1: $(libdl) $(objpfx)bug-dlsym1-lib2.so $(objpfx)bug-dlsym1.out: $(objpfx)bug-dlsym1-lib1.so \ $(objpfx)bug-dlsym1-lib2.so -$(objpfx)bug-dlsym1-lib1.so: $(objpfx)bug-dlsym1-lib2.so \ - $(common-objpfx)libc.so \ - $(common-objpfx)libc_nonshared.a -$(objpfx)bug-dlsym1-lib2.so: $(common-objpfx)libc.so \ - $(common-objpfx)libc_nonshared.a +$(objpfx)bug-dlsym1-lib1.so: $(objpfx)bug-dlsym1-lib2.so $(objpfx)bug-atexit1: $(libdl) $(objpfx)bug-atexit1.out: $(objpfx)bug-atexit1-lib.so -$(objpfx)bug-atexit1-lib.so: $(common-objpfx)libc.so \ - $(common-objpfx)libc_nonshared.a $(objpfx)bug-atexit2: $(libdl) $(objpfx)bug-atexit2.out: $(objpfx)bug-atexit2-lib.so -$(objpfx)bug-atexit2-lib.so: $(common-objpfx)libc.so \ - $(common-objpfx)libc_nonshared.a -LDLIBS-bug-atexit3-lib.so = -lstdc++ -lgcc_eh $(common-objpfx)elf/ld.so \ - $(common-objpfx)libc_nonshared.a +LDLIBS-bug-atexit3-lib.so = -lstdc++ -lgcc_eh $(objpfx)bug-atexit3: $(libdl) $(objpfx)bug-atexit3.out: $(objpfx)bug-atexit3-lib.so -$(objpfx)bug-atexit3-lib.so: $(common-objpfx)libc.so \ - $(common-objpfx)libc_nonshared.a # Depend on libc.so so a DT_NEEDED is generated in the shared objects. diff --git a/libc/elf/dl-reloc.c b/libc/elf/dl-reloc.c index 2754791eb..b7ba8511e 100644 --- a/libc/elf/dl-reloc.c +++ b/libc/elf/dl-reloc.c @@ -1,5 +1,5 @@ /* Relocate a shared object and resolve its references to other loaded objects. - Copyright (C) 1995-2006, 2008, 2009 Free Software Foundation, Inc. + Copyright (C) 1995-2006, 2008, 2009, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -319,6 +319,10 @@ _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], goto call_error; } +#ifdef CLEAR_CACHE + CLEAR_CACHE (textrels->start, textrels->start + textrels->len); +#endif + textrels = textrels->next; } diff --git a/libc/libio/stdio.h b/libc/libio/stdio.h index 85542b1cf..bf16b3fff 100644 --- a/libc/libio/stdio.h +++ b/libc/libio/stdio.h @@ -447,9 +447,9 @@ extern int __REDIRECT (fscanf, (FILE *__restrict __stream, __isoc99_fscanf) __wur; extern int __REDIRECT (scanf, (__const char *__restrict __format, ...), __isoc99_scanf) __wur; -extern int __REDIRECT (sscanf, (__const char *__restrict __s, - __const char *__restrict __format, ...), - __isoc99_sscanf) __THROW; +extern int __REDIRECT_NTH (sscanf, (__const char *__restrict __s, + __const char *__restrict __format, ...), + __isoc99_sscanf); # else extern int __isoc99_fscanf (FILE *__restrict __stream, __const char *__restrict __format, ...) __wur; @@ -501,11 +501,11 @@ extern int __REDIRECT (vfscanf, extern int __REDIRECT (vscanf, (__const char *__restrict __format, _G_va_list __arg), __isoc99_vscanf) __attribute__ ((__format__ (__scanf__, 1, 0))) __wur; -extern int __REDIRECT (vsscanf, - (__const char *__restrict __s, - __const char *__restrict __format, _G_va_list __arg), - __isoc99_vsscanf) - __THROW __attribute__ ((__format__ (__scanf__, 2, 0))); +extern int __REDIRECT_NTH (vsscanf, + (__const char *__restrict __s, + __const char *__restrict __format, + _G_va_list __arg), __isoc99_vsscanf) + __attribute__ ((__format__ (__scanf__, 2, 0))); # else extern int __isoc99_vfscanf (FILE *__restrict __s, __const char *__restrict __format, diff --git a/libc/manual/libc.texinfo b/libc/manual/libc.texinfo index 85f2f20b2..3f6447d34 100644 --- a/libc/manual/libc.texinfo +++ b/libc/manual/libc.texinfo @@ -33,10 +33,10 @@ of @cite{The GNU C Library Reference Manual}, for version @value{VERSION} @value{PKGVERSION}. Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2001, 2002, -2003, 2007, 2008 Free Software Foundation, Inc. +2003, 2007, 2008, 2010 Free Software Foundation, Inc. Permission is granted to copy, distribute and/or modify this document -under the terms of the GNU Free Documentation License, Version 1.2 or +under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with the Invariant Sections being ``Free Software Needs Free Documentation'' and ``GNU Lesser General Public License'', the Front-Cover texts being diff --git a/libc/nis/nss_nis/nis-initgroups.c b/libc/nis/nss_nis/nis-initgroups.c index a5a3ba614..85b9eeecc 100644 --- a/libc/nis/nss_nis/nis-initgroups.c +++ b/libc/nis/nss_nis/nis-initgroups.c @@ -139,7 +139,7 @@ get_uid (const char *user, uid_t *uidp) if (r != ERANGE) break; - extend_alloca (buf, buflen, 2 * buflen); + buf = extend_alloca (buf, buflen, 2 * buflen); } return 1; diff --git a/libc/nptl/ChangeLog b/libc/nptl/ChangeLog index e69e44f7c..49ead511b 100644 --- a/libc/nptl/ChangeLog +++ b/libc/nptl/ChangeLog @@ -1,3 +1,8 @@ +2010-06-01 Takashi Yoshii + + * sysdeps/unix/sysv/linux/sh/lowlevellock.S: Fix incorrect + location of ifndef __ASSUME_FUTEX_CLOCK_REALTIME. + 2010-04-09 Ulrich Drepper [BZ #11390] diff --git a/libc/nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.S b/libc/nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.S index 49b4e6d6e..0d02083c4 100644 --- a/libc/nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.S +++ b/libc/nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009 +/* Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -282,6 +282,7 @@ __lll_timedlock_wait: rts mov.l @r15+, r12 +# ifndef __ASSUME_FUTEX_CLOCK_REALTIME .align 2 # ifdef PIC .Lgot: @@ -293,7 +294,6 @@ __lll_timedlock_wait: .long __have_futex_clock_realtime # endif -# ifndef __ASSUME_FUTEX_CLOCK_REALTIME .Lreltmo: /* Check for a valid timeout value. */ mov.l @(4,r6), r1 diff --git a/libc/posix/getopt.h b/libc/posix/getopt.h index ff0251dec..6e2fa2718 100644 --- a/libc/posix/getopt.h +++ b/libc/posix/getopt.h @@ -1,5 +1,5 @@ /* Declarations for getopt. - Copyright (C) 1989-1994,1996-1999,2001,2003,2004,2009 + Copyright (C) 1989-1994,1996-1999,2001,2003,2004,2009,2010 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -158,9 +158,9 @@ extern int getopt (int ___argc, char *const *___argv, const char *__shortopts) additional functionality can be disable at runtime. This redirection helps to also do this at runtime. */ # ifdef __REDIRECT - extern int __REDIRECT (getopt, (int ___argc, char *const *___argv, - const char *__shortopts), - __posix_getopt) __THROW; + extern int __REDIRECT_NTH (getopt, (int ___argc, char *const *___argv, + const char *__shortopts), + __posix_getopt); # else extern int __posix_getopt (int ___argc, char *const *___argv, const char *__shortopts) __THROW; diff --git a/libc/posix/group_member.c b/libc/posix/group_member.c index 7bd9c46ba..892755e95 100644 --- a/libc/posix/group_member.c +++ b/libc/posix/group_member.c @@ -1,5 +1,5 @@ /* `group_member' -- test if process is in a given group. - Copyright (C) 1995, 1997, 2002 Free Software Foundation, Inc. + Copyright (C) 1995, 1997, 2002, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -39,10 +39,11 @@ __group_member (gid) groups = __alloca (size * sizeof *groups); n = __getgroups (size, groups); size *= 2; - } while (n == size / 2); + } + while (n == size / 2); - while (n >= 0) - if (groups[n--] == gid) + while (n-- > 0) + if (groups[n] == gid) return 1; return 0; diff --git a/libc/sysdeps/powerpc/powerpc32/power7/memcmp.S b/libc/sysdeps/powerpc/powerpc32/power7/memcmp.S new file mode 100644 index 000000000..d529b492f --- /dev/null +++ b/libc/sysdeps/powerpc/powerpc32/power7/memcmp.S @@ -0,0 +1,988 @@ +/* Optimized memcmp implementation for POWER7/PowerPC32. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA + 02110-1301 USA. */ + +#include +#include +#include + +/* int [r3] memcmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ + + .machine power7 +EALIGN (BP_SYM(memcmp),4,0) + CALL_MCOUNT + +#define rTMP r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r8 /* next word in s1 */ +#define rWORD4 r9 /* next word in s2 */ +#define rWORD5 r10 /* next word in s1 */ +#define rWORD6 r11 /* next word in s2 */ +#define rBITDIF r12 /* bits that differ in s1 & s2 words */ +#define rWORD7 r30 /* next word in s1 */ +#define rWORD8 r31 /* next word in s2 */ + + xor rTMP,rSTR2,rSTR1 + cmplwi cr6,rN,0 + cmplwi cr1,rN,12 + clrlwi. rTMP,rTMP,30 + clrlwi rBITDIF,rSTR1,30 + cmplwi cr5,rBITDIF,0 + beq- cr6,L(zeroLength) + dcbt 0,rSTR1 + dcbt 0,rSTR2 + + /* If less than 8 bytes or not aligned, use the unaligned + byte loop. */ + + blt cr1,L(bytealigned) + stwu 1,-64(1) + cfi_adjust_cfa_offset(64) + stw r31,48(1) + cfi_offset(31,(48-64)) + stw r30,44(1) + cfi_offset(30,(44-64)) + bne L(unaligned) +/* At this point we know both strings have the same alignment and the + compare length is at least 8 bytes. rBITDIF contains the low order + 2 bits of rSTR1 and cr5 contains the result of the logical compare + of rBITDIF to 0. If rBITDIF == 0 then we are already word + aligned and can perform the word aligned loop. + + Otherwise we know the two strings have the same alignment (but not + yet word aligned). So we force the string addresses to the next lower + word boundary and special case this first word using shift left to + eliminate bits preceeding the first byte. Since we want to join the + normal (word aligned) compare loop, starting at the second word, + we need to adjust the length (rN) and special case the loop + versioning for the first word. This insures that the loop count is + correct and the first word (shifted) is in the expected register pair. */ + .align 4 +L(samealignment): + clrrwi rSTR1,rSTR1,2 + clrrwi rSTR2,rSTR2,2 + beq cr5,L(Waligned) + add rN,rN,rBITDIF + slwi r11,rBITDIF,3 + srwi rTMP,rN,4 /* Divide by 16 */ + andi. rBITDIF,rN,12 /* Get the word remainder */ + lwz rWORD1,0(rSTR1) + lwz rWORD2,0(rSTR2) + cmplwi cr1,rBITDIF,8 + cmplwi cr7,rN,16 + clrlwi rN,rN,30 + beq L(dPs4) + mtctr rTMP + bgt cr1,L(dPs3) + beq cr1,L(dPs2) + +/* Remainder is 4 */ + .align 3 +L(dsP1): + slw rWORD5,rWORD1,r11 + slw rWORD6,rWORD2,r11 + cmplw cr5,rWORD5,rWORD6 + blt cr7,L(dP1x) +/* Do something useful in this cycle since we have to branch anyway. */ + lwz rWORD1,4(rSTR1) + lwz rWORD2,4(rSTR2) + cmplw cr0,rWORD1,rWORD2 + b L(dP1e) +/* Remainder is 8 */ + .align 4 +L(dPs2): + slw rWORD5,rWORD1,r11 + slw rWORD6,rWORD2,r11 + cmplw cr6,rWORD5,rWORD6 + blt cr7,L(dP2x) +/* Do something useful in this cycle since we have to branch anyway. */ + lwz rWORD7,4(rSTR1) + lwz rWORD8,4(rSTR2) + cmplw cr5,rWORD7,rWORD8 + b L(dP2e) +/* Remainder is 12 */ + .align 4 +L(dPs3): + slw rWORD3,rWORD1,r11 + slw rWORD4,rWORD2,r11 + cmplw cr1,rWORD3,rWORD4 + b L(dP3e) +/* Count is a multiple of 16, remainder is 0 */ + .align 4 +L(dPs4): + mtctr rTMP + slw rWORD1,rWORD1,r11 + slw rWORD2,rWORD2,r11 + cmplw cr0,rWORD1,rWORD2 + b L(dP4e) + +/* At this point we know both strings are word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(Waligned): + andi. rBITDIF,rN,12 /* Get the word remainder */ + srwi rTMP,rN,4 /* Divide by 16 */ + cmplwi cr1,rBITDIF,8 + cmplwi cr7,rN,16 + clrlwi rN,rN,30 + beq L(dP4) + bgt cr1,L(dP3) + beq cr1,L(dP2) + +/* Remainder is 4 */ + .align 4 +L(dP1): + mtctr rTMP +/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early + (8-15 byte compare), we want to use only volatile registers. This + means we can avoid restoring non-volatile registers since we did not + change any on the early exit path. The key here is the non-early + exit path only cares about the condition code (cr5), not about which + register pair was used. */ + lwz rWORD5,0(rSTR1) + lwz rWORD6,0(rSTR2) + cmplw cr5,rWORD5,rWORD6 + blt cr7,L(dP1x) + lwz rWORD1,4(rSTR1) + lwz rWORD2,4(rSTR2) + cmplw cr0,rWORD1,rWORD2 +L(dP1e): + lwz rWORD3,8(rSTR1) + lwz rWORD4,8(rSTR2) + cmplw cr1,rWORD3,rWORD4 + lwz rWORD5,12(rSTR1) + lwz rWORD6,12(rSTR2) + cmplw cr6,rWORD5,rWORD6 + bne cr5,L(dLcr5) + bne cr0,L(dLcr0) + + lwzu rWORD7,16(rSTR1) + lwzu rWORD8,16(rSTR2) + bne cr1,L(dLcr1) + cmplw cr5,rWORD7,rWORD8 + bdnz L(dLoop) + bne cr6,L(dLcr6) + lwz r30,44(1) + lwz r31,48(1) + .align 3 +L(dP1x): + slwi. r12,rN,3 + bne cr5,L(dLcr5) + subfic rN,r12,32 /* Shift count is 32 - (rN * 8). */ + lwz 1,0(1) + bne L(d00) + li rRTN,0 + blr + +/* Remainder is 8 */ + .align 4 +L(dP2): + mtctr rTMP + lwz rWORD5,0(rSTR1) + lwz rWORD6,0(rSTR2) + cmplw cr6,rWORD5,rWORD6 + blt cr7,L(dP2x) + lwz rWORD7,4(rSTR1) + lwz rWORD8,4(rSTR2) + cmplw cr5,rWORD7,rWORD8 +L(dP2e): + lwz rWORD1,8(rSTR1) + lwz rWORD2,8(rSTR2) + cmplw cr0,rWORD1,rWORD2 + lwz rWORD3,12(rSTR1) + lwz rWORD4,12(rSTR2) + cmplw cr1,rWORD3,rWORD4 + addi rSTR1,rSTR1,4 + addi rSTR2,rSTR2,4 + bne cr6,L(dLcr6) + bne cr5,L(dLcr5) + b L(dLoop2) +/* Again we are on a early exit path (16-23 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 +L(dP2x): + lwz rWORD3,4(rSTR1) + lwz rWORD4,4(rSTR2) + cmplw cr5,rWORD3,rWORD4 + slwi. r12,rN,3 + bne cr6,L(dLcr6) + addi rSTR1,rSTR1,4 + addi rSTR2,rSTR2,4 + bne cr5,L(dLcr5) + subfic rN,r12,32 /* Shift count is 32 - (rN * 8). */ + lwz 1,0(1) + bne L(d00) + li rRTN,0 + blr + +/* Remainder is 12 */ + .align 4 +L(dP3): + mtctr rTMP + lwz rWORD3,0(rSTR1) + lwz rWORD4,0(rSTR2) + cmplw cr1,rWORD3,rWORD4 +L(dP3e): + lwz rWORD5,4(rSTR1) + lwz rWORD6,4(rSTR2) + cmplw cr6,rWORD5,rWORD6 + blt cr7,L(dP3x) + lwz rWORD7,8(rSTR1) + lwz rWORD8,8(rSTR2) + cmplw cr5,rWORD7,rWORD8 + lwz rWORD1,12(rSTR1) + lwz rWORD2,12(rSTR2) + cmplw cr0,rWORD1,rWORD2 + addi rSTR1,rSTR1,8 + addi rSTR2,rSTR2,8 + bne cr1,L(dLcr1) + bne cr6,L(dLcr6) + b L(dLoop1) +/* Again we are on a early exit path (24-31 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 +L(dP3x): + lwz rWORD1,8(rSTR1) + lwz rWORD2,8(rSTR2) + cmplw cr5,rWORD1,rWORD2 + slwi. r12,rN,3 + bne cr1,L(dLcr1) + addi rSTR1,rSTR1,8 + addi rSTR2,rSTR2,8 + bne cr6,L(dLcr6) + subfic rN,r12,32 /* Shift count is 32 - (rN * 8). */ + bne cr5,L(dLcr5) + lwz 1,0(1) + bne L(d00) + li rRTN,0 + blr + +/* Count is a multiple of 16, remainder is 0 */ + .align 4 +L(dP4): + mtctr rTMP + lwz rWORD1,0(rSTR1) + lwz rWORD2,0(rSTR2) + cmplw cr0,rWORD1,rWORD2 +L(dP4e): + lwz rWORD3,4(rSTR1) + lwz rWORD4,4(rSTR2) + cmplw cr1,rWORD3,rWORD4 + lwz rWORD5,8(rSTR1) + lwz rWORD6,8(rSTR2) + cmplw cr6,rWORD5,rWORD6 + lwzu rWORD7,12(rSTR1) + lwzu rWORD8,12(rSTR2) + cmplw cr5,rWORD7,rWORD8 + bne cr0,L(dLcr0) + bne cr1,L(dLcr1) + bdz- L(d24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(dLoop): + lwz rWORD1,4(rSTR1) + lwz rWORD2,4(rSTR2) + cmplw cr1,rWORD3,rWORD4 + bne cr6,L(dLcr6) +L(dLoop1): + lwz rWORD3,8(rSTR1) + lwz rWORD4,8(rSTR2) + cmplw cr6,rWORD5,rWORD6 + bne cr5,L(dLcr5) +L(dLoop2): + lwz rWORD5,12(rSTR1) + lwz rWORD6,12(rSTR2) + cmplw cr5,rWORD7,rWORD8 + bne cr0,L(dLcr0) +L(dLoop3): + lwzu rWORD7,16(rSTR1) + lwzu rWORD8,16(rSTR2) + bne cr1,L(dLcr1) + cmplw cr0,rWORD1,rWORD2 + bdnz L(dLoop) + +L(dL4): + cmplw cr1,rWORD3,rWORD4 + bne cr6,L(dLcr6) + cmplw cr6,rWORD5,rWORD6 + bne cr5,L(dLcr5) + cmplw cr5,rWORD7,rWORD8 +L(d44): + bne cr0,L(dLcr0) +L(d34): + bne cr1,L(dLcr1) +L(d24): + bne cr6,L(dLcr6) +L(d14): + slwi. r12,rN,3 + bne cr5,L(dLcr5) +L(d04): + lwz r30,44(1) + lwz r31,48(1) + lwz 1,0(1) + subfic rN,r12,32 /* Shift count is 32 - (rN * 8). */ + beq L(zeroLength) +/* At this point we have a remainder of 1 to 3 bytes to compare. Since + we are aligned it is safe to load the whole word, and use + shift right to eliminate bits beyond the compare length. */ +L(d00): + lwz rWORD1,4(rSTR1) + lwz rWORD2,4(rSTR2) + srw rWORD1,rWORD1,rN + srw rWORD2,rWORD2,rN + cmplw rWORD1,rWORD2 + li rRTN,0 + beqlr + li rRTN,1 + bgtlr + li rRTN,-1 + blr + + .align 4 +L(dLcr0): + lwz r30,44(1) + lwz r31,48(1) + li rRTN,1 + lwz 1,0(1) + bgtlr cr0 + li rRTN,-1 + blr + .align 4 +L(dLcr1): + lwz r30,44(1) + lwz r31,48(1) + li rRTN,1 + lwz 1,0(1) + bgtlr cr1 + li rRTN,-1 + blr + .align 4 +L(dLcr6): + lwz r30,44(1) + lwz r31,48(1) + li rRTN,1 + lwz 1,0(1) + bgtlr cr6 + li rRTN,-1 + blr + .align 4 +L(dLcr5): + lwz r30,44(1) + lwz r31,48(1) +L(dLcr5x): + li rRTN,1 + lwz 1,0(1) + bgtlr cr5 + li rRTN,-1 + blr + + .align 4 +L(bytealigned): + cfi_adjust_cfa_offset(-64) + mtctr rN + +/* We need to prime this loop. This loop is swing modulo scheduled + to avoid pipe delays. The dependent instruction latencies (load to + compare to conditional branch) is 2 to 3 cycles. In this loop each + dispatch group ends in a branch and takes 1 cycle. Effectively + the first iteration of the loop only serves to load operands and + branches based on compares are delayed until the next loop. + + So we must precondition some registers and condition codes so that + we don't exit the loop early on the first iteration. */ + lbz rWORD1,0(rSTR1) + lbz rWORD2,0(rSTR2) + bdz L(b11) + cmplw cr0,rWORD1,rWORD2 + lbz rWORD3,1(rSTR1) + lbz rWORD4,1(rSTR2) + bdz L(b12) + cmplw cr1,rWORD3,rWORD4 + lbzu rWORD5,2(rSTR1) + lbzu rWORD6,2(rSTR2) + bdz L(b13) + .align 4 +L(bLoop): + lbzu rWORD1,1(rSTR1) + lbzu rWORD2,1(rSTR2) + bne cr0,L(bLcr0) + + cmplw cr6,rWORD5,rWORD6 + bdz L(b3i) + + lbzu rWORD3,1(rSTR1) + lbzu rWORD4,1(rSTR2) + bne cr1,L(bLcr1) + + cmplw cr0,rWORD1,rWORD2 + bdz L(b2i) + + lbzu rWORD5,1(rSTR1) + lbzu rWORD6,1(rSTR2) + bne cr6,L(bLcr6) + + cmplw cr1,rWORD3,rWORD4 + bdnz L(bLoop) + +/* We speculatively loading bytes before we have tested the previous + bytes. But we must avoid overrunning the length (in the ctr) to + prevent these speculative loads from causing a segfault. In this + case the loop will exit early (before the all pending bytes are + tested. In this case we must complete the pending operations + before returning. */ +L(b1i): + bne cr0,L(bLcr0) + bne cr1,L(bLcr1) + b L(bx56) + .align 4 +L(b2i): + bne cr6,L(bLcr6) + bne cr0,L(bLcr0) + b L(bx34) + .align 4 +L(b3i): + bne cr1,L(bLcr1) + bne cr6,L(bLcr6) + b L(bx12) + .align 4 +L(bLcr0): + li rRTN,1 + bgtlr cr0 + li rRTN,-1 + blr +L(bLcr1): + li rRTN,1 + bgtlr cr1 + li rRTN,-1 + blr +L(bLcr6): + li rRTN,1 + bgtlr cr6 + li rRTN,-1 + blr + +L(b13): + bne cr0,L(bx12) + bne cr1,L(bx34) +L(bx56): + sub rRTN,rWORD5,rWORD6 + blr + nop +L(b12): + bne cr0,L(bx12) +L(bx34): + sub rRTN,rWORD3,rWORD4 + blr + +L(b11): +L(bx12): + sub rRTN,rWORD1,rWORD2 + blr + + .align 4 +L(zeroLengthReturn): + +L(zeroLength): + li rRTN,0 + blr + + cfi_adjust_cfa_offset(64) + .align 4 +/* At this point we know the strings have different alignment and the + compare length is at least 8 bytes. rBITDIF contains the low order + 2 bits of rSTR1 and cr5 contains the result of the logical compare + of rBITDIF to 0. If rBITDIF == 0 then rStr1 is word aligned and can + perform the Wunaligned loop. + + Otherwise we know that rSTR1 is not aready word aligned yet. + So we can force the string addresses to the next lower word + boundary and special case this first word using shift left to + eliminate bits preceeding the first byte. Since we want to join the + normal (Wualigned) compare loop, starting at the second word, + we need to adjust the length (rN) and special case the loop + versioning for the first W. This insures that the loop count is + correct and the first W (shifted) is in the expected resister pair. */ +#define rSHL r29 /* Unaligned shift left count. */ +#define rSHR r28 /* Unaligned shift right count. */ +#define rB r27 /* Left rotation temp for rWORD2. */ +#define rD r26 /* Left rotation temp for rWORD4. */ +#define rF r25 /* Left rotation temp for rWORD6. */ +#define rH r24 /* Left rotation temp for rWORD8. */ +#define rA r0 /* Right rotation temp for rWORD2. */ +#define rC r12 /* Right rotation temp for rWORD4. */ +#define rE r0 /* Right rotation temp for rWORD6. */ +#define rG r12 /* Right rotation temp for rWORD8. */ +L(unaligned): + stw r29,40(r1) + cfi_offset(r29,(40-64)) + clrlwi rSHL,rSTR2,30 + stw r28,36(r1) + cfi_offset(r28,(36-64)) + beq cr5,L(Wunaligned) + stw r27,32(r1) + cfi_offset(r27,(32-64)) +/* Adjust the logical start of rSTR2 to compensate for the extra bits + in the 1st rSTR1 W. */ + sub r27,rSTR2,rBITDIF +/* But do not attempt to address the W before that W that contains + the actual start of rSTR2. */ + clrrwi rSTR2,rSTR2,2 + stw r26,28(r1) + cfi_offset(r26,(28-64)) +/* Compute the left/right shift counts for the unalign rSTR2, + compensating for the logical (W aligned) start of rSTR1. */ + clrlwi rSHL,r27,30 + clrrwi rSTR1,rSTR1,2 + stw r25,24(r1) + cfi_offset(r25,(24-64)) + slwi rSHL,rSHL,3 + cmplw cr5,r27,rSTR2 + add rN,rN,rBITDIF + slwi r11,rBITDIF,3 + stw r24,20(r1) + cfi_offset(r24,(20-64)) + subfic rSHR,rSHL,32 + srwi rTMP,rN,4 /* Divide by 16 */ + andi. rBITDIF,rN,12 /* Get the W remainder */ +/* We normally need to load 2 Ws to start the unaligned rSTR2, but in + this special case those bits may be discarded anyway. Also we + must avoid loading a W where none of the bits are part of rSTR2 as + this may cross a page boundary and cause a page fault. */ + li rWORD8,0 + blt cr5,L(dus0) + lwz rWORD8,0(rSTR2) + la rSTR2,4(rSTR2) + slw rWORD8,rWORD8,rSHL + +L(dus0): + lwz rWORD1,0(rSTR1) + lwz rWORD2,0(rSTR2) + cmplwi cr1,rBITDIF,8 + cmplwi cr7,rN,16 + srw rG,rWORD2,rSHR + clrlwi rN,rN,30 + beq L(duPs4) + mtctr rTMP + or rWORD8,rG,rWORD8 + bgt cr1,L(duPs3) + beq cr1,L(duPs2) + +/* Remainder is 4 */ + .align 4 +L(dusP1): + slw rB,rWORD2,rSHL + slw rWORD7,rWORD1,r11 + slw rWORD8,rWORD8,r11 + bge cr7,L(duP1e) +/* At this point we exit early with the first word compare + complete and remainder of 0 to 3 bytes. See L(du14) for details on + how we handle the remaining bytes. */ + cmplw cr5,rWORD7,rWORD8 + slwi. rN,rN,3 + bne cr5,L(duLcr5) + cmplw cr7,rN,rSHR + beq L(duZeroReturn) + li rA,0 + ble cr7,L(dutrim) + lwz rWORD2,4(rSTR2) + srw rA,rWORD2,rSHR + b L(dutrim) +/* Remainder is 8 */ + .align 4 +L(duPs2): + slw rH,rWORD2,rSHL + slw rWORD5,rWORD1,r11 + slw rWORD6,rWORD8,r11 + b L(duP2e) +/* Remainder is 12 */ + .align 4 +L(duPs3): + slw rF,rWORD2,rSHL + slw rWORD3,rWORD1,r11 + slw rWORD4,rWORD8,r11 + b L(duP3e) +/* Count is a multiple of 16, remainder is 0 */ + .align 4 +L(duPs4): + mtctr rTMP + or rWORD8,rG,rWORD8 + slw rD,rWORD2,rSHL + slw rWORD1,rWORD1,r11 + slw rWORD2,rWORD8,r11 + b L(duP4e) + +/* At this point we know rSTR1 is word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(Wunaligned): + stw r27,32(r1) + cfi_offset(r27,(32-64)) + clrrwi rSTR2,rSTR2,2 + stw r26,28(r1) + cfi_offset(r26,(28-64)) + srwi rTMP,rN,4 /* Divide by 16 */ + stw r25,24(r1) + cfi_offset(r25,(24-64)) + andi. rBITDIF,rN,12 /* Get the W remainder */ + stw r24,20(r1) + cfi_offset(r24,(24-64)) + slwi rSHL,rSHL,3 + lwz rWORD6,0(rSTR2) + lwzu rWORD8,4(rSTR2) + cmplwi cr1,rBITDIF,8 + cmplwi cr7,rN,16 + clrlwi rN,rN,30 + subfic rSHR,rSHL,32 + slw rH,rWORD6,rSHL + beq L(duP4) + mtctr rTMP + bgt cr1,L(duP3) + beq cr1,L(duP2) + +/* Remainder is 4 */ + .align 4 +L(duP1): + srw rG,rWORD8,rSHR + lwz rWORD7,0(rSTR1) + slw rB,rWORD8,rSHL + or rWORD8,rG,rH + blt cr7,L(duP1x) +L(duP1e): + lwz rWORD1,4(rSTR1) + lwz rWORD2,4(rSTR2) + cmplw cr5,rWORD7,rWORD8 + srw rA,rWORD2,rSHR + slw rD,rWORD2,rSHL + or rWORD2,rA,rB + lwz rWORD3,8(rSTR1) + lwz rWORD4,8(rSTR2) + cmplw cr0,rWORD1,rWORD2 + srw rC,rWORD4,rSHR + slw rF,rWORD4,rSHL + bne cr5,L(duLcr5) + or rWORD4,rC,rD + lwz rWORD5,12(rSTR1) + lwz rWORD6,12(rSTR2) + cmplw cr1,rWORD3,rWORD4 + srw rE,rWORD6,rSHR + slw rH,rWORD6,rSHL + bne cr0,L(duLcr0) + or rWORD6,rE,rF + cmplw cr6,rWORD5,rWORD6 + b L(duLoop3) + .align 4 +/* At this point we exit early with the first word compare + complete and remainder of 0 to 3 bytes. See L(du14) for details on + how we handle the remaining bytes. */ +L(duP1x): + cmplw cr5,rWORD7,rWORD8 + slwi. rN,rN,3 + bne cr5,L(duLcr5) + cmplw cr7,rN,rSHR + beq L(duZeroReturn) + li rA,0 + ble cr7,L(dutrim) + ld rWORD2,8(rSTR2) + srw rA,rWORD2,rSHR + b L(dutrim) +/* Remainder is 8 */ + .align 4 +L(duP2): + srw rE,rWORD8,rSHR + lwz rWORD5,0(rSTR1) + or rWORD6,rE,rH + slw rH,rWORD8,rSHL +L(duP2e): + lwz rWORD7,4(rSTR1) + lwz rWORD8,4(rSTR2) + cmplw cr6,rWORD5,rWORD6 + srw rG,rWORD8,rSHR + slw rB,rWORD8,rSHL + or rWORD8,rG,rH + blt cr7,L(duP2x) + lwz rWORD1,8(rSTR1) + lwz rWORD2,8(rSTR2) + cmplw cr5,rWORD7,rWORD8 + bne cr6,L(duLcr6) + srw rA,rWORD2,rSHR + slw rD,rWORD2,rSHL + or rWORD2,rA,rB + lwz rWORD3,12(rSTR1) + lwz rWORD4,12(rSTR2) + cmplw cr0,rWORD1,rWORD2 + bne cr5,L(duLcr5) + srw rC,rWORD4,rSHR + slw rF,rWORD4,rSHL + or rWORD4,rC,rD + addi rSTR1,rSTR1,4 + addi rSTR2,rSTR2,4 + cmplw cr1,rWORD3,rWORD4 + b L(duLoop2) + .align 4 +L(duP2x): + cmplw cr5,rWORD7,rWORD8 + addi rSTR1,rSTR1,4 + addi rSTR2,rSTR2,4 + bne cr6,L(duLcr6) + slwi. rN,rN,3 + bne cr5,L(duLcr5) + cmplw cr7,rN,rSHR + beq L(duZeroReturn) + li rA,0 + ble cr7,L(dutrim) + lwz rWORD2,4(rSTR2) + srw rA,rWORD2,rSHR + b L(dutrim) + +/* Remainder is 12 */ + .align 4 +L(duP3): + srw rC,rWORD8,rSHR + lwz rWORD3,0(rSTR1) + slw rF,rWORD8,rSHL + or rWORD4,rC,rH +L(duP3e): + lwz rWORD5,4(rSTR1) + lwz rWORD6,4(rSTR2) + cmplw cr1,rWORD3,rWORD4 + srw rE,rWORD6,rSHR + slw rH,rWORD6,rSHL + or rWORD6,rE,rF + lwz rWORD7,8(rSTR1) + lwz rWORD8,8(rSTR2) + cmplw cr6,rWORD5,rWORD6 + bne cr1,L(duLcr1) + srw rG,rWORD8,rSHR + slw rB,rWORD8,rSHL + or rWORD8,rG,rH + blt cr7,L(duP3x) + lwz rWORD1,12(rSTR1) + lwz rWORD2,12(rSTR2) + cmplw cr5,rWORD7,rWORD8 + bne cr6,L(duLcr6) + srw rA,rWORD2,rSHR + slw rD,rWORD2,rSHL + or rWORD2,rA,rB + addi rSTR1,rSTR1,8 + addi rSTR2,rSTR2,8 + cmplw cr0,rWORD1,rWORD2 + b L(duLoop1) + .align 4 +L(duP3x): + addi rSTR1,rSTR1,8 + addi rSTR2,rSTR2,8 + bne cr1,L(duLcr1) + cmplw cr5,rWORD7,rWORD8 + bne cr6,L(duLcr6) + slwi. rN,rN,3 + bne cr5,L(duLcr5) + cmplw cr7,rN,rSHR + beq L(duZeroReturn) + li rA,0 + ble cr7,L(dutrim) + lwz rWORD2,4(rSTR2) + srw rA,rWORD2,rSHR + b L(dutrim) + +/* Count is a multiple of 16, remainder is 0 */ + .align 4 +L(duP4): + mtctr rTMP + srw rA,rWORD8,rSHR + lwz rWORD1,0(rSTR1) + slw rD,rWORD8,rSHL + or rWORD2,rA,rH +L(duP4e): + lwz rWORD3,4(rSTR1) + lwz rWORD4,4(rSTR2) + cmplw cr0,rWORD1,rWORD2 + srw rC,rWORD4,rSHR + slw rF,rWORD4,rSHL + or rWORD4,rC,rD + lwz rWORD5,8(rSTR1) + lwz rWORD6,8(rSTR2) + cmplw cr1,rWORD3,rWORD4 + bne cr0,L(duLcr0) + srw rE,rWORD6,rSHR + slw rH,rWORD6,rSHL + or rWORD6,rE,rF + lwzu rWORD7,12(rSTR1) + lwzu rWORD8,12(rSTR2) + cmplw cr6,rWORD5,rWORD6 + bne cr1,L(duLcr1) + srw rG,rWORD8,rSHR + slw rB,rWORD8,rSHL + or rWORD8,rG,rH + cmplw cr5,rWORD7,rWORD8 + bdz L(du24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(duLoop): + lwz rWORD1,4(rSTR1) + lwz rWORD2,4(rSTR2) + cmplw cr1,rWORD3,rWORD4 + bne cr6,L(duLcr6) + srw rA,rWORD2,rSHR + slw rD,rWORD2,rSHL + or rWORD2,rA,rB +L(duLoop1): + lwz rWORD3,8(rSTR1) + lwz rWORD4,8(rSTR2) + cmplw cr6,rWORD5,rWORD6 + bne cr5,L(duLcr5) + srw rC,rWORD4,rSHR + slw rF,rWORD4,rSHL + or rWORD4,rC,rD +L(duLoop2): + lwz rWORD5,12(rSTR1) + lwz rWORD6,12(rSTR2) + cmplw cr5,rWORD7,rWORD8 + bne cr0,L(duLcr0) + srw rE,rWORD6,rSHR + slw rH,rWORD6,rSHL + or rWORD6,rE,rF +L(duLoop3): + lwzu rWORD7,16(rSTR1) + lwzu rWORD8,16(rSTR2) + cmplw cr0,rWORD1,rWORD2 + bne cr1,L(duLcr1) + srw rG,rWORD8,rSHR + slw rB,rWORD8,rSHL + or rWORD8,rG,rH + bdnz L(duLoop) + +L(duL4): + bne cr1,L(duLcr1) + cmplw cr1,rWORD3,rWORD4 + bne cr6,L(duLcr6) + cmplw cr6,rWORD5,rWORD6 + bne cr5,L(duLcr5) + cmplw cr5,rWORD7,rWORD8 +L(du44): + bne cr0,L(duLcr0) +L(du34): + bne cr1,L(duLcr1) +L(du24): + bne cr6,L(duLcr6) +L(du14): + slwi. rN,rN,3 + bne cr5,L(duLcr5) +/* At this point we have a remainder of 1 to 3 bytes to compare. We use + shift right to eliminate bits beyond the compare length. + + However it may not be safe to load rWORD2 which may be beyond the + string length. So we compare the bit length of the remainder to + the right shift count (rSHR). If the bit count is less than or equal + we do not need to load rWORD2 (all significant bits are already in + rB). */ + cmplw cr7,rN,rSHR + beq L(duZeroReturn) + li rA,0 + ble cr7,L(dutrim) + lwz rWORD2,4(rSTR2) + srw rA,rWORD2,rSHR + .align 4 +L(dutrim): + lwz rWORD1,4(rSTR1) + lwz r31,48(1) + subfic rN,rN,32 /* Shift count is 32 - (rN * 8). */ + or rWORD2,rA,rB + lwz r30,44(1) + lwz r29,40(r1) + srw rWORD1,rWORD1,rN + srw rWORD2,rWORD2,rN + lwz r28,36(r1) + lwz r27,32(r1) + cmplw rWORD1,rWORD2 + li rRTN,0 + beq L(dureturn26) + li rRTN,1 + bgt L(dureturn26) + li rRTN,-1 + b L(dureturn26) + .align 4 +L(duLcr0): + lwz r31,48(1) + lwz r30,44(1) + li rRTN,1 + bgt cr0,L(dureturn29) + lwz r29,40(r1) + lwz r28,36(r1) + li rRTN,-1 + b L(dureturn27) + .align 4 +L(duLcr1): + lwz r31,48(1) + lwz r30,44(1) + li rRTN,1 + bgt cr1,L(dureturn29) + lwz r29,40(r1) + lwz r28,36(r1) + li rRTN,-1 + b L(dureturn27) + .align 4 +L(duLcr6): + lwz r31,48(1) + lwz r30,44(1) + li rRTN,1 + bgt cr6,L(dureturn29) + lwz r29,40(r1) + lwz r28,36(r1) + li rRTN,-1 + b L(dureturn27) + .align 4 +L(duLcr5): + lwz r31,48(1) + lwz r30,44(1) + li rRTN,1 + bgt cr5,L(dureturn29) + lwz r29,40(r1) + lwz r28,36(r1) + li rRTN,-1 + b L(dureturn27) + .align 3 +L(duZeroReturn): + li rRTN,0 + .align 4 +L(dureturn): + lwz r31,48(1) + lwz r30,44(1) +L(dureturn29): + lwz r29,40(r1) + lwz r28,36(r1) +L(dureturn27): + lwz r27,32(r1) +L(dureturn26): + lwz r26,28(r1) +L(dureturn25): + lwz r25,24(r1) + lwz r24,20(r1) + lwz 1,0(1) + blr +END (BP_SYM (memcmp)) +libc_hidden_builtin_def (memcmp) +weak_alias (memcmp,bcmp) diff --git a/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S b/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S new file mode 100644 index 000000000..ba72d0a02 --- /dev/null +++ b/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S @@ -0,0 +1,177 @@ +/* Optimized strcmp implementation for POWER7/PowerPC32. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA + 02110-1301 USA. */ + +#include +#include +#include + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strncmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ + +EALIGN (BP_SYM(strncmp),4,0) + +#define rTMP r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +/* Note: The Bounded pointer support in this code is broken. This code + was inherited from PPC32 and and that support was never completed. + Current PPC gcc does not support -fbounds-check or -fbounded-pointers. */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r10 +#define rWORD4 r11 +#define rFEFE r8 /* constant 0xfefefeff (-0x01010101) */ +#define r7F7F r9 /* constant 0x7f7f7f7f */ +#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f) */ +#define rBITDIF r11 /* bits that differ in s1 & s2 words */ + + dcbt 0,rSTR1 + or rTMP,rSTR2,rSTR1 + lis r7F7F,0x7f7f + dcbt 0,rSTR2 + clrlwi. rTMP,rTMP,30 + cmplwi cr1,rN,0 + lis rFEFE,-0x101 + bne L(unaligned) +/* We are word alligned so set up for two loops. first a word + loop, then fall into the byte loop if any residual. */ + srwi. rTMP,rN,2 + clrlwi rN,rN,30 + addi rFEFE,rFEFE,-0x101 + addi r7F7F,r7F7F,0x7f7f + cmplwi cr1,rN,0 + beq L(unaligned) + + mtctr rTMP + lwz rWORD1,0(rSTR1) + lwz rWORD2,0(rSTR2) + b L(g1) + +L(g0): + lwzu rWORD1,4(rSTR1) + bne cr1,L(different) + lwzu rWORD2,4(rSTR2) +L(g1): add rTMP,rFEFE,rWORD1 + nor rNEG,r7F7F,rWORD1 + bdz L(tail) + and. rTMP,rTMP,rNEG + cmpw cr1,rWORD1,rWORD2 + beq L(g0) + +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ + +L(endstring): + and rTMP,r7F7F,rWORD1 + beq cr1,L(equal) + add rTMP,rTMP,r7F7F + xor. rBITDIF,rWORD1,rWORD2 + + andc rNEG,rNEG,rTMP + blt L(highbit) + cntlzw rBITDIF,rBITDIF + cntlzw rNEG,rNEG + addi rNEG,rNEG,7 + cmpw cr1,rNEG,rBITDIF + sub rRTN,rWORD1,rWORD2 + blt cr1,L(equal) + srawi rRTN,rRTN,31 + ori rRTN,rRTN,1 + blr +L(equal): + li rRTN,0 + blr + +L(different): + lwzu rWORD1,-4(rSTR1) + xor. rBITDIF,rWORD1,rWORD2 + sub rRTN,rWORD1,rWORD2 + blt L(highbit) + srawi rRTN,rRTN,31 + ori rRTN,rRTN,1 + blr +L(highbit): + srwi rWORD2,rWORD2,24 + srwi rWORD1,rWORD1,24 + sub rRTN,rWORD1,rWORD2 + blr + + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(tail): + and. rTMP,rTMP,rNEG + cmpw cr1,rWORD1,rWORD2 + bne L(endstring) + addi rSTR1,rSTR1,4 + bne cr1,L(different) + addi rSTR2,rSTR2,4 + cmplwi cr1,rN,0 +L(unaligned): + mtctr rN + ble cr1,L(ux) +L(uz): + lbz rWORD1,0(rSTR1) + lbz rWORD2,0(rSTR2) + .align 4 +L(u1): + cmpwi cr1,rWORD1,0 + bdz L(u4) + cmpw rWORD1,rWORD2 + beq cr1,L(u4) + lbzu rWORD3,1(rSTR1) + lbzu rWORD4,1(rSTR2) + bne L(u4) + cmpwi cr1,rWORD3,0 + bdz L(u3) + cmpw rWORD3,rWORD4 + beq cr1,L(u3) + lbzu rWORD1,1(rSTR1) + lbzu rWORD2,1(rSTR2) + bne L(u3) + cmpwi cr1,rWORD1,0 + bdz L(u4) + cmpw rWORD1,rWORD2 + beq cr1,L(u4) + lbzu rWORD3,1(rSTR1) + lbzu rWORD4,1(rSTR2) + bne L(u4) + cmpwi cr1,rWORD3,0 + bdz L(u3) + cmpw rWORD3,rWORD4 + beq cr1,L(u3) + lbzu rWORD1,1(rSTR1) + lbzu rWORD2,1(rSTR2) + beq L(u1) + +L(u3): sub rRTN,rWORD3,rWORD4 + blr +L(u4): sub rRTN,rWORD1,rWORD2 + blr +L(ux): + li rRTN,0 + blr +END (BP_SYM (strncmp)) +libc_hidden_builtin_def (strncmp) diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S b/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S new file mode 100644 index 000000000..f1afffb4e --- /dev/null +++ b/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S @@ -0,0 +1,984 @@ +/* Optimized memcmp implementation for POWER7/PowerPC64. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA + 02110-1301 USA. */ + +#include +#include +#include + +/* int [r3] memcmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ + + .machine power7 +EALIGN (BP_SYM(memcmp),4,0) + CALL_MCOUNT 3 + +#define rTMP r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +/* Note: The Bounded pointer support in this code is broken. This code + was inherited from PPC32 and and that support was never completed. + Current PPC gcc does not support -fbounds-check or -fbounded-pointers. */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r8 /* next word in s1 */ +#define rWORD4 r9 /* next word in s2 */ +#define rWORD5 r10 /* next word in s1 */ +#define rWORD6 r11 /* next word in s2 */ +#define rBITDIF r12 /* bits that differ in s1 & s2 words */ +#define rWORD7 r30 /* next word in s1 */ +#define rWORD8 r31 /* next word in s2 */ + + xor rTMP,rSTR2,rSTR1 + cmpldi cr6,rN,0 + cmpldi cr1,rN,12 + clrldi. rTMP,rTMP,61 + clrldi rBITDIF,rSTR1,61 + cmpldi cr5,rBITDIF,0 + beq- cr6,L(zeroLength) + dcbt 0,rSTR1 + dcbt 0,rSTR2 +/* If less than 8 bytes or not aligned, use the unalligned + byte loop. */ + blt cr1,L(bytealigned) + std rWORD8,-8(r1) + cfi_offset(rWORD8,-8) + std rWORD7,-16(r1) + cfi_offset(rWORD7,-16) + bne L(unaligned) +/* At this point we know both strings have the same alignment and the + compare length is at least 8 bytes. rBITDIF containes the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare + of rBITDIF to 0. If rBITDIF == 0 then we are already double word + aligned and can perform the DWaligned loop. + + Otherwise we know the two strings have the same alignment (but not + yet DW). So we can force the string addresses to the next lower DW + boundary and special case this first DW word using shift left to + ellimiate bits preceeding the first byte. Since we want to join the + normal (DWaligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop + versioning for the first DW. This insures that the loop count is + correct and the first DW (shifted) is in the expected resister pair. */ + .align 4 +L(samealignment): + clrrdi rSTR1,rSTR1,3 + clrrdi rSTR2,rSTR2,3 + beq cr5,L(DWaligned) + add rN,rN,rBITDIF + sldi r11,rBITDIF,3 + srdi rTMP,rN,5 /* Divide by 32 */ + andi. rBITDIF,rN,24 /* Get the DW remainder */ + ld rWORD1,0(rSTR1) + ld rWORD2,0(rSTR2) + cmpldi cr1,rBITDIF,16 + cmpldi cr7,rN,32 + clrldi rN,rN,61 + beq L(dPs4) + mtctr rTMP + bgt cr1,L(dPs3) + beq cr1,L(dPs2) + +/* Remainder is 8 */ + .align 3 +L(dsP1): + sld rWORD5,rWORD1,r11 + sld rWORD6,rWORD2,r11 + cmpld cr5,rWORD5,rWORD6 + blt cr7,L(dP1x) +/* Do something useful in this cycle since we have to branch anyway. */ + ld rWORD1,8(rSTR1) + ld rWORD2,8(rSTR2) + cmpld cr0,rWORD1,rWORD2 + b L(dP1e) +/* Remainder is 16 */ + .align 4 +L(dPs2): + sld rWORD5,rWORD1,r11 + sld rWORD6,rWORD2,r11 + cmpld cr6,rWORD5,rWORD6 + blt cr7,L(dP2x) +/* Do something useful in this cycle since we have to branch anyway. */ + ld rWORD7,8(rSTR1) + ld rWORD8,8(rSTR2) + cmpld cr5,rWORD7,rWORD8 + b L(dP2e) +/* Remainder is 24 */ + .align 4 +L(dPs3): + sld rWORD3,rWORD1,r11 + sld rWORD4,rWORD2,r11 + cmpld cr1,rWORD3,rWORD4 + b L(dP3e) +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(dPs4): + mtctr rTMP + sld rWORD1,rWORD1,r11 + sld rWORD2,rWORD2,r11 + cmpld cr0,rWORD1,rWORD2 + b L(dP4e) + +/* At this point we know both strings are double word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(DWaligned): + andi. rBITDIF,rN,24 /* Get the DW remainder */ + srdi rTMP,rN,5 /* Divide by 32 */ + cmpldi cr1,rBITDIF,16 + cmpldi cr7,rN,32 + clrldi rN,rN,61 + beq L(dP4) + bgt cr1,L(dP3) + beq cr1,L(dP2) + +/* Remainder is 8 */ + .align 4 +L(dP1): + mtctr rTMP +/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early + (8-15 byte compare), we want to use only volitile registers. This + means we can avoid restoring non-volitile registers since we did not + change any on the early exit path. The key here is the non-early + exit path only cares about the condition code (cr5), not about which + register pair was used. */ + ld rWORD5,0(rSTR1) + ld rWORD6,0(rSTR2) + cmpld cr5,rWORD5,rWORD6 + blt cr7,L(dP1x) + ld rWORD1,8(rSTR1) + ld rWORD2,8(rSTR2) + cmpld cr0,rWORD1,rWORD2 +L(dP1e): + ld rWORD3,16(rSTR1) + ld rWORD4,16(rSTR2) + cmpld cr1,rWORD3,rWORD4 + ld rWORD5,24(rSTR1) + ld rWORD6,24(rSTR2) + cmpld cr6,rWORD5,rWORD6 + bne cr5,L(dLcr5) + bne cr0,L(dLcr0) + + ldu rWORD7,32(rSTR1) + ldu rWORD8,32(rSTR2) + bne cr1,L(dLcr1) + cmpld cr5,rWORD7,rWORD8 + bdnz L(dLoop) + bne cr6,L(dLcr6) + ld rWORD8,-8(r1) + ld rWORD7,-16(r1) + .align 3 +L(dP1x): + sldi. r12,rN,3 + bne cr5,L(dLcr5) + subfic rN,r12,64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) + li rRTN,0 + blr + +/* Remainder is 16 */ + .align 4 +L(dP2): + mtctr rTMP + ld rWORD5,0(rSTR1) + ld rWORD6,0(rSTR2) + cmpld cr6,rWORD5,rWORD6 + blt cr7,L(dP2x) + ld rWORD7,8(rSTR1) + ld rWORD8,8(rSTR2) + cmpld cr5,rWORD7,rWORD8 +L(dP2e): + ld rWORD1,16(rSTR1) + ld rWORD2,16(rSTR2) + cmpld cr0,rWORD1,rWORD2 + ld rWORD3,24(rSTR1) + ld rWORD4,24(rSTR2) + cmpld cr1,rWORD3,rWORD4 + addi rSTR1,rSTR1,8 + addi rSTR2,rSTR2,8 + bne cr6,L(dLcr6) + bne cr5,L(dLcr5) + b L(dLoop2) +/* Again we are on a early exit path (16-23 byte compare), we want to + only use volitile registers and avoid restoring non-volitile + registers. */ + .align 4 +L(dP2x): + ld rWORD3,8(rSTR1) + ld rWORD4,8(rSTR2) + cmpld cr5,rWORD3,rWORD4 + sldi. r12,rN,3 + bne cr6,L(dLcr6) + addi rSTR1,rSTR1,8 + addi rSTR2,rSTR2,8 + bne cr5,L(dLcr5) + subfic rN,r12,64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) + li rRTN,0 + blr + +/* Remainder is 24 */ + .align 4 +L(dP3): + mtctr rTMP + ld rWORD3,0(rSTR1) + ld rWORD4,0(rSTR2) + cmpld cr1,rWORD3,rWORD4 +L(dP3e): + ld rWORD5,8(rSTR1) + ld rWORD6,8(rSTR2) + cmpld cr6,rWORD5,rWORD6 + blt cr7,L(dP3x) + ld rWORD7,16(rSTR1) + ld rWORD8,16(rSTR2) + cmpld cr5,rWORD7,rWORD8 + ld rWORD1,24(rSTR1) + ld rWORD2,24(rSTR2) + cmpld cr0,rWORD1,rWORD2 + addi rSTR1,rSTR1,16 + addi rSTR2,rSTR2,16 + bne cr1,L(dLcr1) + bne cr6,L(dLcr6) + b L(dLoop1) +/* Again we are on a early exit path (24-31 byte compare), we want to + only use volitile registers and avoid restoring non-volitile + registers. */ + .align 4 +L(dP3x): + ld rWORD1,16(rSTR1) + ld rWORD2,16(rSTR2) + cmpld cr5,rWORD1,rWORD2 + sldi. r12,rN,3 + bne cr1,L(dLcr1) + addi rSTR1,rSTR1,16 + addi rSTR2,rSTR2,16 + bne cr6,L(dLcr6) + subfic rN,r12,64 /* Shift count is 64 - (rN * 8). */ + bne cr5,L(dLcr5) + bne L(d00) + li rRTN,0 + blr + +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(dP4): + mtctr rTMP + ld rWORD1,0(rSTR1) + ld rWORD2,0(rSTR2) + cmpld cr0,rWORD1,rWORD2 +L(dP4e): + ld rWORD3,8(rSTR1) + ld rWORD4,8(rSTR2) + cmpld cr1,rWORD3,rWORD4 + ld rWORD5,16(rSTR1) + ld rWORD6,16(rSTR2) + cmpld cr6,rWORD5,rWORD6 + ldu rWORD7,24(rSTR1) + ldu rWORD8,24(rSTR2) + cmpld cr5,rWORD7,rWORD8 + bne cr0,L(dLcr0) + bne cr1,L(dLcr1) + bdz- L(d24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(dLoop): + ld rWORD1,8(rSTR1) + ld rWORD2,8(rSTR2) + cmpld cr1,rWORD3,rWORD4 + bne cr6,L(dLcr6) +L(dLoop1): + ld rWORD3,16(rSTR1) + ld rWORD4,16(rSTR2) + cmpld cr6,rWORD5,rWORD6 + bne cr5,L(dLcr5) +L(dLoop2): + ld rWORD5,24(rSTR1) + ld rWORD6,24(rSTR2) + cmpld cr5,rWORD7,rWORD8 + bne cr0,L(dLcr0) +L(dLoop3): + ldu rWORD7,32(rSTR1) + ldu rWORD8,32(rSTR2) + bne cr1,L(dLcr1) + cmpld cr0,rWORD1,rWORD2 + bdnz L(dLoop) + +L(dL4): + cmpld cr1,rWORD3,rWORD4 + bne cr6,L(dLcr6) + cmpld cr6,rWORD5,rWORD6 + bne cr5,L(dLcr5) + cmpld cr5,rWORD7,rWORD8 +L(d44): + bne cr0,L(dLcr0) +L(d34): + bne cr1,L(dLcr1) +L(d24): + bne cr6,L(dLcr6) +L(d14): + sldi. r12,rN,3 + bne cr5,L(dLcr5) +L(d04): + ld rWORD8,-8(r1) + ld rWORD7,-16(r1) + subfic rN,r12,64 /* Shift count is 64 - (rN * 8). */ + beq L(zeroLength) +/* At this point we have a remainder of 1 to 7 bytes to compare. Since + we are aligned it is safe to load the whole double word, and use + shift right double to elliminate bits beyond the compare length. */ +L(d00): + ld rWORD1,8(rSTR1) + ld rWORD2,8(rSTR2) + srd rWORD1,rWORD1,rN + srd rWORD2,rWORD2,rN + cmpld cr5,rWORD1,rWORD2 + bne cr5,L(dLcr5x) + li rRTN,0 + blr + .align 4 +L(dLcr0): + ld rWORD8,-8(r1) + ld rWORD7,-16(r1) + li rRTN,1 + bgtlr cr0 + li rRTN,-1 + blr + .align 4 +L(dLcr1): + ld rWORD8,-8(r1) + ld rWORD7,-16(r1) + li rRTN,1 + bgtlr cr1 + li rRTN,-1 + blr + .align 4 +L(dLcr6): + ld rWORD8,-8(r1) + ld rWORD7,-16(r1) + li rRTN,1 + bgtlr cr6 + li rRTN,-1 + blr + .align 4 +L(dLcr5): + ld rWORD8,-8(r1) + ld rWORD7,-16(r1) +L(dLcr5x): + li rRTN,1 + bgtlr cr5 + li rRTN,-1 + blr + + .align 4 +L(bytealigned): + mtctr rN + beq cr6,L(zeroLength) + +/* We need to prime this loop. This loop is swing modulo scheduled + to avoid pipe delays. The dependent instruction latencies (load to + compare to conditional branch) is 2 to 3 cycles. In this loop each + dispatch group ends in a branch and takes 1 cycle. Effectively + the first iteration of the loop only serves to load operands and + branches based on compares are delayed until the next loop. + + So we must precondition some registers and condition codes so that + we don't exit the loop early on the first iteration. */ + + lbz rWORD1,0(rSTR1) + lbz rWORD2,0(rSTR2) + bdz L(b11) + cmpld cr0,rWORD1,rWORD2 + lbz rWORD3,1(rSTR1) + lbz rWORD4,1(rSTR2) + bdz L(b12) + cmpld cr1,rWORD3,rWORD4 + lbzu rWORD5,2(rSTR1) + lbzu rWORD6,2(rSTR2) + bdz L(b13) + .align 4 +L(bLoop): + lbzu rWORD1,1(rSTR1) + lbzu rWORD2,1(rSTR2) + bne cr0,L(bLcr0) + + cmpld cr6,rWORD5,rWORD6 + bdz L(b3i) + + lbzu rWORD3,1(rSTR1) + lbzu rWORD4,1(rSTR2) + bne cr1,L(bLcr1) + + cmpld cr0,rWORD1,rWORD2 + bdz L(b2i) + + lbzu rWORD5,1(rSTR1) + lbzu rWORD6,1(rSTR2) + bne cr6,L(bLcr6) + + cmpld cr1,rWORD3,rWORD4 + bdnz L(bLoop) + +/* We speculatively loading bytes before we have tested the previous + bytes. But we must avoid overrunning the length (in the ctr) to + prevent these speculative loads from causing a segfault. In this + case the loop will exit early (before the all pending bytes are + tested. In this case we must complete the pending operations + before returning. */ +L(b1i): + bne cr0,L(bLcr0) + bne cr1,L(bLcr1) + b L(bx56) + .align 4 +L(b2i): + bne cr6,L(bLcr6) + bne cr0,L(bLcr0) + b L(bx34) + .align 4 +L(b3i): + bne cr1,L(bLcr1) + bne cr6,L(bLcr6) + b L(bx12) + .align 4 +L(bLcr0): + li rRTN,1 + bgtlr cr0 + li rRTN,-1 + blr +L(bLcr1): + li rRTN,1 + bgtlr cr1 + li rRTN,-1 + blr +L(bLcr6): + li rRTN,1 + bgtlr cr6 + li rRTN,-1 + blr + +L(b13): + bne cr0,L(bx12) + bne cr1,L(bx34) +L(bx56): + sub rRTN,rWORD5,rWORD6 + blr + nop +L(b12): + bne cr0,L(bx12) +L(bx34): + sub rRTN,rWORD3,rWORD4 + blr +L(b11): +L(bx12): + sub rRTN,rWORD1,rWORD2 + blr + .align 4 +L(zeroLengthReturn): + ld rWORD8,-8(r1) + ld rWORD7,-16(r1) +L(zeroLength): + li rRTN,0 + blr + + .align 4 +/* At this point we know the strings have different alignment and the + compare length is at least 8 bytes. rBITDIF containes the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare + of rBITDIF to 0. If rBITDIF == 0 then rStr1 is double word + aligned and can perform the DWunaligned loop. + + Otherwise we know that rSTR1 is not aready DW aligned yet. + So we can force the string addresses to the next lower DW + boundary and special case this first DW word using shift left to + ellimiate bits preceeding the first byte. Since we want to join the + normal (DWaligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop + versioning for the first DW. This insures that the loop count is + correct and the first DW (shifted) is in the expected resister pair. */ +#define rSHL r29 /* Unaligned shift left count. */ +#define rSHR r28 /* Unaligned shift right count. */ +#define rB r27 /* Left rotation temp for rWORD2. */ +#define rD r26 /* Left rotation temp for rWORD4. */ +#define rF r25 /* Left rotation temp for rWORD6. */ +#define rH r24 /* Left rotation temp for rWORD8. */ +#define rA r0 /* Right rotation temp for rWORD2. */ +#define rC r12 /* Right rotation temp for rWORD4. */ +#define rE r0 /* Right rotation temp for rWORD6. */ +#define rG r12 /* Right rotation temp for rWORD8. */ +L(unaligned): + std r29,-24(r1) + cfi_offset(r29,-24) + clrldi rSHL,rSTR2,61 + beq cr6,L(duzeroLength) + std r28,-32(r1) + cfi_offset(r28,-32) + beq cr5,L(DWunaligned) + std r27,-40(r1) + cfi_offset(r27,-40) +/* Adjust the logical start of rSTR2 ro compensate for the extra bits + in the 1st rSTR1 DW. */ + sub r27,rSTR2,rBITDIF +/* But do not attempt to address the DW before that DW that contains + the actual start of rSTR2. */ + clrrdi rSTR2,rSTR2,3 + std r26,-48(r1) + cfi_offset(r26,-48) +/* Compute the leaft/right shift counts for the unalign rSTR2, + compensating for the logical (DW aligned) start of rSTR1. */ + clrldi rSHL,r27,61 + clrrdi rSTR1,rSTR1,3 + std r25,-56(r1) + cfi_offset(r25,-56) + sldi rSHL,rSHL,3 + cmpld cr5,r27,rSTR2 + add rN,rN,rBITDIF + sldi r11,rBITDIF,3 + std r24,-64(r1) + cfi_offset(r24,-64) + subfic rSHR,rSHL,64 + srdi rTMP,rN,5 /* Divide by 32 */ + andi. rBITDIF,rN,24 /* Get the DW remainder */ +/* We normally need to load 2 DWs to start the unaligned rSTR2, but in + this special case those bits may be discarded anyway. Also we + must avoid loading a DW where none of the bits are part of rSTR2 as + this may cross a page boundary and cause a page fault. */ + li rWORD8,0 + blt cr5,L(dus0) + ld rWORD8,0(rSTR2) + la rSTR2,8(rSTR2) + sld rWORD8,rWORD8,rSHL + +L(dus0): + ld rWORD1,0(rSTR1) + ld rWORD2,0(rSTR2) + cmpldi cr1,rBITDIF,16 + cmpldi cr7,rN,32 + srd rG,rWORD2,rSHR + clrldi rN,rN,61 + beq L(duPs4) + mtctr rTMP + or rWORD8,rG,rWORD8 + bgt cr1,L(duPs3) + beq cr1,L(duPs2) + +/* Remainder is 8 */ + .align 4 +L(dusP1): + sld rB,rWORD2,rSHL + sld rWORD7,rWORD1,r11 + sld rWORD8,rWORD8,r11 + bge cr7,L(duP1e) +/* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ + cmpld cr5,rWORD7,rWORD8 + sldi. rN,rN,3 + bne cr5,L(duLcr5) + cmpld cr7,rN,rSHR + beq L(duZeroReturn) + li rA,0 + ble cr7,L(dutrim) + ld rWORD2,8(rSTR2) + srd rA,rWORD2,rSHR + b L(dutrim) +/* Remainder is 16 */ + .align 4 +L(duPs2): + sld rH,rWORD2,rSHL + sld rWORD5,rWORD1,r11 + sld rWORD6,rWORD8,r11 + b L(duP2e) +/* Remainder is 24 */ + .align 4 +L(duPs3): + sld rF,rWORD2,rSHL + sld rWORD3,rWORD1,r11 + sld rWORD4,rWORD8,r11 + b L(duP3e) +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(duPs4): + mtctr rTMP + or rWORD8,rG,rWORD8 + sld rD,rWORD2,rSHL + sld rWORD1,rWORD1,r11 + sld rWORD2,rWORD8,r11 + b L(duP4e) + +/* At this point we know rSTR1 is double word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(DWunaligned): + std r27,-40(r1) + cfi_offset(r27,-40) + clrrdi rSTR2,rSTR2,3 + std r26,-48(r1) + cfi_offset(r26,-48) + srdi rTMP,rN,5 /* Divide by 32 */ + std r25,-56(r1) + cfi_offset(r25,-56) + andi. rBITDIF,rN,24 /* Get the DW remainder */ + std r24,-64(r1) + cfi_offset(r24,-64) + sldi rSHL,rSHL,3 + ld rWORD6,0(rSTR2) + ldu rWORD8,8(rSTR2) + cmpldi cr1,rBITDIF,16 + cmpldi cr7,rN,32 + clrldi rN,rN,61 + subfic rSHR,rSHL,64 + sld rH,rWORD6,rSHL + beq L(duP4) + mtctr rTMP + bgt cr1,L(duP3) + beq cr1,L(duP2) + +/* Remainder is 8 */ + .align 4 +L(duP1): + srd rG,rWORD8,rSHR + ld rWORD7,0(rSTR1) + sld rB,rWORD8,rSHL + or rWORD8,rG,rH + blt cr7,L(duP1x) +L(duP1e): + ld rWORD1,8(rSTR1) + ld rWORD2,8(rSTR2) + cmpld cr5,rWORD7,rWORD8 + srd rA,rWORD2,rSHR + sld rD,rWORD2,rSHL + or rWORD2,rA,rB + ld rWORD3,16(rSTR1) + ld rWORD4,16(rSTR2) + cmpld cr0,rWORD1,rWORD2 + srd rC,rWORD4,rSHR + sld rF,rWORD4,rSHL + bne cr5,L(duLcr5) + or rWORD4,rC,rD + ld rWORD5,24(rSTR1) + ld rWORD6,24(rSTR2) + cmpld cr1,rWORD3,rWORD4 + srd rE,rWORD6,rSHR + sld rH,rWORD6,rSHL + bne cr0,L(duLcr0) + or rWORD6,rE,rF + cmpld cr6,rWORD5,rWORD6 + b L(duLoop3) + .align 4 +/* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ +L(duP1x): + cmpld cr5,rWORD7,rWORD8 + sldi. rN,rN,3 + bne cr5,L(duLcr5) + cmpld cr7,rN,rSHR + beq L(duZeroReturn) + li rA,0 + ble cr7,L(dutrim) + ld rWORD2,8(rSTR2) + srd rA,rWORD2,rSHR + b L(dutrim) +/* Remainder is 16 */ + .align 4 +L(duP2): + srd rE,rWORD8,rSHR + ld rWORD5,0(rSTR1) + or rWORD6,rE,rH + sld rH,rWORD8,rSHL +L(duP2e): + ld rWORD7,8(rSTR1) + ld rWORD8,8(rSTR2) + cmpld cr6,rWORD5,rWORD6 + srd rG,rWORD8,rSHR + sld rB,rWORD8,rSHL + or rWORD8,rG,rH + blt cr7,L(duP2x) + ld rWORD1,16(rSTR1) + ld rWORD2,16(rSTR2) + cmpld cr5,rWORD7,rWORD8 + bne cr6,L(duLcr6) + srd rA,rWORD2,rSHR + sld rD,rWORD2,rSHL + or rWORD2,rA,rB + ld rWORD3,24(rSTR1) + ld rWORD4,24(rSTR2) + cmpld cr0,rWORD1,rWORD2 + bne cr5,L(duLcr5) + srd rC,rWORD4,rSHR + sld rF,rWORD4,rSHL + or rWORD4,rC,rD + addi rSTR1,rSTR1,8 + addi rSTR2,rSTR2,8 + cmpld cr1,rWORD3,rWORD4 + b L(duLoop2) + .align 4 +L(duP2x): + cmpld cr5,rWORD7,rWORD8 + addi rSTR1,rSTR1,8 + addi rSTR2,rSTR2,8 + bne cr6,L(duLcr6) + sldi. rN,rN,3 + bne cr5,L(duLcr5) + cmpld cr7,rN,rSHR + beq L(duZeroReturn) + li rA,0 + ble cr7,L(dutrim) + ld rWORD2,8(rSTR2) + srd rA,rWORD2,rSHR + b L(dutrim) + +/* Remainder is 24 */ + .align 4 +L(duP3): + srd rC,rWORD8,rSHR + ld rWORD3,0(rSTR1) + sld rF,rWORD8,rSHL + or rWORD4,rC,rH +L(duP3e): + ld rWORD5,8(rSTR1) + ld rWORD6,8(rSTR2) + cmpld cr1,rWORD3,rWORD4 + srd rE,rWORD6,rSHR + sld rH,rWORD6,rSHL + or rWORD6,rE,rF + ld rWORD7,16(rSTR1) + ld rWORD8,16(rSTR2) + cmpld cr6,rWORD5,rWORD6 + bne cr1,L(duLcr1) + srd rG,rWORD8,rSHR + sld rB,rWORD8,rSHL + or rWORD8,rG,rH + blt cr7,L(duP3x) + ld rWORD1,24(rSTR1) + ld rWORD2,24(rSTR2) + cmpld cr5,rWORD7,rWORD8 + bne cr6,L(duLcr6) + srd rA,rWORD2,rSHR + sld rD,rWORD2,rSHL + or rWORD2,rA,rB + addi rSTR1,rSTR1,16 + addi rSTR2,rSTR2,16 + cmpld cr0,rWORD1,rWORD2 + b L(duLoop1) + .align 4 +L(duP3x): + addi rSTR1,rSTR1,16 + addi rSTR2,rSTR2,16 + bne cr1,L(duLcr1) + cmpld cr5,rWORD7,rWORD8 + bne cr6,L(duLcr6) + sldi. rN,rN,3 + bne cr5,L(duLcr5) + cmpld cr7,rN,rSHR + beq L(duZeroReturn) + li rA,0 + ble cr7,L(dutrim) + ld rWORD2,8(rSTR2) + srd rA,rWORD2,rSHR + b L(dutrim) + +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(duP4): + mtctr rTMP + srd rA,rWORD8,rSHR + ld rWORD1,0(rSTR1) + sld rD,rWORD8,rSHL + or rWORD2,rA,rH +L(duP4e): + ld rWORD3,8(rSTR1) + ld rWORD4,8(rSTR2) + cmpld cr0,rWORD1,rWORD2 + srd rC,rWORD4,rSHR + sld rF,rWORD4,rSHL + or rWORD4,rC,rD + ld rWORD5,16(rSTR1) + ld rWORD6,16(rSTR2) + cmpld cr1,rWORD3,rWORD4 + bne cr0,L(duLcr0) + srd rE,rWORD6,rSHR + sld rH,rWORD6,rSHL + or rWORD6,rE,rF + ldu rWORD7,24(rSTR1) + ldu rWORD8,24(rSTR2) + cmpld cr6,rWORD5,rWORD6 + bne cr1,L(duLcr1) + srd rG,rWORD8,rSHR + sld rB,rWORD8,rSHL + or rWORD8,rG,rH + cmpld cr5,rWORD7,rWORD8 + bdz L(du24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(duLoop): + ld rWORD1,8(rSTR1) + ld rWORD2,8(rSTR2) + cmpld cr1,rWORD3,rWORD4 + bne cr6,L(duLcr6) + srd rA,rWORD2,rSHR + sld rD,rWORD2,rSHL + or rWORD2,rA,rB +L(duLoop1): + ld rWORD3,16(rSTR1) + ld rWORD4,16(rSTR2) + cmpld cr6,rWORD5,rWORD6 + bne cr5,L(duLcr5) + srd rC,rWORD4,rSHR + sld rF,rWORD4,rSHL + or rWORD4,rC,rD +L(duLoop2): + ld rWORD5,24(rSTR1) + ld rWORD6,24(rSTR2) + cmpld cr5,rWORD7,rWORD8 + bne cr0,L(duLcr0) + srd rE,rWORD6,rSHR + sld rH,rWORD6,rSHL + or rWORD6,rE,rF +L(duLoop3): + ldu rWORD7,32(rSTR1) + ldu rWORD8,32(rSTR2) + cmpld cr0,rWORD1,rWORD2 + bne- cr1,L(duLcr1) + srd rG,rWORD8,rSHR + sld rB,rWORD8,rSHL + or rWORD8,rG,rH + bdnz L(duLoop) + +L(duL4): + bne cr1,L(duLcr1) + cmpld cr1,rWORD3,rWORD4 + bne cr6,L(duLcr6) + cmpld cr6,rWORD5,rWORD6 + bne cr5,L(duLcr5) + cmpld cr5,rWORD7,rWORD8 +L(du44): + bne cr0,L(duLcr0) +L(du34): + bne cr1,L(duLcr1) +L(du24): + bne cr6,L(duLcr6) +L(du14): + sldi. rN,rN,3 + bne cr5,L(duLcr5) +/* At this point we have a remainder of 1 to 7 bytes to compare. We use + shift right double to elliminate bits beyond the compare length. + This allows the use of double word subtract to compute the final + result. + + However it may not be safe to load rWORD2 which may be beyond the + string length. So we compare the bit length of the remainder to + the right shift count (rSHR). If the bit count is less than or equal + we do not need to load rWORD2 (all significant bits are already in + rB). */ + cmpld cr7,rN,rSHR + beq L(duZeroReturn) + li rA,0 + ble cr7,L(dutrim) + ld rWORD2,8(rSTR2) + srd rA,rWORD2,rSHR + .align 4 +L(dutrim): + ld rWORD1,8(rSTR1) + ld rWORD8,-8(r1) + subfic rN,rN,64 /* Shift count is 64 - (rN * 8). */ + or rWORD2,rA,rB + ld rWORD7,-16(r1) + ld r29,-24(r1) + srd rWORD1,rWORD1,rN + srd rWORD2,rWORD2,rN + ld r28,-32(r1) + ld r27,-40(r1) + li rRTN,0 + cmpld cr0,rWORD1,rWORD2 + ld r26,-48(r1) + ld r25,-56(r1) + beq cr0,L(dureturn24) + li rRTN,1 + ld r24,-64(r1) + bgtlr cr0 + li rRTN,-1 + blr + .align 4 +L(duLcr0): + ld rWORD8,-8(r1) + ld rWORD7,-16(r1) + li rRTN,1 + bgt cr0,L(dureturn29) + ld r29,-24(r1) + ld r28,-32(r1) + li rRTN,-1 + b L(dureturn27) + .align 4 +L(duLcr1): + ld rWORD8,-8(r1) + ld rWORD7,-16(r1) + li rRTN,1 + bgt cr1,L(dureturn29) + ld r29,-24(r1) + ld r28,-32(r1) + li rRTN,-1 + b L(dureturn27) + .align 4 +L(duLcr6): + ld rWORD8,-8(r1) + ld rWORD7,-16(r1) + li rRTN,1 + bgt cr6,L(dureturn29) + ld r29,-24(r1) + ld r28,-32(r1) + li rRTN,-1 + b L(dureturn27) + .align 4 +L(duLcr5): + ld rWORD8,-8(r1) + ld rWORD7,-16(r1) + li rRTN,1 + bgt cr5,L(dureturn29) + ld r29,-24(r1) + ld r28,-32(r1) + li rRTN,-1 + b L(dureturn27) + .align 3 +L(duZeroReturn): + li rRTN,0 + .align 4 +L(dureturn): + ld rWORD8,-8(r1) + ld rWORD7,-16(r1) +L(dureturn29): + ld r29,-24(r1) + ld r28,-32(r1) +L(dureturn27): + ld r27,-40(r1) +L(dureturn26): + ld r26,-48(r1) +L(dureturn25): + ld r25,-56(r1) +L(dureturn24): + ld r24,-64(r1) + blr +L(duzeroLength): + li rRTN,0 + blr + +END (BP_SYM (memcmp)) +libc_hidden_builtin_def (memcmp) +weak_alias (memcmp,bcmp) diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S b/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S new file mode 100644 index 000000000..34f1e52df --- /dev/null +++ b/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S @@ -0,0 +1,181 @@ +/* Optimized strcmp implementation for POWER7/PowerPC64. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA + 02110-1301 USA. */ + +#include +#include +#include + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strncmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ + +EALIGN (BP_SYM(strncmp),4,0) + CALL_MCOUNT 3 + +#define rTMP r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +/* Note: The Bounded pointer support in this code is broken. This code + was inherited from PPC32 and and that support was never completed. + Current PPC gcc does not support -fbounds-check or -fbounded-pointers. */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r10 +#define rWORD4 r11 +#define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ +#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */ +#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ +#define rBITDIF r11 /* bits that differ in s1 & s2 words */ + + dcbt 0,rSTR1 + or rTMP,rSTR2,rSTR1 + lis r7F7F,0x7f7f + dcbt 0,rSTR2 + clrldi. rTMP,rTMP,61 + cmpldi cr1,rN,0 + lis rFEFE,-0x101 + bne L(unaligned) +/* We are doubleword alligned so set up for two loops. first a double word + loop, then fall into the byte loop if any residual. */ + srdi. rTMP,rN,3 + clrldi rN,rN,61 + addi rFEFE,rFEFE,-0x101 + addi r7F7F,r7F7F,0x7f7f + cmpldi cr1,rN,0 + beq L(unaligned) + + mtctr rTMP + ld rWORD1,0(rSTR1) + ld rWORD2,0(rSTR2) + sldi rTMP,rFEFE,32 + insrdi r7F7F,r7F7F,32,0 + add rFEFE,rFEFE,rTMP + b L(g1) + +L(g0): + ldu rWORD1,8(rSTR1) + bne cr1,L(different) + ldu rWORD2,8(rSTR2) +L(g1): add rTMP,rFEFE,rWORD1 + nor rNEG,r7F7F,rWORD1 + bdz L(tail) + and. rTMP,rTMP,rNEG + cmpd cr1,rWORD1,rWORD2 + beq L(g0) + +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ + +L(endstring): + and rTMP,r7F7F,rWORD1 + beq cr1,L(equal) + add rTMP,rTMP,r7F7F + xor. rBITDIF,rWORD1,rWORD2 + + andc rNEG,rNEG,rTMP + blt L(highbit) + cntlzd rBITDIF,rBITDIF + cntlzd rNEG,rNEG + addi rNEG,rNEG,7 + cmpd cr1,rNEG,rBITDIF + sub rRTN,rWORD1,rWORD2 + blt cr1,L(equal) + sradi rRTN,rRTN,63 + ori rRTN,rRTN,1 + blr +L(equal): + li rRTN,0 + blr + +L(different): + ldu rWORD1,-8(rSTR1) + xor. rBITDIF,rWORD1,rWORD2 + sub rRTN,rWORD1,rWORD2 + blt L(highbit) + sradi rRTN,rRTN,63 + ori rRTN,rRTN,1 + blr +L(highbit): + srdi rWORD2,rWORD2,56 + srdi rWORD1,rWORD1,56 + sub rRTN,rWORD1,rWORD2 + blr + + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(tail): + and. rTMP,rTMP,rNEG + cmpd cr1,rWORD1,rWORD2 + bne L(endstring) + addi rSTR1,rSTR1,8 + bne cr1,L(different) + addi rSTR2,rSTR2,8 + cmpldi cr1,rN,0 +L(unaligned): + mtctr rN + ble cr1,L(ux) +L(uz): + lbz rWORD1,0(rSTR1) + lbz rWORD2,0(rSTR2) + .align 4 +L(u1): + cmpdi cr1,rWORD1,0 + bdz L(u4) + cmpd rWORD1,rWORD2 + beq cr1,L(u4) + lbzu rWORD3,1(rSTR1) + lbzu rWORD4,1(rSTR2) + bne L(u4) + cmpdi cr1,rWORD3,0 + bdz L(u3) + cmpd rWORD3,rWORD4 + beq cr1,L(u3) + lbzu rWORD1,1(rSTR1) + lbzu rWORD2,1(rSTR2) + bne L(u3) + cmpdi cr1,rWORD1,0 + bdz L(u4) + cmpd rWORD1,rWORD2 + beq cr1,L(u4) + lbzu rWORD3,1(rSTR1) + lbzu rWORD4,1(rSTR2) + bne L(u4) + cmpdi cr1,rWORD3,0 + bdz L(u3) + cmpd rWORD3,rWORD4 + beq cr1,L(u3) + lbzu rWORD1,1(rSTR1) + lbzu rWORD2,1(rSTR2) + beq L(u1) + +L(u3): sub rRTN,rWORD3,rWORD4 + blr +L(u4): sub rRTN,rWORD1,rWORD2 + blr +L(ux): + li rRTN,0 + blr +END (BP_SYM (strncmp)) +libc_hidden_builtin_def (strncmp) diff --git a/libc/sysdeps/unix/sysv/linux/getlogin.c b/libc/sysdeps/unix/sysv/linux/getlogin.c index 4d15db093..58e37c4ab 100644 --- a/libc/sysdeps/unix/sysv/linux/getlogin.c +++ b/libc/sysdeps/unix/sysv/linux/getlogin.c @@ -32,8 +32,9 @@ char * getlogin (void) { - if (__getlogin_r_loginuid (name, sizeof (name)) == 0) - return name; + int res = __getlogin_r_loginuid (name, sizeof (name)); + if (res >= 0) + return res == 0 ? name : NULL; return getlogin_fd0 (); } diff --git a/libc/sysdeps/unix/sysv/linux/getlogin_r.c b/libc/sysdeps/unix/sysv/linux/getlogin_r.c index dad2671e8..7d4d6c0ed 100644 --- a/libc/sysdeps/unix/sysv/linux/getlogin_r.c +++ b/libc/sysdeps/unix/sysv/linux/getlogin_r.c @@ -27,6 +27,10 @@ static int getlogin_r_fd0 (char *name, size_t namesize); #undef getlogin_r +/* Try to determine login name from /proc/self/loginuid and return 0 + if successful. If /proc/self/loginuid cannot be read return -1. + Otherwise return the error number. */ + int attribute_hidden __getlogin_r_loginuid (name, namesize) @@ -35,7 +39,7 @@ __getlogin_r_loginuid (name, namesize) { int fd = open_not_cancel_2 ("/proc/self/loginuid", O_RDONLY); if (fd == -1) - return 1; + return -1; /* We are reading a 32-bit number. 12 bytes are enough for the text representation. If not, something is wrong. */ @@ -51,37 +55,38 @@ __getlogin_r_loginuid (name, namesize) || (uidbuf[n] = '\0', uid = strtoul (uidbuf, &endp, 10), endp == uidbuf || *endp != '\0')) - return 1; + return -1; size_t buflen = 1024; char *buf = alloca (buflen); bool use_malloc = false; struct passwd pwd; struct passwd *tpwd; + int result = 0; int res; - while ((res = __getpwuid_r (uid, &pwd, buf, buflen, &tpwd)) != 0) + while ((res = __getpwuid_r (uid, &pwd, buf, buflen, &tpwd)) == ERANGE) if (__libc_use_alloca (2 * buflen)) - extend_alloca (buf, buflen, 2 * buflen); + buf = extend_alloca (buf, buflen, 2 * buflen); else { buflen *= 2; char *newp = realloc (use_malloc ? buf : NULL, buflen); if (newp == NULL) { - fail: - if (use_malloc) - free (buf); - return 1; + result = ENOMEM; + goto out; } buf = newp; use_malloc = true; } - if (tpwd == NULL) - goto fail; + if (res != 0) + { + result = -1; + goto out; + } - int result = 0; size_t needed = strlen (pwd.pw_name) + 1; if (needed > namesize) { @@ -109,8 +114,9 @@ getlogin_r (name, namesize) char *name; size_t namesize; { - if (__getlogin_r_loginuid (name, namesize) == 0) - return 0; + int res = __getlogin_r_loginuid (name, namesize); + if (res >= 0) + return res; return getlogin_r_fd0 (name, namesize); } diff --git a/libc/sysdeps/unix/sysv/linux/i386/bits/fcntl.h b/libc/sysdeps/unix/sysv/linux/i386/bits/fcntl.h index 669388954..944a3209b 100644 --- a/libc/sysdeps/unix/sysv/linux/i386/bits/fcntl.h +++ b/libc/sysdeps/unix/sysv/linux/i386/bits/fcntl.h @@ -102,6 +102,8 @@ # define F_SETLEASE 1024 /* Set a lease. */ # define F_GETLEASE 1025 /* Enquire what lease is active. */ # define F_NOTIFY 1026 /* Request notfications on a directory. */ +# define F_SETPIPE_SZ 1031 /* Set pipe page size array. */ +# define F_GETPIPE_SZ 1032 /* Set pipe page size array. */ #endif #ifdef __USE_XOPEN2K8 # define F_DUPFD_CLOEXEC 1030 /* Duplicate file descriptor with diff --git a/libc/sysdeps/unix/sysv/linux/ia64/bits/fcntl.h b/libc/sysdeps/unix/sysv/linux/ia64/bits/fcntl.h index 33635fd9e..2189a9722 100644 --- a/libc/sysdeps/unix/sysv/linux/ia64/bits/fcntl.h +++ b/libc/sysdeps/unix/sysv/linux/ia64/bits/fcntl.h @@ -99,6 +99,8 @@ # define F_SETLEASE 1024 /* Set a lease. */ # define F_GETLEASE 1025 /* Enquire what lease is active. */ # define F_NOTIFY 1026 /* Request notfications on a directory. */ +# define F_SETPIPE_SZ 1031 /* Set pipe page size array. */ +# define F_GETPIPE_SZ 1032 /* Set pipe page size array. */ #endif #ifdef __USE_XOPEN2K8 # define F_DUPFD_CLOEXEC 1030 /* Duplicate file descriptor with diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h b/libc/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h index fea347bfc..ea760f64a 100644 --- a/libc/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h +++ b/libc/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h @@ -102,6 +102,8 @@ # define F_SETLEASE 1024 /* Set a lease. */ # define F_GETLEASE 1025 /* Enquire what lease is active. */ # define F_NOTIFY 1026 /* Request notfications on a directory. */ +# define F_SETPIPE_SZ 1031 /* Set pipe page size array. */ +# define F_GETPIPE_SZ 1032 /* Set pipe page size array. */ #endif #ifdef __USE_XOPEN2K8 # define F_DUPFD_CLOEXEC 1030 /* Duplicate file descriptor with diff --git a/libc/sysdeps/unix/sysv/linux/s390/bits/fcntl.h b/libc/sysdeps/unix/sysv/linux/s390/bits/fcntl.h index aeb1e0fe9..05c7a3ba8 100644 --- a/libc/sysdeps/unix/sysv/linux/s390/bits/fcntl.h +++ b/libc/sysdeps/unix/sysv/linux/s390/bits/fcntl.h @@ -118,6 +118,8 @@ # define F_SETLEASE 1024 /* Set a lease. */ # define F_GETLEASE 1025 /* Enquire what lease is active. */ # define F_NOTIFY 1026 /* Request notfications on a directory. */ +# define F_SETPIPE_SZ 1031 /* Set pipe page size array. */ +# define F_GETPIPE_SZ 1032 /* Set pipe page size array. */ #endif #ifdef __USE_XOPEN2K8 # define F_DUPFD_CLOEXEC 1030 /* Duplicate file descriptor with diff --git a/libc/sysdeps/unix/sysv/linux/sh/bits/fcntl.h b/libc/sysdeps/unix/sysv/linux/sh/bits/fcntl.h index 2a4123c61..df21f6af4 100644 --- a/libc/sysdeps/unix/sysv/linux/sh/bits/fcntl.h +++ b/libc/sysdeps/unix/sysv/linux/sh/bits/fcntl.h @@ -103,6 +103,8 @@ # define F_SETLEASE 1024 /* Set a lease. */ # define F_GETLEASE 1025 /* Enquire what lease is active. */ # define F_NOTIFY 1026 /* Request notfications on a directory. */ +# define F_SETPIPE_SZ 1031 /* Set pipe page size array. */ +# define F_GETPIPE_SZ 1032 /* Set pipe page size array. */ #endif #ifdef __USE_XOPEN2K8 # define F_DUPFD_CLOEXEC 1030 /* Duplicate file descriptor with diff --git a/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h b/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h index 1dc45b7d8..bf8f3d60e 100644 --- a/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h +++ b/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h @@ -101,6 +101,8 @@ # define F_SETLEASE 1024 /* Set a lease. */ # define F_GETLEASE 1025 /* Enquire what lease is active. */ # define F_NOTIFY 1026 /* Request notfications on a directory. */ +# define F_SETPIPE_SZ 1031 /* Set pipe page size array. */ +# define F_GETPIPE_SZ 1032 /* Set pipe page size array. */ #endif #ifdef __USE_XOPEN2K8 # define F_DUPFD_CLOEXEC 1030 /* Duplicate file descriptor with diff --git a/libc/sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h b/libc/sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h index aa04e0e49..ac029fdda 100644 --- a/libc/sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h +++ b/libc/sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h @@ -117,6 +117,8 @@ # define F_SETLEASE 1024 /* Set a lease. */ # define F_GETLEASE 1025 /* Enquire what lease is active. */ # define F_NOTIFY 1026 /* Request notfications on a directory. */ +# define F_SETPIPE_SZ 1031 /* Set pipe page size array. */ +# define F_GETPIPE_SZ 1032 /* Set pipe page size array. */ #endif #ifdef __USE_XOPEN2K8 # define F_DUPFD_CLOEXEC 1030 /* Duplicate file descriptor with diff --git a/libc/wcsmbs/wchar.h b/libc/wcsmbs/wchar.h index 5a4e10e54..061b105be 100644 --- a/libc/wcsmbs/wchar.h +++ b/libc/wcsmbs/wchar.h @@ -658,10 +658,10 @@ extern int __REDIRECT (fwscanf, (__FILE *__restrict __stream, extern int __REDIRECT (wscanf, (__const wchar_t *__restrict __format, ...), __isoc99_wscanf) /* __attribute__ ((__format__ (__wscanf__, 1, 2))) */; -extern int __REDIRECT (swscanf, (__const wchar_t *__restrict __s, - __const wchar_t *__restrict __format, ...), - __isoc99_swscanf) - __THROW /* __attribute__ ((__format__ (__wscanf__, 2, 3))) */; +extern int __REDIRECT_NTH (swscanf, (__const wchar_t *__restrict __s, + __const wchar_t *__restrict __format, + ...), __isoc99_swscanf) + /* __attribute__ ((__format__ (__wscanf__, 2, 3))) */; # else extern int __isoc99_fwscanf (__FILE *__restrict __stream, __const wchar_t *__restrict __format, ...); @@ -712,10 +712,10 @@ extern int __REDIRECT (vfwscanf, (__FILE *__restrict __s, extern int __REDIRECT (vwscanf, (__const wchar_t *__restrict __format, __gnuc_va_list __arg), __isoc99_vwscanf) /* __attribute__ ((__format__ (__wscanf__, 1, 0))) */; -extern int __REDIRECT (vswscanf, (__const wchar_t *__restrict __s, - __const wchar_t *__restrict __format, - __gnuc_va_list __arg), __isoc99_vswscanf) - __THROW /* __attribute__ ((__format__ (__wscanf__, 2, 0))) */; +extern int __REDIRECT_NTH (vswscanf, (__const wchar_t *__restrict __s, + __const wchar_t *__restrict __format, + __gnuc_va_list __arg), __isoc99_vswscanf) + /* __attribute__ ((__format__ (__wscanf__, 2, 0))) */; # else extern int __isoc99_vfwscanf (__FILE *__restrict __s, __const wchar_t *__restrict __format, -- cgit v1.2.3