diff options
author | joseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d> | 2013-01-10 00:28:48 +0000 |
---|---|---|
committer | joseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d> | 2013-01-10 00:28:48 +0000 |
commit | 00ed3326c2fac75592e173e9d731452e9bb769ff (patch) | |
tree | 749aa90fd85da4971d09bc7cb69d79ac7e1ea580 /libc/sysdeps/powerpc/powerpc32/power6/memcpy.S | |
parent | 28c5710e0d8071ca674ea390549c1b9e969ccde2 (diff) |
Merge changes between r22062 and r22176 from /fsf/trunk.
git-svn-id: svn://svn.eglibc.org/trunk@22177 7b3dc134-2b1b-0410-93df-9e9f96275f8d
Diffstat (limited to 'libc/sysdeps/powerpc/powerpc32/power6/memcpy.S')
-rw-r--r-- | libc/sysdeps/powerpc/powerpc32/power6/memcpy.S | 18 |
1 files changed, 9 insertions, 9 deletions
diff --git a/libc/sysdeps/powerpc/powerpc32/power6/memcpy.S b/libc/sysdeps/powerpc/powerpc32/power6/memcpy.S index 79358368d..203c979d1 100644 --- a/libc/sysdeps/powerpc/powerpc32/power6/memcpy.S +++ b/libc/sysdeps/powerpc/powerpc32/power6/memcpy.S @@ -1,5 +1,5 @@ /* Optimized memcpy implementation for PowerPC32 on POWER6. - Copyright (C) 2003, 2006, 2009, 2011 Free Software Foundation, Inc. + Copyright (C) 2003-2013 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -411,31 +411,31 @@ L(wdu): not. For power4, power5 and power6 machines there is penalty for unaligned loads (src) that cross 32-byte, cacheline, or page boundaries. So we want to use simple (unaligned) loads where - posible but avoid them where we know the load would span a 32-byte + possible but avoid them where we know the load would span a 32-byte boundary. At this point we know we have at least 29 (32-3) bytes to copy the src is unaligned. and we may cross at least one 32-byte - boundary. Also we have the following regester values: + boundary. Also we have the following register values: r3 == adjusted dst, word aligned r4 == unadjusted src r5 == unadjusted len r9 == adjusted Word length r10 == src alignment (1-3) - r12 == adjuested src, not aligned + r12 == adjusted src, not aligned r31 == adjusted len - First we need to copy word upto but not crossing the next 32-byte + First we need to copy word up to but not crossing the next 32-byte boundary. Then perform aligned loads just before and just after - the boundary and use shifts and or to gernerate the next aligned + the boundary and use shifts and or to generate the next aligned word for dst. If more then 32 bytes remain we copy (unaligned src) the next 7 words and repeat the loop until less then 32-bytes - remaim. + remain. Then if more then 4 bytes remain we again use aligned loads, shifts and or to generate the next dst word. We then process the remaining words using unaligned loads as needed. Finally we check - if there more then 0 bytes (1-3) bytes remainting and use + if there more then 0 bytes (1-3) bytes remaining and use halfword and or byte load/stores to complete the copy. */ mr 4,12 /* restore unaligned adjusted src ptr */ @@ -512,7 +512,7 @@ L(wdu_h32_4): addi 3,3,4 .align 4 L(wdu_h32_0): -/* set up for 32-byte boundry crossing word move and possibly 32-byte +/* set up for 32-byte boundary crossing word move and possibly 32-byte move loop. */ clrrwi 12,4,2 cmplwi cr5,31,32 |