aboutsummaryrefslogtreecommitdiff
path: root/libcpp
diff options
context:
space:
mode:
authorwschmidt <wschmidt@138bc75d-0d04-0410-961f-82ee72b054a4>2014-10-03 20:06:38 +0000
committerwschmidt <wschmidt@138bc75d-0d04-0410-961f-82ee72b054a4>2014-10-03 20:06:38 +0000
commite347736da8caf3acc8e58973c3d3655ad6765e1f (patch)
tree2cf47064ba3e0e7d55b6d9d03cbb18665094adac /libcpp
parent5e84569c0452c39bf60fbd066801edc1907a982f (diff)
2014-10-03 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* lex.c (search_line_fast): Add new version to be used for Power8 and later targets when Altivec is enabled. Restrict the existing Altivec version to big-endian systems so that lvsr is not used on little endian, where it is deprecated. Remove LE-specific code from the now-BE-only version. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@215873 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libcpp')
-rw-r--r--libcpp/ChangeLog8
-rw-r--r--libcpp/lex.c115
2 files changed, 112 insertions, 11 deletions
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog
index 3b23708d39a..09304ef8496 100644
--- a/libcpp/ChangeLog
+++ b/libcpp/ChangeLog
@@ -1,3 +1,11 @@
+2014-10-03 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
+
+ * lex.c (search_line_fast): Add new version to be used for Power8
+ and later targets when Altivec is enabled. Restrict the existing
+ Altivec version to big-endian systems so that lvsr is not used on
+ little endian, where it is deprecated. Remove LE-specific code
+ from the now-BE-only version.
+
2014-10-02 Bernd Edlinger <bernd.edlinger@hotmail.de>
Jeff Law <law@redhat.com>
diff --git a/libcpp/lex.c b/libcpp/lex.c
index bdaa0705bc7..45eaca7ab4f 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -513,9 +513,111 @@ init_vectorized_lexer (void)
search_line_fast = impl;
}
-#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__)
+#elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
-/* A vection of the fast scanner using AltiVec vectorized byte compares. */
+/* A vection of the fast scanner using AltiVec vectorized byte compares
+ and VSX unaligned loads (when VSX is available). This is otherwise
+ the same as the pre-GCC 5 version. */
+
+static const uchar *
+search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
+{
+ typedef __attribute__((altivec(vector))) unsigned char vc;
+
+ const vc repl_nl = {
+ '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
+ '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
+ };
+ const vc repl_cr = {
+ '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
+ '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
+ };
+ const vc repl_bs = {
+ '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
+ '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
+ };
+ const vc repl_qm = {
+ '?', '?', '?', '?', '?', '?', '?', '?',
+ '?', '?', '?', '?', '?', '?', '?', '?',
+ };
+ const vc zero = { 0 };
+
+ vc data, t;
+
+ /* Main loop processing 16 bytes at a time. */
+ do
+ {
+ vc m_nl, m_cr, m_bs, m_qm;
+
+ data = *((const vc *)s);
+ s += 16;
+
+ m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
+ m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
+ m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
+ m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
+ t = (m_nl | m_cr) | (m_bs | m_qm);
+
+ /* T now contains 0xff in bytes for which we matched one of the relevant
+ characters. We want to exit the loop if any byte in T is non-zero.
+ Below is the expansion of vec_any_ne(t, zero). */
+ }
+ while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
+
+ /* Restore s to to point to the 16 bytes we just processed. */
+ s -= 16;
+
+ {
+#define N (sizeof(vc) / sizeof(long))
+
+ union {
+ vc v;
+ /* Statically assert that N is 2 or 4. */
+ unsigned long l[(N == 2 || N == 4) ? N : -1];
+ } u;
+ unsigned long l, i = 0;
+
+ u.v = t;
+
+ /* Find the first word of T that is non-zero. */
+ switch (N)
+ {
+ case 4:
+ l = u.l[i++];
+ if (l != 0)
+ break;
+ s += sizeof(unsigned long);
+ l = u.l[i++];
+ if (l != 0)
+ break;
+ s += sizeof(unsigned long);
+ case 2:
+ l = u.l[i++];
+ if (l != 0)
+ break;
+ s += sizeof(unsigned long);
+ l = u.l[i];
+ }
+
+ /* L now contains 0xff in bytes for which we matched one of the
+ relevant characters. We can find the byte index by finding
+ its bit index and dividing by 8. */
+#ifdef __BIG_ENDIAN__
+ l = __builtin_clzl(l) >> 3;
+#else
+ l = __builtin_ctzl(l) >> 3;
+#endif
+ return s + l;
+
+#undef N
+ }
+}
+
+#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
+
+/* A vection of the fast scanner using AltiVec vectorized byte compares.
+ This cannot be used for little endian because vec_lvsl/lvsr are
+ deprecated for little endian and the code won't work properly. */
/* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
so we can't compile this function without -maltivec on the command line
(or implied by some other switch). */
@@ -557,13 +659,8 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
beginning with all ones and shifting in zeros according to the
mis-alignment. The LVSR instruction pulls the exact shift we
want from the address. */
-#ifdef __BIG_ENDIAN__
mask = __builtin_vec_lvsr(0, s);
mask = __builtin_vec_perm(zero, ones, mask);
-#else
- mask = __builtin_vec_lvsl(0, s);
- mask = __builtin_vec_perm(ones, zero, mask);
-#endif
data &= mask;
/* While altivec loads mask addresses, we still need to align S so
@@ -627,11 +724,7 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
/* L now contains 0xff in bytes for which we matched one of the
relevant characters. We can find the byte index by finding
its bit index and dividing by 8. */
-#ifdef __BIG_ENDIAN__
l = __builtin_clzl(l) >> 3;
-#else
- l = __builtin_ctzl(l) >> 3;
-#endif
return s + l;
#undef N