re PR libfortran/78379 (Processor-specific versions for matmul)

2017-05-25 Thomas Koenig <tkoenig@gcc.gnu.org> PR libfortran/78379 * Makefile.am: Add generated/matmulavx128_*.c files. Handle them for compiling and setting the right flags. * acinclude.m4: Add tests for FMA3, FMA4 and AVX128. * configure.ac: Call them. * Makefile.in: Regenerated. * config.h.in: Regenerated. * configure: Regenerated. * m4/matmul.m4: Handle AMD chips by calling 128-bit AVX versions which use FMA3 or FMA4. * m4/matmulavx128.m4: New file. * generated/matmul_c10.c: Regenerated. * generated/matmul_c16.c: Regenerated. * generated/matmul_c4.c: Regenerated. * generated/matmul_c8.c: Regenerated. * generated/matmul_i1.c: Regenerated. * generated/matmul_i16.c: Regenerated. * generated/matmul_i2.c: Regenerated. * generated/matmul_i4.c: Regenerated. * generated/matmul_i8.c: Regenerated. * generated/matmul_r10.c: Regenerated. * generated/matmul_r16.c: Regenerated. * generated/matmul_r4.c: Regenerated. * generated/matmul_r8.c: Regenerated. * generated/matmulavx128_c10.c: New file. * generated/matmulavx128_c16.c: New file. * generated/matmulavx128_c4.c: New file. * generated/matmulavx128_c8.c: New file. * generated/matmulavx128_i1.c: New file. * generated/matmulavx128_i16.c: New file. * generated/matmulavx128_i2.c: New file. * generated/matmulavx128_i4.c: New file. * generated/matmulavx128_i8.c: New file. * generated/matmulavx128_r10.c: New file. * generated/matmulavx128_r16.c: New file. * generated/matmulavx128_r4.c: New file. * generated/matmulavx128_r8.c: New file. From-SVN: r248472
author: Thomas Koenig <tkoenig@gcc.gnu.org> 2017-05-25 21:51:27 +0000
committer: Thomas Koenig <tkoenig@gcc.gnu.org> 2017-05-25 21:51:27 +0000
commit: 1d5cf7fcf28c0b7d44fab10d26fc450e9d962f03 (patch)
tree: 3cd241e469094809d6ceb3aa1a14f6841627e681 /libgfortran/m4
parent: 87e1e6036ef93b18b1450357488ee907db880f37 (diff)
2 files changed, 107 insertions, 0 deletions
diff --git a/libgfortran/m4/matmul.m4 b/libgfortran/m4/matmul.m4
index 7976fda8bb4..c2f641542bf 100644
--- a/libgfortran/m4/matmul.m4
+++ b/libgfortran/m4/matmul.m4
@@ -106,6 +106,26 @@ static' include(matmul_internal.m4)dnl
 static' include(matmul_internal.m4)dnl
 `#endif  /* HAVE_AVX512F */
 
+/* AMD-specifix funtions with AVX128 and FMA3/FMA4.  */
+
+#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
+'define(`matmul_name',`matmul_'rtype_code`_avx128_fma3')dnl
+`void
+'matmul_name` ('rtype` * const restrict retarray, 
+	'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
+internal_proto('matmul_name`);
+#endif
+
+#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
+'define(`matmul_name',`matmul_'rtype_code`_avx128_fma4')dnl
+`void
+'matmul_name` ('rtype` * const restrict retarray, 
+	'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
+internal_proto('matmul_name`);
+#endif
+
 /* Function to fall back to if there is no special processor-specific version.  */
 'define(`matmul_name',`matmul_'rtype_code`_vanilla')dnl
 `static' include(matmul_internal.m4)dnl
@@ -161,6 +181,26 @@ void matmul_'rtype_code` ('rtype` * const restrict retarray,
 	    }
 #endif  /* HAVE_AVX */
         }
+    else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
+      {
+#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
+        if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+	    && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+	  {
+            matmul_fn = matmul_'rtype_code`_avx128_fma3;
+	    goto store;
+	  }
+#endif
+#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
+        if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+	     && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+	  {
+            matmul_fn = matmul_'rtype_code`_avx128_fma4;
+	    goto store;
+	  }
+#endif
+
+      }
    store:
       __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED);
    }
diff --git a/libgfortran/m4/matmulavx128.m4 b/libgfortran/m4/matmulavx128.m4
new file mode 100644
index 00000000000..14172843579
--- /dev/null
+++ b/libgfortran/m4/matmulavx128.m4
@@ -0,0 +1,67 @@
+`/* Implementation of the MATMUL intrinsic
+   Copyright (C) 2002-2017 Free Software Foundation, Inc.
+   Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "libgfortran.h"
+#include <string.h>
+#include <assert.h>'
+
+include(iparm.m4)dnl
+
+/* These are the specific versions of matmul with -mprefer-avx128.  */
+
+`#if defined (HAVE_'rtype_name`)
+
+/* Prototype for the BLAS ?gemm subroutine, a pointer to which can be
+   passed to us by the front-end, in which case we call it for large
+   matrices.  */
+
+typedef void (*blas_call)(const char *, const char *, const int *, const int *,
+                          const int *, const 'rtype_name` *, const 'rtype_name` *,
+                          const int *, const 'rtype_name` *, const int *,
+                          const 'rtype_name` *, 'rtype_name` *, const int *,
+                          int, int);
+
+#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
+'define(`matmul_name',`matmul_'rtype_code`_avx128_fma3')dnl
+`void
+'matmul_name` ('rtype` * const restrict retarray, 
+	'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
+internal_proto('matmul_name`);
+'include(matmul_internal.m4)dnl
+`#endif
+
+#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
+'define(`matmul_name',`matmul_'rtype_code`_avx128_fma4')dnl
+`void
+'matmul_name` ('rtype` * const restrict retarray, 
+	'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
+internal_proto('matmul_name`);
+'include(matmul_internal.m4)dnl
+`#endif
+
+#endif
+'
author	Thomas Koenig <tkoenig@gcc.gnu.org>	2017-05-25 21:51:27 +0000
committer	Thomas Koenig <tkoenig@gcc.gnu.org>	2017-05-25 21:51:27 +0000
commit	1d5cf7fcf28c0b7d44fab10d26fc450e9d962f03 (patch)
tree	3cd241e469094809d6ceb3aa1a14f6841627e681 /libgfortran/m4
parent	87e1e6036ef93b18b1450357488ee907db880f37 (diff)