summaryrefslogtreecommitdiff
path: root/polly
diff options
context:
space:
mode:
authorMichael Kruse <llvm@meinersbur.de>2018-09-26 15:22:39 +0000
committerMichael Kruse <llvm@meinersbur.de>2018-09-26 15:22:39 +0000
commita0b013b9bf51745bd7b0c5fc2e851ef3610ab50a (patch)
tree5bd99169062e193705320e6de9c65f40246a925c /polly
parenta2735c72a60b8cc6920574991051531c0874f58b (diff)
[doc] Fix HowToManuallyUseTheIndividualPiecesOfPolly
Also remove compiled binaries.
Diffstat (limited to 'polly')
-rw-r--r--polly/docs/HowToManuallyUseTheIndividualPiecesOfPolly.rst49
-rw-r--r--polly/docs/experiments/matmul/init_array___%for.cond1.preheader---%for.end19.jscop44
-rw-r--r--polly/docs/experiments/matmul/init_array___%for.cond1.preheader---%for.end19.jscop.interchanged39
-rw-r--r--polly/docs/experiments/matmul/init_array___%for.cond1.preheader---%for.end19.jscop.interchanged+tiled39
-rw-r--r--polly/docs/experiments/matmul/init_array___%for.cond1.preheader---%for.end19.jscop.interchanged+tiled+vector39
-rw-r--r--polly/docs/experiments/matmul/main___%for.cond1.preheader---%for.end30.jscop73
-rw-r--r--polly/docs/experiments/matmul/main___%for.cond1.preheader---%for.end30.jscop.interchanged73
-rw-r--r--polly/docs/experiments/matmul/main___%for.cond1.preheader---%for.end30.jscop.interchanged+tiled73
-rw-r--r--polly/docs/experiments/matmul/main___%for.cond1.preheader---%for.end30.jscop.interchanged+tiled+vector73
-rw-r--r--polly/docs/experiments/matmul/matmul.ll (renamed from polly/docs/experiments/matmul/matmul.s)104
-rwxr-xr-xpolly/docs/experiments/matmul/matmul.normalopt.exebin8849 -> 0 bytes
-rw-r--r--polly/docs/experiments/matmul/matmul.normalopt.llbin2620 -> 10896 bytes
-rw-r--r--polly/docs/experiments/matmul/matmul.normalopt.s327
-rwxr-xr-xpolly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.exebin13413 -> 0 bytes
-rw-r--r--polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.llbin5980 -> 25968 bytes
-rw-r--r--polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.s1290
-rwxr-xr-xpolly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.exebin8923 -> 0 bytes
-rw-r--r--polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.llbin3724 -> 22889 bytes
-rw-r--r--polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.s833
-rwxr-xr-xpolly/docs/experiments/matmul/matmul.polly.interchanged+tiled.exebin8916 -> 0 bytes
-rw-r--r--polly/docs/experiments/matmul/matmul.polly.interchanged+tiled.llbin3612 -> 25589 bytes
-rw-r--r--polly/docs/experiments/matmul/matmul.polly.interchanged+tiled.s675
-rwxr-xr-xpolly/docs/experiments/matmul/matmul.polly.interchanged.exebin8910 -> 0 bytes
-rw-r--r--polly/docs/experiments/matmul/matmul.polly.interchanged.llbin3128 -> 13634 bytes
-rw-r--r--polly/docs/experiments/matmul/matmul.polly.interchanged.s346
-rw-r--r--polly/docs/experiments/matmul/matmul.preopt.ll67
-rwxr-xr-xpolly/docs/experiments/matmul/runall.sh78
-rw-r--r--polly/docs/experiments/matmul/scops.init_array.dot44
-rw-r--r--polly/docs/experiments/matmul/scops.init_array.dot.pngbin154236 -> 126274 bytes
-rw-r--r--polly/docs/experiments/matmul/scops.main.dot60
-rw-r--r--polly/docs/experiments/matmul/scops.main.dot.pngbin190505 -> 179169 bytes
-rw-r--r--polly/docs/experiments/matmul/scops.print_array.dot60
-rw-r--r--polly/docs/experiments/matmul/scops.print_array.dot.pngbin200271 -> 210160 bytes
-rw-r--r--polly/docs/experiments/matmul/scopsonly.init_array.dot44
-rw-r--r--polly/docs/experiments/matmul/scopsonly.init_array.dot.pngbin28261 -> 26510 bytes
-rw-r--r--polly/docs/experiments/matmul/scopsonly.main.dot60
-rw-r--r--polly/docs/experiments/matmul/scopsonly.main.dot.pngbin43325 -> 34308 bytes
-rw-r--r--polly/docs/experiments/matmul/scopsonly.print_array.dot60
-rw-r--r--polly/docs/experiments/matmul/scopsonly.print_array.dot.pngbin52118 -> 77565 bytes
39 files changed, 2571 insertions, 1979 deletions
diff --git a/polly/docs/HowToManuallyUseTheIndividualPiecesOfPolly.rst b/polly/docs/HowToManuallyUseTheIndividualPiecesOfPolly.rst
index 1822923c288..958dc96fa9e 100644
--- a/polly/docs/HowToManuallyUseTheIndividualPiecesOfPolly.rst
+++ b/polly/docs/HowToManuallyUseTheIndividualPiecesOfPolly.rst
@@ -21,7 +21,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
- clang -S -emit-llvm matmul.c -o matmul.s
+ clang -S -emit-llvm matmul.c -Xclang -disable-O0-optnone -o matmul.ll
2. **Prepare the LLVM-IR for Polly**
@@ -34,7 +34,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
- opt -S -polly-canonicalize matmul.s > matmul.preopt.ll
+ opt -S -polly-canonicalize matmul.ll -o matmul.preopt.ll
3. **Show the SCoPs detected by Polly (optional)**
--------------------------------------------------
@@ -45,7 +45,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
- $ opt -polly-ast -analyze -q matmul.preopt.ll -polly-process-unprofitable
+ $ opt -basicaa -polly-ast -analyze matmul.preopt.ll -polly-process-unprofitable -polly-use-llvm-names
.. code-block:: guess
@@ -84,8 +84,8 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
- $ opt -view-scops -disable-output matmul.preopt.ll
- $ opt -view-scops-only -disable-output matmul.preopt.ll
+ $ opt -polly-use-llvm-names -basicaa -view-scops -disable-output matmul.preopt.ll
+ $ opt -polly-use-llvm-names -basicaa -view-scops-only -disable-output matmul.preopt.ll
The output for the different functions:
@@ -104,7 +104,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
- $ opt -polly-scops -analyze matmul.preopt.ll -polly-process-unprofitable
+ $ opt -polly-use-llvm-names -basicaa -polly-scops -analyze matmul.preopt.ll -polly-process-unprofitable
.. code-block:: guess
@@ -194,7 +194,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
- $ opt -polly-dependences -analyze matmul.preopt.ll -polly-process-unprofitable
+ $ opt -basicaa -polly-use-llvm-names -polly-dependences -analyze matmul.preopt.ll -polly-process-unprofitable
.. code-block:: guess
@@ -226,7 +226,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
- $ opt -polly-export-jscop matmul.preopt.ll -polly-process-unprofitable
+ $ opt -basicaa -polly-use-llvm-names -polly-export-jscop matmul.preopt.ll -polly-process-unprofitable
.. code-block:: guess
@@ -254,7 +254,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
- $ opt matmul.preopt.ll -polly-import-jscop -polly-ast -analyze -polly-process-unprofitable
+ $ opt -basicaa -polly-use-llvm-names matmul.preopt.ll -polly-import-jscop -polly-ast -analyze -polly-process-unprofitable
.. code-block:: c
@@ -282,7 +282,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
- $ opt matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged -polly-ast -analyze -polly-process-unprofitable
+ $ opt -basicaa -polly-use-llvm-names matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged -polly-ast -analyze -polly-process-unprofitable
.. code-block:: c
@@ -311,7 +311,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
- $ opt matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled -polly-ast -analyze -polly-process-unprofitable
+ $ opt -basicaa -polly-use-llvm-names matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled -polly-ast -analyze -polly-process-unprofitable
.. code-block:: c
@@ -346,7 +346,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
- $ opt matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled -polly-ast -analyze -polly-process-unprofitable
+ $ opt -basicaa -polly-use-llvm-names matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled -polly-ast -analyze -polly-process-unprofitable
.. code-block:: c
@@ -383,11 +383,11 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
- $ opt matmul.preopt.ll | opt -O3 > matmul.normalopt.ll
+ $ opt -S matmul.preopt.ll | opt -S -O3 -o matmul.normalopt.ll
.. code-block:: console
- $ opt matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged -polly-codegen -polly-process-unprofitable | opt -O3 > matmul.polly.interchanged.ll
+ $ opt -S matmul.preopt.ll -basicaa -polly-use-llvm-names -polly-import-jscop -polly-import-jscop-postfix=interchanged -polly-codegen -polly-process-unprofitable | opt -S -O3 -o matmul.polly.interchanged.ll
.. code-block:: guess
@@ -397,7 +397,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
- $ opt matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled -polly-codegen -polly-process-unprofitable | opt -O3 > matmul.polly.interchanged+tiled.ll
+ $ opt -S matmul.preopt.ll -basicaa -polly-use-llvm-names -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled -polly-codegen -polly-process-unprofitable | opt -S -O3 -o matmul.polly.interchanged+tiled.ll
.. code-block:: guess
@@ -407,7 +407,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
- $ opt matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled+vector -polly-codegen -polly-vectorizer=polly -polly-process-unprofitable | opt -O3 > matmul.polly.interchanged+tiled+vector.ll
+ $ opt -S matmul.preopt.ll -basicaa -polly-use-llvm-names -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled+vector -polly-codegen -polly-vectorizer=polly -polly-process-unprofitable | opt -S -O3 -o matmul.polly.interchanged+tiled+vector.ll
.. code-block:: guess
@@ -417,7 +417,7 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
- $ opt matmul.preopt.ll -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled+vector -polly-codegen -polly-vectorizer=polly -polly-parallel -polly-process-unprofitable | opt -O3 > matmul.polly.interchanged+tiled+openmp.ll
+ $ opt -S matmul.preopt.ll -basicaa -polly-use-llvm-names -polly-import-jscop -polly-import-jscop-postfix=interchanged+tiled+vector -polly-codegen -polly-vectorizer=polly -polly-parallel -polly-process-unprofitable | opt -S -O3 -o matmul.polly.interchanged+tiled+openmp.ll
.. code-block:: guess
@@ -431,11 +431,16 @@ performance improvement can be expected by an optimal automatic optimizer.
.. code-block:: console
- $ llc matmul.normalopt.ll -o matmul.normalopt.s && gcc matmul.normalopt.s -o matmul.normalopt.exe
- $ llc matmul.polly.interchanged.ll -o matmul.polly.interchanged.s && gcc matmul.polly.interchanged.s -o matmul.polly.interchanged.exe
- $ llc matmul.polly.interchanged+tiled.ll -o matmul.polly.interchanged+tiled.s && gcc matmul.polly.interchanged+tiled.s -o matmul.polly.interchanged+tiled.exe
- $ llc matmul.polly.interchanged+tiled+vector.ll -o matmul.polly.interchanged+tiled+vector.s && gcc matmul.polly.interchanged+tiled+vector.s -o matmul.polly.interchanged+tiled+vector.exe
- $ llc matmul.polly.interchanged+tiled+vector+openmp.ll -o matmul.polly.interchanged+tiled+vector+openmp.s && gcc -fopenmp matmul.polly.interchanged+tiled+vector+openmp.s -o matmul.polly.interchanged+tiled+vector+openmp.exe
+ $ llc matmul.normalopt.ll -o matmul.normalopt.s -relocation-model=pic
+ $ gcc matmul.normalopt.s -o matmul.normalopt.exe
+ $ llc matmul.polly.interchanged.ll -o matmul.polly.interchanged.s -relocation-model=pic
+ $ gcc matmul.polly.interchanged.s -o matmul.polly.interchanged.exe
+ $ llc matmul.polly.interchanged+tiled.ll -o matmul.polly.interchanged+tiled.s -relocation-model=pic
+ $ gcc matmul.polly.interchanged+tiled.s -o matmul.polly.interchanged+tiled.exe
+ $ llc matmul.polly.interchanged+tiled+vector.ll -o matmul.polly.interchanged+tiled+vector.s -relocation-model=pic
+ $ gcc matmul.polly.interchanged+tiled+vector.s -o matmul.polly.interchanged+tiled+vector.exe
+ $ llc matmul.polly.interchanged+tiled+vector+openmp.ll -o matmul.polly.interchanged+tiled+vector+openmp.s -relocation-model=pic
+ $ gcc matmul.polly.interchanged+tiled+vector+openmp.s -lgomp -o matmul.polly.interchanged+tiled+vector+openmp.exe
11. **Compare the runtime of the executables**
----------------------------------------------
diff --git a/polly/docs/experiments/matmul/init_array___%for.cond1.preheader---%for.end19.jscop b/polly/docs/experiments/matmul/init_array___%for.cond1.preheader---%for.end19.jscop
index 7f1db3e9e4b..2cc32b1cc01 100644
--- a/polly/docs/experiments/matmul/init_array___%for.cond1.preheader---%for.end19.jscop
+++ b/polly/docs/experiments/matmul/init_array___%for.cond1.preheader---%for.end19.jscop
@@ -1,33 +1,39 @@
{
- "arrays" : [
+ "arrays": [
{
- "name" : "MemRef_A",
- "sizes" : [ "1536" ],
- "type" : "float"
+ "name": "MemRef_A",
+ "sizes": [
+ "*",
+ "1536"
+ ],
+ "type": "float"
},
{
- "name" : "MemRef_B",
- "sizes" : [ "1536" ],
- "type" : "float"
+ "name": "MemRef_B",
+ "sizes": [
+ "*",
+ "1536"
+ ],
+ "type": "float"
}
],
- "context" : "{ : }",
- "name" : "%for.cond1.preheader---%for.end19",
- "statements" : [
+ "context": "{ : }",
+ "name": "%for.cond1.preheader---%for.end19",
+ "statements": [
{
- "accesses" : [
+ "accesses": [
{
- "kind" : "write",
- "relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_A[i0, i1] }"
+ "kind": "write",
+ "relation": "{ Stmt_for_body3[i0, i1] -> MemRef_A[i0, i1] }"
},
{
- "kind" : "write",
- "relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_B[i0, i1] }"
+ "kind": "write",
+ "relation": "{ Stmt_for_body3[i0, i1] -> MemRef_B[i0, i1] }"
}
],
- "domain" : "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
- "name" : "Stmt_for_body3",
- "schedule" : "{ Stmt_for_body3[i0, i1] -> [i0, i1] }"
+ "domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
+ "name": "Stmt_for_body3",
+ "schedule": "{ Stmt_for_body3[i0, i1] -> [i0, i1] }"
}
]
-}
+} \ No newline at end of file
diff --git a/polly/docs/experiments/matmul/init_array___%for.cond1.preheader---%for.end19.jscop.interchanged b/polly/docs/experiments/matmul/init_array___%for.cond1.preheader---%for.end19.jscop.interchanged
new file mode 100644
index 00000000000..2cc32b1cc01
--- /dev/null
+++ b/polly/docs/experiments/matmul/init_array___%for.cond1.preheader---%for.end19.jscop.interchanged
@@ -0,0 +1,39 @@
+{
+ "arrays": [
+ {
+ "name": "MemRef_A",
+ "sizes": [
+ "*",
+ "1536"
+ ],
+ "type": "float"
+ },
+ {
+ "name": "MemRef_B",
+ "sizes": [
+ "*",
+ "1536"
+ ],
+ "type": "float"
+ }
+ ],
+ "context": "{ : }",
+ "name": "%for.cond1.preheader---%for.end19",
+ "statements": [
+ {
+ "accesses": [
+ {
+ "kind": "write",
+ "relation": "{ Stmt_for_body3[i0, i1] -> MemRef_A[i0, i1] }"
+ },
+ {
+ "kind": "write",
+ "relation": "{ Stmt_for_body3[i0, i1] -> MemRef_B[i0, i1] }"
+ }
+ ],
+ "domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
+ "name": "Stmt_for_body3",
+ "schedule": "{ Stmt_for_body3[i0, i1] -> [i0, i1] }"
+ }
+ ]
+} \ No newline at end of file
diff --git a/polly/docs/experiments/matmul/init_array___%for.cond1.preheader---%for.end19.jscop.interchanged+tiled b/polly/docs/experiments/matmul/init_array___%for.cond1.preheader---%for.end19.jscop.interchanged+tiled
new file mode 100644
index 00000000000..2cc32b1cc01
--- /dev/null
+++ b/polly/docs/experiments/matmul/init_array___%for.cond1.preheader---%for.end19.jscop.interchanged+tiled
@@ -0,0 +1,39 @@
+{
+ "arrays": [
+ {
+ "name": "MemRef_A",
+ "sizes": [
+ "*",
+ "1536"
+ ],
+ "type": "float"
+ },
+ {
+ "name": "MemRef_B",
+ "sizes": [
+ "*",
+ "1536"
+ ],
+ "type": "float"
+ }
+ ],
+ "context": "{ : }",
+ "name": "%for.cond1.preheader---%for.end19",
+ "statements": [
+ {
+ "accesses": [
+ {
+ "kind": "write",
+ "relation": "{ Stmt_for_body3[i0, i1] -> MemRef_A[i0, i1] }"
+ },
+ {
+ "kind": "write",
+ "relation": "{ Stmt_for_body3[i0, i1] -> MemRef_B[i0, i1] }"
+ }
+ ],
+ "domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
+ "name": "Stmt_for_body3",
+ "schedule": "{ Stmt_for_body3[i0, i1] -> [i0, i1] }"
+ }
+ ]
+} \ No newline at end of file
diff --git a/polly/docs/experiments/matmul/init_array___%for.cond1.preheader---%for.end19.jscop.interchanged+tiled+vector b/polly/docs/experiments/matmul/init_array___%for.cond1.preheader---%for.end19.jscop.interchanged+tiled+vector
new file mode 100644
index 00000000000..2cc32b1cc01
--- /dev/null
+++ b/polly/docs/experiments/matmul/init_array___%for.cond1.preheader---%for.end19.jscop.interchanged+tiled+vector
@@ -0,0 +1,39 @@
+{
+ "arrays": [
+ {
+ "name": "MemRef_A",
+ "sizes": [
+ "*",
+ "1536"
+ ],
+ "type": "float"
+ },
+ {
+ "name": "MemRef_B",
+ "sizes": [
+ "*",
+ "1536"
+ ],
+ "type": "float"
+ }
+ ],
+ "context": "{ : }",
+ "name": "%for.cond1.preheader---%for.end19",
+ "statements": [
+ {
+ "accesses": [
+ {
+ "kind": "write",
+ "relation": "{ Stmt_for_body3[i0, i1] -> MemRef_A[i0, i1] }"
+ },
+ {
+ "kind": "write",
+ "relation": "{ Stmt_for_body3[i0, i1] -> MemRef_B[i0, i1] }"
+ }
+ ],
+ "domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
+ "name": "Stmt_for_body3",
+ "schedule": "{ Stmt_for_body3[i0, i1] -> [i0, i1] }"
+ }
+ ]
+} \ No newline at end of file
diff --git a/polly/docs/experiments/matmul/main___%for.cond1.preheader---%for.end30.jscop b/polly/docs/experiments/matmul/main___%for.cond1.preheader---%for.end30.jscop
index ac1a908db1f..2e4b597fb7c 100644
--- a/polly/docs/experiments/matmul/main___%for.cond1.preheader---%for.end30.jscop
+++ b/polly/docs/experiments/matmul/main___%for.cond1.preheader---%for.end30.jscop
@@ -1,57 +1,66 @@
{
- "arrays" : [
+ "arrays": [
{
- "name" : "MemRef_C",
- "sizes" : [ "1536" ],
- "type" : "float"
+ "name": "MemRef_C",
+ "sizes": [
+ "*",
+ "1536"
+ ],
+ "type": "float"
},
{
- "name" : "MemRef_A",
- "sizes" : [ "1536" ],
- "type" : "float"
+ "name": "MemRef_A",
+ "sizes": [
+ "*",
+ "1536"
+ ],
+ "type": "float"
},
{
- "name" : "MemRef_B",
- "sizes" : [ "1536" ],
- "type" : "float"
+ "name": "MemRef_B",
+ "sizes": [
+ "*",
+ "1536"
+ ],
+ "type": "float"
}
],
- "context" : "{ : }",
- "name" : "%for.cond1.preheader---%for.end30",
- "statements" : [
+ "context": "{ : }",
+ "name": "%for.cond1.preheader---%for.end30",
+ "statements": [
{
- "accesses" : [
+ "accesses": [
{
- "kind" : "write",
- "relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
+ "kind": "write",
+ "relation": "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
}
],
- "domain" : "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
- "name" : "Stmt_for_body3",
- "schedule" : "{ Stmt_for_body3[i0, i1] -> [i0, i1, 0, 0] }"
+ "domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
+ "name": "Stmt_for_body3",
+ "schedule": "{ Stmt_for_body3[i0, i1] -> [i0, i1, 0, 0] }"
},
{
- "accesses" : [
+ "accesses": [
{
- "kind" : "read",
- "relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
+ "kind": "read",
+ "relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
},
{
- "kind" : "read",
- "relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
+ "kind": "read",
+ "relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
},
{
- "kind" : "read",
- "relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
+ "kind": "read",
+ "relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
},
{
- "kind" : "write",
- "relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
+ "kind": "write",
+ "relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
}
],
- "domain" : "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
- "name" : "Stmt_for_body8",
- "schedule" : "{ Stmt_for_body8[i0, i1, i2] -> [i0, i1, 1, i2] }"
+ "domain": "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
+ "name": "Stmt_for_body8",
+ "schedule": "{ Stmt_for_body8[i0, i1, i2] -> [i0, i1, 1, i2] }"
}
]
-}
+} \ No newline at end of file
diff --git a/polly/docs/experiments/matmul/main___%for.cond1.preheader---%for.end30.jscop.interchanged b/polly/docs/experiments/matmul/main___%for.cond1.preheader---%for.end30.jscop.interchanged
index 7e3d212b4ef..fc45fa1dc47 100644
--- a/polly/docs/experiments/matmul/main___%for.cond1.preheader---%for.end30.jscop.interchanged
+++ b/polly/docs/experiments/matmul/main___%for.cond1.preheader---%for.end30.jscop.interchanged
@@ -1,57 +1,66 @@
{
- "arrays" : [
+ "arrays": [
{
- "name" : "MemRef_C",
- "sizes" : [ "1536" ],
- "type" : "float"
+ "name": "MemRef_C",
+ "sizes": [
+ "*",
+ "1536"
+ ],
+ "type": "float"
},
{
- "name" : "MemRef_A",
- "sizes" : [ "1536" ],
- "type" : "float"
+ "name": "MemRef_A",
+ "sizes": [
+ "*",
+ "1536"
+ ],
+ "type": "float"
},
{
- "name" : "MemRef_B",
- "sizes" : [ "1536" ],
- "type" : "float"
+ "name": "MemRef_B",
+ "sizes": [
+ "*",
+ "1536"
+ ],
+ "type": "float"
}
],
- "context" : "{ : }",
- "name" : "%for.cond1.preheader---%for.end30",
- "statements" : [
+ "context": "{ : }",
+ "name": "%for.cond1.preheader---%for.end30",
+ "statements": [
{
- "accesses" : [
+ "accesses": [
{
- "kind" : "write",
- "relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
+ "kind": "write",
+ "relation": "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
}
],
- "domain" : "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
- "name" : "Stmt_for_body3",
- "schedule" : "{ Stmt_for_body3[i0, i1] -> [0, i0, i1, 0] }"
+ "domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
+ "name": "Stmt_for_body3",
+ "schedule": "{ Stmt_for_body3[i0, i1] -> [0, i0, i1, 0] }"
},
{
- "accesses" : [
+ "accesses": [
{
- "kind" : "read",
- "relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
+ "kind": "read",
+ "relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
},
{
- "kind" : "read",
- "relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
+ "kind": "read",
+ "relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
},
{
- "kind" : "read",
- "relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
+ "kind": "read",
+ "relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
},
{
- "kind" : "write",
- "relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
+ "kind": "write",
+ "relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
}
],
- "domain" : "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
- "name" : "Stmt_for_body8",
- "schedule" : "{ Stmt_for_body8[i0, i1, i2] -> [1, i0, i2, i1] }"
+ "domain": "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
+ "name": "Stmt_for_body8",
+ "schedule": "{ Stmt_for_body8[i0, i1, i2] -> [1, i0, i2, i1] }"
}
]
-}
+} \ No newline at end of file
diff --git a/polly/docs/experiments/matmul/main___%for.cond1.preheader---%for.end30.jscop.interchanged+tiled b/polly/docs/experiments/matmul/main___%for.cond1.preheader---%for.end30.jscop.interchanged+tiled
index b7c33b9896c..32617959177 100644
--- a/polly/docs/experiments/matmul/main___%for.cond1.preheader---%for.end30.jscop.interchanged+tiled
+++ b/polly/docs/experiments/matmul/main___%for.cond1.preheader---%for.end30.jscop.interchanged+tiled
@@ -1,57 +1,66 @@
{
- "arrays" : [
+ "arrays": [
{
- "name" : "MemRef_C",
- "sizes" : [ "1536" ],
- "type" : "float"
+ "name": "MemRef_C",
+ "sizes": [
+ "*",
+ "1536"
+ ],
+ "type": "float"
},
{
- "name" : "MemRef_A",
- "sizes" : [ "1536" ],
- "type" : "float"
+ "name": "MemRef_A",
+ "sizes": [
+ "*",
+ "1536"
+ ],
+ "type": "float"
},
{
- "name" : "MemRef_B",
- "sizes" : [ "1536" ],
- "type" : "float"
+ "name": "MemRef_B",
+ "sizes": [
+ "*",
+ "1536"
+ ],
+ "type": "float"
}
],
- "context" : "{ : }",
- "name" : "%for.cond1.preheader---%for.end30",
- "statements" : [
+ "context": "{ : }",
+ "name": "%for.cond1.preheader---%for.end30",
+ "statements": [
{
- "accesses" : [
+ "accesses": [
{
- "kind" : "write",
- "relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
+ "kind": "write",
+ "relation": "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
}
],
- "domain" : "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
- "name" : "Stmt_for_body3",
- "schedule" : "{ Stmt_for_body3[i0, i1] -> [0, i0, i1, 0, 0, 0, 0 ] }"
+ "domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
+ "name": "Stmt_for_body3",
+ "schedule": "{ Stmt_for_body3[i0, i1] -> [0, i0, i1, 0, 0, 0, 0 ] }"
},
{
- "accesses" : [
+ "accesses": [
{
- "kind" : "read",
- "relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
+ "kind": "read",
+ "relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
},
{
- "kind" : "read",
- "relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
+ "kind": "read",
+ "relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
},
{
- "kind" : "read",
- "relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
+ "kind": "read",
+ "relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
},
{
- "kind" : "write",
- "relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
+ "kind": "write",
+ "relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
}
],
- "domain" : "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
- "name" : "Stmt_for_body8",
- "schedule" : "{ Stmt_for_body8[i0, i1, i2] -> [1, o0, o1, o2, i0, i2, i1]: o0 <= i0 < o0 + 64 and o1 <= i1 < o1 + 64 and o2 <= i2 < o2 + 64 and o0 % 64 = 0 and o1 % 64 = 0 and o2 % 64 = 0 }"
+ "domain": "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
+ "name": "Stmt_for_body8",
+ "schedule": "{ Stmt_for_body8[i0, i1, i2] -> [1, o0, o1, o2, i0, i2, i1]: o0 <= i0 < o0 + 64 and o1 <= i1 < o1 + 64 and o2 <= i2 < o2 + 64 and o0 % 64 = 0 and o1 % 64 = 0 and o2 % 64 = 0 }"
}
]
-}
+} \ No newline at end of file
diff --git a/polly/docs/experiments/matmul/main___%for.cond1.preheader---%for.end30.jscop.interchanged+tiled+vector b/polly/docs/experiments/matmul/main___%for.cond1.preheader---%for.end30.jscop.interchanged+tiled+vector
index 0f588aa8de8..d7a872a4e35 100644
--- a/polly/docs/experiments/matmul/main___%for.cond1.preheader---%for.end30.jscop.interchanged+tiled+vector
+++ b/polly/docs/experiments/matmul/main___%for.cond1.preheader---%for.end30.jscop.interchanged+tiled+vector
@@ -1,57 +1,66 @@
{
- "arrays" : [
+ "arrays": [
{
- "name" : "MemRef_C",
- "sizes" : [ "1536" ],
- "type" : "float"
+ "name": "MemRef_C",
+ "sizes": [
+ "*",
+ "1536"
+ ],
+ "type": "float"
},
{
- "name" : "MemRef_A",
- "sizes" : [ "1536" ],
- "type" : "float"
+ "name": "MemRef_A",
+ "sizes": [
+ "*",
+ "1536"
+ ],
+ "type": "float"
},
{
- "name" : "MemRef_B",
- "sizes" : [ "1536" ],
- "type" : "float"
+ "name": "MemRef_B",
+ "sizes": [
+ "*",
+ "1536"
+ ],
+ "type": "float"
}
],
- "context" : "{ : }",
- "name" : "%for.cond1.preheader---%for.end30",
- "statements" : [
+ "context": "{ : }",
+ "name": "%for.cond1.preheader---%for.end30",
+ "statements": [
{
- "accesses" : [
+ "accesses": [
{
- "kind" : "write",
- "relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
+ "kind": "write",
+ "relation": "{ Stmt_for_body3[i0, i1] -> MemRef_C[i0, i1] }"
}
],
- "domain" : "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
- "name" : "Stmt_for_body3",
- "schedule" : "{ Stmt_for_body3[i0, i1] -> [0, i0, i1, 0, 0, 0, 0, 0 ] }"
+ "domain": "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 }",
+ "name": "Stmt_for_body3",
+ "schedule": "{ Stmt_for_body3[i0, i1] -> [0, i0, i1, 0, 0, 0, 0, 0 ] }"
},
{
- "accesses" : [
+ "accesses": [
{
- "kind" : "read",
- "relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
+ "kind": "read",
+ "relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
},
{
- "kind" : "read",
- "relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
+ "kind": "read",
+ "relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[i0, i2] }"
},
{
- "kind" : "read",
- "relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
+ "kind": "read",
+ "relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i2, i1] }"
},
{
- "kind" : "write",
- "relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
+ "kind": "write",
+ "relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[i0, i1] }"
}
],
- "domain" : "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
- "name" : "Stmt_for_body8",
- "schedule" : "{ Stmt_for_body8[i0, i1, i2] -> [1, o0, o1, o2, i0, i2, oo1, i1]: o0 <= i0 < o0 + 64 and o1 <= oo1 < o1 + 64 and o2 <= i2 < o2 + 64 and oo1 <= i1 < oo1 + 4 and o0 % 64 = 0 and o1 % 64 = 0 and o2 % 64 = 0 and oo1 % 4 = 0 }"
+ "domain": "{ Stmt_for_body8[i0, i1, i2] : 0 <= i0 <= 1535 and 0 <= i1 <= 1535 and 0 <= i2 <= 1535 }",
+ "name": "Stmt_for_body8",
+ "schedule": "{ Stmt_for_body8[i0, i1, i2] -> [1, o0, o1, o2, i0, i2, oo1, i1]: o0 <= i0 < o0 + 64 and o1 <= oo1 < o1 + 64 and o2 <= i2 < o2 + 64 and oo1 <= i1 < oo1 + 4 and o0 % 64 = 0 and o1 % 64 = 0 and o2 % 64 = 0 and oo1 % 4 = 0 }"
}
]
-}
+} \ No newline at end of file
diff --git a/polly/docs/experiments/matmul/matmul.s b/polly/docs/experiments/matmul/matmul.ll
index 17147be2447..f8918bd2ca6 100644
--- a/polly/docs/experiments/matmul/matmul.s
+++ b/polly/docs/experiments/matmul/matmul.ll
@@ -6,15 +6,15 @@ target triple = "x86_64-unknown-linux-gnu"
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
-@A = common global [1536 x [1536 x float]] zeroinitializer, align 16
-@B = common global [1536 x [1536 x float]] zeroinitializer, align 16
-@stdout = external global %struct._IO_FILE*, align 8
+@A = common dso_local global [1536 x [1536 x float]] zeroinitializer, align 16
+@B = common dso_local global [1536 x [1536 x float]] zeroinitializer, align 16
+@stdout = external dso_local global %struct._IO_FILE*, align 8
@.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1
-@C = common global [1536 x [1536 x float]] zeroinitializer, align 16
+@C = common dso_local global [1536 x [1536 x float]] zeroinitializer, align 16
@.str.1 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1
-; Function Attrs: nounwind uwtable
-define void @init_array() #0 {
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @init_array() #0 {
entry:
%i = alloca i32, align 4
%j = alloca i32, align 4
@@ -44,12 +44,12 @@ for.body3: ; preds = %for.cond1
%conv = sitofp i32 %add to double
%div = fdiv double %conv, 2.000000e+00
%conv4 = fptrunc double %div to float
- %4 = load i32, i32* %j, align 4
+ %4 = load i32, i32* %i, align 4
%idxprom = sext i32 %4 to i64
- %5 = load i32, i32* %i, align 4
+ %arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom
+ %5 = load i32, i32* %j, align 4
%idxprom5 = sext i32 %5 to i64
- %arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom5
- %arrayidx6 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
+ %arrayidx6 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom5
store float %conv4, float* %arrayidx6, align 4
%6 = load i32, i32* %i, align 4
%7 = load i32, i32* %j, align 4
@@ -59,12 +59,12 @@ for.body3: ; preds = %for.cond1
%conv10 = sitofp i32 %add9 to double
%div11 = fdiv double %conv10, 2.000000e+00
%conv12 = fptrunc double %div11 to float
- %8 = load i32, i32* %j, align 4
+ %8 = load i32, i32* %i, align 4
%idxprom13 = sext i32 %8 to i64
- %9 = load i32, i32* %i, align 4
- %idxprom14 = sext i32 %9 to i64
- %arrayidx15 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom14
- %arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx15, i64 0, i64 %idxprom13
+ %arrayidx14 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom13
+ %9 = load i32, i32* %j, align 4
+ %idxprom15 = sext i32 %9 to i64
+ %arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx14, i64 0, i64 %idxprom15
store float %conv12, float* %arrayidx16, align 4
br label %for.inc
@@ -87,8 +87,8 @@ for.end19: ; preds = %for.cond
ret void
}
-; Function Attrs: nounwind uwtable
-define void @print_array() #0 {
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @print_array() #0 {
entry:
%i = alloca i32, align 4
%j = alloca i32, align 4
@@ -111,12 +111,12 @@ for.cond1: ; preds = %for.inc, %for.body
for.body3: ; preds = %for.cond1
%2 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
- %3 = load i32, i32* %j, align 4
+ %3 = load i32, i32* %i, align 4
%idxprom = sext i32 %3 to i64
- %4 = load i32, i32* %i, align 4
+ %arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom
+ %4 = load i32, i32* %j, align 4
%idxprom4 = sext i32 %4 to i64
- %arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom4
- %arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
+ %arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom4
%5 = load float, float* %arrayidx5, align 4
%conv = fpext float %5 to double
%call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), double %conv)
@@ -154,10 +154,10 @@ for.end12: ; preds = %for.cond
ret void
}
-declare i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1
+declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1
-; Function Attrs: nounwind uwtable
-define i32 @main() #0 {
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @main() #0 {
entry:
%retval = alloca i32, align 4
%i = alloca i32, align 4
@@ -185,12 +185,12 @@ for.cond1: ; preds = %for.inc25, %for.bod
br i1 %cmp2, label %for.body3, label %for.end27
for.body3: ; preds = %for.cond1
- %2 = load i32, i32* %j, align 4
+ %2 = load i32, i32* %i, align 4
%idxprom = sext i32 %2 to i64
- %3 = load i32, i32* %i, align 4
+ %arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom
+ %3 = load i32, i32* %j, align 4
%idxprom4 = sext i32 %3 to i64
- %arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom4
- %arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
+ %arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom4
store float 0.000000e+00, float* %arrayidx5, align 4
store i32 0, i32* %k, align 4
br label %for.cond6
@@ -201,35 +201,35 @@ for.cond6: ; preds = %for.inc, %for.body3
br i1 %cmp7, label %for.body8, label %for.end
for.body8: ; preds = %for.cond6
- %5 = load i32, i32* %j, align 4
+ %5 = load i32, i32* %i, align 4
%idxprom9 = sext i32 %5 to i64
- %6 = load i32, i32* %i, align 4
- %idxprom10 = sext i32 %6 to i64
- %arrayidx11 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom10
- %arrayidx12 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx11, i64 0, i64 %idxprom9
+ %arrayidx10 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom9
+ %6 = load i32, i32* %j, align 4
+ %idxprom11 = sext i32 %6 to i64
+ %arrayidx12 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx10, i64 0, i64 %idxprom11
%7 = load float, float* %arrayidx12, align 4
- %8 = load i32, i32* %k, align 4
+ %8 = load i32, i32* %i, align 4
%idxprom13 = sext i32 %8 to i64
- %9 = load i32, i32* %i, align 4
- %idxprom14 = sext i32 %9 to i64
- %arrayidx15 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom14
- %arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx15, i64 0, i64 %idxprom13
+ %arrayidx14 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom13
+ %9 = load i32, i32* %k, align 4
+ %idxprom15 = sext i32 %9 to i64
+ %arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx14, i64 0, i64 %idxprom15
%10 = load float, float* %arrayidx16, align 4
- %11 = load i32, i32* %j, align 4
+ %11 = load i32, i32* %k, align 4
%idxprom17 = sext i32 %11 to i64
- %12 = load i32, i32* %k, align 4
- %idxprom18 = sext i32 %12 to i64
- %arrayidx19 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom18
- %arrayidx20 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx19, i64 0, i64 %idxprom17
+ %arrayidx18 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom17
+ %12 = load i32, i32* %j, align 4
+ %idxprom19 = sext i32 %12 to i64
+ %arrayidx20 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx18, i64 0, i64 %idxprom19
%13 = load float, float* %arrayidx20, align 4
%mul = fmul float %10, %13
%add = fadd float %7, %mul
- %14 = load i32, i32* %j, align 4
+ %14 = load i32, i32* %i, align 4
%idxprom21 = sext i32 %14 to i64
- %15 = load i32, i32* %i, align 4
- %idxprom22 = sext i32 %15 to i64
- %arrayidx23 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom22
- %arrayidx24 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx23, i64 0, i64 %idxprom21
+ %arrayidx22 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom21
+ %15 = load i32, i32* %j, align 4
+ %idxprom23 = sext i32 %15 to i64
+ %arrayidx24 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx22, i64 0, i64 %idxprom23
store float %add, float* %arrayidx24, align 4
br label %for.inc
@@ -261,9 +261,11 @@ for.end30: ; preds = %for.cond
ret i32 0
}
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-!llvm.ident = !{!0}
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
-!0 = !{!"clang version 4.0.0 (http://llvm.org/git/clang.git 081569d9a29c7bc827b2d41f8e62891bbc895e2f) (http://llvm.org/git/llvm.git e117e506536626352e8e47f6c72cd6e2a276622c)"}
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"}
diff --git a/polly/docs/experiments/matmul/matmul.normalopt.exe b/polly/docs/experiments/matmul/matmul.normalopt.exe
deleted file mode 100755
index cdb9e67af45..00000000000
--- a/polly/docs/experiments/matmul/matmul.normalopt.exe
+++ /dev/null
Binary files differ
diff --git a/polly/docs/experiments/matmul/matmul.normalopt.ll b/polly/docs/experiments/matmul/matmul.normalopt.ll
index ba792c29f70..8d8a4aa287a 100644
--- a/polly/docs/experiments/matmul/matmul.normalopt.ll
+++ b/polly/docs/experiments/matmul/matmul.normalopt.ll
Binary files differ
diff --git a/polly/docs/experiments/matmul/matmul.normalopt.s b/polly/docs/experiments/matmul/matmul.normalopt.s
index 079af702a14..ec4c7b256b9 100644
--- a/polly/docs/experiments/matmul/matmul.normalopt.s
+++ b/polly/docs/experiments/matmul/matmul.normalopt.s
@@ -1,263 +1,235 @@
- .file "matmul.normalopt.ll"
+ .text
+ .file "matmul.c"
.section .rodata.cst8,"aM",@progbits,8
- .align 8
+ .p2align 3 # -- Begin function init_array
.LCPI0_0:
.quad 4602678819172646912 # double 0.5
.text
.globl init_array
- .align 16, 0x90
+ .p2align 4, 0x90
.type init_array,@function
init_array: # @init_array
.cfi_startproc
-# BB#0: # %entry
+# %bb.0: # %entry
pushq %rbp
-.Ltmp2:
.cfi_def_cfa_offset 16
-.Ltmp3:
.cfi_offset %rbp, -16
movq %rsp, %rbp
-.Ltmp4:
.cfi_def_cfa_register %rbp
+ leaq B(%rip), %rax
+ leaq A(%rip), %rcx
xorl %r8d, %r8d
- vmovsd .LCPI0_0(%rip), %xmm0
- .align 16, 0x90
+ movsd .LCPI0_0(%rip), %xmm0 # xmm0 = mem[0],zero
+ xorl %r9d, %r9d
+ .p2align 4, 0x90
.LBB0_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB0_2 Depth 2
- xorl %ecx, %ecx
- .align 16, 0x90
+ movl $1, %edi
+ xorl %edx, %edx
+ .p2align 4, 0x90
.LBB0_2: # %for.body3
# Parent Loop BB0_1 Depth=1
# => This Inner Loop Header: Depth=2
- movl %ecx, %edx
- imull %r8d, %edx
movl %edx, %esi
- sarl $31, %esi
- shrl $22, %esi
- addl %edx, %esi
- andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
- negl %esi
- movq %r8, %rax
- shlq $11, %rax
- leal 1(%rdx,%rsi), %edi
- leaq (%rax,%rax,2), %rsi
- leaq 1(%rcx), %rdx
- cmpq $1536, %rdx # imm = 0x600
- vcvtsi2sdl %edi, %xmm0, %xmm1
- vmulsd %xmm0, %xmm1, %xmm1
- vcvtsd2ss %xmm1, %xmm1, %xmm1
- vmovss %xmm1, A(%rsi,%rcx,4)
- vmovss %xmm1, B(%rsi,%rcx,4)
- movq %rdx, %rcx
+ andl $1022, %esi # imm = 0x3FE
+ orl $1, %esi
+ xorps %xmm1, %xmm1
+ cvtsi2sdl %esi, %xmm1
+ mulsd %xmm0, %xmm1
+ cvtsd2ss %xmm1, %xmm1
+ movss %xmm1, -4(%rcx,%rdi,4)
+ movss %xmm1, -4(%rax,%rdi,4)
+ leal (%r9,%rdx), %esi
+ andl $1023, %esi # imm = 0x3FF
+ addl $1, %esi
+ xorps %xmm1, %xmm1
+ cvtsi2sdl %esi, %xmm1
+ mulsd %xmm0, %xmm1
+ cvtsd2ss %xmm1, %xmm1
+ movss %xmm1, (%rcx,%rdi,4)
+ movss %xmm1, (%rax,%rdi,4)
+ addq $2, %rdi
+ addl %r8d, %edx
+ cmpq $1537, %rdi # imm = 0x601
jne .LBB0_2
-# BB#3: # %for.inc17
+# %bb.3: # %for.inc17
# in Loop: Header=BB0_1 Depth=1
- incq %r8
- cmpq $1536, %r8 # imm = 0x600
+ addq $1, %r9
+ addq $6144, %rax # imm = 0x1800
+ addq $6144, %rcx # imm = 0x1800
+ addl $2, %r8d
+ cmpq $1536, %r9 # imm = 0x600
jne .LBB0_1
-# BB#4: # %for.end19
+# %bb.4: # %for.end19
popq %rbp
- ret
-.Ltmp5:
- .size init_array, .Ltmp5-init_array
+ .cfi_def_cfa %rsp, 8
+ retq
+.Lfunc_end0:
+ .size init_array, .Lfunc_end0-init_array
.cfi_endproc
-
- .globl print_array
- .align 16, 0x90
+ # -- End function
+ .globl print_array # -- Begin function print_array
+ .p2align 4, 0x90
.type print_array,@function
print_array: # @print_array
.cfi_startproc
-# BB#0: # %entry
+# %bb.0: # %entry
pushq %rbp
-.Ltmp9:
.cfi_def_cfa_offset 16
-.Ltmp10:
.cfi_offset %rbp, -16
movq %rsp, %rbp
-.Ltmp11:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
+ pushq %r13
pushq %r12
pushq %rbx
-.Ltmp12:
- .cfi_offset %rbx, -48
-.Ltmp13:
- .cfi_offset %r12, -40
-.Ltmp14:
+ pushq %rax
+ .cfi_offset %rbx, -56
+ .cfi_offset %r12, -48
+ .cfi_offset %r13, -40
.cfi_offset %r14, -32
-.Ltmp15:
.cfi_offset %r15, -24
- xorl %r14d, %r14d
- movl $C, %r15d
- .align 16, 0x90
+ leaq C(%rip), %r13
+ xorl %eax, %eax
+ movl $3435973837, %r12d # imm = 0xCCCCCCCD
+ leaq .L.str(%rip), %r14
+ .p2align 4, 0x90
.LBB1_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB1_2 Depth 2
- movq stdout(%rip), %rax
- movq %r15, %r12
+ movq %rax, -48(%rbp) # 8-byte Spill
+ movq stdout(%rip), %rsi
xorl %ebx, %ebx
- .align 16, 0x90
+ .p2align 4, 0x90
.LBB1_2: # %for.body3
# Parent Loop BB1_1 Depth=1
# => This Inner Loop Header: Depth=2
- vmovss (%r12), %xmm0
- vcvtss2sd %xmm0, %xmm0, %xmm0
- movq %rax, %rdi
- movl $.L.str, %esi
+ movl %ebx, %eax
+ imulq %r12, %rax
+ shrq $38, %rax
+ leal (%rax,%rax,4), %r15d
+ shll $4, %r15d
+ addl $79, %r15d
+ movss (%r13,%rbx,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
+ cvtss2sd %xmm0, %xmm0
movb $1, %al
+ movq %rsi, %rdi
+ movq %r14, %rsi
callq fprintf
- movslq %ebx, %rax
- imulq $1717986919, %rax, %rcx # imm = 0x66666667
- movq %rcx, %rdx
- shrq $63, %rdx
- sarq $37, %rcx
- addl %edx, %ecx
- imull $80, %ecx, %ecx
- subl %ecx, %eax
- cmpl $79, %eax
+ cmpl %ebx, %r15d
jne .LBB1_4
-# BB#3: # %if.then
+# %bb.3: # %if.then
# in Loop: Header=BB1_2 Depth=2
movq stdout(%rip), %rsi
movl $10, %edi
- callq fputc
+ callq fputc@PLT
.LBB1_4: # %for.inc
# in Loop: Header=BB1_2 Depth=2
- addq $4, %r12
- incq %rbx
- movq stdout(%rip), %rax
+ addq $1, %rbx
+ movq stdout(%rip), %rsi
cmpq $1536, %rbx # imm = 0x600
jne .LBB1_2
-# BB#5: # %for.end
+# %bb.5: # %for.end
# in Loop: Header=BB1_1 Depth=1
movl $10, %edi
- movq %rax, %rsi
- callq fputc
- addq $6144, %r15 # imm = 0x1800
- incq %r14
- cmpq $1536, %r14 # imm = 0x600
+ callq fputc@PLT
+ movq -48(%rbp), %rax # 8-byte Reload
+ addq $1, %rax
+ addq $6144, %r13 # imm = 0x1800
+ cmpq $1536, %rax # imm = 0x600
jne .LBB1_1
-# BB#6: # %for.end12
+# %bb.6: # %for.end12
+ addq $8, %rsp
popq %rbx
popq %r12
+ popq %r13
popq %r14
popq %r15
popq %rbp
- ret
-.Ltmp16:
- .size print_array, .Ltmp16-print_array
+ .cfi_def_cfa %rsp, 8
+ retq
+.Lfunc_end1:
+ .size print_array, .Lfunc_end1-print_array
.cfi_endproc
-
- .section .rodata.cst8,"aM",@progbits,8
- .align 8
-.LCPI2_0:
- .quad 4602678819172646912 # double 0.5
- .text
- .globl main
- .align 16, 0x90
+ # -- End function
+ .globl main # -- Begin function main
+ .p2align 4, 0x90
.type main,@function
main: # @main
.cfi_startproc
-# BB#0: # %entry
+# %bb.0: # %entry
pushq %rbp
-.Ltmp19:
.cfi_def_cfa_offset 16
-.Ltmp20:
.cfi_offset %rbp, -16
movq %rsp, %rbp
-.Ltmp21:
.cfi_def_cfa_register %rbp
- xorl %r8d, %r8d
- vmovsd .LCPI2_0(%rip), %xmm0
- .align 16, 0x90
-.LBB2_1: # %for.cond1.preheader.i
+ callq init_array
+ leaq A(%rip), %rax
+ xorl %r10d, %r10d
+ leaq B(%rip), %r8
+ leaq C(%rip), %r9
+ .p2align 4, 0x90
+.LBB2_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB2_2 Depth 2
- xorl %ecx, %ecx
- .align 16, 0x90
-.LBB2_2: # %for.body3.i
+ # Child Loop BB2_3 Depth 3
+ movq %r8, %rsi
+ xorl %edx, %edx
+ .p2align 4, 0x90
+.LBB2_2: # %for.body3
# Parent Loop BB2_1 Depth=1
- # => This Inner Loop Header: Depth=2
- movl %ecx, %edx
- imull %r8d, %edx
- movl %edx, %esi
- sarl $31, %esi
- shrl $22, %esi
- addl %edx, %esi
- andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
- negl %esi
- movq %r8, %rax
- shlq $11, %rax
- leal 1(%rdx,%rsi), %edi
- leaq (%rax,%rax,2), %rsi
- leaq 1(%rcx), %rdx
+ # => This Loop Header: Depth=2
+ # Child Loop BB2_3 Depth 3
+ leaq (%r10,%r10,2), %rcx
+ shlq $11, %rcx
+ addq %r9, %rcx
+ leaq (%rcx,%rdx,4), %r11
+ movl $0, (%rcx,%rdx,4)
+ xorps %xmm0, %xmm0
+ movl $2, %ecx
+ movq %rsi, %rdi
+ .p2align 4, 0x90
+.LBB2_3: # %for.body8
+ # Parent Loop BB2_1 Depth=1
+ # Parent Loop BB2_2 Depth=2
+ # => This Inner Loop Header: Depth=3
+ movss -8(%rax,%rcx,4), %xmm1 # xmm1 = mem[0],zero,zero,zero
+ mulss (%rdi), %xmm1
+ movss -4(%rax,%rcx,4), %xmm2 # xmm2 = mem[0],zero,zero,zero
+ addss %xmm0, %xmm1
+ mulss 6144(%rdi), %xmm2
+ addss %xmm1, %xmm2
+ movss (%rax,%rcx,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
+ mulss 12288(%rdi), %xmm0
+ addss %xmm2, %xmm0
+ addq $3, %rcx
+ addq $18432, %rdi # imm = 0x4800
+ cmpq $1538, %rcx # imm = 0x602
+ jne .LBB2_3
+# %bb.4: # %for.inc25
+ # in Loop: Header=BB2_2 Depth=2
+ movss %xmm0, (%r11)
+ addq $1, %rdx
+ addq $4, %rsi
cmpq $1536, %rdx # imm = 0x600
- vcvtsi2sdl %edi, %xmm0, %xmm1
- vmulsd %xmm0, %xmm1, %xmm1
- vcvtsd2ss %xmm1, %xmm1, %xmm1
- vmovss %xmm1, A(%rsi,%rcx,4)
- vmovss %xmm1, B(%rsi,%rcx,4)
- movq %rdx, %rcx
jne .LBB2_2
-# BB#3: # %for.inc17.i
+# %bb.5: # %for.inc28
# in Loop: Header=BB2_1 Depth=1
- incq %r8
- cmpq $1536, %r8 # imm = 0x600
- jne .LBB2_1
-# BB#4:
- xorl %r8d, %r8d
- movl $A, %r9d
- .align 16, 0x90
-.LBB2_5: # %for.cond1.preheader
- # =>This Loop Header: Depth=1
- # Child Loop BB2_6 Depth 2
- # Child Loop BB2_7 Depth 3
- leaq (%r8,%r8,2), %rdx
- shlq $11, %rdx
- leaq C(%rdx), %rsi
- xorl %edi, %edi
- .align 16, 0x90
-.LBB2_6: # %for.body3
- # Parent Loop BB2_5 Depth=1
- # => This Loop Header: Depth=2
- # Child Loop BB2_7 Depth 3
- movl $0, (%rsi)
- vxorps %xmm0, %xmm0, %xmm0
- movq $-9437184, %rax # imm = 0xFFFFFFFFFF700000
- movq %r9, %rcx
- .align 16, 0x90
-.LBB2_7: # %for.body8
- # Parent Loop BB2_5 Depth=1
- # Parent Loop BB2_6 Depth=2
- # => This Inner Loop Header: Depth=3
- vmovss (%rcx), %xmm1
- vmulss B+9437184(%rax,%rdi,4), %xmm1, %xmm1
- vaddss %xmm1, %xmm0, %xmm0
- addq $4, %rcx
+ addq $1, %r10
addq $6144, %rax # imm = 0x1800
- jne .LBB2_7
-# BB#8: # %for.inc25
- # in Loop: Header=BB2_6 Depth=2
- vmovss %xmm0, (%rsi)
- leaq C+4(%rdx,%rdi,4), %rsi
- incq %rdi
- cmpq $1536, %rdi # imm = 0x600
- jne .LBB2_6
-# BB#9: # %for.inc28
- # in Loop: Header=BB2_5 Depth=1
- addq $6144, %r9 # imm = 0x1800
- incq %r8
- cmpq $1536, %r8 # imm = 0x600
- jne .LBB2_5
-# BB#10: # %for.end30
+ cmpq $1536, %r10 # imm = 0x600
+ jne .LBB2_1
+# %bb.6: # %for.end30
xorl %eax, %eax
popq %rbp
- ret
-.Ltmp22:
- .size main, .Ltmp22-main
+ .cfi_def_cfa %rsp, 8
+ retq
+.Lfunc_end2:
+ .size main, .Lfunc_end2-main
.cfi_endproc
-
+ # -- End function
.type A,@object # @A
.comm A,9437184,16
.type B,@object # @B
@@ -265,10 +237,11 @@ main: # @main
.type .L.str,@object # @.str
.section .rodata.str1.1,"aMS",@progbits,1
.L.str:
- .asciz "%lf "
+ .asciz "%lf "
.size .L.str, 5
.type C,@object # @C
.comm C,9437184,16
+ .ident "clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"
.section ".note.GNU-stack","",@progbits
diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.exe b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.exe
deleted file mode 100755
index feb24366d73..00000000000
--- a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.exe
+++ /dev/null
Binary files differ
diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.ll b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.ll
index 593794ef380..169f9405bc7 100644
--- a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.ll
+++ b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.ll
Binary files differ
diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.s b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.s
index ca87de11704..4e89fd6ea4d 100644
--- a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.s
+++ b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.s
@@ -1,743 +1,852 @@
- .file "matmul.polly.interchanged+tiled+vector+openmp.ll"
- .section .rodata.cst8,"aM",@progbits,8
- .align 8
-.LCPI0_0:
- .quad 4602678819172646912 # double 0.5
.text
- .globl init_array
- .align 16, 0x90
+ .file "matmul.c"
+ .globl init_array # -- Begin function init_array
+ .p2align 4, 0x90
.type init_array,@function
init_array: # @init_array
.cfi_startproc
-# BB#0: # %entry
+# %bb.0: # %entry
pushq %rbp
-.Ltmp3:
.cfi_def_cfa_offset 16
-.Ltmp4:
.cfi_offset %rbp, -16
movq %rsp, %rbp
-.Ltmp5:
.cfi_def_cfa_register %rbp
- pushq %r15
- pushq %r14
pushq %rbx
- subq $24, %rsp
-.Ltmp6:
- .cfi_offset %rbx, -40
-.Ltmp7:
- .cfi_offset %r14, -32
-.Ltmp8:
- .cfi_offset %r15, -24
- leaq -32(%rbp), %rsi
- movl $init_array.omp_subfn, %edi
+ pushq %rax
+ .cfi_offset %rbx, -24
+ leaq init_array_polly_subfn(%rip), %rdi
+ leaq -16(%rbp), %rbx
xorl %edx, %edx
xorl %ecx, %ecx
movl $1536, %r8d # imm = 0x600
movl $1, %r9d
- callq GOMP_parallel_loop_runtime_start
- leaq -40(%rbp), %rdi
- leaq -48(%rbp), %rsi
- callq GOMP_loop_runtime_next
- testb %al, %al
- je .LBB0_4
-# BB#1:
- leaq -40(%rbp), %r14
- leaq -48(%rbp), %r15
- vmovsd .LCPI0_0(%rip), %xmm1
- .align 16, 0x90
-.LBB0_2: # %omp.loadIVBounds.i
- # =>This Loop Header: Depth=1
- # Child Loop BB0_8 Depth 2
- # Child Loop BB0_5 Depth 3
- movq -48(%rbp), %r8
- leaq -1(%r8), %rcx
- movq -40(%rbp), %rax
- cmpq %rcx, %rax
- jg .LBB0_3
-# BB#7: # %polly.loop_preheader4.preheader.i
- # in Loop: Header=BB0_2 Depth=1
- addq $-2, %r8
- .align 16, 0x90
-.LBB0_8: # %polly.loop_preheader4.i
- # Parent Loop BB0_2 Depth=1
- # => This Loop Header: Depth=2
- # Child Loop BB0_5 Depth 3
- xorl %edx, %edx
- .align 16, 0x90
-.LBB0_5: # %polly.loop_header3.i
- # Parent Loop BB0_2 Depth=1
- # Parent Loop BB0_8 Depth=2
- # => This Inner Loop Header: Depth=3
- movl %edx, %esi
- imull %eax, %esi
- movl %esi, %edi
- sarl $31, %edi
- shrl $22, %edi
- addl %esi, %edi
- andl $-1024, %edi # imm = 0xFFFFFFFFFFFFFC00
- negl %edi
- movq %rax, %rcx
- shlq $11, %rcx
- leal 1(%rsi,%rdi), %ebx
- leaq (%rcx,%rcx,2), %rdi
- leaq 1(%rdx), %rsi
- cmpq $1536, %rsi # imm = 0x600
- vcvtsi2sdl %ebx, %xmm0, %xmm0
- vmulsd %xmm1, %xmm0, %xmm0
- vcvtsd2ss %xmm0, %xmm0, %xmm0
- vmovss %xmm0, A(%rdi,%rdx,4)
- vmovss %xmm0, B(%rdi,%rdx,4)
- movq %rsi, %rdx
- jne .LBB0_5
-# BB#6: # %polly.loop_exit5.i
- # in Loop: Header=BB0_8 Depth=2
- cmpq %r8, %rax
- leaq 1(%rax), %rax
- jle .LBB0_8
-.LBB0_3: # %omp.checkNext.backedge.i
- # in Loop: Header=BB0_2 Depth=1
- movq %r14, %rdi
- movq %r15, %rsi
- callq GOMP_loop_runtime_next
- vmovsd .LCPI0_0(%rip), %xmm1
- testb %al, %al
- jne .LBB0_2
-.LBB0_4: # %init_array.omp_subfn.exit
- callq GOMP_loop_end_nowait
- callq GOMP_parallel_end
- addq $24, %rsp
+ movq %rbx, %rsi
+ callq GOMP_parallel_loop_runtime_start@PLT
+ movq %rbx, %rdi
+ callq init_array_polly_subfn
+ callq GOMP_parallel_end@PLT
+ addq $8, %rsp
popq %rbx
- popq %r14
- popq %r15
popq %rbp
- ret
-.Ltmp9:
- .size init_array, .Ltmp9-init_array
+ .cfi_def_cfa %rsp, 8
+ retq
+.Lfunc_end0:
+ .size init_array, .Lfunc_end0-init_array
.cfi_endproc
-
- .globl print_array
- .align 16, 0x90
+ # -- End function
+ .globl print_array # -- Begin function print_array
+ .p2align 4, 0x90
.type print_array,@function
print_array: # @print_array
.cfi_startproc
-# BB#0: # %entry
+# %bb.0: # %entry
pushq %rbp
-.Ltmp13:
.cfi_def_cfa_offset 16
-.Ltmp14:
.cfi_offset %rbp, -16
movq %rsp, %rbp
-.Ltmp15:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
+ pushq %r13
pushq %r12
pushq %rbx
-.Ltmp16:
- .cfi_offset %rbx, -48
-.Ltmp17:
- .cfi_offset %r12, -40
-.Ltmp18:
+ pushq %rax
+ .cfi_offset %rbx, -56
+ .cfi_offset %r12, -48
+ .cfi_offset %r13, -40
.cfi_offset %r14, -32
-.Ltmp19:
.cfi_offset %r15, -24
- xorl %r14d, %r14d
- movl $C, %r15d
- .align 16, 0x90
+ leaq C(%rip), %r13
+ xorl %eax, %eax
+ movl $3435973837, %r12d # imm = 0xCCCCCCCD
+ leaq .L.str(%rip), %r14
+ .p2align 4, 0x90
.LBB1_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB1_2 Depth 2
- movq stdout(%rip), %rax
- movq %r15, %r12
+ movq %rax, -48(%rbp) # 8-byte Spill
+ movq stdout(%rip), %rsi
xorl %ebx, %ebx
- .align 16, 0x90
+ .p2align 4, 0x90
.LBB1_2: # %for.body3
# Parent Loop BB1_1 Depth=1
# => This Inner Loop Header: Depth=2
- vmovss (%r12), %xmm0
- vcvtss2sd %xmm0, %xmm0, %xmm0
- movq %rax, %rdi
- movl $.L.str, %esi
+ movl %ebx, %eax
+ imulq %r12, %rax
+ shrq $38, %rax
+ leal (%rax,%rax,4), %r15d
+ shll $4, %r15d
+ addl $79, %r15d
+ movss (%r13,%rbx,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
+ cvtss2sd %xmm0, %xmm0
movb $1, %al
+ movq %rsi, %rdi
+ movq %r14, %rsi
callq fprintf
- movslq %ebx, %rax
- imulq $1717986919, %rax, %rcx # imm = 0x66666667
- movq %rcx, %rdx
- shrq $63, %rdx
- sarq $37, %rcx
- addl %edx, %ecx
- imull $80, %ecx, %ecx
- subl %ecx, %eax
- cmpl $79, %eax
+ cmpl %ebx, %r15d
jne .LBB1_4
-# BB#3: # %if.then
+# %bb.3: # %if.then
# in Loop: Header=BB1_2 Depth=2
movq stdout(%rip), %rsi
movl $10, %edi
- callq fputc
+ callq fputc@PLT
.LBB1_4: # %for.inc
# in Loop: Header=BB1_2 Depth=2
- addq $4, %r12
- incq %rbx
- movq stdout(%rip), %rax
+ addq $1, %rbx
+ movq stdout(%rip), %rsi
cmpq $1536, %rbx # imm = 0x600
jne .LBB1_2
-# BB#5: # %for.end
+# %bb.5: # %for.end
# in Loop: Header=BB1_1 Depth=1
movl $10, %edi
- movq %rax, %rsi
- callq fputc
- addq $6144, %r15 # imm = 0x1800
- incq %r14
- cmpq $1536, %r14 # imm = 0x600
+ callq fputc@PLT
+ movq -48(%rbp), %rax # 8-byte Reload
+ addq $1, %rax
+ addq $6144, %r13 # imm = 0x1800
+ cmpq $1536, %rax # imm = 0x600
jne .LBB1_1
-# BB#6: # %for.end12
+# %bb.6: # %for.end12
+ addq $8, %rsp
popq %rbx
popq %r12
+ popq %r13
popq %r14
popq %r15
popq %rbp
- ret
-.Ltmp20:
- .size print_array, .Ltmp20-print_array
+ .cfi_def_cfa %rsp, 8
+ retq
+.Lfunc_end1:
+ .size print_array, .Lfunc_end1-print_array
.cfi_endproc
-
- .globl main
- .align 16, 0x90
+ # -- End function
+ .globl main # -- Begin function main
+ .p2align 4, 0x90
.type main,@function
main: # @main
.cfi_startproc
-# BB#0: # %entry
+# %bb.0: # %entry
pushq %rbp
-.Ltmp24:
.cfi_def_cfa_offset 16
-.Ltmp25:
.cfi_offset %rbp, -16
movq %rsp, %rbp
-.Ltmp26:
.cfi_def_cfa_register %rbp
- pushq %r15
- pushq %r14
- pushq %r13
- pushq %r12
pushq %rbx
- subq $24, %rsp
-.Ltmp27:
- .cfi_offset %rbx, -56
-.Ltmp28:
- .cfi_offset %r12, -48
-.Ltmp29:
- .cfi_offset %r13, -40
-.Ltmp30:
- .cfi_offset %r14, -32
-.Ltmp31:
- .cfi_offset %r15, -24
+ pushq %rax
+ .cfi_offset %rbx, -24
callq init_array
- leaq -48(%rbp), %rsi
- movl $main.omp_subfn, %edi
+ leaq main_polly_subfn(%rip), %rdi
+ leaq -16(%rbp), %rbx
xorl %edx, %edx
xorl %ecx, %ecx
movl $1536, %r8d # imm = 0x600
movl $1, %r9d
- callq GOMP_parallel_loop_runtime_start
- leaq -56(%rbp), %rdi
- leaq -64(%rbp), %rsi
- callq GOMP_loop_runtime_next
- testb %al, %al
- je .LBB2_4
-# BB#1:
- leaq -56(%rbp), %r14
- leaq -64(%rbp), %r15
- .align 16, 0x90
-.LBB2_2: # %omp.loadIVBounds.i
- # =>This Loop Header: Depth=1
- # Child Loop BB2_6 Depth 2
- movq -64(%rbp), %r12
- leaq -1(%r12), %rcx
- movq -56(%rbp), %rax
- cmpq %rcx, %rax
- jg .LBB2_3
-# BB#5: # %polly.loop_preheader4.preheader.i
- # in Loop: Header=BB2_2 Depth=1
- addq $-2, %r12
- leaq (%rax,%rax,2), %rcx
- leaq -1(%rax), %r13
- shlq $11, %rcx
- leaq C(%rcx), %rbx
- .align 16, 0x90
-.LBB2_6: # %polly.loop_preheader4.i
- # Parent Loop BB2_2 Depth=1
- # => This Inner Loop Header: Depth=2
- movq %rbx, %rdi
- xorl %esi, %esi
- movl $6144, %edx # imm = 0x1800
- callq memset
- addq $6144, %rbx # imm = 0x1800
- incq %r13
- cmpq %r12, %r13
- jle .LBB2_6
-.LBB2_3: # %omp.checkNext.backedge.i
- # in Loop: Header=BB2_2 Depth=1
- movq %r14, %rdi
- movq %r15, %rsi
- callq GOMP_loop_runtime_next
- testb %al, %al
- jne .LBB2_2
-.LBB2_4: # %main.omp_subfn.exit
- callq GOMP_loop_end_nowait
- callq GOMP_parallel_end
- leaq -48(%rbp), %rbx
- movl $main.omp_subfn1, %edi
movq %rbx, %rsi
+ callq GOMP_parallel_loop_runtime_start@PLT
+ movq %rbx, %rdi
+ callq main_polly_subfn
+ callq GOMP_parallel_end@PLT
+ leaq main_polly_subfn_1(%rip), %rdi
xorl %edx, %edx
xorl %ecx, %ecx
movl $1536, %r8d # imm = 0x600
movl $64, %r9d
- callq GOMP_parallel_loop_runtime_start
+ movq %rbx, %rsi
+ callq GOMP_parallel_loop_runtime_start@PLT
movq %rbx, %rdi
- callq main.omp_subfn1
- callq GOMP_parallel_end
+ callq main_polly_subfn_1
+ callq GOMP_parallel_end@PLT
xorl %eax, %eax
- addq $24, %rsp
+ addq $8, %rsp
popq %rbx
- popq %r12
- popq %r13
- popq %r14
- popq %r15
popq %rbp
- ret
-.Ltmp32:
- .size main, .Ltmp32-main
+ .cfi_def_cfa %rsp, 8
+ retq
+.Lfunc_end2:
+ .size main, .Lfunc_end2-main
.cfi_endproc
-
+ # -- End function
.section .rodata.cst8,"aM",@progbits,8
- .align 8
+ .p2align 3 # -- Begin function init_array_polly_subfn
.LCPI3_0:
.quad 4602678819172646912 # double 0.5
.text
- .align 16, 0x90
- .type init_array.omp_subfn,@function
-init_array.omp_subfn: # @init_array.omp_subfn
+ .p2align 4, 0x90
+ .type init_array_polly_subfn,@function
+init_array_polly_subfn: # @init_array_polly_subfn
.cfi_startproc
-# BB#0: # %omp.setup
- pushq %rbp
-.Ltmp36:
- .cfi_def_cfa_offset 16
-.Ltmp37:
- .cfi_offset %rbp, -16
- movq %rsp, %rbp
-.Ltmp38:
- .cfi_def_cfa_register %rbp
+# %bb.0: # %polly.par.setup
pushq %r15
+ .cfi_def_cfa_offset 16
pushq %r14
+ .cfi_def_cfa_offset 24
+ pushq %r13
+ .cfi_def_cfa_offset 32
+ pushq %r12
+ .cfi_def_cfa_offset 40
pushq %rbx
- subq $24, %rsp
-.Ltmp39:
- .cfi_offset %rbx, -40
-.Ltmp40:
- .cfi_offset %r14, -32
-.Ltmp41:
- .cfi_offset %r15, -24
- leaq -32(%rbp), %rdi
- leaq -40(%rbp), %rsi
- callq GOMP_loop_runtime_next
+ .cfi_def_cfa_offset 48
+ subq $16, %rsp
+ .cfi_def_cfa_offset 64
+ .cfi_offset %rbx, -48
+ .cfi_offset %r12, -40
+ .cfi_offset %r13, -32
+ .cfi_offset %r14, -24
+ .cfi_offset %r15, -16
+ leaq 8(%rsp), %rdi
+ movq %rsp, %rsi
+ callq GOMP_loop_runtime_next@PLT
testb %al, %al
- je .LBB3_4
-# BB#1:
- leaq -32(%rbp), %r14
- leaq -40(%rbp), %r15
- vmovsd .LCPI3_0(%rip), %xmm1
- .align 16, 0x90
-.LBB3_2: # %omp.loadIVBounds
+ je .LBB3_2
+# %bb.1:
+ leaq B(%rip), %r15
+ leaq A(%rip), %r12
+ movsd .LCPI3_0(%rip), %xmm1 # xmm1 = mem[0],zero
+ leaq 8(%rsp), %r14
+ movq %rsp, %r13
+ .p2align 4, 0x90
+.LBB3_4: # %polly.par.loadIVBounds
# =>This Loop Header: Depth=1
- # Child Loop BB3_8 Depth 2
- # Child Loop BB3_5 Depth 3
- movq -40(%rbp), %r8
- leaq -1(%r8), %rcx
- movq -32(%rbp), %rax
- cmpq %rcx, %rax
- jg .LBB3_3
-# BB#7: # %polly.loop_preheader4.preheader
- # in Loop: Header=BB3_2 Depth=1
- addq $-2, %r8
- .align 16, 0x90
-.LBB3_8: # %polly.loop_preheader4
- # Parent Loop BB3_2 Depth=1
+ # Child Loop BB3_5 Depth 2
+ # Child Loop BB3_6 Depth 3
+ movq 8(%rsp), %rax
+ movq (%rsp), %r8
+ decq %r8
+ movq %rax, %rdx
+ shlq $11, %rdx
+ leaq (%rdx,%rdx,2), %rdx
+ leaq (%r15,%rdx), %rsi
+ addq %r12, %rdx
+ .p2align 4, 0x90
+.LBB3_5: # %polly.loop_header
+ # Parent Loop BB3_4 Depth=1
# => This Loop Header: Depth=2
- # Child Loop BB3_5 Depth 3
- xorl %edx, %edx
- .align 16, 0x90
-.LBB3_5: # %polly.loop_header3
- # Parent Loop BB3_2 Depth=1
- # Parent Loop BB3_8 Depth=2
+ # Child Loop BB3_6 Depth 3
+ movq $-6144, %rdi # imm = 0xE800
+ xorl %ecx, %ecx
+ .p2align 4, 0x90
+.LBB3_6: # %polly.loop_header2
+ # Parent Loop BB3_4 Depth=1
+ # Parent Loop BB3_5 Depth=2
# => This Inner Loop Header: Depth=3
- movl %edx, %esi
- imull %eax, %esi
- movl %esi, %edi
- sarl $31, %edi
- shrl $22, %edi
- addl %esi, %edi
- andl $-1024, %edi # imm = 0xFFFFFFFFFFFFFC00
- negl %edi
- movq %rax, %rcx
- shlq $11, %rcx
- leal 1(%rsi,%rdi), %ebx
- leaq (%rcx,%rcx,2), %rdi
- leaq 1(%rdx), %rsi
- cmpq $1536, %rsi # imm = 0x600
- vcvtsi2sdl %ebx, %xmm0, %xmm0
- vmulsd %xmm1, %xmm0, %xmm0
- vcvtsd2ss %xmm0, %xmm0, %xmm0
- vmovss %xmm0, A(%rdi,%rdx,4)
- vmovss %xmm0, B(%rdi,%rdx,4)
- movq %rsi, %rdx
- jne .LBB3_5
-# BB#6: # %polly.loop_exit5
- # in Loop: Header=BB3_8 Depth=2
+ movl %ecx, %ebx
+ andl $1023, %ebx # imm = 0x3FF
+ incl %ebx
+ xorps %xmm0, %xmm0
+ cvtsi2sdl %ebx, %xmm0
+ mulsd %xmm1, %xmm0
+ cvtsd2ss %xmm0, %xmm0
+ movss %xmm0, 6144(%rdx,%rdi)
+ movss %xmm0, 6144(%rsi,%rdi)
+ addl %eax, %ecx
+ addq $4, %rdi
+ jne .LBB3_6
+# %bb.7: # %polly.loop_exit4
+ # in Loop: Header=BB3_5 Depth=2
+ addq $6144, %rsi # imm = 0x1800
+ addq $6144, %rdx # imm = 0x1800
cmpq %r8, %rax
leaq 1(%rax), %rax
- jle .LBB3_8
-.LBB3_3: # %omp.checkNext.backedge
- # in Loop: Header=BB3_2 Depth=1
+ jl .LBB3_5
+# %bb.3: # %polly.par.checkNext.loopexit
+ # in Loop: Header=BB3_4 Depth=1
movq %r14, %rdi
- movq %r15, %rsi
- callq GOMP_loop_runtime_next
- vmovsd .LCPI3_0(%rip), %xmm1
+ movq %r13, %rsi
+ callq GOMP_loop_runtime_next@PLT
+ movsd .LCPI3_0(%rip), %xmm1 # xmm1 = mem[0],zero
testb %al, %al
- jne .LBB3_2
-.LBB3_4: # %omp.exit
- callq GOMP_loop_end_nowait
- addq $24, %rsp
+ jne .LBB3_4
+.LBB3_2: # %polly.par.exit
+ callq GOMP_loop_end_nowait@PLT
+ addq $16, %rsp
+ .cfi_def_cfa_offset 48
popq %rbx
+ .cfi_def_cfa_offset 40
+ popq %r12
+ .cfi_def_cfa_offset 32
+ popq %r13
+ .cfi_def_cfa_offset 24
popq %r14
+ .cfi_def_cfa_offset 16
popq %r15
- popq %rbp
- ret
-.Ltmp42:
- .size init_array.omp_subfn, .Ltmp42-init_array.omp_subfn
+ .cfi_def_cfa_offset 8
+ retq
+.Lfunc_end3:
+ .size init_array_polly_subfn, .Lfunc_end3-init_array_polly_subfn
.cfi_endproc
-
- .align 16, 0x90
- .type main.omp_subfn,@function
-main.omp_subfn: # @main.omp_subfn
+ # -- End function
+ .p2align 4, 0x90 # -- Begin function main_polly_subfn
+ .type main_polly_subfn,@function
+main_polly_subfn: # @main_polly_subfn
.cfi_startproc
-# BB#0: # %omp.setup
- pushq %rbp
-.Ltmp46:
- .cfi_def_cfa_offset 16
-.Ltmp47:
- .cfi_offset %rbp, -16
- movq %rsp, %rbp
-.Ltmp48:
- .cfi_def_cfa_register %rbp
+# %bb.0: # %polly.par.setup
pushq %r15
+ .cfi_def_cfa_offset 16
pushq %r14
- pushq %r13
- pushq %r12
+ .cfi_def_cfa_offset 24
pushq %rbx
- subq $24, %rsp
-.Ltmp49:
- .cfi_offset %rbx, -56
-.Ltmp50:
- .cfi_offset %r12, -48
-.Ltmp51:
- .cfi_offset %r13, -40
-.Ltmp52:
- .cfi_offset %r14, -32
-.Ltmp53:
- .cfi_offset %r15, -24
- leaq -48(%rbp), %rdi
- leaq -56(%rbp), %rsi
- callq GOMP_loop_runtime_next
+ .cfi_def_cfa_offset 32
+ subq $16, %rsp
+ .cfi_def_cfa_offset 48
+ .cfi_offset %rbx, -32
+ .cfi_offset %r14, -24
+ .cfi_offset %r15, -16
+ leaq 8(%rsp), %rdi
+ movq %rsp, %rsi
+ callq GOMP_loop_runtime_next@PLT
testb %al, %al
- je .LBB4_4
-# BB#1:
- leaq -48(%rbp), %r14
- leaq -56(%rbp), %r15
- .align 16, 0x90
-.LBB4_2: # %omp.loadIVBounds
- # =>This Loop Header: Depth=1
- # Child Loop BB4_6 Depth 2
- movq -56(%rbp), %r12
- leaq -1(%r12), %rcx
- movq -48(%rbp), %rax
+ je .LBB4_3
+# %bb.1:
+ leaq C(%rip), %r15
+ leaq 8(%rsp), %r14
+ movq %rsp, %rbx
+ .p2align 4, 0x90
+.LBB4_2: # %polly.par.loadIVBounds
+ # =>This Inner Loop Header: Depth=1
+ movq 8(%rsp), %rax
+ movq (%rsp), %rcx
+ decq %rcx
+ leaq (%rax,%rax,2), %rdi
+ shlq $11, %rdi
+ addq %r15, %rdi
cmpq %rcx, %rax
- jg .LBB4_3
-# BB#5: # %polly.loop_preheader4.preheader
- # in Loop: Header=BB4_2 Depth=1
- addq $-2, %r12
- leaq (%rax,%rax,2), %rcx
- leaq -1(%rax), %r13
+ cmovgeq %rax, %rcx
+ incq %rcx
+ subq %rax, %rcx
shlq $11, %rcx
- leaq C(%rcx), %rbx
- .align 16, 0x90
-.LBB4_6: # %polly.loop_preheader4
- # Parent Loop BB4_2 Depth=1
- # => This Inner Loop Header: Depth=2
- movq %rbx, %rdi
+ leaq (%rcx,%rcx,2), %rdx
xorl %esi, %esi
- movl $6144, %edx # imm = 0x1800
- callq memset
- addq $6144, %rbx # imm = 0x1800
- incq %r13
- cmpq %r12, %r13
- jle .LBB4_6
-.LBB4_3: # %omp.checkNext.backedge
- # in Loop: Header=BB4_2 Depth=1
+ callq memset@PLT
movq %r14, %rdi
- movq %r15, %rsi
- callq GOMP_loop_runtime_next
+ movq %rbx, %rsi
+ callq GOMP_loop_runtime_next@PLT
testb %al, %al
jne .LBB4_2
-.LBB4_4: # %omp.exit
- callq GOMP_loop_end_nowait
- addq $24, %rsp
+.LBB4_3: # %polly.par.exit
+ callq GOMP_loop_end_nowait@PLT
+ addq $16, %rsp
+ .cfi_def_cfa_offset 32
popq %rbx
- popq %r12
- popq %r13
+ .cfi_def_cfa_offset 24
popq %r14
+ .cfi_def_cfa_offset 16
popq %r15
- popq %rbp
- ret
-.Ltmp54:
- .size main.omp_subfn, .Ltmp54-main.omp_subfn
+ .cfi_def_cfa_offset 8
+ retq
+.Lfunc_end4:
+ .size main_polly_subfn, .Lfunc_end4-main_polly_subfn
.cfi_endproc
-
- .align 16, 0x90
- .type main.omp_subfn1,@function
-main.omp_subfn1: # @main.omp_subfn1
+ # -- End function
+ .p2align 4, 0x90 # -- Begin function main_polly_subfn_1
+ .type main_polly_subfn_1,@function
+main_polly_subfn_1: # @main_polly_subfn_1
.cfi_startproc
-# BB#0: # %omp.setup
+# %bb.0: # %polly.par.setup
pushq %rbp
-.Ltmp58:
.cfi_def_cfa_offset 16
-.Ltmp59:
- .cfi_offset %rbp, -16
- movq %rsp, %rbp
-.Ltmp60:
- .cfi_def_cfa_register %rbp
pushq %r15
+ .cfi_def_cfa_offset 24
pushq %r14
+ .cfi_def_cfa_offset 32
pushq %r13
+ .cfi_def_cfa_offset 40
pushq %r12
+ .cfi_def_cfa_offset 48
pushq %rbx
- subq $72, %rsp
-.Ltmp61:
+ .cfi_def_cfa_offset 56
+ subq $296, %rsp # imm = 0x128
+ .cfi_def_cfa_offset 352
.cfi_offset %rbx, -56
-.Ltmp62:
.cfi_offset %r12, -48
-.Ltmp63:
.cfi_offset %r13, -40
-.Ltmp64:
.cfi_offset %r14, -32
-.Ltmp65:
.cfi_offset %r15, -24
+ .cfi_offset %rbp, -16
jmp .LBB5_1
- .align 16, 0x90
-.LBB5_2: # %omp.loadIVBounds
- # in Loop: Header=BB5_1 Depth=1
- movq -56(%rbp), %rax
- movq %rax, -112(%rbp) # 8-byte Spill
- leaq -1(%rax), %rax
- movq -48(%rbp), %rcx
- cmpq %rax, %rcx
- jg .LBB5_1
-# BB#3: # %polly.loop_preheader4.preheader
+ .p2align 4, 0x90
+.LBB5_2: # %polly.par.loadIVBounds
# in Loop: Header=BB5_1 Depth=1
- leaq -1(%rcx), %rax
- movq %rax, -88(%rbp) # 8-byte Spill
- addq $-65, -112(%rbp) # 8-byte Folded Spill
- movq %rcx, %rax
- shlq $9, %rax
- leaq (%rax,%rax,2), %rax
- leaq C+16(,%rax,4), %rax
- movq %rax, -104(%rbp) # 8-byte Spill
- .align 16, 0x90
-.LBB5_7: # %polly.loop_preheader4
+ movq 40(%rsp), %rdx
+ movq 32(%rsp), %rax
+ decq %rax
+ movq %rax, 136(%rsp) # 8-byte Spill
+ leaq (%rdx,%rdx,2), %rcx
+ shlq $11, %rcx
+ leaq A(%rip), %rax
+ addq %rax, %rcx
+ movq %rcx, 24(%rsp) # 8-byte Spill
+ .p2align 4, 0x90
+.LBB5_3: # %polly.loop_header
# Parent Loop BB5_1 Depth=1
# => This Loop Header: Depth=2
- # Child Loop BB5_8 Depth 3
- # Child Loop BB5_9 Depth 4
- # Child Loop BB5_12 Depth 5
- # Child Loop BB5_17 Depth 6
- # Child Loop BB5_18 Depth 7
- # Child Loop BB5_14 Depth 5
- movq %rcx, -72(%rbp) # 8-byte Spill
- leaq 62(%rcx), %rdi
- xorl %edx, %edx
- .align 16, 0x90
-.LBB5_8: # %polly.loop_preheader11
+ # Child Loop BB5_4 Depth 3
+ # Child Loop BB5_5 Depth 4
+ # Child Loop BB5_6 Depth 5
+ # Child Loop BB5_7 Depth 6
+ leaq 63(%rdx), %rsi
+ leaq B+192(%rip), %r14
+ xorl %ecx, %ecx
+ xorl %eax, %eax
+ movq %rdx, 168(%rsp) # 8-byte Spill
+ .p2align 4, 0x90
+.LBB5_4: # %polly.loop_header2
# Parent Loop BB5_1 Depth=1
- # Parent Loop BB5_7 Depth=2
+ # Parent Loop BB5_3 Depth=2
# => This Loop Header: Depth=3
- # Child Loop BB5_9 Depth 4
- # Child Loop BB5_12 Depth 5
- # Child Loop BB5_17 Depth 6
- # Child Loop BB5_18 Depth 7
- # Child Loop BB5_14 Depth 5
- movq %rdx, -96(%rbp) # 8-byte Spill
- leaq -4(%rdx), %rcx
- movq %rdx, %rax
- decq %rax
- cmovsq %rcx, %rax
- movq %rax, %r14
- sarq $63, %r14
- shrq $62, %r14
- addq %rax, %r14
- andq $-4, %r14
- movq %rdx, %rax
- orq $63, %rax
- leaq -4(%rax), %rdx
- movq -104(%rbp), %rcx # 8-byte Reload
- leaq (%rcx,%r14,4), %rcx
- movq %rcx, -80(%rbp) # 8-byte Spill
- leaq B+16(,%r14,4), %rbx
- leaq 4(%r14), %rcx
- movq %rcx, -64(%rbp) # 8-byte Spill
- xorl %r11d, %r11d
- .align 16, 0x90
-.LBB5_9: # %polly.loop_header10
+ # Child Loop BB5_5 Depth 4
+ # Child Loop BB5_6 Depth 5
+ # Child Loop BB5_7 Depth 6
+ movq %rax, 144(%rsp) # 8-byte Spill
+ movq %rcx, 152(%rsp) # 8-byte Spill
+ shlq $6, %rcx
+ leaq 16(%rcx), %rdi
+ leaq 32(%rcx), %rbp
+ leaq 48(%rcx), %r15
+ movq 24(%rsp), %r9 # 8-byte Reload
+ movq %r14, 160(%rsp) # 8-byte Spill
+ xorl %eax, %eax
+ .p2align 4, 0x90
+.LBB5_5: # %polly.loop_header8
# Parent Loop BB5_1 Depth=1
- # Parent Loop BB5_7 Depth=2
- # Parent Loop BB5_8 Depth=3
+ # Parent Loop BB5_3 Depth=2
+ # Parent Loop BB5_4 Depth=3
# => This Loop Header: Depth=4
- # Child Loop BB5_12 Depth 5
- # Child Loop BB5_17 Depth 6
- # Child Loop BB5_18 Depth 7
- # Child Loop BB5_14 Depth 5
- movabsq $9223372036854775744, %rcx # imm = 0x7FFFFFFFFFFFFFC0
- cmpq %rcx, -72(%rbp) # 8-byte Folded Reload
- jg .LBB5_15
-# BB#10: # %polly.loop_header17.preheader
- # in Loop: Header=BB5_9 Depth=4
- movq %r11, %r15
- orq $63, %r15
- cmpq %r15, %r11
- movq -88(%rbp), %rcx # 8-byte Reload
- jle .LBB5_11
- .align 16, 0x90
-.LBB5_14: # %polly.loop_exit28.us
- # Parent Loop BB5_1 Depth=1
- # Parent Loop BB5_7 Depth=2
- # Parent Loop BB5_8 Depth=3
- # Parent Loop BB5_9 Depth=4
- # => This Inner Loop Header: Depth=5
- incq %rcx
- cmpq %rdi, %rcx
- jle .LBB5_14
- jmp .LBB5_15
- .align 16, 0x90
-.LBB5_11: # in Loop: Header=BB5_9 Depth=4
- decq %r15
- movq -80(%rbp), %r13 # 8-byte Reload
- movq -72(%rbp), %rcx # 8-byte Reload
- .align 16, 0x90
-.LBB5_12: # %polly.loop_header26.preheader
+ # Child Loop BB5_6 Depth 5
+ # Child Loop BB5_7 Depth 6
+ movq %rax, 176(%rsp) # 8-byte Spill
+ movq %r9, 184(%rsp) # 8-byte Spill
+ movq %rdx, %rax
+ .p2align 4, 0x90
+.LBB5_6: # %polly.loop_header14
# Parent Loop BB5_1 Depth=1
- # Parent Loop BB5_7 Depth=2
- # Parent Loop BB5_8 Depth=3
- # Parent Loop BB5_9 Depth=4
+ # Parent Loop BB5_3 Depth=2
+ # Parent Loop BB5_4 Depth=3
+ # Parent Loop BB5_5 Depth=4
# => This Loop Header: Depth=5
- # Child Loop BB5_17 Depth 6
- # Child Loop BB5_18 Depth 7
- cmpq %rax, -64(%rbp) # 8-byte Folded Reload
- movq %rbx, %r12
- movq %r11, %r8
- jg .LBB5_13
- .align 16, 0x90
-.LBB5_17: # %polly.loop_header35.preheader
+ # Child Loop BB5_7 Depth 6
+ leaq (%rax,%rax,2), %rbx
+ shlq $11, %rbx
+ leaq C(%rip), %rdx
+ addq %rdx, %rbx
+ leaq (%rbx,%rcx,4), %r8
+ leaq (%rbx,%rdi,4), %rdx
+ leaq (%rbx,%rbp,4), %r13
+ leaq (%rbx,%r15,4), %r10
+ movups (%rbx,%rcx,4), %xmm8
+ movups 16(%rbx,%rcx,4), %xmm0
+ movaps %xmm0, 96(%rsp) # 16-byte Spill
+ movups 32(%rbx,%rcx,4), %xmm6
+ movups 48(%rbx,%rcx,4), %xmm1
+ movups (%rbx,%rdi,4), %xmm15
+ movups 16(%rbx,%rdi,4), %xmm0
+ movaps %xmm0, (%rsp) # 16-byte Spill
+ movups 32(%rbx,%rdi,4), %xmm0
+ movaps %xmm0, 48(%rsp) # 16-byte Spill
+ movups 48(%rbx,%rdi,4), %xmm0
+ movaps %xmm0, 64(%rsp) # 16-byte Spill
+ movups (%rbx,%rbp,4), %xmm11
+ movups 16(%rbx,%rbp,4), %xmm0
+ movaps %xmm0, 112(%rsp) # 16-byte Spill
+ movups 32(%rbx,%rbp,4), %xmm12
+ movups 48(%rbx,%rbp,4), %xmm0
+ movaps %xmm0, 80(%rsp) # 16-byte Spill
+ movups (%rbx,%r15,4), %xmm9
+ movups 16(%rbx,%r15,4), %xmm13
+ movups 32(%rbx,%r15,4), %xmm2
+ movups 48(%rbx,%r15,4), %xmm3
+ movq $-256, %r12
+ movq %r14, %r11
+ .p2align 4, 0x90
+.LBB5_7: # %vector.ph
# Parent Loop BB5_1 Depth=1
- # Parent Loop BB5_7 Depth=2
- # Parent Loop BB5_8 Depth=3
- # Parent Loop BB5_9 Depth=4
- # Parent Loop BB5_12 Depth=5
- # => This Loop Header: Depth=6
- # Child Loop BB5_18 Depth 7
- leaq (%rcx,%rcx,2), %rsi
- shlq $11, %rsi
- vbroadcastss A(%rsi,%r8,4), %xmm0
- movq %r13, %r9
- movq %r12, %r10
- movq %r14, %rsi
-.LBB5_18: # %polly.loop_header35
- # Parent Loop BB5_1 Depth=1
- # Parent Loop BB5_7 Depth=2
- # Parent Loop BB5_8 Depth=3
- # Parent Loop BB5_9 Depth=4
- # Parent Loop BB5_12 Depth=5
- # Parent Loop BB5_17 Depth=6
- # => This Inner Loop Header: Depth=7
- vmulps (%r10), %xmm0, %xmm1
- vaddps (%r9), %xmm1, %xmm1
- vmovaps %xmm1, (%r9)
- addq $16, %r9
- addq $16, %r10
- addq $4, %rsi
- cmpq %rdx, %rsi
- jle .LBB5_18
-# BB#16: # %polly.loop_exit37
- # in Loop: Header=BB5_17 Depth=6
- addq $6144, %r12 # imm = 0x1800
- cmpq %r15, %r8
- leaq 1(%r8), %r8
- jle .LBB5_17
- .align 16, 0x90
-.LBB5_13: # %polly.loop_exit28
- # in Loop: Header=BB5_12 Depth=5
- addq $6144, %r13 # imm = 0x1800
- cmpq %rdi, %rcx
- leaq 1(%rcx), %rcx
- jle .LBB5_12
- .align 16, 0x90
-.LBB5_15: # %polly.loop_exit19
- # in Loop: Header=BB5_9 Depth=4
- addq $393216, %rbx # imm = 0x60000
- cmpq $1472, %r11 # imm = 0x5C0
- leaq 64(%r11), %r11
- jl .LBB5_9
-# BB#5: # %polly.loop_exit12
- # in Loop: Header=BB5_8 Depth=3
- movq -96(%rbp), %rdx # 8-byte Reload
- cmpq $1472, %rdx # imm = 0x5C0
- leaq 64(%rdx), %rdx
- jl .LBB5_8
-# BB#6: # %polly.loop_exit5
- # in Loop: Header=BB5_7 Depth=2
- addq $64, -88(%rbp) # 8-byte Folded Spill
- addq $393216, -104(%rbp) # 8-byte Folded Spill
+ # Parent Loop BB5_3 Depth=2
+ # Parent Loop BB5_4 Depth=3
+ # Parent Loop BB5_5 Depth=4
+ # Parent Loop BB5_6 Depth=5
+ # => This Inner Loop Header: Depth=6
+ movaps %xmm12, 208(%rsp) # 16-byte Spill
+ movaps %xmm2, 224(%rsp) # 16-byte Spill
+ movaps %xmm3, 240(%rsp) # 16-byte Spill
+ movaps %xmm8, %xmm10
+ movaps 96(%rsp), %xmm7 # 16-byte Reload
+ unpcklps %xmm7, %xmm10 # xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1]
+ movaps %xmm1, %xmm4
+ shufps $0, %xmm6, %xmm4 # xmm4 = xmm4[0,0],xmm6[0,0]
+ shufps $36, %xmm4, %xmm10 # xmm10 = xmm10[0,1],xmm4[2,0]
+ movaps %xmm7, %xmm5
+ shufps $17, %xmm8, %xmm5 # xmm5 = xmm5[1,0],xmm8[1,0]
+ movaps %xmm6, %xmm4
+ unpcklps %xmm1, %xmm4 # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+ shufps $226, %xmm4, %xmm5 # xmm5 = xmm5[2,0],xmm4[2,3]
+ movaps %xmm8, %xmm12
+ unpckhps %xmm7, %xmm12 # xmm12 = xmm12[2],xmm7[2],xmm12[3],xmm7[3]
+ movaps %xmm1, %xmm4
+ shufps $34, %xmm6, %xmm4 # xmm4 = xmm4[2,0],xmm6[2,0]
+ shufps $36, %xmm4, %xmm12 # xmm12 = xmm12[0,1],xmm4[2,0]
+ shufps $51, %xmm8, %xmm7 # xmm7 = xmm7[3,0],xmm8[3,0]
+ unpckhps %xmm1, %xmm6 # xmm6 = xmm6[2],xmm1[2],xmm6[3],xmm1[3]
+ shufps $226, %xmm6, %xmm7 # xmm7 = xmm7[2,0],xmm6[2,3]
+ movaps -160(%r11), %xmm0
+ movaps -144(%r11), %xmm1
+ movaps %xmm1, %xmm6
+ shufps $0, %xmm0, %xmm6 # xmm6 = xmm6[0,0],xmm0[0,0]
+ movaps -192(%r11), %xmm3
+ movaps -176(%r11), %xmm4
+ movaps %xmm3, %xmm8
+ unpcklps %xmm4, %xmm8 # xmm8 = xmm8[0],xmm4[0],xmm8[1],xmm4[1]
+ shufps $36, %xmm6, %xmm8 # xmm8 = xmm8[0,1],xmm6[2,0]
+ movaps %xmm0, %xmm2
+ unpcklps %xmm1, %xmm2 # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+ movaps %xmm4, %xmm6
+ shufps $17, %xmm3, %xmm6 # xmm6 = xmm6[1,0],xmm3[1,0]
+ shufps $226, %xmm2, %xmm6 # xmm6 = xmm6[2,0],xmm2[2,3]
+ movaps %xmm1, %xmm2
+ shufps $34, %xmm0, %xmm2 # xmm2 = xmm2[2,0],xmm0[2,0]
+ movaps %xmm3, %xmm14
+ unpckhps %xmm4, %xmm14 # xmm14 = xmm14[2],xmm4[2],xmm14[3],xmm4[3]
+ shufps $36, %xmm2, %xmm14 # xmm14 = xmm14[0,1],xmm2[2,0]
+ unpckhps %xmm1, %xmm0 # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+ shufps $51, %xmm3, %xmm4 # xmm4 = xmm4[3,0],xmm3[3,0]
+ shufps $226, %xmm0, %xmm4 # xmm4 = xmm4[2,0],xmm0[2,3]
+ movss 256(%r9,%r12), %xmm0 # xmm0 = mem[0],zero,zero,zero
+ shufps $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0]
+ mulps %xmm0, %xmm8
+ addps %xmm10, %xmm8
+ mulps %xmm0, %xmm6
+ addps %xmm5, %xmm6
+ mulps %xmm0, %xmm14
+ addps %xmm12, %xmm14
+ mulps %xmm0, %xmm4
+ movaps %xmm0, %xmm5
+ addps %xmm7, %xmm4
+ movaps %xmm14, %xmm0
+ unpckhps %xmm4, %xmm0 # xmm0 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+ movaps %xmm6, %xmm1
+ shufps $51, %xmm8, %xmm1 # xmm1 = xmm1[3,0],xmm8[3,0]
+ shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
+ movaps %xmm1, 272(%rsp) # 16-byte Spill
+ movaps %xmm4, %xmm0
+ shufps $34, %xmm14, %xmm0 # xmm0 = xmm0[2,0],xmm14[2,0]
+ movaps %xmm8, %xmm1
+ unpckhps %xmm6, %xmm1 # xmm1 = xmm1[2],xmm6[2],xmm1[3],xmm6[3]
+ shufps $36, %xmm0, %xmm1 # xmm1 = xmm1[0,1],xmm0[2,0]
+ movaps %xmm1, 256(%rsp) # 16-byte Spill
+ movaps %xmm14, %xmm0
+ unpcklps %xmm4, %xmm0 # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+ movaps %xmm6, %xmm1
+ shufps $17, %xmm8, %xmm1 # xmm1 = xmm1[1,0],xmm8[1,0]
+ shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
+ movaps %xmm1, 96(%rsp) # 16-byte Spill
+ shufps $0, %xmm14, %xmm4 # xmm4 = xmm4[0,0],xmm14[0,0]
+ unpcklps %xmm6, %xmm8 # xmm8 = xmm8[0],xmm6[0],xmm8[1],xmm6[1]
+ shufps $36, %xmm4, %xmm8 # xmm8 = xmm8[0,1],xmm4[2,0]
+ movaps %xmm15, %xmm14
+ movaps (%rsp), %xmm4 # 16-byte Reload
+ unpcklps %xmm4, %xmm14 # xmm14 = xmm14[0],xmm4[0],xmm14[1],xmm4[1]
+ movaps 64(%rsp), %xmm1 # 16-byte Reload
+ movaps %xmm1, %xmm0
+ movaps 48(%rsp), %xmm3 # 16-byte Reload
+ shufps $0, %xmm3, %xmm0 # xmm0 = xmm0[0,0],xmm3[0,0]
+ shufps $36, %xmm0, %xmm14 # xmm14 = xmm14[0,1],xmm0[2,0]
+ movaps %xmm4, %xmm12
+ shufps $17, %xmm15, %xmm12 # xmm12 = xmm12[1,0],xmm15[1,0]
+ movaps %xmm3, %xmm2
+ unpcklps %xmm1, %xmm2 # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+ shufps $226, %xmm2, %xmm12 # xmm12 = xmm12[2,0],xmm2[2,3]
+ movaps %xmm15, %xmm7
+ unpckhps %xmm4, %xmm7 # xmm7 = xmm7[2],xmm4[2],xmm7[3],xmm4[3]
+ movaps %xmm1, %xmm2
+ shufps $34, %xmm3, %xmm2 # xmm2 = xmm2[2,0],xmm3[2,0]
+ shufps $36, %xmm2, %xmm7 # xmm7 = xmm7[0,1],xmm2[2,0]
+ shufps $51, %xmm15, %xmm4 # xmm4 = xmm4[3,0],xmm15[3,0]
+ unpckhps %xmm1, %xmm3 # xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+ shufps $226, %xmm3, %xmm4 # xmm4 = xmm4[2,0],xmm3[2,3]
+ movaps %xmm4, (%rsp) # 16-byte Spill
+ movaps -96(%r11), %xmm2
+ movaps -80(%r11), %xmm1
+ movaps %xmm1, %xmm4
+ shufps $0, %xmm2, %xmm4 # xmm4 = xmm4[0,0],xmm2[0,0]
+ movaps -112(%r11), %xmm10
+ movaps -128(%r11), %xmm0
+ movaps %xmm0, %xmm15
+ unpcklps %xmm10, %xmm15 # xmm15 = xmm15[0],xmm10[0],xmm15[1],xmm10[1]
+ shufps $36, %xmm4, %xmm15 # xmm15 = xmm15[0,1],xmm4[2,0]
+ movaps %xmm2, %xmm4
+ unpcklps %xmm1, %xmm4 # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+ movaps %xmm10, %xmm6
+ shufps $17, %xmm0, %xmm6 # xmm6 = xmm6[1,0],xmm0[1,0]
+ shufps $226, %xmm4, %xmm6 # xmm6 = xmm6[2,0],xmm4[2,3]
+ movaps %xmm1, %xmm3
+ shufps $34, %xmm2, %xmm3 # xmm3 = xmm3[2,0],xmm2[2,0]
+ movaps %xmm0, %xmm4
+ unpckhps %xmm10, %xmm4 # xmm4 = xmm4[2],xmm10[2],xmm4[3],xmm10[3]
+ shufps $36, %xmm3, %xmm4 # xmm4 = xmm4[0,1],xmm3[2,0]
+ unpckhps %xmm1, %xmm2 # xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+ shufps $51, %xmm0, %xmm10 # xmm10 = xmm10[3,0],xmm0[3,0]
+ shufps $226, %xmm2, %xmm10 # xmm10 = xmm10[2,0],xmm2[2,3]
+ movaps %xmm5, 192(%rsp) # 16-byte Spill
+ mulps %xmm5, %xmm15
+ addps %xmm14, %xmm15
+ mulps %xmm5, %xmm6
+ addps %xmm12, %xmm6
+ mulps %xmm5, %xmm4
+ addps %xmm7, %xmm4
+ mulps %xmm5, %xmm10
+ addps (%rsp), %xmm10 # 16-byte Folded Reload
+ movaps %xmm4, %xmm0
+ unpckhps %xmm10, %xmm0 # xmm0 = xmm0[2],xmm10[2],xmm0[3],xmm10[3]
+ movaps %xmm6, %xmm1
+ shufps $51, %xmm15, %xmm1 # xmm1 = xmm1[3,0],xmm15[3,0]
+ shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
+ movaps %xmm1, 64(%rsp) # 16-byte Spill
+ movaps %xmm10, %xmm0
+ shufps $34, %xmm4, %xmm0 # xmm0 = xmm0[2,0],xmm4[2,0]
+ movaps %xmm15, %xmm1
+ unpckhps %xmm6, %xmm1 # xmm1 = xmm1[2],xmm6[2],xmm1[3],xmm6[3]
+ shufps $36, %xmm0, %xmm1 # xmm1 = xmm1[0,1],xmm0[2,0]
+ movaps %xmm1, 48(%rsp) # 16-byte Spill
+ movaps %xmm4, %xmm0
+ unpcklps %xmm10, %xmm0 # xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1]
+ movaps %xmm6, %xmm1
+ shufps $17, %xmm15, %xmm1 # xmm1 = xmm1[1,0],xmm15[1,0]
+ shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
+ movaps %xmm1, (%rsp) # 16-byte Spill
+ shufps $0, %xmm4, %xmm10 # xmm10 = xmm10[0,0],xmm4[0,0]
+ unpcklps %xmm6, %xmm15 # xmm15 = xmm15[0],xmm6[0],xmm15[1],xmm6[1]
+ shufps $36, %xmm10, %xmm15 # xmm15 = xmm15[0,1],xmm10[2,0]
+ movaps %xmm11, %xmm10
+ movaps 112(%rsp), %xmm14 # 16-byte Reload
+ unpcklps %xmm14, %xmm10 # xmm10 = xmm10[0],xmm14[0],xmm10[1],xmm14[1]
+ movaps 80(%rsp), %xmm2 # 16-byte Reload
+ movaps %xmm2, %xmm0
+ movaps 208(%rsp), %xmm3 # 16-byte Reload
+ shufps $0, %xmm3, %xmm0 # xmm0 = xmm0[0,0],xmm3[0,0]
+ shufps $36, %xmm0, %xmm10 # xmm10 = xmm10[0,1],xmm0[2,0]
+ movaps %xmm14, %xmm12
+ shufps $17, %xmm11, %xmm12 # xmm12 = xmm12[1,0],xmm11[1,0]
+ movaps %xmm3, %xmm0
+ unpcklps %xmm2, %xmm0 # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+ shufps $226, %xmm0, %xmm12 # xmm12 = xmm12[2,0],xmm0[2,3]
+ movaps %xmm11, %xmm0
+ unpckhps %xmm14, %xmm0 # xmm0 = xmm0[2],xmm14[2],xmm0[3],xmm14[3]
+ movaps %xmm2, %xmm1
+ shufps $34, %xmm3, %xmm1 # xmm1 = xmm1[2,0],xmm3[2,0]
+ shufps $36, %xmm1, %xmm0 # xmm0 = xmm0[0,1],xmm1[2,0]
+ shufps $51, %xmm11, %xmm14 # xmm14 = xmm14[3,0],xmm11[3,0]
+ unpckhps %xmm2, %xmm3 # xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+ shufps $226, %xmm3, %xmm14 # xmm14 = xmm14[2,0],xmm3[2,3]
+ movaps -32(%r11), %xmm1
+ movaps -16(%r11), %xmm2
+ movaps %xmm2, %xmm3
+ shufps $0, %xmm1, %xmm3 # xmm3 = xmm3[0,0],xmm1[0,0]
+ movaps -48(%r11), %xmm4
+ movaps -64(%r11), %xmm5
+ movaps %xmm5, %xmm11
+ unpcklps %xmm4, %xmm11 # xmm11 = xmm11[0],xmm4[0],xmm11[1],xmm4[1]
+ shufps $36, %xmm3, %xmm11 # xmm11 = xmm11[0,1],xmm3[2,0]
+ movaps %xmm1, %xmm3
+ unpcklps %xmm2, %xmm3 # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+ movaps %xmm4, %xmm7
+ shufps $17, %xmm5, %xmm7 # xmm7 = xmm7[1,0],xmm5[1,0]
+ shufps $226, %xmm3, %xmm7 # xmm7 = xmm7[2,0],xmm3[2,3]
+ movaps %xmm2, %xmm3
+ shufps $34, %xmm1, %xmm3 # xmm3 = xmm3[2,0],xmm1[2,0]
+ movaps %xmm5, %xmm6
+ unpckhps %xmm4, %xmm6 # xmm6 = xmm6[2],xmm4[2],xmm6[3],xmm4[3]
+ shufps $36, %xmm3, %xmm6 # xmm6 = xmm6[0,1],xmm3[2,0]
+ unpckhps %xmm2, %xmm1 # xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+ shufps $51, %xmm5, %xmm4 # xmm4 = xmm4[3,0],xmm5[3,0]
+ shufps $226, %xmm1, %xmm4 # xmm4 = xmm4[2,0],xmm1[2,3]
+ movaps 192(%rsp), %xmm1 # 16-byte Reload
+ mulps %xmm1, %xmm11
+ addps %xmm10, %xmm11
+ mulps %xmm1, %xmm7
+ addps %xmm12, %xmm7
+ mulps %xmm1, %xmm6
+ addps %xmm0, %xmm6
+ mulps %xmm1, %xmm4
+ addps %xmm14, %xmm4
+ movaps %xmm6, %xmm0
+ unpckhps %xmm4, %xmm0 # xmm0 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+ movaps %xmm7, %xmm1
+ shufps $51, %xmm11, %xmm1 # xmm1 = xmm1[3,0],xmm11[3,0]
+ shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
+ movaps %xmm1, 80(%rsp) # 16-byte Spill
+ movaps %xmm4, %xmm0
+ shufps $34, %xmm6, %xmm0 # xmm0 = xmm0[2,0],xmm6[2,0]
+ movaps %xmm11, %xmm12
+ unpckhps %xmm7, %xmm12 # xmm12 = xmm12[2],xmm7[2],xmm12[3],xmm7[3]
+ shufps $36, %xmm0, %xmm12 # xmm12 = xmm12[0,1],xmm0[2,0]
+ movaps %xmm6, %xmm0
+ unpcklps %xmm4, %xmm0 # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+ movaps %xmm7, %xmm1
+ shufps $17, %xmm11, %xmm1 # xmm1 = xmm1[1,0],xmm11[1,0]
+ shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
+ movaps %xmm1, 112(%rsp) # 16-byte Spill
+ shufps $0, %xmm6, %xmm4 # xmm4 = xmm4[0,0],xmm6[0,0]
+ unpcklps %xmm7, %xmm11 # xmm11 = xmm11[0],xmm7[0],xmm11[1],xmm7[1]
+ shufps $36, %xmm4, %xmm11 # xmm11 = xmm11[0,1],xmm4[2,0]
+ movaps %xmm9, %xmm10
+ unpcklps %xmm13, %xmm10 # xmm10 = xmm10[0],xmm13[0],xmm10[1],xmm13[1]
+ movaps 240(%rsp), %xmm2 # 16-byte Reload
+ movaps %xmm2, %xmm0
+ movaps 224(%rsp), %xmm3 # 16-byte Reload
+ shufps $0, %xmm3, %xmm0 # xmm0 = xmm0[0,0],xmm3[0,0]
+ shufps $36, %xmm0, %xmm10 # xmm10 = xmm10[0,1],xmm0[2,0]
+ movaps %xmm13, %xmm14
+ shufps $17, %xmm9, %xmm14 # xmm14 = xmm14[1,0],xmm9[1,0]
+ movaps %xmm3, %xmm0
+ unpcklps %xmm2, %xmm0 # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+ shufps $226, %xmm0, %xmm14 # xmm14 = xmm14[2,0],xmm0[2,3]
+ movaps %xmm9, %xmm0
+ unpckhps %xmm13, %xmm0 # xmm0 = xmm0[2],xmm13[2],xmm0[3],xmm13[3]
+ movaps %xmm2, %xmm1
+ shufps $34, %xmm3, %xmm1 # xmm1 = xmm1[2,0],xmm3[2,0]
+ shufps $36, %xmm1, %xmm0 # xmm0 = xmm0[0,1],xmm1[2,0]
+ shufps $51, %xmm9, %xmm13 # xmm13 = xmm13[3,0],xmm9[3,0]
+ unpckhps %xmm2, %xmm3 # xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+ shufps $226, %xmm3, %xmm13 # xmm13 = xmm13[2,0],xmm3[2,3]
+ movaps 32(%r11), %xmm1
+ movaps 48(%r11), %xmm2
+ movaps %xmm2, %xmm3
+ shufps $0, %xmm1, %xmm3 # xmm3 = xmm3[0,0],xmm1[0,0]
+ movaps 16(%r11), %xmm4
+ movaps (%r11), %xmm5
+ movaps %xmm5, %xmm9
+ unpcklps %xmm4, %xmm9 # xmm9 = xmm9[0],xmm4[0],xmm9[1],xmm4[1]
+ shufps $36, %xmm3, %xmm9 # xmm9 = xmm9[0,1],xmm3[2,0]
+ movaps %xmm1, %xmm3
+ unpcklps %xmm2, %xmm3 # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+ movaps %xmm4, %xmm7
+ shufps $17, %xmm5, %xmm7 # xmm7 = xmm7[1,0],xmm5[1,0]
+ shufps $226, %xmm3, %xmm7 # xmm7 = xmm7[2,0],xmm3[2,3]
+ movaps %xmm2, %xmm3
+ shufps $34, %xmm1, %xmm3 # xmm3 = xmm3[2,0],xmm1[2,0]
+ movaps %xmm5, %xmm6
+ unpckhps %xmm4, %xmm6 # xmm6 = xmm6[2],xmm4[2],xmm6[3],xmm4[3]
+ shufps $36, %xmm3, %xmm6 # xmm6 = xmm6[0,1],xmm3[2,0]
+ unpckhps %xmm2, %xmm1 # xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+ shufps $51, %xmm5, %xmm4 # xmm4 = xmm4[3,0],xmm5[3,0]
+ shufps $226, %xmm1, %xmm4 # xmm4 = xmm4[2,0],xmm1[2,3]
+ movaps 192(%rsp), %xmm1 # 16-byte Reload
+ mulps %xmm1, %xmm9
+ addps %xmm10, %xmm9
+ mulps %xmm1, %xmm7
+ addps %xmm14, %xmm7
+ mulps %xmm1, %xmm6
+ addps %xmm0, %xmm6
+ mulps %xmm1, %xmm4
+ addps %xmm13, %xmm4
+ movaps %xmm6, %xmm0
+ unpckhps %xmm4, %xmm0 # xmm0 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+ movaps %xmm7, %xmm3
+ shufps $51, %xmm9, %xmm3 # xmm3 = xmm3[3,0],xmm9[3,0]
+ shufps $226, %xmm0, %xmm3 # xmm3 = xmm3[2,0],xmm0[2,3]
+ movaps %xmm4, %xmm0
+ shufps $34, %xmm6, %xmm0 # xmm0 = xmm0[2,0],xmm6[2,0]
+ movaps %xmm9, %xmm2
+ unpckhps %xmm7, %xmm2 # xmm2 = xmm2[2],xmm7[2],xmm2[3],xmm7[3]
+ shufps $36, %xmm0, %xmm2 # xmm2 = xmm2[0,1],xmm0[2,0]
+ movaps %xmm6, %xmm0
+ unpcklps %xmm4, %xmm0 # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+ movaps %xmm7, %xmm13
+ shufps $17, %xmm9, %xmm13 # xmm13 = xmm13[1,0],xmm9[1,0]
+ shufps $226, %xmm0, %xmm13 # xmm13 = xmm13[2,0],xmm0[2,3]
+ shufps $0, %xmm6, %xmm4 # xmm4 = xmm4[0,0],xmm6[0,0]
+ movaps 256(%rsp), %xmm6 # 16-byte Reload
+ movaps 272(%rsp), %xmm1 # 16-byte Reload
+ unpcklps %xmm7, %xmm9 # xmm9 = xmm9[0],xmm7[0],xmm9[1],xmm7[1]
+ shufps $36, %xmm4, %xmm9 # xmm9 = xmm9[0,1],xmm4[2,0]
+ addq $6144, %r11 # imm = 0x1800
+ addq $4, %r12
+ jne .LBB5_7
+# %bb.8: # %polly.loop_exit22
+ # in Loop: Header=BB5_6 Depth=5
+ movups %xmm8, (%r8)
+ movaps 96(%rsp), %xmm0 # 16-byte Reload
+ movups %xmm0, 16(%r8)
+ movups %xmm6, 32(%r8)
+ movups %xmm1, 48(%r8)
+ movaps 64(%rsp), %xmm0 # 16-byte Reload
+ movups %xmm0, 48(%rdx)
+ movaps 48(%rsp), %xmm0 # 16-byte Reload
+ movups %xmm0, 32(%rdx)
+ movaps (%rsp), %xmm0 # 16-byte Reload
+ movups %xmm0, 16(%rdx)
+ movups %xmm15, (%rdx)
+ movaps 80(%rsp), %xmm0 # 16-byte Reload
+ movups %xmm0, 48(%r13)
+ movaps 112(%rsp), %xmm0 # 16-byte Reload
+ movups %xmm0, 16(%r13)
+ movups %xmm11, (%r13)
+ movups %xmm12, 32(%r13)
+ movups %xmm3, 48(%r10)
+ movups %xmm13, 16(%r10)
+ movups %xmm9, (%r10)
+ movups %xmm2, 32(%r10)
+ addq $6144, %r9 # imm = 0x1800
+ cmpq %rsi, %rax
+ leaq 1(%rax), %rax
+ jl .LBB5_6
+# %bb.9: # %polly.loop_exit16
+ # in Loop: Header=BB5_5 Depth=4
+ movq 176(%rsp), %rax # 8-byte Reload
+ addq $64, %rax
+ addq $393216, %r14 # imm = 0x60000
+ movq 184(%rsp), %r9 # 8-byte Reload
+ addq $256, %r9 # imm = 0x100
+ cmpq $1536, %rax # imm = 0x600
+ movq 168(%rsp), %rdx # 8-byte Reload
+ jb .LBB5_5
+# %bb.10: # %polly.loop_exit10
+ # in Loop: Header=BB5_4 Depth=3
+ movq 144(%rsp), %rax # 8-byte Reload
+ addq $64, %rax
+ movq 152(%rsp), %rcx # 8-byte Reload
+ incq %rcx
+ movq 160(%rsp), %r14 # 8-byte Reload
+ addq $256, %r14 # imm = 0x100
+ cmpq $1536, %rax # imm = 0x600
+ jb .LBB5_4
+# %bb.11: # %polly.loop_exit4
+ # in Loop: Header=BB5_3 Depth=2
+ addq $64, %rdx
+ addq $393216, 24(%rsp) # 8-byte Folded Spill
# imm = 0x60000
- movq -72(%rbp), %rcx # 8-byte Reload
- cmpq -112(%rbp), %rcx # 8-byte Folded Reload
- leaq 64(%rcx), %rcx
- jle .LBB5_7
-.LBB5_1: # %omp.setup
+ cmpq 136(%rsp), %rdx # 8-byte Folded Reload
+ jle .LBB5_3
+.LBB5_1: # %polly.par.setup
# =>This Loop Header: Depth=1
- # Child Loop BB5_7 Depth 2
- # Child Loop BB5_8 Depth 3
- # Child Loop BB5_9 Depth 4
- # Child Loop BB5_12 Depth 5
- # Child Loop BB5_17 Depth 6
- # Child Loop BB5_18 Depth 7
- # Child Loop BB5_14 Depth 5
- leaq -48(%rbp), %rdi
- leaq -56(%rbp), %rsi
- callq GOMP_loop_runtime_next
+ # Child Loop BB5_3 Depth 2
+ # Child Loop BB5_4 Depth 3
+ # Child Loop BB5_5 Depth 4
+ # Child Loop BB5_6 Depth 5
+ # Child Loop BB5_7 Depth 6
+ leaq 40(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ callq GOMP_loop_runtime_next@PLT
testb %al, %al
jne .LBB5_2
-# BB#4: # %omp.exit
- callq GOMP_loop_end_nowait
- addq $72, %rsp
+# %bb.12: # %polly.par.exit
+ callq GOMP_loop_end_nowait@PLT
+ addq $296, %rsp # imm = 0x128
+ .cfi_def_cfa_offset 56
popq %rbx
+ .cfi_def_cfa_offset 48
popq %r12
+ .cfi_def_cfa_offset 40
popq %r13
+ .cfi_def_cfa_offset 32
popq %r14
+ .cfi_def_cfa_offset 24
popq %r15
+ .cfi_def_cfa_offset 16
popq %rbp
- ret
-.Ltmp66:
- .size main.omp_subfn1, .Ltmp66-main.omp_subfn1
+ .cfi_def_cfa_offset 8
+ retq
+.Lfunc_end5:
+ .size main_polly_subfn_1, .Lfunc_end5-main_polly_subfn_1
.cfi_endproc
-
+ # -- End function
.type A,@object # @A
.comm A,9437184,16
.type B,@object # @B
@@ -745,10 +854,11 @@ main.omp_subfn1: # @main.omp_subfn1
.type .L.str,@object # @.str
.section .rodata.str1.1,"aMS",@progbits,1
.L.str:
- .asciz "%lf "
+ .asciz "%lf "
.size .L.str, 5
.type C,@object # @C
.comm C,9437184,16
+ .ident "clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"
.section ".note.GNU-stack","",@progbits
diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.exe b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.exe
deleted file mode 100755
index 36b788ea9ac..00000000000
--- a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.exe
+++ /dev/null
Binary files differ
diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.ll b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.ll
index 9d1f9ad098f..9294896bcb3 100644
--- a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.ll
+++ b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.ll
Binary files differ
diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.s b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.s
index 485d230bc39..194fdb144c9 100644
--- a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.s
+++ b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector.s
@@ -1,385 +1,645 @@
- .file "matmul.polly.interchanged+tiled+vector.ll"
+ .text
+ .file "matmul.c"
.section .rodata.cst8,"aM",@progbits,8
- .align 8
+ .p2align 3 # -- Begin function init_array
.LCPI0_0:
.quad 4602678819172646912 # double 0.5
.text
.globl init_array
- .align 16, 0x90
+ .p2align 4, 0x90
.type init_array,@function
init_array: # @init_array
.cfi_startproc
-# BB#0: # %entry
+# %bb.0: # %entry
pushq %rbp
-.Ltmp2:
.cfi_def_cfa_offset 16
-.Ltmp3:
.cfi_offset %rbp, -16
movq %rsp, %rbp
-.Ltmp4:
.cfi_def_cfa_register %rbp
+ leaq B(%rip), %rax
+ leaq A(%rip), %rcx
xorl %r8d, %r8d
- vmovsd .LCPI0_0(%rip), %xmm0
- .align 16, 0x90
-.LBB0_1: # %polly.loop_preheader3
+ movsd .LCPI0_0(%rip), %xmm0 # xmm0 = mem[0],zero
+ xorl %r9d, %r9d
+ .p2align 4, 0x90
+.LBB0_1: # %polly.loop_header
# =>This Loop Header: Depth=1
# Child Loop BB0_2 Depth 2
- xorl %ecx, %ecx
- .align 16, 0x90
-.LBB0_2: # %polly.loop_header2
+ movl $1, %edi
+ xorl %edx, %edx
+ .p2align 4, 0x90
+.LBB0_2: # %polly.loop_header1
# Parent Loop BB0_1 Depth=1
# => This Inner Loop Header: Depth=2
- movl %ecx, %edx
- imull %r8d, %edx
movl %edx, %esi
- sarl $31, %esi
- shrl $22, %esi
- addl %edx, %esi
- andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
- negl %esi
- movq %r8, %rax
- shlq $11, %rax
- leal 1(%rdx,%rsi), %edi
- leaq (%rax,%rax,2), %rsi
- leaq 1(%rcx), %rdx
- cmpq $1536, %rdx # imm = 0x600
- vcvtsi2sdl %edi, %xmm0, %xmm1
- vmulsd %xmm0, %xmm1, %xmm1
- vcvtsd2ss %xmm1, %xmm1, %xmm1
- vmovss %xmm1, A(%rsi,%rcx,4)
- vmovss %xmm1, B(%rsi,%rcx,4)
- movq %rdx, %rcx
+ andl $1022, %esi # imm = 0x3FE
+ orl $1, %esi
+ xorps %xmm1, %xmm1
+ cvtsi2sdl %esi, %xmm1
+ mulsd %xmm0, %xmm1
+ cvtsd2ss %xmm1, %xmm1
+ movss %xmm1, -4(%rcx,%rdi,4)
+ movss %xmm1, -4(%rax,%rdi,4)
+ leal (%r9,%rdx), %esi
+ andl $1023, %esi # imm = 0x3FF
+ addl $1, %esi
+ xorps %xmm1, %xmm1
+ cvtsi2sdl %esi, %xmm1
+ mulsd %xmm0, %xmm1
+ cvtsd2ss %xmm1, %xmm1
+ movss %xmm1, (%rcx,%rdi,4)
+ movss %xmm1, (%rax,%rdi,4)
+ addq $2, %rdi
+ addl %r8d, %edx
+ cmpq $1537, %rdi # imm = 0x601
jne .LBB0_2
-# BB#3: # %polly.loop_exit4
+# %bb.3: # %polly.loop_exit3
# in Loop: Header=BB0_1 Depth=1
- incq %r8
- cmpq $1536, %r8 # imm = 0x600
+ addq $1, %r9
+ addq $6144, %rax # imm = 0x1800
+ addq $6144, %rcx # imm = 0x1800
+ addl $2, %r8d
+ cmpq $1536, %r9 # imm = 0x600
jne .LBB0_1
-# BB#4: # %polly.loop_exit
+# %bb.4: # %polly.exiting
popq %rbp
- ret
-.Ltmp5:
- .size init_array, .Ltmp5-init_array
+ .cfi_def_cfa %rsp, 8
+ retq
+.Lfunc_end0:
+ .size init_array, .Lfunc_end0-init_array
.cfi_endproc
-
- .globl print_array
- .align 16, 0x90
+ # -- End function
+ .globl print_array # -- Begin function print_array
+ .p2align 4, 0x90
.type print_array,@function
print_array: # @print_array
.cfi_startproc
-# BB#0: # %entry
+# %bb.0: # %entry
pushq %rbp
-.Ltmp9:
.cfi_def_cfa_offset 16
-.Ltmp10:
.cfi_offset %rbp, -16
movq %rsp, %rbp
-.Ltmp11:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
+ pushq %r13
pushq %r12
pushq %rbx
-.Ltmp12:
- .cfi_offset %rbx, -48
-.Ltmp13:
- .cfi_offset %r12, -40
-.Ltmp14:
+ pushq %rax
+ .cfi_offset %rbx, -56
+ .cfi_offset %r12, -48
+ .cfi_offset %r13, -40
.cfi_offset %r14, -32
-.Ltmp15:
.cfi_offset %r15, -24
- xorl %r14d, %r14d
- movl $C, %r15d
- .align 16, 0x90
+ leaq C(%rip), %r13
+ xorl %eax, %eax
+ movl $3435973837, %r12d # imm = 0xCCCCCCCD
+ leaq .L.str(%rip), %r14
+ .p2align 4, 0x90
.LBB1_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB1_2 Depth 2
- movq stdout(%rip), %rax
- movq %r15, %r12
+ movq %rax, -48(%rbp) # 8-byte Spill
+ movq stdout(%rip), %rsi
xorl %ebx, %ebx
- .align 16, 0x90
+ .p2align 4, 0x90
.LBB1_2: # %for.body3
# Parent Loop BB1_1 Depth=1
# => This Inner Loop Header: Depth=2
- vmovss (%r12), %xmm0
- vcvtss2sd %xmm0, %xmm0, %xmm0
- movq %rax, %rdi
- movl $.L.str, %esi
+ movl %ebx, %eax
+ imulq %r12, %rax
+ shrq $38, %rax
+ leal (%rax,%rax,4), %r15d
+ shll $4, %r15d
+ addl $79, %r15d
+ movss (%r13,%rbx,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
+ cvtss2sd %xmm0, %xmm0
movb $1, %al
+ movq %rsi, %rdi
+ movq %r14, %rsi
callq fprintf
- movslq %ebx, %rax
- imulq $1717986919, %rax, %rcx # imm = 0x66666667
- movq %rcx, %rdx
- shrq $63, %rdx
- sarq $37, %rcx
- addl %edx, %ecx
- imull $80, %ecx, %ecx
- subl %ecx, %eax
- cmpl $79, %eax
+ cmpl %ebx, %r15d
jne .LBB1_4
-# BB#3: # %if.then
+# %bb.3: # %if.then
# in Loop: Header=BB1_2 Depth=2
movq stdout(%rip), %rsi
movl $10, %edi
- callq fputc
+ callq fputc@PLT
.LBB1_4: # %for.inc
# in Loop: Header=BB1_2 Depth=2
- addq $4, %r12
- incq %rbx
- movq stdout(%rip), %rax
+ addq $1, %rbx
+ movq stdout(%rip), %rsi
cmpq $1536, %rbx # imm = 0x600
jne .LBB1_2
-# BB#5: # %for.end
+# %bb.5: # %for.end
# in Loop: Header=BB1_1 Depth=1
movl $10, %edi
- movq %rax, %rsi
- callq fputc
- addq $6144, %r15 # imm = 0x1800
- incq %r14
- cmpq $1536, %r14 # imm = 0x600
+ callq fputc@PLT
+ movq -48(%rbp), %rax # 8-byte Reload
+ addq $1, %rax
+ addq $6144, %r13 # imm = 0x1800
+ cmpq $1536, %rax # imm = 0x600
jne .LBB1_1
-# BB#6: # %for.end12
+# %bb.6: # %for.end12
+ addq $8, %rsp
popq %rbx
popq %r12
+ popq %r13
popq %r14
popq %r15
popq %rbp
- ret
-.Ltmp16:
- .size print_array, .Ltmp16-print_array
+ .cfi_def_cfa %rsp, 8
+ retq
+.Lfunc_end1:
+ .size print_array, .Lfunc_end1-print_array
.cfi_endproc
-
- .section .rodata.cst8,"aM",@progbits,8
- .align 8
-.LCPI2_0:
- .quad 4602678819172646912 # double 0.5
- .text
- .globl main
- .align 16, 0x90
+ # -- End function
+ .globl main # -- Begin function main
+ .p2align 4, 0x90
.type main,@function
main: # @main
.cfi_startproc
-# BB#0: # %entry
+# %bb.0: # %entry
pushq %rbp
-.Ltmp20:
.cfi_def_cfa_offset 16
-.Ltmp21:
.cfi_offset %rbp, -16
movq %rsp, %rbp
-.Ltmp22:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
- subq $56, %rsp
-.Ltmp23:
+ subq $264, %rsp # imm = 0x108
.cfi_offset %rbx, -56
-.Ltmp24:
.cfi_offset %r12, -48
-.Ltmp25:
.cfi_offset %r13, -40
-.Ltmp26:
.cfi_offset %r14, -32
-.Ltmp27:
.cfi_offset %r15, -24
- xorl %ebx, %ebx
- vmovsd .LCPI2_0(%rip), %xmm0
- .align 16, 0x90
-.LBB2_1: # %polly.loop_preheader3.i
- # =>This Loop Header: Depth=1
- # Child Loop BB2_2 Depth 2
- xorl %ecx, %ecx
- .align 16, 0x90
-.LBB2_2: # %polly.loop_header2.i
- # Parent Loop BB2_1 Depth=1
- # => This Inner Loop Header: Depth=2
- movl %ecx, %edx
- imull %ebx, %edx
- movl %edx, %esi
- sarl $31, %esi
- shrl $22, %esi
- addl %edx, %esi
- andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
- negl %esi
- movq %rbx, %rax
- shlq $11, %rax
- leal 1(%rdx,%rsi), %edi
- leaq (%rax,%rax,2), %rsi
- leaq 1(%rcx), %rdx
- cmpq $1536, %rdx # imm = 0x600
- vcvtsi2sdl %edi, %xmm0, %xmm1
- vmulsd %xmm0, %xmm1, %xmm1
- vcvtsd2ss %xmm1, %xmm1, %xmm1
- vmovss %xmm1, A(%rsi,%rcx,4)
- vmovss %xmm1, B(%rsi,%rcx,4)
- movq %rdx, %rcx
- jne .LBB2_2
-# BB#3: # %polly.loop_exit4.i
- # in Loop: Header=BB2_1 Depth=1
- incq %rbx
- cmpq $1536, %rbx # imm = 0x600
- jne .LBB2_1
-# BB#4: # %polly.loop_preheader3.preheader
- movl $C, %edi
+ callq init_array
+ leaq C(%rip), %rdi
+ xorl %eax, %eax
+ movq %rax, -48(%rbp) # 8-byte Spill
xorl %esi, %esi
movl $9437184, %edx # imm = 0x900000
- callq memset
- xorl %esi, %esi
- movl $C+16, %eax
- movq %rax, -88(%rbp) # 8-byte Spill
- .align 16, 0x90
-.LBB2_5: # %polly.loop_preheader17
- # =>This Loop Header: Depth=1
- # Child Loop BB2_15 Depth 2
- # Child Loop BB2_8 Depth 3
- # Child Loop BB2_11 Depth 4
- # Child Loop BB2_17 Depth 5
- # Child Loop BB2_18 Depth 6
- movq %rsi, -56(%rbp) # 8-byte Spill
- movq %rsi, %rax
- orq $63, %rax
+ callq memset@PLT
+ movl $64, %eax
+ movq %rax, -80(%rbp) # 8-byte Spill
+ leaq A(%rip), %rax
movq %rax, -72(%rbp) # 8-byte Spill
- leaq -1(%rax), %rax
- movq %rax, -48(%rbp) # 8-byte Spill
- xorl %edx, %edx
- .align 16, 0x90
-.LBB2_15: # %polly.loop_preheader24
- # Parent Loop BB2_5 Depth=1
+ .p2align 4, 0x90
+.LBB2_1: # %polly.loop_header8
+ # =>This Loop Header: Depth=1
+ # Child Loop BB2_2 Depth 2
+ # Child Loop BB2_3 Depth 3
+ # Child Loop BB2_4 Depth 4
+ # Child Loop BB2_5 Depth 5
+ leaq B+192(%rip), %r9
+ xorl %edi, %edi
+ xorl %eax, %eax
+ .p2align 4, 0x90
+.LBB2_2: # %polly.loop_header14
+ # Parent Loop BB2_1 Depth=1
# => This Loop Header: Depth=2
- # Child Loop BB2_8 Depth 3
- # Child Loop BB2_11 Depth 4
- # Child Loop BB2_17 Depth 5
- # Child Loop BB2_18 Depth 6
- movq %rdx, -80(%rbp) # 8-byte Spill
- leaq -4(%rdx), %rcx
- movq %rdx, %rax
- decq %rax
- cmovsq %rcx, %rax
- movq %rax, %r15
- sarq $63, %r15
- shrq $62, %r15
- addq %rax, %r15
- andq $-4, %r15
- movq %rdx, %r13
- orq $63, %r13
- leaq -4(%r13), %rdx
- xorl %r10d, %r10d
- movq -88(%rbp), %rax # 8-byte Reload
- leaq (%rax,%r15,4), %rax
- movq %rax, -64(%rbp) # 8-byte Spill
- leaq B+16(,%r15,4), %rbx
- leaq 4(%r15), %r12
- .align 16, 0x90
-.LBB2_8: # %polly.loop_header23
- # Parent Loop BB2_5 Depth=1
- # Parent Loop BB2_15 Depth=2
+ # Child Loop BB2_3 Depth 3
+ # Child Loop BB2_4 Depth 4
+ # Child Loop BB2_5 Depth 5
+ movq %rax, -168(%rbp) # 8-byte Spill
+ movq %rdi, -176(%rbp) # 8-byte Spill
+ shlq $6, %rdi
+ leaq 16(%rdi), %rdx
+ leaq 32(%rdi), %rsi
+ leaq 48(%rdi), %rcx
+ movq -72(%rbp), %r12 # 8-byte Reload
+ movq %r9, -184(%rbp) # 8-byte Spill
+ xorl %eax, %eax
+ .p2align 4, 0x90
+.LBB2_3: # %polly.loop_header20
+ # Parent Loop BB2_1 Depth=1
+ # Parent Loop BB2_2 Depth=2
# => This Loop Header: Depth=3
- # Child Loop BB2_11 Depth 4
- # Child Loop BB2_17 Depth 5
- # Child Loop BB2_18 Depth 6
- cmpq -72(%rbp), %rsi # 8-byte Folded Reload
- jg .LBB2_13
-# BB#9: # %polly.loop_header30.preheader
- # in Loop: Header=BB2_8 Depth=3
- movq %r10, %rax
- orq $63, %rax
- cmpq %rax, %r10
- jg .LBB2_13
-# BB#10: # in Loop: Header=BB2_8 Depth=3
- decq %rax
- movq -64(%rbp), %r14 # 8-byte Reload
- movq -56(%rbp), %r11 # 8-byte Reload
- .align 16, 0x90
-.LBB2_11: # %polly.loop_header37.preheader
- # Parent Loop BB2_5 Depth=1
- # Parent Loop BB2_15 Depth=2
- # Parent Loop BB2_8 Depth=3
+ # Child Loop BB2_4 Depth 4
+ # Child Loop BB2_5 Depth 5
+ movq %rax, -192(%rbp) # 8-byte Spill
+ movq %r12, -200(%rbp) # 8-byte Spill
+ movq -48(%rbp), %r14 # 8-byte Reload
+ .p2align 4, 0x90
+.LBB2_4: # %polly.loop_header26
+ # Parent Loop BB2_1 Depth=1
+ # Parent Loop BB2_2 Depth=2
+ # Parent Loop BB2_3 Depth=3
# => This Loop Header: Depth=4
- # Child Loop BB2_17 Depth 5
- # Child Loop BB2_18 Depth 6
- cmpq %r13, %r12
- movq %rbx, %r8
- movq %r10, %rsi
- jg .LBB2_12
- .align 16, 0x90
-.LBB2_17: # %polly.loop_header46.preheader
- # Parent Loop BB2_5 Depth=1
- # Parent Loop BB2_15 Depth=2
- # Parent Loop BB2_8 Depth=3
- # Parent Loop BB2_11 Depth=4
- # => This Loop Header: Depth=5
- # Child Loop BB2_18 Depth 6
- leaq (%r11,%r11,2), %rcx
- shlq $11, %rcx
- vbroadcastss A(%rcx,%rsi,4), %xmm0
- movq %r14, %rdi
- movq %r8, %r9
- movq %r15, %rcx
-.LBB2_18: # %polly.loop_header46
- # Parent Loop BB2_5 Depth=1
- # Parent Loop BB2_15 Depth=2
- # Parent Loop BB2_8 Depth=3
- # Parent Loop BB2_11 Depth=4
- # Parent Loop BB2_17 Depth=5
- # => This Inner Loop Header: Depth=6
- vmulps (%r9), %xmm0, %xmm1
- vaddps (%rdi), %xmm1, %xmm1
- vmovaps %xmm1, (%rdi)
- addq $16, %rdi
- addq $16, %r9
- addq $4, %rcx
- cmpq %rdx, %rcx
- jle .LBB2_18
-# BB#16: # %polly.loop_exit48
- # in Loop: Header=BB2_17 Depth=5
- addq $6144, %r8 # imm = 0x1800
- cmpq %rax, %rsi
- leaq 1(%rsi), %rsi
- jle .LBB2_17
- .align 16, 0x90
-.LBB2_12: # %polly.loop_exit39
- # in Loop: Header=BB2_11 Depth=4
- addq $6144, %r14 # imm = 0x1800
- cmpq -48(%rbp), %r11 # 8-byte Folded Reload
- leaq 1(%r11), %r11
- jle .LBB2_11
- .align 16, 0x90
-.LBB2_13: # %polly.loop_exit32
- # in Loop: Header=BB2_8 Depth=3
- addq $393216, %rbx # imm = 0x60000
- cmpq $1472, %r10 # imm = 0x5C0
- leaq 64(%r10), %r10
- movq -56(%rbp), %rsi # 8-byte Reload
- jl .LBB2_8
-# BB#14: # %polly.loop_exit25
- # in Loop: Header=BB2_15 Depth=2
- movq -80(%rbp), %rdx # 8-byte Reload
- cmpq $1472, %rdx # imm = 0x5C0
- leaq 64(%rdx), %rdx
- jl .LBB2_15
-# BB#6: # %polly.loop_exit18
- # in Loop: Header=BB2_5 Depth=1
- addq $393216, -88(%rbp) # 8-byte Folded Spill
+ # Child Loop BB2_5 Depth 5
+ leaq (%r14,%r14,2), %rbx
+ shlq $11, %rbx
+ leaq C(%rip), %rax
+ addq %rax, %rbx
+ leaq (%rbx,%rdi,4), %r8
+ leaq (%rbx,%rdx,4), %r15
+ leaq (%rbx,%rsi,4), %r10
+ leaq (%rbx,%rcx,4), %r11
+ movups (%rbx,%rdi,4), %xmm8
+ movups 16(%rbx,%rdi,4), %xmm0
+ movaps %xmm0, -144(%rbp) # 16-byte Spill
+ movups 32(%rbx,%rdi,4), %xmm6
+ movups 48(%rbx,%rdi,4), %xmm1
+ movups (%rbx,%rdx,4), %xmm15
+ movups 16(%rbx,%rdx,4), %xmm0
+ movaps %xmm0, -64(%rbp) # 16-byte Spill
+ movups 32(%rbx,%rdx,4), %xmm0
+ movaps %xmm0, -96(%rbp) # 16-byte Spill
+ movups 48(%rbx,%rdx,4), %xmm0
+ movaps %xmm0, -112(%rbp) # 16-byte Spill
+ movups (%rbx,%rsi,4), %xmm11
+ movups 16(%rbx,%rsi,4), %xmm0
+ movaps %xmm0, -160(%rbp) # 16-byte Spill
+ movups 32(%rbx,%rsi,4), %xmm12
+ movups 48(%rbx,%rsi,4), %xmm0
+ movaps %xmm0, -128(%rbp) # 16-byte Spill
+ movups (%rbx,%rcx,4), %xmm9
+ movups 16(%rbx,%rcx,4), %xmm13
+ movups 32(%rbx,%rcx,4), %xmm2
+ movups 48(%rbx,%rcx,4), %xmm3
+ movq %r9, %rbx
+ movl $0, %r13d
+ .p2align 4, 0x90
+.LBB2_5: # %vector.ph
+ # Parent Loop BB2_1 Depth=1
+ # Parent Loop BB2_2 Depth=2
+ # Parent Loop BB2_3 Depth=3
+ # Parent Loop BB2_4 Depth=4
+ # => This Inner Loop Header: Depth=5
+ movaps %xmm12, -240(%rbp) # 16-byte Spill
+ movaps %xmm2, -256(%rbp) # 16-byte Spill
+ movaps %xmm3, -272(%rbp) # 16-byte Spill
+ movaps %xmm8, %xmm10
+ movaps -144(%rbp), %xmm7 # 16-byte Reload
+ unpcklps %xmm7, %xmm10 # xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1]
+ movaps %xmm1, %xmm4
+ shufps $0, %xmm6, %xmm4 # xmm4 = xmm4[0,0],xmm6[0,0]
+ shufps $36, %xmm4, %xmm10 # xmm10 = xmm10[0,1],xmm4[2,0]
+ movaps %xmm7, %xmm5
+ shufps $17, %xmm8, %xmm5 # xmm5 = xmm5[1,0],xmm8[1,0]
+ movaps %xmm6, %xmm4
+ unpcklps %xmm1, %xmm4 # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+ shufps $226, %xmm4, %xmm5 # xmm5 = xmm5[2,0],xmm4[2,3]
+ movaps %xmm8, %xmm12
+ unpckhps %xmm7, %xmm12 # xmm12 = xmm12[2],xmm7[2],xmm12[3],xmm7[3]
+ movaps %xmm1, %xmm4
+ shufps $34, %xmm6, %xmm4 # xmm4 = xmm4[2,0],xmm6[2,0]
+ shufps $36, %xmm4, %xmm12 # xmm12 = xmm12[0,1],xmm4[2,0]
+ shufps $51, %xmm8, %xmm7 # xmm7 = xmm7[3,0],xmm8[3,0]
+ unpckhps %xmm1, %xmm6 # xmm6 = xmm6[2],xmm1[2],xmm6[3],xmm1[3]
+ shufps $226, %xmm6, %xmm7 # xmm7 = xmm7[2,0],xmm6[2,3]
+ movaps -160(%rbx), %xmm0
+ movaps -144(%rbx), %xmm1
+ movaps %xmm1, %xmm6
+ shufps $0, %xmm0, %xmm6 # xmm6 = xmm6[0,0],xmm0[0,0]
+ movaps -192(%rbx), %xmm3
+ movaps -176(%rbx), %xmm4
+ movaps %xmm3, %xmm8
+ unpcklps %xmm4, %xmm8 # xmm8 = xmm8[0],xmm4[0],xmm8[1],xmm4[1]
+ shufps $36, %xmm6, %xmm8 # xmm8 = xmm8[0,1],xmm6[2,0]
+ movaps %xmm0, %xmm2
+ unpcklps %xmm1, %xmm2 # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+ movaps %xmm4, %xmm6
+ shufps $17, %xmm3, %xmm6 # xmm6 = xmm6[1,0],xmm3[1,0]
+ shufps $226, %xmm2, %xmm6 # xmm6 = xmm6[2,0],xmm2[2,3]
+ movaps %xmm1, %xmm2
+ shufps $34, %xmm0, %xmm2 # xmm2 = xmm2[2,0],xmm0[2,0]
+ movaps %xmm3, %xmm14
+ unpckhps %xmm4, %xmm14 # xmm14 = xmm14[2],xmm4[2],xmm14[3],xmm4[3]
+ shufps $36, %xmm2, %xmm14 # xmm14 = xmm14[0,1],xmm2[2,0]
+ unpckhps %xmm1, %xmm0 # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+ shufps $51, %xmm3, %xmm4 # xmm4 = xmm4[3,0],xmm3[3,0]
+ shufps $226, %xmm0, %xmm4 # xmm4 = xmm4[2,0],xmm0[2,3]
+ movss (%r12,%r13,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
+ shufps $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0]
+ mulps %xmm0, %xmm8
+ addps %xmm10, %xmm8
+ mulps %xmm0, %xmm6
+ addps %xmm5, %xmm6
+ mulps %xmm0, %xmm14
+ addps %xmm12, %xmm14
+ mulps %xmm0, %xmm4
+ movaps %xmm0, %xmm5
+ addps %xmm7, %xmm4
+ movaps %xmm14, %xmm0
+ unpckhps %xmm4, %xmm0 # xmm0 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+ movaps %xmm6, %xmm1
+ shufps $51, %xmm8, %xmm1 # xmm1 = xmm1[3,0],xmm8[3,0]
+ shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
+ movaps %xmm1, -304(%rbp) # 16-byte Spill
+ movaps %xmm4, %xmm0
+ shufps $34, %xmm14, %xmm0 # xmm0 = xmm0[2,0],xmm14[2,0]
+ movaps %xmm8, %xmm1
+ unpckhps %xmm6, %xmm1 # xmm1 = xmm1[2],xmm6[2],xmm1[3],xmm6[3]
+ shufps $36, %xmm0, %xmm1 # xmm1 = xmm1[0,1],xmm0[2,0]
+ movaps %xmm1, -288(%rbp) # 16-byte Spill
+ movaps %xmm14, %xmm0
+ unpcklps %xmm4, %xmm0 # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+ movaps %xmm6, %xmm1
+ shufps $17, %xmm8, %xmm1 # xmm1 = xmm1[1,0],xmm8[1,0]
+ shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
+ movaps %xmm1, -144(%rbp) # 16-byte Spill
+ shufps $0, %xmm14, %xmm4 # xmm4 = xmm4[0,0],xmm14[0,0]
+ unpcklps %xmm6, %xmm8 # xmm8 = xmm8[0],xmm6[0],xmm8[1],xmm6[1]
+ shufps $36, %xmm4, %xmm8 # xmm8 = xmm8[0,1],xmm4[2,0]
+ movaps %xmm15, %xmm14
+ movaps -64(%rbp), %xmm4 # 16-byte Reload
+ unpcklps %xmm4, %xmm14 # xmm14 = xmm14[0],xmm4[0],xmm14[1],xmm4[1]
+ movaps -112(%rbp), %xmm1 # 16-byte Reload
+ movaps %xmm1, %xmm0
+ movaps -96(%rbp), %xmm3 # 16-byte Reload
+ shufps $0, %xmm3, %xmm0 # xmm0 = xmm0[0,0],xmm3[0,0]
+ shufps $36, %xmm0, %xmm14 # xmm14 = xmm14[0,1],xmm0[2,0]
+ movaps %xmm4, %xmm12
+ shufps $17, %xmm15, %xmm12 # xmm12 = xmm12[1,0],xmm15[1,0]
+ movaps %xmm3, %xmm2
+ unpcklps %xmm1, %xmm2 # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+ shufps $226, %xmm2, %xmm12 # xmm12 = xmm12[2,0],xmm2[2,3]
+ movaps %xmm15, %xmm7
+ unpckhps %xmm4, %xmm7 # xmm7 = xmm7[2],xmm4[2],xmm7[3],xmm4[3]
+ movaps %xmm1, %xmm2
+ shufps $34, %xmm3, %xmm2 # xmm2 = xmm2[2,0],xmm3[2,0]
+ shufps $36, %xmm2, %xmm7 # xmm7 = xmm7[0,1],xmm2[2,0]
+ shufps $51, %xmm15, %xmm4 # xmm4 = xmm4[3,0],xmm15[3,0]
+ unpckhps %xmm1, %xmm3 # xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+ shufps $226, %xmm3, %xmm4 # xmm4 = xmm4[2,0],xmm3[2,3]
+ movaps %xmm4, -64(%rbp) # 16-byte Spill
+ movaps -96(%rbx), %xmm2
+ movaps -80(%rbx), %xmm1
+ movaps %xmm1, %xmm4
+ shufps $0, %xmm2, %xmm4 # xmm4 = xmm4[0,0],xmm2[0,0]
+ movaps -112(%rbx), %xmm10
+ movaps -128(%rbx), %xmm0
+ movaps %xmm0, %xmm15
+ unpcklps %xmm10, %xmm15 # xmm15 = xmm15[0],xmm10[0],xmm15[1],xmm10[1]
+ shufps $36, %xmm4, %xmm15 # xmm15 = xmm15[0,1],xmm4[2,0]
+ movaps %xmm2, %xmm4
+ unpcklps %xmm1, %xmm4 # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+ movaps %xmm10, %xmm6
+ shufps $17, %xmm0, %xmm6 # xmm6 = xmm6[1,0],xmm0[1,0]
+ shufps $226, %xmm4, %xmm6 # xmm6 = xmm6[2,0],xmm4[2,3]
+ movaps %xmm1, %xmm3
+ shufps $34, %xmm2, %xmm3 # xmm3 = xmm3[2,0],xmm2[2,0]
+ movaps %xmm0, %xmm4
+ unpckhps %xmm10, %xmm4 # xmm4 = xmm4[2],xmm10[2],xmm4[3],xmm10[3]
+ shufps $36, %xmm3, %xmm4 # xmm4 = xmm4[0,1],xmm3[2,0]
+ unpckhps %xmm1, %xmm2 # xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+ shufps $51, %xmm0, %xmm10 # xmm10 = xmm10[3,0],xmm0[3,0]
+ shufps $226, %xmm2, %xmm10 # xmm10 = xmm10[2,0],xmm2[2,3]
+ movaps %xmm5, -224(%rbp) # 16-byte Spill
+ mulps %xmm5, %xmm15
+ addps %xmm14, %xmm15
+ mulps %xmm5, %xmm6
+ addps %xmm12, %xmm6
+ mulps %xmm5, %xmm4
+ addps %xmm7, %xmm4
+ mulps %xmm5, %xmm10
+ addps -64(%rbp), %xmm10 # 16-byte Folded Reload
+ movaps %xmm4, %xmm0
+ unpckhps %xmm10, %xmm0 # xmm0 = xmm0[2],xmm10[2],xmm0[3],xmm10[3]
+ movaps %xmm6, %xmm1
+ shufps $51, %xmm15, %xmm1 # xmm1 = xmm1[3,0],xmm15[3,0]
+ shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
+ movaps %xmm1, -112(%rbp) # 16-byte Spill
+ movaps %xmm10, %xmm0
+ shufps $34, %xmm4, %xmm0 # xmm0 = xmm0[2,0],xmm4[2,0]
+ movaps %xmm15, %xmm1
+ unpckhps %xmm6, %xmm1 # xmm1 = xmm1[2],xmm6[2],xmm1[3],xmm6[3]
+ shufps $36, %xmm0, %xmm1 # xmm1 = xmm1[0,1],xmm0[2,0]
+ movaps %xmm1, -96(%rbp) # 16-byte Spill
+ movaps %xmm4, %xmm0
+ unpcklps %xmm10, %xmm0 # xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1]
+ movaps %xmm6, %xmm1
+ shufps $17, %xmm15, %xmm1 # xmm1 = xmm1[1,0],xmm15[1,0]
+ shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
+ movaps %xmm1, -64(%rbp) # 16-byte Spill
+ shufps $0, %xmm4, %xmm10 # xmm10 = xmm10[0,0],xmm4[0,0]
+ unpcklps %xmm6, %xmm15 # xmm15 = xmm15[0],xmm6[0],xmm15[1],xmm6[1]
+ shufps $36, %xmm10, %xmm15 # xmm15 = xmm15[0,1],xmm10[2,0]
+ movaps %xmm11, %xmm10
+ movaps -160(%rbp), %xmm14 # 16-byte Reload
+ unpcklps %xmm14, %xmm10 # xmm10 = xmm10[0],xmm14[0],xmm10[1],xmm14[1]
+ movaps -128(%rbp), %xmm2 # 16-byte Reload
+ movaps %xmm2, %xmm0
+ movaps -240(%rbp), %xmm3 # 16-byte Reload
+ shufps $0, %xmm3, %xmm0 # xmm0 = xmm0[0,0],xmm3[0,0]
+ shufps $36, %xmm0, %xmm10 # xmm10 = xmm10[0,1],xmm0[2,0]
+ movaps %xmm14, %xmm12
+ shufps $17, %xmm11, %xmm12 # xmm12 = xmm12[1,0],xmm11[1,0]
+ movaps %xmm3, %xmm0
+ unpcklps %xmm2, %xmm0 # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+ shufps $226, %xmm0, %xmm12 # xmm12 = xmm12[2,0],xmm0[2,3]
+ movaps %xmm11, %xmm0
+ unpckhps %xmm14, %xmm0 # xmm0 = xmm0[2],xmm14[2],xmm0[3],xmm14[3]
+ movaps %xmm2, %xmm1
+ shufps $34, %xmm3, %xmm1 # xmm1 = xmm1[2,0],xmm3[2,0]
+ shufps $36, %xmm1, %xmm0 # xmm0 = xmm0[0,1],xmm1[2,0]
+ shufps $51, %xmm11, %xmm14 # xmm14 = xmm14[3,0],xmm11[3,0]
+ unpckhps %xmm2, %xmm3 # xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+ shufps $226, %xmm3, %xmm14 # xmm14 = xmm14[2,0],xmm3[2,3]
+ movaps -32(%rbx), %xmm1
+ movaps -16(%rbx), %xmm2
+ movaps %xmm2, %xmm3
+ shufps $0, %xmm1, %xmm3 # xmm3 = xmm3[0,0],xmm1[0,0]
+ movaps -48(%rbx), %xmm4
+ movaps -64(%rbx), %xmm5
+ movaps %xmm5, %xmm11
+ unpcklps %xmm4, %xmm11 # xmm11 = xmm11[0],xmm4[0],xmm11[1],xmm4[1]
+ shufps $36, %xmm3, %xmm11 # xmm11 = xmm11[0,1],xmm3[2,0]
+ movaps %xmm1, %xmm3
+ unpcklps %xmm2, %xmm3 # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+ movaps %xmm4, %xmm7
+ shufps $17, %xmm5, %xmm7 # xmm7 = xmm7[1,0],xmm5[1,0]
+ shufps $226, %xmm3, %xmm7 # xmm7 = xmm7[2,0],xmm3[2,3]
+ movaps %xmm2, %xmm3
+ shufps $34, %xmm1, %xmm3 # xmm3 = xmm3[2,0],xmm1[2,0]
+ movaps %xmm5, %xmm6
+ unpckhps %xmm4, %xmm6 # xmm6 = xmm6[2],xmm4[2],xmm6[3],xmm4[3]
+ shufps $36, %xmm3, %xmm6 # xmm6 = xmm6[0,1],xmm3[2,0]
+ unpckhps %xmm2, %xmm1 # xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+ shufps $51, %xmm5, %xmm4 # xmm4 = xmm4[3,0],xmm5[3,0]
+ shufps $226, %xmm1, %xmm4 # xmm4 = xmm4[2,0],xmm1[2,3]
+ movaps -224(%rbp), %xmm1 # 16-byte Reload
+ mulps %xmm1, %xmm11
+ addps %xmm10, %xmm11
+ mulps %xmm1, %xmm7
+ addps %xmm12, %xmm7
+ mulps %xmm1, %xmm6
+ addps %xmm0, %xmm6
+ mulps %xmm1, %xmm4
+ addps %xmm14, %xmm4
+ movaps %xmm6, %xmm0
+ unpckhps %xmm4, %xmm0 # xmm0 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+ movaps %xmm7, %xmm1
+ shufps $51, %xmm11, %xmm1 # xmm1 = xmm1[3,0],xmm11[3,0]
+ shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
+ movaps %xmm1, -128(%rbp) # 16-byte Spill
+ movaps %xmm4, %xmm0
+ shufps $34, %xmm6, %xmm0 # xmm0 = xmm0[2,0],xmm6[2,0]
+ movaps %xmm11, %xmm12
+ unpckhps %xmm7, %xmm12 # xmm12 = xmm12[2],xmm7[2],xmm12[3],xmm7[3]
+ shufps $36, %xmm0, %xmm12 # xmm12 = xmm12[0,1],xmm0[2,0]
+ movaps %xmm6, %xmm0
+ unpcklps %xmm4, %xmm0 # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+ movaps %xmm7, %xmm1
+ shufps $17, %xmm11, %xmm1 # xmm1 = xmm1[1,0],xmm11[1,0]
+ shufps $226, %xmm0, %xmm1 # xmm1 = xmm1[2,0],xmm0[2,3]
+ movaps %xmm1, -160(%rbp) # 16-byte Spill
+ shufps $0, %xmm6, %xmm4 # xmm4 = xmm4[0,0],xmm6[0,0]
+ unpcklps %xmm7, %xmm11 # xmm11 = xmm11[0],xmm7[0],xmm11[1],xmm7[1]
+ shufps $36, %xmm4, %xmm11 # xmm11 = xmm11[0,1],xmm4[2,0]
+ movaps %xmm9, %xmm10
+ unpcklps %xmm13, %xmm10 # xmm10 = xmm10[0],xmm13[0],xmm10[1],xmm13[1]
+ movaps -272(%rbp), %xmm2 # 16-byte Reload
+ movaps %xmm2, %xmm0
+ movaps -256(%rbp), %xmm3 # 16-byte Reload
+ shufps $0, %xmm3, %xmm0 # xmm0 = xmm0[0,0],xmm3[0,0]
+ shufps $36, %xmm0, %xmm10 # xmm10 = xmm10[0,1],xmm0[2,0]
+ movaps %xmm13, %xmm14
+ shufps $17, %xmm9, %xmm14 # xmm14 = xmm14[1,0],xmm9[1,0]
+ movaps %xmm3, %xmm0
+ unpcklps %xmm2, %xmm0 # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+ shufps $226, %xmm0, %xmm14 # xmm14 = xmm14[2,0],xmm0[2,3]
+ movaps %xmm9, %xmm0
+ unpckhps %xmm13, %xmm0 # xmm0 = xmm0[2],xmm13[2],xmm0[3],xmm13[3]
+ movaps %xmm2, %xmm1
+ shufps $34, %xmm3, %xmm1 # xmm1 = xmm1[2,0],xmm3[2,0]
+ shufps $36, %xmm1, %xmm0 # xmm0 = xmm0[0,1],xmm1[2,0]
+ shufps $51, %xmm9, %xmm13 # xmm13 = xmm13[3,0],xmm9[3,0]
+ unpckhps %xmm2, %xmm3 # xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+ shufps $226, %xmm3, %xmm13 # xmm13 = xmm13[2,0],xmm3[2,3]
+ movaps 32(%rbx), %xmm1
+ movaps 48(%rbx), %xmm2
+ movaps %xmm2, %xmm3
+ shufps $0, %xmm1, %xmm3 # xmm3 = xmm3[0,0],xmm1[0,0]
+ movaps 16(%rbx), %xmm4
+ movaps (%rbx), %xmm5
+ movaps %xmm5, %xmm9
+ unpcklps %xmm4, %xmm9 # xmm9 = xmm9[0],xmm4[0],xmm9[1],xmm4[1]
+ shufps $36, %xmm3, %xmm9 # xmm9 = xmm9[0,1],xmm3[2,0]
+ movaps %xmm1, %xmm3
+ unpcklps %xmm2, %xmm3 # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+ movaps %xmm4, %xmm7
+ shufps $17, %xmm5, %xmm7 # xmm7 = xmm7[1,0],xmm5[1,0]
+ shufps $226, %xmm3, %xmm7 # xmm7 = xmm7[2,0],xmm3[2,3]
+ movaps %xmm2, %xmm3
+ shufps $34, %xmm1, %xmm3 # xmm3 = xmm3[2,0],xmm1[2,0]
+ movaps %xmm5, %xmm6
+ unpckhps %xmm4, %xmm6 # xmm6 = xmm6[2],xmm4[2],xmm6[3],xmm4[3]
+ shufps $36, %xmm3, %xmm6 # xmm6 = xmm6[0,1],xmm3[2,0]
+ unpckhps %xmm2, %xmm1 # xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+ shufps $51, %xmm5, %xmm4 # xmm4 = xmm4[3,0],xmm5[3,0]
+ shufps $226, %xmm1, %xmm4 # xmm4 = xmm4[2,0],xmm1[2,3]
+ movaps -224(%rbp), %xmm1 # 16-byte Reload
+ mulps %xmm1, %xmm9
+ addps %xmm10, %xmm9
+ mulps %xmm1, %xmm7
+ addps %xmm14, %xmm7
+ mulps %xmm1, %xmm6
+ addps %xmm0, %xmm6
+ mulps %xmm1, %xmm4
+ addps %xmm13, %xmm4
+ movaps %xmm6, %xmm0
+ unpckhps %xmm4, %xmm0 # xmm0 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+ movaps %xmm7, %xmm3
+ shufps $51, %xmm9, %xmm3 # xmm3 = xmm3[3,0],xmm9[3,0]
+ shufps $226, %xmm0, %xmm3 # xmm3 = xmm3[2,0],xmm0[2,3]
+ movaps %xmm4, %xmm0
+ shufps $34, %xmm6, %xmm0 # xmm0 = xmm0[2,0],xmm6[2,0]
+ movaps %xmm9, %xmm2
+ unpckhps %xmm7, %xmm2 # xmm2 = xmm2[2],xmm7[2],xmm2[3],xmm7[3]
+ shufps $36, %xmm0, %xmm2 # xmm2 = xmm2[0,1],xmm0[2,0]
+ movaps %xmm6, %xmm0
+ unpcklps %xmm4, %xmm0 # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+ movaps %xmm7, %xmm13
+ shufps $17, %xmm9, %xmm13 # xmm13 = xmm13[1,0],xmm9[1,0]
+ shufps $226, %xmm0, %xmm13 # xmm13 = xmm13[2,0],xmm0[2,3]
+ shufps $0, %xmm6, %xmm4 # xmm4 = xmm4[0,0],xmm6[0,0]
+ movaps -288(%rbp), %xmm6 # 16-byte Reload
+ movaps -304(%rbp), %xmm1 # 16-byte Reload
+ unpcklps %xmm7, %xmm9 # xmm9 = xmm9[0],xmm7[0],xmm9[1],xmm7[1]
+ shufps $36, %xmm4, %xmm9 # xmm9 = xmm9[0,1],xmm4[2,0]
+ addq $1, %r13
+ addq $6144, %rbx # imm = 0x1800
+ cmpq $64, %r13
+ jne .LBB2_5
+# %bb.6: # %polly.loop_exit34
+ # in Loop: Header=BB2_4 Depth=4
+ movups %xmm8, (%r8)
+ movaps -144(%rbp), %xmm0 # 16-byte Reload
+ movups %xmm0, 16(%r8)
+ movups %xmm6, 32(%r8)
+ movups %xmm1, 48(%r8)
+ movaps -112(%rbp), %xmm0 # 16-byte Reload
+ movups %xmm0, 48(%r15)
+ movaps -96(%rbp), %xmm0 # 16-byte Reload
+ movups %xmm0, 32(%r15)
+ movaps -64(%rbp), %xmm0 # 16-byte Reload
+ movups %xmm0, 16(%r15)
+ movups %xmm15, (%r15)
+ movaps -128(%rbp), %xmm0 # 16-byte Reload
+ movups %xmm0, 48(%r10)
+ movaps -160(%rbp), %xmm0 # 16-byte Reload
+ movups %xmm0, 16(%r10)
+ movups %xmm11, (%r10)
+ movups %xmm12, 32(%r10)
+ movups %xmm3, 48(%r11)
+ movups %xmm13, 16(%r11)
+ movups %xmm9, (%r11)
+ movups %xmm2, 32(%r11)
+ addq $1, %r14
+ addq $6144, %r12 # imm = 0x1800
+ cmpq -80(%rbp), %r14 # 8-byte Folded Reload
+ jne .LBB2_4
+# %bb.7: # %polly.loop_exit28
+ # in Loop: Header=BB2_3 Depth=3
+ movq -192(%rbp), %rax # 8-byte Reload
+ addq $64, %rax
+ addq $393216, %r9 # imm = 0x60000
+ movq -200(%rbp), %r12 # 8-byte Reload
+ addq $256, %r12 # imm = 0x100
+ cmpq $1536, %rax # imm = 0x600
+ jb .LBB2_3
+# %bb.8: # %polly.loop_exit22
+ # in Loop: Header=BB2_2 Depth=2
+ movq -168(%rbp), %rax # 8-byte Reload
+ addq $64, %rax
+ movq -176(%rbp), %rdi # 8-byte Reload
+ addq $1, %rdi
+ movq -184(%rbp), %r9 # 8-byte Reload
+ addq $256, %r9 # imm = 0x100
+ cmpq $1536, %rax # imm = 0x600
+ jb .LBB2_2
+# %bb.9: # %polly.loop_exit16
+ # in Loop: Header=BB2_1 Depth=1
+ movq -48(%rbp), %rax # 8-byte Reload
+ movq %rax, %rcx
+ addq $64, %rcx
+ addq $64, -80(%rbp) # 8-byte Folded Spill
+ addq $393216, -72(%rbp) # 8-byte Folded Spill
# imm = 0x60000
- cmpq $1472, %rsi # imm = 0x5C0
- leaq 64(%rsi), %rsi
- jl .LBB2_5
-# BB#7: # %polly.loop_exit11
+ movq %rcx, %rax
+ movq %rcx, -48(%rbp) # 8-byte Spill
+ cmpq $1536, %rcx # imm = 0x600
+ jb .LBB2_1
+# %bb.10: # %polly.exiting
xorl %eax, %eax
- addq $56, %rsp
+ addq $264, %rsp # imm = 0x108
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
- ret
-.Ltmp28:
- .size main, .Ltmp28-main
+ .cfi_def_cfa %rsp, 8
+ retq
+.Lfunc_end2:
+ .size main, .Lfunc_end2-main
.cfi_endproc
-
+ # -- End function
.type A,@object # @A
.comm A,9437184,16
.type B,@object # @B
@@ -387,10 +647,11 @@ main: # @main
.type .L.str,@object # @.str
.section .rodata.str1.1,"aMS",@progbits,1
.L.str:
- .asciz "%lf "
+ .asciz "%lf "
.size .L.str, 5
.type C,@object # @C
.comm C,9437184,16
+ .ident "clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"
.section ".note.GNU-stack","",@progbits
diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled.exe b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled.exe
deleted file mode 100755
index fbd8b128fd8..00000000000
--- a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled.exe
+++ /dev/null
Binary files differ
diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled.ll b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled.ll
index acdd95f3bc4..10ea4c8b55f 100644
--- a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled.ll
+++ b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled.ll
Binary files differ
diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled.s b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled.s
index f7ab7fdd59c..bf25833eec1 100644
--- a/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled.s
+++ b/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled.s
@@ -1,379 +1,495 @@
- .file "matmul.polly.interchanged+tiled.ll"
+ .text
+ .file "matmul.c"
.section .rodata.cst8,"aM",@progbits,8
- .align 8
+ .p2align 3 # -- Begin function init_array
.LCPI0_0:
.quad 4602678819172646912 # double 0.5
.text
.globl init_array
- .align 16, 0x90
+ .p2align 4, 0x90
.type init_array,@function
init_array: # @init_array
.cfi_startproc
-# BB#0: # %entry
+# %bb.0: # %entry
pushq %rbp
-.Ltmp2:
.cfi_def_cfa_offset 16
-.Ltmp3:
.cfi_offset %rbp, -16
movq %rsp, %rbp
-.Ltmp4:
.cfi_def_cfa_register %rbp
+ leaq B(%rip), %rax
+ leaq A(%rip), %rcx
xorl %r8d, %r8d
- vmovsd .LCPI0_0(%rip), %xmm0
- .align 16, 0x90
-.LBB0_1: # %polly.loop_preheader3
+ movsd .LCPI0_0(%rip), %xmm0 # xmm0 = mem[0],zero
+ xorl %r9d, %r9d
+ .p2align 4, 0x90
+.LBB0_1: # %polly.loop_header
# =>This Loop Header: Depth=1
# Child Loop BB0_2 Depth 2
- xorl %ecx, %ecx
- .align 16, 0x90
-.LBB0_2: # %polly.loop_header2
+ movl $1, %edi
+ xorl %edx, %edx
+ .p2align 4, 0x90
+.LBB0_2: # %polly.loop_header1
# Parent Loop BB0_1 Depth=1
# => This Inner Loop Header: Depth=2
- movl %ecx, %edx
- imull %r8d, %edx
movl %edx, %esi
- sarl $31, %esi
- shrl $22, %esi
- addl %edx, %esi
- andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
- negl %esi
- movq %r8, %rax
- shlq $11, %rax
- leal 1(%rdx,%rsi), %edi
- leaq (%rax,%rax,2), %rsi
- leaq 1(%rcx), %rdx
- cmpq $1536, %rdx # imm = 0x600
- vcvtsi2sdl %edi, %xmm0, %xmm1
- vmulsd %xmm0, %xmm1, %xmm1
- vcvtsd2ss %xmm1, %xmm1, %xmm1
- vmovss %xmm1, A(%rsi,%rcx,4)
- vmovss %xmm1, B(%rsi,%rcx,4)
- movq %rdx, %rcx
+ andl $1022, %esi # imm = 0x3FE
+ orl $1, %esi
+ xorps %xmm1, %xmm1
+ cvtsi2sdl %esi, %xmm1
+ mulsd %xmm0, %xmm1
+ cvtsd2ss %xmm1, %xmm1
+ movss %xmm1, -4(%rcx,%rdi,4)
+ movss %xmm1, -4(%rax,%rdi,4)
+ leal (%r9,%rdx), %esi
+ andl $1023, %esi # imm = 0x3FF
+ addl $1, %esi
+ xorps %xmm1, %xmm1
+ cvtsi2sdl %esi, %xmm1
+ mulsd %xmm0, %xmm1
+ cvtsd2ss %xmm1, %xmm1
+ movss %xmm1, (%rcx,%rdi,4)
+ movss %xmm1, (%rax,%rdi,4)
+ addq $2, %rdi
+ addl %r8d, %edx
+ cmpq $1537, %rdi # imm = 0x601
jne .LBB0_2
-# BB#3: # %polly.loop_exit4
+# %bb.3: # %polly.loop_exit3
# in Loop: Header=BB0_1 Depth=1
- incq %r8
- cmpq $1536, %r8 # imm = 0x600
+ addq $1, %r9
+ addq $6144, %rax # imm = 0x1800
+ addq $6144, %rcx # imm = 0x1800
+ addl $2, %r8d
+ cmpq $1536, %r9 # imm = 0x600
jne .LBB0_1
-# BB#4: # %polly.loop_exit
+# %bb.4: # %polly.exiting
popq %rbp
- ret
-.Ltmp5:
- .size init_array, .Ltmp5-init_array
+ .cfi_def_cfa %rsp, 8
+ retq
+.Lfunc_end0:
+ .size init_array, .Lfunc_end0-init_array
.cfi_endproc
-
- .globl print_array
- .align 16, 0x90
+ # -- End function
+ .globl print_array # -- Begin function print_array
+ .p2align 4, 0x90
.type print_array,@function
print_array: # @print_array
.cfi_startproc
-# BB#0: # %entry
+# %bb.0: # %entry
pushq %rbp
-.Ltmp9:
.cfi_def_cfa_offset 16
-.Ltmp10:
.cfi_offset %rbp, -16
movq %rsp, %rbp
-.Ltmp11:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
+ pushq %r13
pushq %r12
pushq %rbx
-.Ltmp12:
- .cfi_offset %rbx, -48
-.Ltmp13:
- .cfi_offset %r12, -40
-.Ltmp14:
+ pushq %rax
+ .cfi_offset %rbx, -56
+ .cfi_offset %r12, -48
+ .cfi_offset %r13, -40
.cfi_offset %r14, -32
-.Ltmp15:
.cfi_offset %r15, -24
- xorl %r14d, %r14d
- movl $C, %r15d
- .align 16, 0x90
+ leaq C(%rip), %r13
+ xorl %eax, %eax
+ movl $3435973837, %r12d # imm = 0xCCCCCCCD
+ leaq .L.str(%rip), %r14
+ .p2align 4, 0x90
.LBB1_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB1_2 Depth 2
- movq stdout(%rip), %rax
- movq %r15, %r12
+ movq %rax, -48(%rbp) # 8-byte Spill
+ movq stdout(%rip), %rsi
xorl %ebx, %ebx
- .align 16, 0x90
+ .p2align 4, 0x90
.LBB1_2: # %for.body3
# Parent Loop BB1_1 Depth=1
# => This Inner Loop Header: Depth=2
- vmovss (%r12), %xmm0
- vcvtss2sd %xmm0, %xmm0, %xmm0
- movq %rax, %rdi
- movl $.L.str, %esi
+ movl %ebx, %eax
+ imulq %r12, %rax
+ shrq $38, %rax
+ leal (%rax,%rax,4), %r15d
+ shll $4, %r15d
+ addl $79, %r15d
+ movss (%r13,%rbx,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
+ cvtss2sd %xmm0, %xmm0
movb $1, %al
+ movq %rsi, %rdi
+ movq %r14, %rsi
callq fprintf
- movslq %ebx, %rax
- imulq $1717986919, %rax, %rcx # imm = 0x66666667
- movq %rcx, %rdx
- shrq $63, %rdx
- sarq $37, %rcx
- addl %edx, %ecx
- imull $80, %ecx, %ecx
- subl %ecx, %eax
- cmpl $79, %eax
+ cmpl %ebx, %r15d
jne .LBB1_4
-# BB#3: # %if.then
+# %bb.3: # %if.then
# in Loop: Header=BB1_2 Depth=2
movq stdout(%rip), %rsi
movl $10, %edi
- callq fputc
+ callq fputc@PLT
.LBB1_4: # %for.inc
# in Loop: Header=BB1_2 Depth=2
- addq $4, %r12
- incq %rbx
- movq stdout(%rip), %rax
+ addq $1, %rbx
+ movq stdout(%rip), %rsi
cmpq $1536, %rbx # imm = 0x600
jne .LBB1_2
-# BB#5: # %for.end
+# %bb.5: # %for.end
# in Loop: Header=BB1_1 Depth=1
movl $10, %edi
- movq %rax, %rsi
- callq fputc
- addq $6144, %r15 # imm = 0x1800
- incq %r14
- cmpq $1536, %r14 # imm = 0x600
+ callq fputc@PLT
+ movq -48(%rbp), %rax # 8-byte Reload
+ addq $1, %rax
+ addq $6144, %r13 # imm = 0x1800
+ cmpq $1536, %rax # imm = 0x600
jne .LBB1_1
-# BB#6: # %for.end12
+# %bb.6: # %for.end12
+ addq $8, %rsp
popq %rbx
popq %r12
+ popq %r13
popq %r14
popq %r15
popq %rbp
- ret
-.Ltmp16:
- .size print_array, .Ltmp16-print_array
+ .cfi_def_cfa %rsp, 8
+ retq
+.Lfunc_end1:
+ .size print_array, .Lfunc_end1-print_array
.cfi_endproc
-
- .section .rodata.cst8,"aM",@progbits,8
- .align 8
-.LCPI2_0:
- .quad 4602678819172646912 # double 0.5
- .text
- .globl main
- .align 16, 0x90
+ # -- End function
+ .globl main # -- Begin function main
+ .p2align 4, 0x90
.type main,@function
main: # @main
.cfi_startproc
-# BB#0: # %entry
+# %bb.0: # %entry
pushq %rbp
-.Ltmp20:
.cfi_def_cfa_offset 16
-.Ltmp21:
.cfi_offset %rbp, -16
movq %rsp, %rbp
-.Ltmp22:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
- subq $56, %rsp
-.Ltmp23:
+ subq $344, %rsp # imm = 0x158
.cfi_offset %rbx, -56
-.Ltmp24:
.cfi_offset %r12, -48
-.Ltmp25:
.cfi_offset %r13, -40
-.Ltmp26:
.cfi_offset %r14, -32
-.Ltmp27:
.cfi_offset %r15, -24
- xorl %ebx, %ebx
- vmovsd .LCPI2_0(%rip), %xmm0
- .align 16, 0x90
-.LBB2_1: # %polly.loop_preheader3.i
- # =>This Loop Header: Depth=1
- # Child Loop BB2_2 Depth 2
- xorl %ecx, %ecx
- .align 16, 0x90
-.LBB2_2: # %polly.loop_header2.i
- # Parent Loop BB2_1 Depth=1
- # => This Inner Loop Header: Depth=2
- movl %ecx, %edx
- imull %ebx, %edx
- movl %edx, %esi
- sarl $31, %esi
- shrl $22, %esi
- addl %edx, %esi
- andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
- negl %esi
- movq %rbx, %rax
- shlq $11, %rax
- leal 1(%rdx,%rsi), %edi
- leaq (%rax,%rax,2), %rsi
- leaq 1(%rcx), %rdx
- cmpq $1536, %rdx # imm = 0x600
- vcvtsi2sdl %edi, %xmm0, %xmm1
- vmulsd %xmm0, %xmm1, %xmm1
- vcvtsd2ss %xmm1, %xmm1, %xmm1
- vmovss %xmm1, A(%rsi,%rcx,4)
- vmovss %xmm1, B(%rsi,%rcx,4)
- movq %rdx, %rcx
- jne .LBB2_2
-# BB#3: # %polly.loop_exit4.i
- # in Loop: Header=BB2_1 Depth=1
- incq %rbx
- cmpq $1536, %rbx # imm = 0x600
- jne .LBB2_1
-# BB#4: # %polly.loop_preheader3.preheader
- movl $C, %ebx
- movl $C, %edi
+ callq init_array
+ leaq C(%rip), %rdi
+ xorl %eax, %eax
+ movq %rax, -48(%rbp) # 8-byte Spill
xorl %esi, %esi
movl $9437184, %edx # imm = 0x900000
- callq memset
- xorl %eax, %eax
- .align 16, 0x90
-.LBB2_5: # %polly.loop_preheader17
- # =>This Loop Header: Depth=1
- # Child Loop BB2_15 Depth 2
- # Child Loop BB2_8 Depth 3
- # Child Loop BB2_11 Depth 4
- # Child Loop BB2_17 Depth 5
- # Child Loop BB2_18 Depth 6
+ callq memset@PLT
+ movl $64, %eax
+ movq %rax, -64(%rbp) # 8-byte Spill
+ leaq A(%rip), %rax
movq %rax, -56(%rbp) # 8-byte Spill
- movq %rbx, -88(%rbp) # 8-byte Spill
- movq %rax, %rcx
- orq $63, %rcx
- movq %rcx, -72(%rbp) # 8-byte Spill
- leaq -1(%rcx), %rcx
- movq %rcx, -48(%rbp) # 8-byte Spill
- movq $-1, %r15
- movl $B, %ecx
- movq %rbx, -64(%rbp) # 8-byte Spill
- xorl %r12d, %r12d
- .align 16, 0x90
-.LBB2_15: # %polly.loop_preheader24
- # Parent Loop BB2_5 Depth=1
+ .p2align 4, 0x90
+.LBB2_1: # %polly.loop_header8
+ # =>This Loop Header: Depth=1
+ # Child Loop BB2_2 Depth 2
+ # Child Loop BB2_3 Depth 3
+ # Child Loop BB2_4 Depth 4
+ # Child Loop BB2_5 Depth 5
+ leaq B+240(%rip), %rax
+ xorl %edi, %edi
+ .p2align 4, 0x90
+.LBB2_2: # %polly.loop_header14
+ # Parent Loop BB2_1 Depth=1
# => This Loop Header: Depth=2
- # Child Loop BB2_8 Depth 3
- # Child Loop BB2_11 Depth 4
- # Child Loop BB2_17 Depth 5
- # Child Loop BB2_18 Depth 6
+ # Child Loop BB2_3 Depth 3
+ # Child Loop BB2_4 Depth 4
+ # Child Loop BB2_5 Depth 5
+ movq %rdi, %rcx
+ orq $4, %rcx
movq %rcx, -80(%rbp) # 8-byte Spill
- movq %r12, %r13
- orq $63, %r13
- leaq -1(%r13), %rbx
- xorl %r9d, %r9d
- movq %rcx, %rdx
- .align 16, 0x90
-.LBB2_8: # %polly.loop_header23
- # Parent Loop BB2_5 Depth=1
- # Parent Loop BB2_15 Depth=2
+ movq %rdi, %rcx
+ orq $8, %rcx
+ movq %rcx, -264(%rbp) # 8-byte Spill
+ movq %rdi, %rcx
+ orq $12, %rcx
+ movq %rcx, -256(%rbp) # 8-byte Spill
+ movq %rdi, %rcx
+ orq $16, %rcx
+ movq %rcx, -248(%rbp) # 8-byte Spill
+ movq %rdi, %rcx
+ orq $20, %rcx
+ movq %rcx, -240(%rbp) # 8-byte Spill
+ movq %rdi, %rcx
+ orq $24, %rcx
+ movq %rcx, -232(%rbp) # 8-byte Spill
+ movq %rdi, %rcx
+ orq $28, %rcx
+ movq %rcx, -224(%rbp) # 8-byte Spill
+ movq %rdi, %rcx
+ orq $32, %rcx
+ movq %rcx, -216(%rbp) # 8-byte Spill
+ movq %rdi, %rcx
+ orq $36, %rcx
+ movq %rcx, -208(%rbp) # 8-byte Spill
+ movq %rdi, %rcx
+ orq $40, %rcx
+ movq %rcx, -200(%rbp) # 8-byte Spill
+ movq %rdi, %rcx
+ orq $44, %rcx
+ movq %rcx, -192(%rbp) # 8-byte Spill
+ movq %rdi, %rcx
+ orq $48, %rcx
+ movq %rcx, -184(%rbp) # 8-byte Spill
+ movq %rdi, %rcx
+ orq $52, %rcx
+ movq %rcx, -176(%rbp) # 8-byte Spill
+ movq %rdi, %rcx
+ orq $56, %rcx
+ movq %rcx, -168(%rbp) # 8-byte Spill
+ movq %rdi, %rcx
+ orq $60, %rcx
+ movq %rcx, -160(%rbp) # 8-byte Spill
+ movq -56(%rbp), %rdx # 8-byte Reload
+ movq %rax, -136(%rbp) # 8-byte Spill
+ movq %rax, -72(%rbp) # 8-byte Spill
+ xorl %eax, %eax
+ movq %rdi, -272(%rbp) # 8-byte Spill
+ .p2align 4, 0x90
+.LBB2_3: # %polly.loop_header20
+ # Parent Loop BB2_1 Depth=1
+ # Parent Loop BB2_2 Depth=2
# => This Loop Header: Depth=3
- # Child Loop BB2_11 Depth 4
- # Child Loop BB2_17 Depth 5
- # Child Loop BB2_18 Depth 6
- cmpq -72(%rbp), %rax # 8-byte Folded Reload
- jg .LBB2_13
-# BB#9: # %polly.loop_header30.preheader
- # in Loop: Header=BB2_8 Depth=3
- movq %r9, %rax
- orq $63, %rax
- cmpq %rax, %r9
- jg .LBB2_13
-# BB#10: # in Loop: Header=BB2_8 Depth=3
- decq %rax
- movq -64(%rbp), %r10 # 8-byte Reload
- movq -56(%rbp), %r11 # 8-byte Reload
- .align 16, 0x90
-.LBB2_11: # %polly.loop_header37.preheader
- # Parent Loop BB2_5 Depth=1
- # Parent Loop BB2_15 Depth=2
- # Parent Loop BB2_8 Depth=3
+ # Child Loop BB2_4 Depth 4
+ # Child Loop BB2_5 Depth 5
+ movq %rax, -144(%rbp) # 8-byte Spill
+ movq %rdx, -152(%rbp) # 8-byte Spill
+ movq -48(%rbp), %rax # 8-byte Reload
+ .p2align 4, 0x90
+.LBB2_4: # %polly.loop_header26
+ # Parent Loop BB2_1 Depth=1
+ # Parent Loop BB2_2 Depth=2
+ # Parent Loop BB2_3 Depth=3
# => This Loop Header: Depth=4
- # Child Loop BB2_17 Depth 5
- # Child Loop BB2_18 Depth 6
- cmpq %r13, %r12
- movq %rdx, %r14
- movq %r9, %rcx
- jg .LBB2_12
- .align 16, 0x90
-.LBB2_17: # %polly.loop_header46.preheader
- # Parent Loop BB2_5 Depth=1
- # Parent Loop BB2_15 Depth=2
- # Parent Loop BB2_8 Depth=3
- # Parent Loop BB2_11 Depth=4
- # => This Loop Header: Depth=5
- # Child Loop BB2_18 Depth 6
- leaq (%r11,%r11,2), %rsi
- shlq $11, %rsi
- vmovss A(%rsi,%rcx,4), %xmm0
- movq %r10, %rdi
- movq %r14, %r8
- movq %r15, %rsi
-.LBB2_18: # %polly.loop_header46
- # Parent Loop BB2_5 Depth=1
- # Parent Loop BB2_15 Depth=2
- # Parent Loop BB2_8 Depth=3
- # Parent Loop BB2_11 Depth=4
- # Parent Loop BB2_17 Depth=5
- # => This Inner Loop Header: Depth=6
- vmulss (%r8), %xmm0, %xmm1
- vaddss (%rdi), %xmm1, %xmm1
- vmovss %xmm1, (%rdi)
- addq $4, %rdi
- addq $4, %r8
- incq %rsi
- cmpq %rbx, %rsi
- jle .LBB2_18
-# BB#16: # %polly.loop_exit48
- # in Loop: Header=BB2_17 Depth=5
- addq $6144, %r14 # imm = 0x1800
- cmpq %rax, %rcx
- leaq 1(%rcx), %rcx
- jle .LBB2_17
- .align 16, 0x90
-.LBB2_12: # %polly.loop_exit39
- # in Loop: Header=BB2_11 Depth=4
- addq $6144, %r10 # imm = 0x1800
- cmpq -48(%rbp), %r11 # 8-byte Folded Reload
- leaq 1(%r11), %r11
- jle .LBB2_11
- .align 16, 0x90
-.LBB2_13: # %polly.loop_exit32
- # in Loop: Header=BB2_8 Depth=3
- addq $393216, %rdx # imm = 0x60000
- cmpq $1472, %r9 # imm = 0x5C0
- leaq 64(%r9), %r9
- movq -56(%rbp), %rax # 8-byte Reload
- jl .LBB2_8
-# BB#14: # %polly.loop_exit25
- # in Loop: Header=BB2_15 Depth=2
- addq $256, -64(%rbp) # 8-byte Folded Spill
- # imm = 0x100
+ # Child Loop BB2_5 Depth 5
+ movq %rax, -376(%rbp) # 8-byte Spill
+ leaq (%rax,%rax,2), %rax
+ shlq $11, %rax
+ leaq C(%rip), %rsi
+ addq %rsi, %rax
+ leaq (%rax,%rdi,4), %rcx
+ movq %rcx, -368(%rbp) # 8-byte Spill
+ movq -80(%rbp), %rcx # 8-byte Reload
+ leaq (%rax,%rcx,4), %rcx
+ movq %rcx, -360(%rbp) # 8-byte Spill
+ movq -264(%rbp), %rbx # 8-byte Reload
+ leaq (%rax,%rbx,4), %rcx
+ movq %rcx, -352(%rbp) # 8-byte Spill
+ movq -256(%rbp), %r8 # 8-byte Reload
+ movq %rdi, %rsi
+ leaq (%rax,%r8,4), %rdi
+ movq %rdi, -344(%rbp) # 8-byte Spill
+ movq -248(%rbp), %rdi # 8-byte Reload
+ leaq (%rax,%rdi,4), %rcx
+ movq %rcx, -336(%rbp) # 8-byte Spill
+ movq -240(%rbp), %r9 # 8-byte Reload
+ leaq (%rax,%r9,4), %rcx
+ movq %rcx, -328(%rbp) # 8-byte Spill
+ movq -232(%rbp), %r10 # 8-byte Reload
+ leaq (%rax,%r10,4), %rcx
+ movq %rcx, -320(%rbp) # 8-byte Spill
+ movq -224(%rbp), %r14 # 8-byte Reload
+ leaq (%rax,%r14,4), %rcx
+ movq %rcx, -312(%rbp) # 8-byte Spill
+ movq -216(%rbp), %r15 # 8-byte Reload
+ leaq (%rax,%r15,4), %rcx
+ movq %rcx, -304(%rbp) # 8-byte Spill
+ movq -208(%rbp), %r12 # 8-byte Reload
+ leaq (%rax,%r12,4), %rcx
+ movq %rcx, -296(%rbp) # 8-byte Spill
+ movq -200(%rbp), %r13 # 8-byte Reload
+ leaq (%rax,%r13,4), %rcx
+ movq %rcx, -288(%rbp) # 8-byte Spill
+ movq -192(%rbp), %r11 # 8-byte Reload
+ leaq (%rax,%r11,4), %rcx
+ movq %rcx, -280(%rbp) # 8-byte Spill
+ movaps (%rax,%rsi,4), %xmm15
movq -80(%rbp), %rcx # 8-byte Reload
- addq $256, %rcx # imm = 0x100
- addq $64, %r15
- cmpq $1472, %r12 # imm = 0x5C0
- leaq 64(%r12), %r12
- jl .LBB2_15
-# BB#6: # %polly.loop_exit18
- # in Loop: Header=BB2_5 Depth=1
- movq -88(%rbp), %rbx # 8-byte Reload
- addq $393216, %rbx # imm = 0x60000
- cmpq $1472, %rax # imm = 0x5C0
- leaq 64(%rax), %rax
- jl .LBB2_5
-# BB#7: # %polly.loop_exit11
+ movaps (%rax,%rcx,4), %xmm14
+ movaps (%rax,%rbx,4), %xmm13
+ movaps (%rax,%r8,4), %xmm12
+ movaps (%rax,%rdi,4), %xmm11
+ movaps (%rax,%r9,4), %xmm10
+ movaps (%rax,%r10,4), %xmm9
+ movaps (%rax,%r14,4), %xmm8
+ movaps (%rax,%r15,4), %xmm7
+ movaps (%rax,%r12,4), %xmm6
+ movaps (%rax,%r13,4), %xmm5
+ movaps (%rax,%r11,4), %xmm4
+ movq -184(%rbp), %rcx # 8-byte Reload
+ movaps (%rax,%rcx,4), %xmm3
+ movq -176(%rbp), %rsi # 8-byte Reload
+ movaps (%rax,%rsi,4), %xmm0
+ movaps %xmm0, -96(%rbp) # 16-byte Spill
+ movq -168(%rbp), %rbx # 8-byte Reload
+ movaps (%rax,%rbx,4), %xmm0
+ movaps %xmm0, -112(%rbp) # 16-byte Spill
+ movq -160(%rbp), %rdi # 8-byte Reload
+ movaps (%rax,%rdi,4), %xmm0
+ movaps %xmm0, -128(%rbp) # 16-byte Spill
+ leaq (%rax,%rcx,4), %r8
+ leaq (%rax,%rsi,4), %rcx
+ leaq (%rax,%rbx,4), %rsi
+ leaq (%rax,%rdi,4), %rax
+ movq -72(%rbp), %r9 # 8-byte Reload
+ movl $0, %r10d
+ .p2align 4, 0x90
+.LBB2_5: # %vector.ph
+ # Parent Loop BB2_1 Depth=1
+ # Parent Loop BB2_2 Depth=2
+ # Parent Loop BB2_3 Depth=3
+ # Parent Loop BB2_4 Depth=4
+ # => This Inner Loop Header: Depth=5
+ movss (%rdx,%r10,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
+ shufps $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0]
+ movaps -240(%r9), %xmm1
+ mulps %xmm0, %xmm1
+ addps %xmm1, %xmm15
+ movaps -224(%r9), %xmm1
+ mulps %xmm0, %xmm1
+ addps %xmm1, %xmm14
+ movaps -208(%r9), %xmm1
+ mulps %xmm0, %xmm1
+ addps %xmm1, %xmm13
+ movaps -192(%r9), %xmm1
+ mulps %xmm0, %xmm1
+ addps %xmm1, %xmm12
+ movaps -176(%r9), %xmm1
+ mulps %xmm0, %xmm1
+ addps %xmm1, %xmm11
+ movaps -160(%r9), %xmm1
+ mulps %xmm0, %xmm1
+ addps %xmm1, %xmm10
+ movaps -144(%r9), %xmm1
+ mulps %xmm0, %xmm1
+ addps %xmm1, %xmm9
+ movaps -128(%r9), %xmm1
+ mulps %xmm0, %xmm1
+ addps %xmm1, %xmm8
+ movaps -112(%r9), %xmm1
+ mulps %xmm0, %xmm1
+ addps %xmm1, %xmm7
+ movaps -96(%r9), %xmm1
+ mulps %xmm0, %xmm1
+ addps %xmm1, %xmm6
+ movaps -80(%r9), %xmm1
+ mulps %xmm0, %xmm1
+ addps %xmm1, %xmm5
+ movaps -64(%r9), %xmm1
+ mulps %xmm0, %xmm1
+ addps %xmm1, %xmm4
+ movaps -48(%r9), %xmm1
+ mulps %xmm0, %xmm1
+ addps %xmm1, %xmm3
+ movaps -32(%r9), %xmm1
+ mulps %xmm0, %xmm1
+ movaps -96(%rbp), %xmm2 # 16-byte Reload
+ addps %xmm1, %xmm2
+ movaps %xmm2, -96(%rbp) # 16-byte Spill
+ movaps -16(%r9), %xmm1
+ mulps %xmm0, %xmm1
+ movaps -112(%rbp), %xmm2 # 16-byte Reload
+ addps %xmm1, %xmm2
+ movaps %xmm2, -112(%rbp) # 16-byte Spill
+ mulps (%r9), %xmm0
+ movaps -128(%rbp), %xmm1 # 16-byte Reload
+ addps %xmm0, %xmm1
+ movaps %xmm1, -128(%rbp) # 16-byte Spill
+ addq $1, %r10
+ addq $6144, %r9 # imm = 0x1800
+ cmpq $64, %r10
+ jne .LBB2_5
+# %bb.6: # %polly.loop_exit34
+ # in Loop: Header=BB2_4 Depth=4
+ movq -368(%rbp), %rdi # 8-byte Reload
+ movaps %xmm15, (%rdi)
+ movq -360(%rbp), %rdi # 8-byte Reload
+ movaps %xmm14, (%rdi)
+ movq -352(%rbp), %rdi # 8-byte Reload
+ movaps %xmm13, (%rdi)
+ movq -344(%rbp), %rdi # 8-byte Reload
+ movaps %xmm12, (%rdi)
+ movq -336(%rbp), %rdi # 8-byte Reload
+ movaps %xmm11, (%rdi)
+ movq -328(%rbp), %rdi # 8-byte Reload
+ movaps %xmm10, (%rdi)
+ movq -320(%rbp), %rdi # 8-byte Reload
+ movaps %xmm9, (%rdi)
+ movq -312(%rbp), %rdi # 8-byte Reload
+ movaps %xmm8, (%rdi)
+ movq -304(%rbp), %rdi # 8-byte Reload
+ movaps %xmm7, (%rdi)
+ movq -296(%rbp), %rdi # 8-byte Reload
+ movaps %xmm6, (%rdi)
+ movq -288(%rbp), %rdi # 8-byte Reload
+ movaps %xmm5, (%rdi)
+ movq -280(%rbp), %rdi # 8-byte Reload
+ movaps %xmm4, (%rdi)
+ movaps %xmm3, (%r8)
+ movaps -96(%rbp), %xmm0 # 16-byte Reload
+ movaps %xmm0, (%rcx)
+ movaps -112(%rbp), %xmm0 # 16-byte Reload
+ movaps %xmm0, (%rsi)
+ movaps -128(%rbp), %xmm0 # 16-byte Reload
+ movaps %xmm0, (%rax)
+ movq -376(%rbp), %rax # 8-byte Reload
+ addq $1, %rax
+ addq $6144, %rdx # imm = 0x1800
+ cmpq -64(%rbp), %rax # 8-byte Folded Reload
+ movq -272(%rbp), %rdi # 8-byte Reload
+ jne .LBB2_4
+# %bb.7: # %polly.loop_exit28
+ # in Loop: Header=BB2_3 Depth=3
+ movq -144(%rbp), %rax # 8-byte Reload
+ addq $64, %rax
+ addq $393216, -72(%rbp) # 8-byte Folded Spill
+ # imm = 0x60000
+ movq -152(%rbp), %rdx # 8-byte Reload
+ addq $256, %rdx # imm = 0x100
+ cmpq $1536, %rax # imm = 0x600
+ jb .LBB2_3
+# %bb.8: # %polly.loop_exit22
+ # in Loop: Header=BB2_2 Depth=2
+ addq $64, %rdi
+ movq -136(%rbp), %rax # 8-byte Reload
+ addq $256, %rax # imm = 0x100
+ cmpq $1536, %rdi # imm = 0x600
+ jb .LBB2_2
+# %bb.9: # %polly.loop_exit16
+ # in Loop: Header=BB2_1 Depth=1
+ movq -48(%rbp), %rax # 8-byte Reload
+ movq %rax, %rcx
+ addq $64, %rcx
+ addq $64, -64(%rbp) # 8-byte Folded Spill
+ addq $393216, -56(%rbp) # 8-byte Folded Spill
+ # imm = 0x60000
+ movq %rcx, %rax
+ movq %rcx, -48(%rbp) # 8-byte Spill
+ cmpq $1536, %rcx # imm = 0x600
+ jb .LBB2_1
+# %bb.10: # %polly.exiting
xorl %eax, %eax
- addq $56, %rsp
+ addq $344, %rsp # imm = 0x158
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
- ret
-.Ltmp28:
- .size main, .Ltmp28-main
+ .cfi_def_cfa %rsp, 8
+ retq
+.Lfunc_end2:
+ .size main, .Lfunc_end2-main
.cfi_endproc
-
+ # -- End function
.type A,@object # @A
.comm A,9437184,16
.type B,@object # @B
@@ -381,10 +497,11 @@ main: # @main
.type .L.str,@object # @.str
.section .rodata.str1.1,"aMS",@progbits,1
.L.str:
- .asciz "%lf "
+ .asciz "%lf "
.size .L.str, 5
.type C,@object # @C
.comm C,9437184,16
+ .ident "clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"
.section ".note.GNU-stack","",@progbits
diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged.exe b/polly/docs/experiments/matmul/matmul.polly.interchanged.exe
deleted file mode 100755
index 240c95a7f79..00000000000
--- a/polly/docs/experiments/matmul/matmul.polly.interchanged.exe
+++ /dev/null
Binary files differ
diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged.ll b/polly/docs/experiments/matmul/matmul.polly.interchanged.ll
index 52fbccc7ed5..f56171df7a0 100644
--- a/polly/docs/experiments/matmul/matmul.polly.interchanged.ll
+++ b/polly/docs/experiments/matmul/matmul.polly.interchanged.ll
Binary files differ
diff --git a/polly/docs/experiments/matmul/matmul.polly.interchanged.s b/polly/docs/experiments/matmul/matmul.polly.interchanged.s
index a764da0b3f2..21770b0a917 100644
--- a/polly/docs/experiments/matmul/matmul.polly.interchanged.s
+++ b/polly/docs/experiments/matmul/matmul.polly.interchanged.s
@@ -1,275 +1,248 @@
- .file "matmul.polly.interchanged.ll"
+ .text
+ .file "matmul.c"
.section .rodata.cst8,"aM",@progbits,8
- .align 8
+ .p2align 3 # -- Begin function init_array
.LCPI0_0:
.quad 4602678819172646912 # double 0.5
.text
.globl init_array
- .align 16, 0x90
+ .p2align 4, 0x90
.type init_array,@function
init_array: # @init_array
.cfi_startproc
-# BB#0: # %entry
+# %bb.0: # %entry
pushq %rbp
-.Ltmp2:
.cfi_def_cfa_offset 16
-.Ltmp3:
.cfi_offset %rbp, -16
movq %rsp, %rbp
-.Ltmp4:
.cfi_def_cfa_register %rbp
+ leaq B(%rip), %rax
+ leaq A(%rip), %rcx
xorl %r8d, %r8d
- vmovsd .LCPI0_0(%rip), %xmm0
- .align 16, 0x90
-.LBB0_1: # %polly.loop_preheader3
+ movsd .LCPI0_0(%rip), %xmm0 # xmm0 = mem[0],zero
+ xorl %r9d, %r9d
+ .p2align 4, 0x90
+.LBB0_1: # %polly.loop_header
# =>This Loop Header: Depth=1
# Child Loop BB0_2 Depth 2
- xorl %ecx, %ecx
- .align 16, 0x90
-.LBB0_2: # %polly.loop_header2
+ movl $1, %edi
+ xorl %edx, %edx
+ .p2align 4, 0x90
+.LBB0_2: # %polly.loop_header1
# Parent Loop BB0_1 Depth=1
# => This Inner Loop Header: Depth=2
- movl %ecx, %edx
- imull %r8d, %edx
movl %edx, %esi
- sarl $31, %esi
- shrl $22, %esi
- addl %edx, %esi
- andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
- negl %esi
- movq %r8, %rax
- shlq $11, %rax
- leal 1(%rdx,%rsi), %edi
- leaq (%rax,%rax,2), %rsi
- leaq 1(%rcx), %rdx
- cmpq $1536, %rdx # imm = 0x600
- vcvtsi2sdl %edi, %xmm0, %xmm1
- vmulsd %xmm0, %xmm1, %xmm1
- vcvtsd2ss %xmm1, %xmm1, %xmm1
- vmovss %xmm1, A(%rsi,%rcx,4)
- vmovss %xmm1, B(%rsi,%rcx,4)
- movq %rdx, %rcx
+ andl $1022, %esi # imm = 0x3FE
+ orl $1, %esi
+ xorps %xmm1, %xmm1
+ cvtsi2sdl %esi, %xmm1
+ mulsd %xmm0, %xmm1
+ cvtsd2ss %xmm1, %xmm1
+ movss %xmm1, -4(%rcx,%rdi,4)
+ movss %xmm1, -4(%rax,%rdi,4)
+ leal (%r9,%rdx), %esi
+ andl $1023, %esi # imm = 0x3FF
+ addl $1, %esi
+ xorps %xmm1, %xmm1
+ cvtsi2sdl %esi, %xmm1
+ mulsd %xmm0, %xmm1
+ cvtsd2ss %xmm1, %xmm1
+ movss %xmm1, (%rcx,%rdi,4)
+ movss %xmm1, (%rax,%rdi,4)
+ addq $2, %rdi
+ addl %r8d, %edx
+ cmpq $1537, %rdi # imm = 0x601
jne .LBB0_2
-# BB#3: # %polly.loop_exit4
+# %bb.3: # %polly.loop_exit3
# in Loop: Header=BB0_1 Depth=1
- incq %r8
- cmpq $1536, %r8 # imm = 0x600
+ addq $1, %r9
+ addq $6144, %rax # imm = 0x1800
+ addq $6144, %rcx # imm = 0x1800
+ addl $2, %r8d
+ cmpq $1536, %r9 # imm = 0x600
jne .LBB0_1
-# BB#4: # %polly.loop_exit
+# %bb.4: # %polly.exiting
popq %rbp
- ret
-.Ltmp5:
- .size init_array, .Ltmp5-init_array
+ .cfi_def_cfa %rsp, 8
+ retq
+.Lfunc_end0:
+ .size init_array, .Lfunc_end0-init_array
.cfi_endproc
-
- .globl print_array
- .align 16, 0x90
+ # -- End function
+ .globl print_array # -- Begin function print_array
+ .p2align 4, 0x90
.type print_array,@function
print_array: # @print_array
.cfi_startproc
-# BB#0: # %entry
+# %bb.0: # %entry
pushq %rbp
-.Ltmp9:
.cfi_def_cfa_offset 16
-.Ltmp10:
.cfi_offset %rbp, -16
movq %rsp, %rbp
-.Ltmp11:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
+ pushq %r13
pushq %r12
pushq %rbx
-.Ltmp12:
- .cfi_offset %rbx, -48
-.Ltmp13:
- .cfi_offset %r12, -40
-.Ltmp14:
+ pushq %rax
+ .cfi_offset %rbx, -56
+ .cfi_offset %r12, -48
+ .cfi_offset %r13, -40
.cfi_offset %r14, -32
-.Ltmp15:
.cfi_offset %r15, -24
- xorl %r14d, %r14d
- movl $C, %r15d
- .align 16, 0x90
+ leaq C(%rip), %r13
+ xorl %eax, %eax
+ movl $3435973837, %r12d # imm = 0xCCCCCCCD
+ leaq .L.str(%rip), %r14
+ .p2align 4, 0x90
.LBB1_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB1_2 Depth 2
- movq stdout(%rip), %rax
- movq %r15, %r12
+ movq %rax, -48(%rbp) # 8-byte Spill
+ movq stdout(%rip), %rsi
xorl %ebx, %ebx
- .align 16, 0x90
+ .p2align 4, 0x90
.LBB1_2: # %for.body3
# Parent Loop BB1_1 Depth=1
# => This Inner Loop Header: Depth=2
- vmovss (%r12), %xmm0
- vcvtss2sd %xmm0, %xmm0, %xmm0
- movq %rax, %rdi
- movl $.L.str, %esi
+ movl %ebx, %eax
+ imulq %r12, %rax
+ shrq $38, %rax
+ leal (%rax,%rax,4), %r15d
+ shll $4, %r15d
+ addl $79, %r15d
+ movss (%r13,%rbx,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
+ cvtss2sd %xmm0, %xmm0
movb $1, %al
+ movq %rsi, %rdi
+ movq %r14, %rsi
callq fprintf
- movslq %ebx, %rax
- imulq $1717986919, %rax, %rcx # imm = 0x66666667
- movq %rcx, %rdx
- shrq $63, %rdx
- sarq $37, %rcx
- addl %edx, %ecx
- imull $80, %ecx, %ecx
- subl %ecx, %eax
- cmpl $79, %eax
+ cmpl %ebx, %r15d
jne .LBB1_4
-# BB#3: # %if.then
+# %bb.3: # %if.then
# in Loop: Header=BB1_2 Depth=2
movq stdout(%rip), %rsi
movl $10, %edi
- callq fputc
+ callq fputc@PLT
.LBB1_4: # %for.inc
# in Loop: Header=BB1_2 Depth=2
- addq $4, %r12
- incq %rbx
- movq stdout(%rip), %rax
+ addq $1, %rbx
+ movq stdout(%rip), %rsi
cmpq $1536, %rbx # imm = 0x600
jne .LBB1_2
-# BB#5: # %for.end
+# %bb.5: # %for.end
# in Loop: Header=BB1_1 Depth=1
movl $10, %edi
- movq %rax, %rsi
- callq fputc
- addq $6144, %r15 # imm = 0x1800
- incq %r14
- cmpq $1536, %r14 # imm = 0x600
+ callq fputc@PLT
+ movq -48(%rbp), %rax # 8-byte Reload
+ addq $1, %rax
+ addq $6144, %r13 # imm = 0x1800
+ cmpq $1536, %rax # imm = 0x600
jne .LBB1_1
-# BB#6: # %for.end12
+# %bb.6: # %for.end12
+ addq $8, %rsp
popq %rbx
popq %r12
+ popq %r13
popq %r14
popq %r15
popq %rbp
- ret
-.Ltmp16:
- .size print_array, .Ltmp16-print_array
+ .cfi_def_cfa %rsp, 8
+ retq
+.Lfunc_end1:
+ .size print_array, .Lfunc_end1-print_array
.cfi_endproc
-
- .section .rodata.cst8,"aM",@progbits,8
- .align 8
-.LCPI2_0:
- .quad 4602678819172646912 # double 0.5
- .text
- .globl main
- .align 16, 0x90
+ # -- End function
+ .globl main # -- Begin function main
+ .p2align 4, 0x90
.type main,@function
main: # @main
.cfi_startproc
-# BB#0: # %entry
+# %bb.0: # %entry
pushq %rbp
-.Ltmp20:
.cfi_def_cfa_offset 16
-.Ltmp21:
.cfi_offset %rbp, -16
movq %rsp, %rbp
-.Ltmp22:
.cfi_def_cfa_register %rbp
pushq %r14
pushq %rbx
-.Ltmp23:
.cfi_offset %rbx, -32
-.Ltmp24:
.cfi_offset %r14, -24
- xorl %ebx, %ebx
- vmovsd .LCPI2_0(%rip), %xmm0
- .align 16, 0x90
-.LBB2_1: # %polly.loop_preheader3.i
- # =>This Loop Header: Depth=1
- # Child Loop BB2_2 Depth 2
- xorl %ecx, %ecx
- .align 16, 0x90
-.LBB2_2: # %polly.loop_header2.i
- # Parent Loop BB2_1 Depth=1
- # => This Inner Loop Header: Depth=2
- movl %ecx, %edx
- imull %ebx, %edx
- movl %edx, %esi
- sarl $31, %esi
- shrl $22, %esi
- addl %edx, %esi
- andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
- negl %esi
- movq %rbx, %rax
- shlq $11, %rax
- leal 1(%rdx,%rsi), %edi
- leaq (%rax,%rax,2), %rsi
- leaq 1(%rcx), %rdx
- cmpq $1536, %rdx # imm = 0x600
- vcvtsi2sdl %edi, %xmm0, %xmm1
- vmulsd %xmm0, %xmm1, %xmm1
- vcvtsd2ss %xmm1, %xmm1, %xmm1
- vmovss %xmm1, A(%rsi,%rcx,4)
- vmovss %xmm1, B(%rsi,%rcx,4)
- movq %rdx, %rcx
- jne .LBB2_2
-# BB#3: # %polly.loop_exit4.i
- # in Loop: Header=BB2_1 Depth=1
- incq %rbx
- cmpq $1536, %rbx # imm = 0x600
- jne .LBB2_1
-# BB#4: # %polly.loop_preheader3.preheader
- movl $C, %r14d
- movl $C, %edi
+ callq init_array
+ leaq C(%rip), %rbx
+ xorl %r14d, %r14d
xorl %esi, %esi
movl $9437184, %edx # imm = 0x900000
- callq memset
- xorl %eax, %eax
- .align 16, 0x90
-.LBB2_5: # %polly.loop_preheader17
+ movq %rbx, %rdi
+ callq memset@PLT
+ leaq B(%rip), %rax
+ leaq A(%rip), %rcx
+ .p2align 4, 0x90
+.LBB2_1: # %polly.loop_header8
# =>This Loop Header: Depth=1
- # Child Loop BB2_10 Depth 2
- # Child Loop BB2_8 Depth 3
- movl $B, %ebx
- xorl %edx, %edx
- .align 16, 0x90
-.LBB2_10: # %polly.loop_preheader24
- # Parent Loop BB2_5 Depth=1
+ # Child Loop BB2_2 Depth 2
+ # Child Loop BB2_3 Depth 3
+ movq %rax, %rdx
+ xorl %esi, %esi
+ .p2align 4, 0x90
+.LBB2_2: # %polly.loop_header14
+ # Parent Loop BB2_1 Depth=1
# => This Loop Header: Depth=2
- # Child Loop BB2_8 Depth 3
- leaq (%rax,%rax,2), %rcx
- shlq $11, %rcx
- vmovss A(%rcx,%rdx,4), %xmm0
- movl $1536, %esi # imm = 0x600
- movq %r14, %rdi
- movq %rbx, %rcx
- .align 16, 0x90
-.LBB2_8: # %polly.loop_header23
- # Parent Loop BB2_5 Depth=1
- # Parent Loop BB2_10 Depth=2
+ # Child Loop BB2_3 Depth 3
+ leaq (%r14,%r14,2), %rdi
+ shlq $11, %rdi
+ addq %rcx, %rdi
+ movss (%rdi,%rsi,4), %xmm0 # xmm0 = mem[0],zero,zero,zero
+ shufps $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0]
+ movl $12, %edi
+ .p2align 4, 0x90
+.LBB2_3: # %vector.body
+ # Parent Loop BB2_1 Depth=1
+ # Parent Loop BB2_2 Depth=2
# => This Inner Loop Header: Depth=3
- vmulss (%rcx), %xmm0, %xmm1
- vaddss (%rdi), %xmm1, %xmm1
- vmovss %xmm1, (%rdi)
- addq $4, %rdi
- addq $4, %rcx
- decq %rsi
- jne .LBB2_8
-# BB#9: # %polly.loop_exit25
- # in Loop: Header=BB2_10 Depth=2
+ movaps -48(%rdx,%rdi,4), %xmm1
+ mulps %xmm0, %xmm1
+ movaps -32(%rdx,%rdi,4), %xmm2
+ mulps %xmm0, %xmm2
+ addps -48(%rbx,%rdi,4), %xmm1
+ addps -32(%rbx,%rdi,4), %xmm2
+ movaps %xmm1, -48(%rbx,%rdi,4)
+ movaps %xmm2, -32(%rbx,%rdi,4)
+ movaps -16(%rdx,%rdi,4), %xmm1
+ mulps %xmm0, %xmm1
+ movaps (%rdx,%rdi,4), %xmm2
+ mulps %xmm0, %xmm2
+ addps -16(%rbx,%rdi,4), %xmm1
+ addps (%rbx,%rdi,4), %xmm2
+ movaps %xmm1, -16(%rbx,%rdi,4)
+ movaps %xmm2, (%rbx,%rdi,4)
+ addq $16, %rdi
+ cmpq $1548, %rdi # imm = 0x60C
+ jne .LBB2_3
+# %bb.4: # %polly.loop_exit22
+ # in Loop: Header=BB2_2 Depth=2
+ addq $1, %rsi
+ addq $6144, %rdx # imm = 0x1800
+ cmpq $1536, %rsi # imm = 0x600
+ jne .LBB2_2
+# %bb.5: # %polly.loop_exit16
+ # in Loop: Header=BB2_1 Depth=1
+ addq $1, %r14
addq $6144, %rbx # imm = 0x1800
- incq %rdx
- cmpq $1536, %rdx # imm = 0x600
- jne .LBB2_10
-# BB#6: # %polly.loop_exit18
- # in Loop: Header=BB2_5 Depth=1
- addq $6144, %r14 # imm = 0x1800
- incq %rax
- cmpq $1536, %rax # imm = 0x600
- jne .LBB2_5
-# BB#7: # %polly.loop_exit11
+ cmpq $1536, %r14 # imm = 0x600
+ jne .LBB2_1
+# %bb.6: # %polly.exiting
xorl %eax, %eax
popq %rbx
popq %r14
popq %rbp
- ret
-.Ltmp25:
- .size main, .Ltmp25-main
+ .cfi_def_cfa %rsp, 8
+ retq
+.Lfunc_end2:
+ .size main, .Lfunc_end2-main
.cfi_endproc
-
+ # -- End function
.type A,@object # @A
.comm A,9437184,16
.type B,@object # @B
@@ -277,10 +250,11 @@ main: # @main
.type .L.str,@object # @.str
.section .rodata.str1.1,"aMS",@progbits,1
.L.str:
- .asciz "%lf "
+ .asciz "%lf "
.size .L.str, 5
.type C,@object # @C
.comm C,9437184,16
+ .ident "clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"
.section ".note.GNU-stack","",@progbits
diff --git a/polly/docs/experiments/matmul/matmul.preopt.ll b/polly/docs/experiments/matmul/matmul.preopt.ll
index db536642574..6fe4352b220 100644
--- a/polly/docs/experiments/matmul/matmul.preopt.ll
+++ b/polly/docs/experiments/matmul/matmul.preopt.ll
@@ -1,4 +1,4 @@
-; ModuleID = 'matmul.s'
+; ModuleID = 'matmul.ll'
source_filename = "matmul.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@@ -6,15 +6,15 @@ target triple = "x86_64-unknown-linux-gnu"
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
-@A = common global [1536 x [1536 x float]] zeroinitializer, align 16
-@B = common global [1536 x [1536 x float]] zeroinitializer, align 16
-@stdout = external global %struct._IO_FILE*, align 8
+@A = common dso_local global [1536 x [1536 x float]] zeroinitializer, align 16
+@B = common dso_local global [1536 x [1536 x float]] zeroinitializer, align 16
+@stdout = external dso_local global %struct._IO_FILE*, align 8
@.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1
-@C = common global [1536 x [1536 x float]] zeroinitializer, align 16
+@C = common dso_local global [1536 x [1536 x float]] zeroinitializer, align 16
@.str.1 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1
-; Function Attrs: nounwind uwtable
-define void @init_array() #0 {
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @init_array() #0 {
entry:
br label %entry.split
@@ -22,44 +22,37 @@ entry.split: ; preds = %entry
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %entry.split, %for.inc17
- %indvars.iv5 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next6, %for.inc17 ]
+ %indvars.iv4 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next5, %for.inc17 ]
br label %for.body3
for.body3: ; preds = %for.cond1.preheader, %for.body3
%indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
- %0 = mul nuw nsw i64 %indvars.iv, %indvars.iv5
+ %0 = mul nuw nsw i64 %indvars.iv, %indvars.iv4
%1 = trunc i64 %0 to i32
- %rem = srem i32 %1, 1024
- %add = add nsw i32 %rem, 1
+ %rem = and i32 %1, 1023
+ %add = add nuw nsw i32 %rem, 1
%conv = sitofp i32 %add to double
%div = fmul double %conv, 5.000000e-01
%conv4 = fptrunc double %div to float
- %arrayidx6 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %indvars.iv5, i64 %indvars.iv
+ %arrayidx6 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %indvars.iv4, i64 %indvars.iv
store float %conv4, float* %arrayidx6, align 4
- %2 = mul nuw nsw i64 %indvars.iv, %indvars.iv5
- %3 = trunc i64 %2 to i32
- %rem8 = srem i32 %3, 1024
- %add9 = add nsw i32 %rem8, 1
- %conv10 = sitofp i32 %add9 to double
- %div11 = fmul double %conv10, 5.000000e-01
- %conv12 = fptrunc double %div11 to float
- %arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %indvars.iv5, i64 %indvars.iv
- store float %conv12, float* %arrayidx16, align 4
+ %arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %indvars.iv4, i64 %indvars.iv
+ store float %conv4, float* %arrayidx16, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 1536
br i1 %exitcond, label %for.body3, label %for.inc17
for.inc17: ; preds = %for.body3
- %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1
- %exitcond7 = icmp ne i64 %indvars.iv.next6, 1536
- br i1 %exitcond7, label %for.cond1.preheader, label %for.end19
+ %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
+ %exitcond6 = icmp ne i64 %indvars.iv.next5, 1536
+ br i1 %exitcond6, label %for.cond1.preheader, label %for.end19
for.end19: ; preds = %for.inc17
ret void
}
-; Function Attrs: nounwind uwtable
-define void @print_array() #0 {
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @print_array() #0 {
entry:
br label %entry.split
@@ -79,7 +72,7 @@ for.body3: ; preds = %for.cond1.preheader
%conv = fpext float %2 to double
%call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2
%3 = trunc i64 %indvars.iv to i32
- %rem = srem i32 %3, 80
+ %rem = urem i32 %3, 80
%cmp6 = icmp eq i32 %rem, 79
br i1 %cmp6, label %if.then, label %for.inc
@@ -105,10 +98,10 @@ for.end12: ; preds = %for.end
ret void
}
-declare i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1
+declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1
-; Function Attrs: nounwind uwtable
-define i32 @main() #0 {
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @main() #0 {
entry:
br label %entry.split
@@ -128,16 +121,14 @@ for.body3: ; preds = %for.cond1.preheader
for.body8: ; preds = %for.body3, %for.body8
%indvars.iv = phi i64 [ 0, %for.body3 ], [ %indvars.iv.next, %for.body8 ]
- %arrayidx12 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4
- %0 = load float, float* %arrayidx12, align 4
+ %0 = load float, float* %arrayidx5, align 4
%arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %indvars.iv7, i64 %indvars.iv
%1 = load float, float* %arrayidx16, align 4
%arrayidx20 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %indvars.iv, i64 %indvars.iv4
%2 = load float, float* %arrayidx20, align 4
%mul = fmul float %1, %2
%add = fadd float %0, %mul
- %arrayidx24 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4
- store float %add, float* %arrayidx24, align 4
+ store float %add, float* %arrayidx5, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 1536
br i1 %exitcond, label %for.body8, label %for.inc25
@@ -162,10 +153,12 @@ declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) #2
; Function Attrs: nounwind
declare i32 @fputc(i32, %struct._IO_FILE* nocapture) #2
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind }
-!llvm.ident = !{!0}
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
-!0 = !{!"clang version 4.0.0 (http://llvm.org/git/clang.git 081569d9a29c7bc827b2d41f8e62891bbc895e2f) (http://llvm.org/git/llvm.git e117e506536626352e8e47f6c72cd6e2a276622c)"}
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"}
diff --git a/polly/docs/experiments/matmul/runall.sh b/polly/docs/experiments/matmul/runall.sh
index 575b58f9824..011d66bf6b4 100755
--- a/polly/docs/experiments/matmul/runall.sh
+++ b/polly/docs/experiments/matmul/runall.sh
@@ -1,85 +1,83 @@
#!/bin/sh -a
echo "--> 1. Create LLVM-IR from C"
-clang -S -emit-llvm matmul.c -o matmul.s
+clang -S -emit-llvm matmul.c -Xclang -disable-O0-optnone -o matmul.ll
echo "--> 2. Prepare the LLVM-IR for Polly"
-opt -S -polly-canonicalize matmul.s > matmul.preopt.ll
+opt -S -polly-canonicalize matmul.ll -o matmul.preopt.ll
echo "--> 3. Show the SCoPs detected by Polly"
-opt -basicaa -polly-ast -analyze -q matmul.preopt.ll \
- -polly-process-unprofitable
+opt -basicaa -polly-ast -analyze matmul.preopt.ll \
+ -polly-process-unprofitable -polly-use-llvm-names
echo "--> 4.1 Highlight the detected SCoPs in the CFGs of the program"
# We only create .dot files, as directly -view-scops directly calls graphviz
# which would require user interaction to continue the script.
# opt -basicaa -view-scops -disable-output matmul.preopt.ll
-opt -basicaa -dot-scops -disable-output matmul.preopt.ll
+opt -basicaa -dot-scops -disable-output matmul.preopt.ll -polly-use-llvm-names
echo "--> 4.2 Highlight the detected SCoPs in the CFGs of the program (print \
no instructions)"
# We only create .dot files, as directly -view-scops-only directly calls
# graphviz which would require user interaction to continue the script.
# opt -basicaa -view-scops-only -disable-output matmul.preopt.ll
-opt -basicaa -dot-scops-only -disable-output matmul.preopt.ll
+opt -basicaa -dot-scops-only -disable-output matmul.preopt.ll -polly-use-llvm-names
echo "--> 4.3 Create .png files from the .dot files"
for i in `ls *.dot`; do dot -Tpng $i > $i.png; done
echo "--> 5. View the polyhedral representation of the SCoPs"
-opt -basicaa -polly-scops -analyze matmul.preopt.ll -polly-process-unprofitable
+opt -basicaa -polly-scops -analyze matmul.preopt.ll \
+ -polly-process-unprofitable -polly-use-llvm-names
echo "--> 6. Show the dependences for the SCoPs"
opt -basicaa -polly-dependences -analyze matmul.preopt.ll \
- -polly-process-unprofitable
+ -polly-process-unprofitable -polly-use-llvm-names
echo "--> 7. Export jscop files"
-opt -basicaa -polly-export-jscop matmul.preopt.ll -polly-process-unprofitable
+opt -basicaa -polly-export-jscop matmul.preopt.ll \
+ -polly-process-unprofitable -disable-output -polly-use-llvm-names
echo "--> 8. Import the updated jscop files and print the new SCoPs. (optional)"
opt -basicaa -polly-import-jscop -polly-ast -analyze matmul.preopt.ll \
- -polly-process-unprofitable
+ -polly-process-unprofitable -polly-use-llvm-names
opt -basicaa -polly-import-jscop -polly-ast -analyze matmul.preopt.ll \
- -polly-import-jscop-postfix=interchanged -polly-process-unprofitable
+ -polly-import-jscop-postfix=interchanged -polly-process-unprofitable -polly-use-llvm-names
opt -basicaa -polly-import-jscop -polly-ast -analyze matmul.preopt.ll \
- -polly-import-jscop-postfix=interchanged+tiled -polly-process-unprofitable
+ -polly-import-jscop-postfix=interchanged+tiled -polly-process-unprofitable -polly-use-llvm-names
opt -basicaa -polly-import-jscop -polly-ast -analyze matmul.preopt.ll \
-polly-import-jscop-postfix=interchanged+tiled+vector \
- -polly-process-unprofitable
+ -polly-process-unprofitable -polly-use-llvm-names
echo "--> 9. Codegenerate the SCoPs"
-opt -basicaa -polly-import-jscop -polly-import-jscop-postfix=interchanged \
- -polly-codegen -polly-process-unprofitable\
- matmul.preopt.ll | opt -O3 > matmul.polly.interchanged.ll
-opt -basicaa -polly-import-jscop \
+opt -S -basicaa -polly-import-jscop -polly-import-jscop-postfix=interchanged \
+ -polly-codegen -polly-process-unprofitable -polly-use-llvm-names \
+ matmul.preopt.ll | opt -O3 -S -o matmul.polly.interchanged.ll
+opt -S -basicaa -polly-import-jscop \
-polly-import-jscop-postfix=interchanged+tiled -polly-codegen \
- matmul.preopt.ll -polly-process-unprofitable \
- | opt -O3 > matmul.polly.interchanged+tiled.ll
-opt -basicaa -polly-import-jscop -polly-process-unprofitable\
+ matmul.preopt.ll -polly-process-unprofitable -polly-use-llvm-names \
+ | opt -O3 -S -o matmul.polly.interchanged+tiled.ll
+opt -S -basicaa -polly-import-jscop -polly-process-unprofitable\
-polly-import-jscop-postfix=interchanged+tiled+vector -polly-codegen \
- matmul.preopt.ll -polly-vectorizer=polly\
- | opt -O3 > matmul.polly.interchanged+tiled+vector.ll
-opt -basicaa -polly-import-jscop -polly-process-unprofitable\
+ matmul.preopt.ll -polly-vectorizer=polly -polly-use-llvm-names \
+ | opt -O3 -S -o matmul.polly.interchanged+tiled+vector.ll
+opt -S -basicaa -polly-import-jscop -polly-process-unprofitable\
-polly-import-jscop-postfix=interchanged+tiled+vector -polly-codegen \
- matmul.preopt.ll -polly-vectorizer=polly -polly-parallel\
- | opt -O3 > matmul.polly.interchanged+tiled+vector+openmp.ll
-opt matmul.preopt.ll | opt -O3 > matmul.normalopt.ll
+ matmul.preopt.ll -polly-vectorizer=polly -polly-parallel -polly-use-llvm-names \
+ | opt -O3 -S -o matmul.polly.interchanged+tiled+vector+openmp.ll
+opt -S matmul.preopt.ll | opt -O3 -S -o matmul.normalopt.ll
echo "--> 10. Create the executables"
-llc matmul.polly.interchanged.ll -o matmul.polly.interchanged.s && gcc matmul.polly.interchanged.s \
- -o matmul.polly.interchanged.exe
-llc matmul.polly.interchanged+tiled.ll -o matmul.polly.interchanged+tiled.s && gcc matmul.polly.interchanged+tiled.s \
- -o matmul.polly.interchanged+tiled.exe
-llc matmul.polly.interchanged+tiled+vector.ll \
- -o matmul.polly.interchanged+tiled+vector.s \
- && gcc matmul.polly.interchanged+tiled+vector.s \
- -o matmul.polly.interchanged+tiled+vector.exe
-llc matmul.polly.interchanged+tiled+vector+openmp.ll \
- -o matmul.polly.interchanged+tiled+vector+openmp.s \
- && gcc -lgomp matmul.polly.interchanged+tiled+vector+openmp.s \
- -o matmul.polly.interchanged+tiled+vector+openmp.exe
-llc matmul.normalopt.ll -o matmul.normalopt.s && gcc matmul.normalopt.s \
- -o matmul.normalopt.exe
+llc matmul.polly.interchanged.ll -o matmul.polly.interchanged.s -relocation-model=pic
+gcc matmul.polly.interchanged.s -o matmul.polly.interchanged.exe
+llc matmul.polly.interchanged+tiled.ll -o matmul.polly.interchanged+tiled.s -relocation-model=pic
+gcc matmul.polly.interchanged+tiled.s -o matmul.polly.interchanged+tiled.exe
+llc matmul.polly.interchanged+tiled+vector.ll -o matmul.polly.interchanged+tiled+vector.s -relocation-model=pic
+gcc matmul.polly.interchanged+tiled+vector.s -o matmul.polly.interchanged+tiled+vector.exe
+llc matmul.polly.interchanged+tiled+vector+openmp.ll -o matmul.polly.interchanged+tiled+vector+openmp.s -relocation-model=pic
+gcc matmul.polly.interchanged+tiled+vector+openmp.s -lgomp -o matmul.polly.interchanged+tiled+vector+openmp.exe
+llc matmul.normalopt.ll -o matmul.normalopt.s -relocation-model=pic
+gcc matmul.normalopt.s -lgomp -o matmul.normalopt.exe
echo "--> 11. Compare the runtime of the executables"
diff --git a/polly/docs/experiments/matmul/scops.init_array.dot b/polly/docs/experiments/matmul/scops.init_array.dot
index 3b9d6c9c586..39e2d7e42a8 100644
--- a/polly/docs/experiments/matmul/scops.init_array.dot
+++ b/polly/docs/experiments/matmul/scops.init_array.dot
@@ -1,39 +1,39 @@
digraph "Scop Graph for 'init_array' function" {
label="Scop Graph for 'init_array' function";
- Node0x5b5b5a0 [shape=record,label="{entry:\l br label %entry.split\l}"];
- Node0x5b5b5a0 -> Node0x5b5de30;
- Node0x5b5de30 [shape=record,label="{entry.split: \l br label %for.cond1.preheader\l}"];
- Node0x5b5de30 -> Node0x5b5de50;
- Node0x5b5de50 [shape=record,label="{for.cond1.preheader: \l %indvars.iv5 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next6, %for.inc17 ]\l br label %for.body3\l}"];
- Node0x5b5de50 -> Node0x5b5b570;
- Node0x5b5b570 [shape=record,label="{for.body3: \l %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next,\l... %for.body3 ]\l %0 = mul nuw nsw i64 %indvars.iv, %indvars.iv5\l %1 = trunc i64 %0 to i32\l %rem = srem i32 %1, 1024\l %add = add nsw i32 %rem, 1\l %conv = sitofp i32 %add to double\l %div = fmul double %conv, 5.000000e-01\l %conv4 = fptrunc double %div to float\l %arrayidx6 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @A, i64 0, i64 %indvars.iv5, i64 %indvars.iv\l store float %conv4, float* %arrayidx6, align 4\l %2 = mul nuw nsw i64 %indvars.iv, %indvars.iv5\l %3 = trunc i64 %2 to i32\l %rem8 = srem i32 %3, 1024\l %add9 = add nsw i32 %rem8, 1\l %conv10 = sitofp i32 %add9 to double\l %div11 = fmul double %conv10, 5.000000e-01\l %conv12 = fptrunc double %div11 to float\l %arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @B, i64 0, i64 %indvars.iv5, i64 %indvars.iv\l store float %conv12, float* %arrayidx16, align 4\l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body3, label %for.inc17\l}"];
- Node0x5b5b570 -> Node0x5b5b570[constraint=false];
- Node0x5b5b570 -> Node0x5b5df30;
- Node0x5b5df30 [shape=record,label="{for.inc17: \l %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1\l %exitcond7 = icmp ne i64 %indvars.iv.next6, 1536\l br i1 %exitcond7, label %for.cond1.preheader, label %for.end19\l}"];
- Node0x5b5df30 -> Node0x5b5de50[constraint=false];
- Node0x5b5df30 -> Node0x5b5df90;
- Node0x5b5df90 [shape=record,label="{for.end19: \l ret void\l}"];
+ Node0x7fffc6c46ea0 [shape=record,label="{entry:\l br label %entry.split\l}"];
+ Node0x7fffc6c46ea0 -> Node0x7fffc6c46f20;
+ Node0x7fffc6c46f20 [shape=record,label="{entry.split: \l br label %for.cond1.preheader\l}"];
+ Node0x7fffc6c46f20 -> Node0x7fffc6c47000;
+ Node0x7fffc6c47000 [shape=record,label="{for.cond1.preheader: \l %indvars.iv4 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next5, %for.inc17 ]\l br label %for.body3\l}"];
+ Node0x7fffc6c47000 -> Node0x7fffc6c47290;
+ Node0x7fffc6c47290 [shape=record,label="{for.body3: \l %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next,\l... %for.body3 ]\l %0 = mul nuw nsw i64 %indvars.iv, %indvars.iv4\l %1 = trunc i64 %0 to i32\l %rem = and i32 %1, 1023\l %add = add nuw nsw i32 %rem, 1\l %conv = sitofp i32 %add to double\l %div = fmul double %conv, 5.000000e-01\l %conv4 = fptrunc double %div to float\l %arrayidx6 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @A, i64 0, i64 %indvars.iv4, i64 %indvars.iv\l store float %conv4, float* %arrayidx6, align 4\l %arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @B, i64 0, i64 %indvars.iv4, i64 %indvars.iv\l store float %conv4, float* %arrayidx16, align 4\l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body3, label %for.inc17\l}"];
+ Node0x7fffc6c47290 -> Node0x7fffc6c47290[constraint=false];
+ Node0x7fffc6c47290 -> Node0x7fffc6c47b10;
+ Node0x7fffc6c47b10 [shape=record,label="{for.inc17: \l %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1\l %exitcond6 = icmp ne i64 %indvars.iv.next5, 1536\l br i1 %exitcond6, label %for.cond1.preheader, label %for.end19\l}"];
+ Node0x7fffc6c47b10 -> Node0x7fffc6c47000[constraint=false];
+ Node0x7fffc6c47b10 -> Node0x7fffc6c48b10;
+ Node0x7fffc6c48b10 [shape=record,label="{for.end19: \l ret void\l}"];
colorscheme = "paired12"
- subgraph cluster_0x5b4bdd0 {
+ subgraph cluster_0x7fffc6c32540 {
label = "";
style = solid;
color = 1
- subgraph cluster_0x5b4bf50 {
+ subgraph cluster_0x7fffc6c32f30 {
label = "Region can not profitably be optimized!";
style = solid;
color = 6
- subgraph cluster_0x5b4c0d0 {
+ subgraph cluster_0x7fffc6c32690 {
label = "";
style = solid;
color = 5
- Node0x5b5b570;
+ Node0x7fffc6c47290;
}
- Node0x5b5de50;
- Node0x5b5df30;
+ Node0x7fffc6c47000;
+ Node0x7fffc6c47b10;
}
- Node0x5b5b5a0;
- Node0x5b5de30;
- Node0x5b5df90;
+ Node0x7fffc6c46ea0;
+ Node0x7fffc6c46f20;
+ Node0x7fffc6c48b10;
}
}
diff --git a/polly/docs/experiments/matmul/scops.init_array.dot.png b/polly/docs/experiments/matmul/scops.init_array.dot.png
index 48a9f38946a..3cd5eb8a3c8 100644
--- a/polly/docs/experiments/matmul/scops.init_array.dot.png
+++ b/polly/docs/experiments/matmul/scops.init_array.dot.png
Binary files differ
diff --git a/polly/docs/experiments/matmul/scops.main.dot b/polly/docs/experiments/matmul/scops.main.dot
index e4abe8fbec8..7c20cbf220f 100644
--- a/polly/docs/experiments/matmul/scops.main.dot
+++ b/polly/docs/experiments/matmul/scops.main.dot
@@ -1,50 +1,50 @@
digraph "Scop Graph for 'main' function" {
label="Scop Graph for 'main' function";
- Node0x5b5c850 [shape=record,label="{entry:\l br label %entry.split\l}"];
- Node0x5b5c850 -> Node0x5b5a440;
- Node0x5b5a440 [shape=record,label="{entry.split: \l tail call void @init_array()\l br label %for.cond1.preheader\l}"];
- Node0x5b5a440 -> Node0x5b38cd0;
- Node0x5b38cd0 [shape=record,label="{for.cond1.preheader: \l %indvars.iv7 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next8, %for.inc28 ]\l br label %for.body3\l}"];
- Node0x5b38cd0 -> Node0x5b4bd30;
- Node0x5b4bd30 [shape=record,label="{for.body3: \l %indvars.iv4 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next5,\l... %for.inc25 ]\l %arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4\l store float 0.000000e+00, float* %arrayidx5, align 4\l br label %for.body8\l}"];
- Node0x5b4bd30 -> Node0x5b38c50;
- Node0x5b38c50 [shape=record,label="{for.body8: \l %indvars.iv = phi i64 [ 0, %for.body3 ], [ %indvars.iv.next, %for.body8 ]\l %arrayidx12 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4\l %0 = load float, float* %arrayidx12, align 4\l %arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @A, i64 0, i64 %indvars.iv7, i64 %indvars.iv\l %1 = load float, float* %arrayidx16, align 4\l %arrayidx20 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @B, i64 0, i64 %indvars.iv, i64 %indvars.iv4\l %2 = load float, float* %arrayidx20, align 4\l %mul = fmul float %1, %2\l %add = fadd float %0, %mul\l %arrayidx24 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4\l store float %add, float* %arrayidx24, align 4\l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body8, label %for.inc25\l}"];
- Node0x5b38c50 -> Node0x5b38c50[constraint=false];
- Node0x5b38c50 -> Node0x5b5a290;
- Node0x5b5a290 [shape=record,label="{for.inc25: \l %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1\l %exitcond6 = icmp ne i64 %indvars.iv.next5, 1536\l br i1 %exitcond6, label %for.body3, label %for.inc28\l}"];
- Node0x5b5a290 -> Node0x5b4bd30[constraint=false];
- Node0x5b5a290 -> Node0x5b5a340;
- Node0x5b5a340 [shape=record,label="{for.inc28: \l %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1\l %exitcond9 = icmp ne i64 %indvars.iv.next8, 1536\l br i1 %exitcond9, label %for.cond1.preheader, label %for.end30\l}"];
- Node0x5b5a340 -> Node0x5b38cd0[constraint=false];
- Node0x5b5a340 -> Node0x5b5a3a0;
- Node0x5b5a3a0 [shape=record,label="{for.end30: \l ret i32 0\l}"];
+ Node0x7fffc6c4cb90 [shape=record,label="{entry:\l br label %entry.split\l}"];
+ Node0x7fffc6c4cb90 -> Node0x7fffc6c47b10;
+ Node0x7fffc6c47b10 [shape=record,label="{entry.split: \l tail call void @init_array()\l br label %for.cond1.preheader\l}"];
+ Node0x7fffc6c47b10 -> Node0x7fffc6c456e0;
+ Node0x7fffc6c456e0 [shape=record,label="{for.cond1.preheader: \l %indvars.iv7 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next8, %for.inc28 ]\l br label %for.body3\l}"];
+ Node0x7fffc6c456e0 -> Node0x7fffc6c3f080;
+ Node0x7fffc6c3f080 [shape=record,label="{for.body3: \l %indvars.iv4 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next5,\l... %for.inc25 ]\l %arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @C, i64 0, i64 %indvars.iv7, i64 %indvars.iv4\l store float 0.000000e+00, float* %arrayidx5, align 4\l br label %for.body8\l}"];
+ Node0x7fffc6c3f080 -> Node0x7fffc6c3f220;
+ Node0x7fffc6c3f220 [shape=record,label="{for.body8: \l %indvars.iv = phi i64 [ 0, %for.body3 ], [ %indvars.iv.next, %for.body8 ]\l %0 = load float, float* %arrayidx5, align 4\l %arrayidx16 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @A, i64 0, i64 %indvars.iv7, i64 %indvars.iv\l %1 = load float, float* %arrayidx16, align 4\l %arrayidx20 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536\l... x float]]* @B, i64 0, i64 %indvars.iv, i64 %indvars.iv4\l %2 = load float, float* %arrayidx20, align 4\l %mul = fmul float %1, %2\l %add = fadd float %0, %mul\l store float %add, float* %arrayidx5, align 4\l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body8, label %for.inc25\l}"];
+ Node0x7fffc6c3f220 -> Node0x7fffc6c3f220[constraint=false];
+ Node0x7fffc6c3f220 -> Node0x7fffc6c40480;
+ Node0x7fffc6c40480 [shape=record,label="{for.inc25: \l %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1\l %exitcond6 = icmp ne i64 %indvars.iv.next5, 1536\l br i1 %exitcond6, label %for.body3, label %for.inc28\l}"];
+ Node0x7fffc6c40480 -> Node0x7fffc6c3f080[constraint=false];
+ Node0x7fffc6c40480 -> Node0x7fffc6c404e0;
+ Node0x7fffc6c404e0 [shape=record,label="{for.inc28: \l %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1\l %exitcond9 = icmp ne i64 %indvars.iv.next8, 1536\l br i1 %exitcond9, label %for.cond1.preheader, label %for.end30\l}"];
+ Node0x7fffc6c404e0 -> Node0x7fffc6c456e0[constraint=false];
+ Node0x7fffc6c404e0 -> Node0x7fffc6c40540;
+ Node0x7fffc6c40540 [shape=record,label="{for.end30: \l ret i32 0\l}"];
colorscheme = "paired12"
- subgraph cluster_0x5b5c970 {
+ subgraph cluster_0x7fffc6c32540 {
label = "";
style = solid;
color = 1
- subgraph cluster_0x5b5c5a0 {
+ subgraph cluster_0x7fffc6c32f30 {
label = "";
style = filled;
- color = 3 subgraph cluster_0x5b5c9f0 {
+ color = 3 subgraph cluster_0x7fffc6c32690 {
label = "";
style = solid;
color = 5
- subgraph cluster_0x5b5c110 {
+ subgraph cluster_0x7fffc6c32dc0 {
label = "";
style = solid;
color = 7
- Node0x5b38c50;
+ Node0x7fffc6c3f220;
}
- Node0x5b4bd30;
- Node0x5b5a290;
+ Node0x7fffc6c3f080;
+ Node0x7fffc6c40480;
}
- Node0x5b38cd0;
- Node0x5b5a340;
+ Node0x7fffc6c456e0;
+ Node0x7fffc6c404e0;
}
- Node0x5b5c850;
- Node0x5b5a440;
- Node0x5b5a3a0;
+ Node0x7fffc6c4cb90;
+ Node0x7fffc6c47b10;
+ Node0x7fffc6c40540;
}
}
diff --git a/polly/docs/experiments/matmul/scops.main.dot.png b/polly/docs/experiments/matmul/scops.main.dot.png
index 4e73701a08d..241ddaa39f2 100644
--- a/polly/docs/experiments/matmul/scops.main.dot.png
+++ b/polly/docs/experiments/matmul/scops.main.dot.png
Binary files differ
diff --git a/polly/docs/experiments/matmul/scops.print_array.dot b/polly/docs/experiments/matmul/scops.print_array.dot
index 748ccb170cd..5c5c0fca145 100644
--- a/polly/docs/experiments/matmul/scops.print_array.dot
+++ b/polly/docs/experiments/matmul/scops.print_array.dot
@@ -1,51 +1,51 @@
digraph "Scop Graph for 'print_array' function" {
label="Scop Graph for 'print_array' function";
- Node0x5b5ee00 [shape=record,label="{entry:\l br label %entry.split\l}"];
- Node0x5b5ee00 -> Node0x5b5ee50;
- Node0x5b5ee50 [shape=record,label="{entry.split: \l br label %for.cond1.preheader\l}"];
- Node0x5b5ee50 -> Node0x5b5ee70;
- Node0x5b5ee70 [shape=record,label="{for.cond1.preheader: \l %indvars.iv6 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next7, %for.end ]\l %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l br label %for.body3\l}"];
- Node0x5b5ee70 -> Node0x5b5ee20;
- Node0x5b5ee20 [shape=record,label="{for.body3: \l %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next,\l... %for.inc ]\l %1 = phi %struct._IO_FILE* [ %0, %for.cond1.preheader ], [ %5, %for.inc ]\l %arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @C, i64 0, i64 %indvars.iv6, i64 %indvars.iv\l %2 = load float, float* %arrayidx5, align 4\l %conv = fpext float %2 to double\l %call = tail call i32 (%struct._IO_FILE*, i8*, ...)\l... @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x\l... i8]* @.str, i64 0, i64 0), double %conv) #2\l %3 = trunc i64 %indvars.iv to i32\l %rem = srem i32 %3, 80\l %cmp6 = icmp eq i32 %rem, 79\l br i1 %cmp6, label %if.then, label %for.inc\l}"];
- Node0x5b5ee20 -> Node0x5b60d10;
- Node0x5b5ee20 -> Node0x5b60d70;
- Node0x5b60d10 [shape=record,label="{if.then: \l %4 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l %fputc3 = tail call i32 @fputc(i32 10, %struct._IO_FILE* %4)\l br label %for.inc\l}"];
- Node0x5b60d10 -> Node0x5b60d70;
- Node0x5b60d70 [shape=record,label="{for.inc: \l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %5 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body3, label %for.end\l}"];
- Node0x5b60d70 -> Node0x5b5ee20[constraint=false];
- Node0x5b60d70 -> Node0x5b60e10;
- Node0x5b60e10 [shape=record,label="{for.end: \l %.lcssa = phi %struct._IO_FILE* [ %5, %for.inc ]\l %fputc = tail call i32 @fputc(i32 10, %struct._IO_FILE* %.lcssa)\l %indvars.iv.next7 = add nuw nsw i64 %indvars.iv6, 1\l %exitcond8 = icmp ne i64 %indvars.iv.next7, 1536\l br i1 %exitcond8, label %for.cond1.preheader, label %for.end12\l}"];
- Node0x5b60e10 -> Node0x5b5ee70[constraint=false];
- Node0x5b60e10 -> Node0x5b60e70;
- Node0x5b60e70 [shape=record,label="{for.end12: \l ret void\l}"];
+ Node0x7fffc6c42bf0 [shape=record,label="{entry:\l br label %entry.split\l}"];
+ Node0x7fffc6c42bf0 -> Node0x7fffc6c42f10;
+ Node0x7fffc6c42f10 [shape=record,label="{entry.split: \l br label %for.cond1.preheader\l}"];
+ Node0x7fffc6c42f10 -> Node0x7fffc6c4abb0;
+ Node0x7fffc6c4abb0 [shape=record,label="{for.cond1.preheader: \l %indvars.iv6 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next7, %for.end ]\l %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l br label %for.body3\l}"];
+ Node0x7fffc6c4abb0 -> Node0x7fffc6c4ac10;
+ Node0x7fffc6c4ac10 [shape=record,label="{for.body3: \l %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next,\l... %for.inc ]\l %1 = phi %struct._IO_FILE* [ %0, %for.cond1.preheader ], [ %5, %for.inc ]\l %arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x\l... float]]* @C, i64 0, i64 %indvars.iv6, i64 %indvars.iv\l %2 = load float, float* %arrayidx5, align 4\l %conv = fpext float %2 to double\l %call = tail call i32 (%struct._IO_FILE*, i8*, ...)\l... @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x\l... i8]* @.str, i64 0, i64 0), double %conv) #2\l %3 = trunc i64 %indvars.iv to i32\l %rem = urem i32 %3, 80\l %cmp6 = icmp eq i32 %rem, 79\l br i1 %cmp6, label %if.then, label %for.inc\l}"];
+ Node0x7fffc6c4ac10 -> Node0x7fffc6c4af80;
+ Node0x7fffc6c4ac10 -> Node0x7fffc6c4afe0;
+ Node0x7fffc6c4af80 [shape=record,label="{if.then: \l %4 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l %fputc3 = tail call i32 @fputc(i32 10, %struct._IO_FILE* %4)\l br label %for.inc\l}"];
+ Node0x7fffc6c4af80 -> Node0x7fffc6c4afe0;
+ Node0x7fffc6c4afe0 [shape=record,label="{for.inc: \l %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\l %5 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8\l %exitcond = icmp ne i64 %indvars.iv.next, 1536\l br i1 %exitcond, label %for.body3, label %for.end\l}"];
+ Node0x7fffc6c4afe0 -> Node0x7fffc6c4ac10[constraint=false];
+ Node0x7fffc6c4afe0 -> Node0x7fffc6c4b3b0;
+ Node0x7fffc6c4b3b0 [shape=record,label="{for.end: \l %.lcssa = phi %struct._IO_FILE* [ %5, %for.inc ]\l %fputc = tail call i32 @fputc(i32 10, %struct._IO_FILE* %.lcssa)\l %indvars.iv.next7 = add nuw nsw i64 %indvars.iv6, 1\l %exitcond8 = icmp ne i64 %indvars.iv.next7, 1536\l br i1 %exitcond8, label %for.cond1.preheader, label %for.end12\l}"];
+ Node0x7fffc6c4b3b0 -> Node0x7fffc6c4abb0[constraint=false];
+ Node0x7fffc6c4b3b0 -> Node0x7fffc6c4b580;
+ Node0x7fffc6c4b580 [shape=record,label="{for.end12: \l ret void\l}"];
colorscheme = "paired12"
- subgraph cluster_0x5b349a0 {
+ subgraph cluster_0x7fffc6c32540 {
label = "";
style = solid;
color = 1
- subgraph cluster_0x5b5c2c0 {
+ subgraph cluster_0x7fffc6c32dc0 {
label = "Call instruction: %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2";
style = solid;
color = 6
- subgraph cluster_0x5b5c240 {
+ subgraph cluster_0x7fffc6c32690 {
label = "Call instruction: %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2";
style = solid;
color = 5
- subgraph cluster_0x5b34a20 {
+ subgraph cluster_0x7fffc6c32f30 {
label = "Region can not profitably be optimized!";
style = solid;
color = 7
- Node0x5b5ee20;
- Node0x5b60d10;
+ Node0x7fffc6c4ac10;
+ Node0x7fffc6c4af80;
}
- Node0x5b60d70;
+ Node0x7fffc6c4afe0;
}
- Node0x5b5ee70;
- Node0x5b60e10;
+ Node0x7fffc6c4abb0;
+ Node0x7fffc6c4b3b0;
}
- Node0x5b5ee00;
- Node0x5b5ee50;
- Node0x5b60e70;
+ Node0x7fffc6c42bf0;
+ Node0x7fffc6c42f10;
+ Node0x7fffc6c4b580;
}
}
diff --git a/polly/docs/experiments/matmul/scops.print_array.dot.png b/polly/docs/experiments/matmul/scops.print_array.dot.png
index e3b973b131a..75c4db88de6 100644
--- a/polly/docs/experiments/matmul/scops.print_array.dot.png
+++ b/polly/docs/experiments/matmul/scops.print_array.dot.png
Binary files differ
diff --git a/polly/docs/experiments/matmul/scopsonly.init_array.dot b/polly/docs/experiments/matmul/scopsonly.init_array.dot
index 3d2092b21c9..a8907a9be82 100644
--- a/polly/docs/experiments/matmul/scopsonly.init_array.dot
+++ b/polly/docs/experiments/matmul/scopsonly.init_array.dot
@@ -1,39 +1,39 @@
digraph "Scop Graph for 'init_array' function" {
label="Scop Graph for 'init_array' function";
- Node0x5ae2570 [shape=record,label="{entry}"];
- Node0x5ae2570 -> Node0x5ae4e90;
- Node0x5ae4e90 [shape=record,label="{entry.split}"];
- Node0x5ae4e90 -> Node0x5ae4f50;
- Node0x5ae4f50 [shape=record,label="{for.cond1.preheader}"];
- Node0x5ae4f50 -> Node0x5ae50e0;
- Node0x5ae50e0 [shape=record,label="{for.body3}"];
- Node0x5ae50e0 -> Node0x5ae50e0[constraint=false];
- Node0x5ae50e0 -> Node0x5ae5100;
- Node0x5ae5100 [shape=record,label="{for.inc17}"];
- Node0x5ae5100 -> Node0x5ae4f50[constraint=false];
- Node0x5ae5100 -> Node0x5ae4ff0;
- Node0x5ae4ff0 [shape=record,label="{for.end19}"];
+ Node0x7fffdb5cceb0 [shape=record,label="{entry}"];
+ Node0x7fffdb5cceb0 -> Node0x7fffdb5ccf00;
+ Node0x7fffdb5ccf00 [shape=record,label="{entry.split}"];
+ Node0x7fffdb5ccf00 -> Node0x7fffdb5ccf80;
+ Node0x7fffdb5ccf80 [shape=record,label="{for.cond1.preheader}"];
+ Node0x7fffdb5ccf80 -> Node0x7fffdb5cd090;
+ Node0x7fffdb5cd090 [shape=record,label="{for.body3}"];
+ Node0x7fffdb5cd090 -> Node0x7fffdb5cd090[constraint=false];
+ Node0x7fffdb5cd090 -> Node0x7fffdb5cd0b0;
+ Node0x7fffdb5cd0b0 [shape=record,label="{for.inc17}"];
+ Node0x7fffdb5cd0b0 -> Node0x7fffdb5ccf80[constraint=false];
+ Node0x7fffdb5cd0b0 -> Node0x7fffdb5cd2a0;
+ Node0x7fffdb5cd2a0 [shape=record,label="{for.end19}"];
colorscheme = "paired12"
- subgraph cluster_0x5ad2dd0 {
+ subgraph cluster_0x7fffdb5b8530 {
label = "";
style = solid;
color = 1
- subgraph cluster_0x5ad2f50 {
+ subgraph cluster_0x7fffdb5b8f40 {
label = "Region can not profitably be optimized!";
style = solid;
color = 6
- subgraph cluster_0x5ad30d0 {
+ subgraph cluster_0x7fffdb5b86a0 {
label = "";
style = solid;
color = 5
- Node0x5ae50e0;
+ Node0x7fffdb5cd090;
}
- Node0x5ae4f50;
- Node0x5ae5100;
+ Node0x7fffdb5ccf80;
+ Node0x7fffdb5cd0b0;
}
- Node0x5ae2570;
- Node0x5ae4e90;
- Node0x5ae4ff0;
+ Node0x7fffdb5cceb0;
+ Node0x7fffdb5ccf00;
+ Node0x7fffdb5cd2a0;
}
}
diff --git a/polly/docs/experiments/matmul/scopsonly.init_array.dot.png b/polly/docs/experiments/matmul/scopsonly.init_array.dot.png
index f101d4d3081..bdfae23e0ff 100644
--- a/polly/docs/experiments/matmul/scopsonly.init_array.dot.png
+++ b/polly/docs/experiments/matmul/scopsonly.init_array.dot.png
Binary files differ
diff --git a/polly/docs/experiments/matmul/scopsonly.main.dot b/polly/docs/experiments/matmul/scopsonly.main.dot
index c2d60c7ded6..9793a24baef 100644
--- a/polly/docs/experiments/matmul/scopsonly.main.dot
+++ b/polly/docs/experiments/matmul/scopsonly.main.dot
@@ -1,50 +1,50 @@
digraph "Scop Graph for 'main' function" {
label="Scop Graph for 'main' function";
- Node0x5abfcf0 [shape=record,label="{entry}"];
- Node0x5abfcf0 -> Node0x5ade060;
- Node0x5ade060 [shape=record,label="{entry.split}"];
- Node0x5ade060 -> Node0x5ade0e0;
- Node0x5ade0e0 [shape=record,label="{for.cond1.preheader}"];
- Node0x5ade0e0 -> Node0x5ade100;
- Node0x5ade100 [shape=record,label="{for.body3}"];
- Node0x5ade100 -> Node0x5ae0020;
- Node0x5ae0020 [shape=record,label="{for.body8}"];
- Node0x5ae0020 -> Node0x5ae0020[constraint=false];
- Node0x5ae0020 -> Node0x5ae0080;
- Node0x5ae0080 [shape=record,label="{for.inc25}"];
- Node0x5ae0080 -> Node0x5ade100[constraint=false];
- Node0x5ae0080 -> Node0x5adfef0;
- Node0x5adfef0 [shape=record,label="{for.inc28}"];
- Node0x5adfef0 -> Node0x5ade0e0[constraint=false];
- Node0x5adfef0 -> Node0x5adff50;
- Node0x5adff50 [shape=record,label="{for.end30}"];
+ Node0x7fffdb5cbd10 [shape=record,label="{entry}"];
+ Node0x7fffdb5cbd10 -> Node0x7fffdb5c7140;
+ Node0x7fffdb5c7140 [shape=record,label="{entry.split}"];
+ Node0x7fffdb5c7140 -> Node0x7fffdb5c7200;
+ Node0x7fffdb5c7200 [shape=record,label="{for.cond1.preheader}"];
+ Node0x7fffdb5c7200 -> Node0x7fffdb5ccd60;
+ Node0x7fffdb5ccd60 [shape=record,label="{for.body3}"];
+ Node0x7fffdb5ccd60 -> Node0x7fffdb5ccd80;
+ Node0x7fffdb5ccd80 [shape=record,label="{for.body8}"];
+ Node0x7fffdb5ccd80 -> Node0x7fffdb5ccd80[constraint=false];
+ Node0x7fffdb5ccd80 -> Node0x7fffdb5cce20;
+ Node0x7fffdb5cce20 [shape=record,label="{for.inc25}"];
+ Node0x7fffdb5cce20 -> Node0x7fffdb5ccd60[constraint=false];
+ Node0x7fffdb5cce20 -> Node0x7fffdb5cce80;
+ Node0x7fffdb5cce80 [shape=record,label="{for.inc28}"];
+ Node0x7fffdb5cce80 -> Node0x7fffdb5c7200[constraint=false];
+ Node0x7fffdb5cce80 -> Node0x7fffdb5ccee0;
+ Node0x7fffdb5ccee0 [shape=record,label="{for.end30}"];
colorscheme = "paired12"
- subgraph cluster_0x5ad2c80 {
+ subgraph cluster_0x7fffdb5b8530 {
label = "";
style = solid;
color = 1
- subgraph cluster_0x5ad2e50 {
+ subgraph cluster_0x7fffdb5b8f40 {
label = "";
style = filled;
- color = 3 subgraph cluster_0x5ad2d00 {
+ color = 3 subgraph cluster_0x7fffdb5b86a0 {
label = "";
style = solid;
color = 5
- subgraph cluster_0x5ad2dd0 {
+ subgraph cluster_0x7fffdb5cc3c0 {
label = "";
style = solid;
color = 7
- Node0x5ae0020;
+ Node0x7fffdb5ccd80;
}
- Node0x5ade100;
- Node0x5ae0080;
+ Node0x7fffdb5ccd60;
+ Node0x7fffdb5cce20;
}
- Node0x5ade0e0;
- Node0x5adfef0;
+ Node0x7fffdb5c7200;
+ Node0x7fffdb5cce80;
}
- Node0x5abfcf0;
- Node0x5ade060;
- Node0x5adff50;
+ Node0x7fffdb5cbd10;
+ Node0x7fffdb5c7140;
+ Node0x7fffdb5ccee0;
}
}
diff --git a/polly/docs/experiments/matmul/scopsonly.main.dot.png b/polly/docs/experiments/matmul/scopsonly.main.dot.png
index 32634243888..92124ee467e 100644
--- a/polly/docs/experiments/matmul/scopsonly.main.dot.png
+++ b/polly/docs/experiments/matmul/scopsonly.main.dot.png
Binary files differ
diff --git a/polly/docs/experiments/matmul/scopsonly.print_array.dot b/polly/docs/experiments/matmul/scopsonly.print_array.dot
index 0f7de45e877..7d9a8aea882 100644
--- a/polly/docs/experiments/matmul/scopsonly.print_array.dot
+++ b/polly/docs/experiments/matmul/scopsonly.print_array.dot
@@ -1,51 +1,51 @@
digraph "Scop Graph for 'print_array' function" {
label="Scop Graph for 'print_array' function";
- Node0x5ae5e30 [shape=record,label="{entry}"];
- Node0x5ae5e30 -> Node0x5ae5f50;
- Node0x5ae5f50 [shape=record,label="{entry.split}"];
- Node0x5ae5f50 -> Node0x5ae7d90;
- Node0x5ae7d90 [shape=record,label="{for.cond1.preheader}"];
- Node0x5ae7d90 -> Node0x5ae7f20;
- Node0x5ae7f20 [shape=record,label="{for.body3}"];
- Node0x5ae7f20 -> Node0x5ae7f40;
- Node0x5ae7f20 -> Node0x5ae7f60;
- Node0x5ae7f40 [shape=record,label="{if.then}"];
- Node0x5ae7f40 -> Node0x5ae7f60;
- Node0x5ae7f60 [shape=record,label="{for.inc}"];
- Node0x5ae7f60 -> Node0x5ae7f20[constraint=false];
- Node0x5ae7f60 -> Node0x5ae7e30;
- Node0x5ae7e30 [shape=record,label="{for.end}"];
- Node0x5ae7e30 -> Node0x5ae7d90[constraint=false];
- Node0x5ae7e30 -> Node0x5ae8110;
- Node0x5ae8110 [shape=record,label="{for.end12}"];
+ Node0x7fffdb5c9180 [shape=record,label="{entry}"];
+ Node0x7fffdb5c9180 -> Node0x7fffdb5b7940;
+ Node0x7fffdb5b7940 [shape=record,label="{entry.split}"];
+ Node0x7fffdb5b7940 -> Node0x7fffdb5b7960;
+ Node0x7fffdb5b7960 [shape=record,label="{for.cond1.preheader}"];
+ Node0x7fffdb5b7960 -> Node0x7fffdb5b79c0;
+ Node0x7fffdb5b79c0 [shape=record,label="{for.body3}"];
+ Node0x7fffdb5b79c0 -> Node0x7fffdb5b79e0;
+ Node0x7fffdb5b79c0 -> Node0x7fffdb5b7a80;
+ Node0x7fffdb5b79e0 [shape=record,label="{if.then}"];
+ Node0x7fffdb5b79e0 -> Node0x7fffdb5b7a80;
+ Node0x7fffdb5b7a80 [shape=record,label="{for.inc}"];
+ Node0x7fffdb5b7a80 -> Node0x7fffdb5b79c0[constraint=false];
+ Node0x7fffdb5b7a80 -> Node0x7fffdb5b7ae0;
+ Node0x7fffdb5b7ae0 [shape=record,label="{for.end}"];
+ Node0x7fffdb5b7ae0 -> Node0x7fffdb5b7960[constraint=false];
+ Node0x7fffdb5b7ae0 -> Node0x7fffdb5b7b40;
+ Node0x7fffdb5b7b40 [shape=record,label="{for.end12}"];
colorscheme = "paired12"
- subgraph cluster_0x5abb9a0 {
+ subgraph cluster_0x7fffdb5b8530 {
label = "";
style = solid;
color = 1
- subgraph cluster_0x5ae32c0 {
+ subgraph cluster_0x7fffdb5cc3c0 {
label = "Call instruction: %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2";
style = solid;
color = 6
- subgraph cluster_0x5ae3240 {
+ subgraph cluster_0x7fffdb5b86a0 {
label = "Call instruction: %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #2";
style = solid;
color = 5
- subgraph cluster_0x5abba20 {
+ subgraph cluster_0x7fffdb5b8f40 {
label = "Region can not profitably be optimized!";
style = solid;
color = 7
- Node0x5ae7f20;
- Node0x5ae7f40;
+ Node0x7fffdb5b79c0;
+ Node0x7fffdb5b79e0;
}
- Node0x5ae7f60;
+ Node0x7fffdb5b7a80;
}
- Node0x5ae7d90;
- Node0x5ae7e30;
+ Node0x7fffdb5b7960;
+ Node0x7fffdb5b7ae0;
}
- Node0x5ae5e30;
- Node0x5ae5f50;
- Node0x5ae8110;
+ Node0x7fffdb5c9180;
+ Node0x7fffdb5b7940;
+ Node0x7fffdb5b7b40;
}
}
diff --git a/polly/docs/experiments/matmul/scopsonly.print_array.dot.png b/polly/docs/experiments/matmul/scopsonly.print_array.dot.png
index b0d4b45aace..f5b8e2eddb0 100644
--- a/polly/docs/experiments/matmul/scopsonly.print_array.dot.png
+++ b/polly/docs/experiments/matmul/scopsonly.print_array.dot.png
Binary files differ