aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPierre Langlois <pierre.langlois@arm.com>2020-11-05 15:12:11 +0000
committerPierre Langlois <pierre.langlois@arm.com>2020-12-02 17:27:29 +0000
commit7c107ea3bed2cff74ea7dcc7b9e736bba6a592b8 (patch)
treec4ce8bedad3ac289e10f585846d80a05ce1a8dd5
parent2ed1d317d95faa67d949edf5e038b5be77e22763 (diff)
Skip top 64-bit write in movi if identical.
The MacroAssembler's Movi() can synthesise any 128-bit immediate by splitting it into two 64-bit writes: movi vd.2D, #low64 mov x16, #high64 ins vd.2D[1], x16 However, if the low and top 64-bits are identical, we can skip the second write. Change-Id: I4e8883b20f229647e974e6a693287d17489f7f58
-rw-r--r--src/aarch64/macro-assembler-aarch64.cc12
-rw-r--r--test/aarch64/test-disasm-neon-aarch64.cc7
2 files changed, 15 insertions, 4 deletions
diff --git a/src/aarch64/macro-assembler-aarch64.cc b/src/aarch64/macro-assembler-aarch64.cc
index 8574ed7f..ecfdd4d1 100644
--- a/src/aarch64/macro-assembler-aarch64.cc
+++ b/src/aarch64/macro-assembler-aarch64.cc
@@ -1097,11 +1097,15 @@ void MacroAssembler::Movi(const VRegister& vd,
void MacroAssembler::Movi(const VRegister& vd, uint64_t hi, uint64_t lo) {
// TODO: Move 128-bit values in a more efficient way.
VIXL_ASSERT(vd.Is128Bits());
- UseScratchRegisterScope temps(this);
Movi(vd.V2D(), lo);
- Register temp = temps.AcquireX();
- Mov(temp, hi);
- Ins(vd.V2D(), 1, temp);
+ if (hi != lo) {
+ UseScratchRegisterScope temps(this);
+ // TODO: Figure out if using a temporary V register to materialise the
+ // immediate is better.
+ Register temp = temps.AcquireX();
+ Mov(temp, hi);
+ Ins(vd.V2D(), 1, temp);
+ }
}
diff --git a/test/aarch64/test-disasm-neon-aarch64.cc b/test/aarch64/test-disasm-neon-aarch64.cc
index a8e91e95..e09ebd95 100644
--- a/test/aarch64/test-disasm-neon-aarch64.cc
+++ b/test/aarch64/test-disasm-neon-aarch64.cc
@@ -3099,6 +3099,13 @@ TEST(neon_modimm) {
COMPARE_MACRO(Movi(v1.V2D(), 0xffff0000ffffff),
"movi v1.2d, #0xffff0000ffffff");
+ COMPARE_MACRO(Movi(v2.V2D(), 0xff00ff00ff00ff, 0xff00ff00ff00ff),
+ "movi v2.2d, #0xff00ff00ff00ff");
+ COMPARE_MACRO(Movi(v3.V2D(), 0xffff, 0xff00ff00ff00ff),
+ "movi v3.2d, #0xff00ff00ff00ff\n"
+ "mov x16, #0xffff\n"
+ "mov v3.d[1], x16");
+
COMPARE_MACRO(Fmov(v0.V2S(), 1.0f), "fmov v0.2s, #0x70 (1.0000)");
COMPARE_MACRO(Fmov(v31.V2S(), -13.0f), "fmov v31.2s, #0xaa (-13.0000)");
COMPARE_MACRO(Fmov(v0.V4S(), 1.0f), "fmov v0.4s, #0x70 (1.0000)");