aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Jones <christopher.jones@arm.com>2024-02-29 13:44:20 +0000
committerJacob Bramley <jacob.bramley@arm.com>2024-04-30 10:55:11 +0100
commit3134e2560de12dfebbd72ef4868b2a5c5d7b5da9 (patch)
treeac018f6c02f06c417e865b0883bb9f246e5224b2
parent89dfbc009318786cffc22a902f2ea0930d937612 (diff)
Perform implicit checks on store instructions
It is possible for runtimes to perform implicit checks on store instructions as well as load instructions, therefore support performing implicit checks on store instructions as well as loads. Do this by returning true from memory write operations if they succeeded and false if they failed but were handled by a signal handler. Implicit checks on store instructions are simulated using a native memory load because introducing an additional memory store could change the observable behaviour of multithreaded runtimes. This means that it is not currently possible to accurately simulate the behaviour of implicit checks on regions with different access permissions for reads/writes. For example: if a page has read but not write permissions then an implicit check would not fail as the underlying probing instruction will perform a memory read.
-rw-r--r--src/aarch64/logic-aarch64.cc67
-rw-r--r--src/aarch64/simulator-aarch64.cc184
-rw-r--r--src/aarch64/simulator-aarch64.h86
-rw-r--r--test/aarch64/test-simulator-aarch64.cc964
4 files changed, 1072 insertions, 229 deletions
diff --git a/src/aarch64/logic-aarch64.cc b/src/aarch64/logic-aarch64.cc
index afd107c7..dbab6662 100644
--- a/src/aarch64/logic-aarch64.cc
+++ b/src/aarch64/logic-aarch64.cc
@@ -410,49 +410,52 @@ bool Simulator::ld4r(VectorFormat vform,
}
-void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
+bool Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- StoreLane(src, vform, i, addr);
+ if (!StoreLane(src, vform, i, addr)) return false;
addr += LaneSizeInBytesFromFormat(vform);
}
+ return true;
}
-void Simulator::st1(VectorFormat vform,
+bool Simulator::st1(VectorFormat vform,
LogicVRegister src,
int index,
uint64_t addr) {
- StoreLane(src, vform, index, addr);
+ return StoreLane(src, vform, index, addr);
}
-void Simulator::st2(VectorFormat vform,
+bool Simulator::st2(VectorFormat vform,
LogicVRegister src,
LogicVRegister src2,
uint64_t addr) {
int esize = LaneSizeInBytesFromFormat(vform);
uint64_t addr2 = addr + esize;
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- StoreLane(src, vform, i, addr);
- StoreLane(src2, vform, i, addr2);
+ if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2)) {
+ return false;
+ }
addr += 2 * esize;
addr2 += 2 * esize;
}
+ return true;
}
-void Simulator::st2(VectorFormat vform,
+bool Simulator::st2(VectorFormat vform,
LogicVRegister src,
LogicVRegister src2,
int index,
uint64_t addr) {
int esize = LaneSizeInBytesFromFormat(vform);
- StoreLane(src, vform, index, addr);
- StoreLane(src2, vform, index, addr + 1 * esize);
+ return (StoreLane(src, vform, index, addr) &&
+ StoreLane(src2, vform, index, addr + 1 * esize));
}
-void Simulator::st3(VectorFormat vform,
+bool Simulator::st3(VectorFormat vform,
LogicVRegister src,
LogicVRegister src2,
LogicVRegister src3,
@@ -461,30 +464,32 @@ void Simulator::st3(VectorFormat vform,
uint64_t addr2 = addr + esize;
uint64_t addr3 = addr2 + esize;
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- StoreLane(src, vform, i, addr);
- StoreLane(src2, vform, i, addr2);
- StoreLane(src3, vform, i, addr3);
+ if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2) ||
+ !StoreLane(src3, vform, i, addr3)) {
+ return false;
+ }
addr += 3 * esize;
addr2 += 3 * esize;
addr3 += 3 * esize;
}
+ return true;
}
-void Simulator::st3(VectorFormat vform,
+bool Simulator::st3(VectorFormat vform,
LogicVRegister src,
LogicVRegister src2,
LogicVRegister src3,
int index,
uint64_t addr) {
int esize = LaneSizeInBytesFromFormat(vform);
- StoreLane(src, vform, index, addr);
- StoreLane(src2, vform, index, addr + 1 * esize);
- StoreLane(src3, vform, index, addr + 2 * esize);
+ return (StoreLane(src, vform, index, addr) &&
+ StoreLane(src2, vform, index, addr + 1 * esize) &&
+ StoreLane(src3, vform, index, addr + 2 * esize));
}
-void Simulator::st4(VectorFormat vform,
+bool Simulator::st4(VectorFormat vform,
LogicVRegister src,
LogicVRegister src2,
LogicVRegister src3,
@@ -495,19 +500,21 @@ void Simulator::st4(VectorFormat vform,
uint64_t addr3 = addr2 + esize;
uint64_t addr4 = addr3 + esize;
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- StoreLane(src, vform, i, addr);
- StoreLane(src2, vform, i, addr2);
- StoreLane(src3, vform, i, addr3);
- StoreLane(src4, vform, i, addr4);
+ if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2) ||
+ !StoreLane(src3, vform, i, addr3) ||
+ !StoreLane(src4, vform, i, addr4)) {
+ return false;
+ }
addr += 4 * esize;
addr2 += 4 * esize;
addr3 += 4 * esize;
addr4 += 4 * esize;
}
+ return true;
}
-void Simulator::st4(VectorFormat vform,
+bool Simulator::st4(VectorFormat vform,
LogicVRegister src,
LogicVRegister src2,
LogicVRegister src3,
@@ -515,10 +522,10 @@ void Simulator::st4(VectorFormat vform,
int index,
uint64_t addr) {
int esize = LaneSizeInBytesFromFormat(vform);
- StoreLane(src, vform, index, addr);
- StoreLane(src2, vform, index, addr + 1 * esize);
- StoreLane(src3, vform, index, addr + 2 * esize);
- StoreLane(src4, vform, index, addr + 3 * esize);
+ return (StoreLane(src, vform, index, addr) &&
+ StoreLane(src2, vform, index, addr + 1 * esize) &&
+ StoreLane(src3, vform, index, addr + 2 * esize) &&
+ StoreLane(src4, vform, index, addr + 3 * esize));
}
@@ -7282,7 +7289,9 @@ void Simulator::SVEStructuredStoreHelper(VectorFormat vform,
for (int r = 0; r < reg_count; r++) {
uint64_t element_address = addr.GetElementAddress(i, r);
- StoreLane(zt[r], unpack_vform, i << unpack_shift, element_address);
+ if (!StoreLane(zt[r], unpack_vform, i << unpack_shift, element_address)) {
+ return;
+ }
}
}
diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc
index 5d77548f..3624f257 100644
--- a/src/aarch64/simulator-aarch64.cc
+++ b/src/aarch64/simulator-aarch64.cc
@@ -42,22 +42,22 @@ using vixl::internal::SimFloat16;
const Instruction* Simulator::kEndOfSimAddress = NULL;
-MemoryReadResult TryMemoryRead(uintptr_t address, uintptr_t access_size) {
+MemoryAccessResult TryMemoryAccess(uintptr_t address, uintptr_t access_size) {
#ifdef VIXL_ENABLE_IMPLICIT_CHECKS
for (uintptr_t i = 0; i < access_size; i++) {
- if (_vixl_internal_ReadMemory(address, i) == MemoryReadResult::Failure) {
- // The memory read failed.
- return MemoryReadResult::Failure;
+ if (_vixl_internal_ReadMemory(address, i) == MemoryAccessResult::Failure) {
+ // The memory access failed.
+ return MemoryAccessResult::Failure;
}
}
- // Either the memory read did not raise a signal or the signal handler did
- // not correctly return MemoryReadResult::Failure.
- return MemoryReadResult::Success;
+ // Either the memory access did not raise a signal or the signal handler did
+ // not correctly return MemoryAccessResult::Failure.
+ return MemoryAccessResult::Success;
#else
USE(address);
USE(access_size);
- return MemoryReadResult::Success;
+ return MemoryAccessResult::Success;
#endif // VIXL_ENABLE_IMPLICIT_CHECKS
}
@@ -512,9 +512,9 @@ const Simulator::FormToVisitorFnMap* Simulator::GetFormToVisitorFnMap() {
}
// Try to access the piece of memory given by the address passed in RDI and the
-// offset passed in %rsi, using testb. If a signal is raised then the signal
-// handler should set RIP to _vixl_internal_ReadMemory_continue and RAX to
-// MemoryReadResult::Failure. If no signal is raised then zero RAX before
+// offset passed in RSI, using testb. If a signal is raised then the signal
+// handler should set RIP to _vixl_internal_AccessMemory_continue and RAX to
+// MemoryAccessResult::Failure. If no signal is raised then zero RAX before
// returning.
#ifdef VIXL_ENABLE_IMPLICIT_CHECKS
#ifdef __x86_64__
@@ -524,8 +524,8 @@ asm(R"(
testb (%rdi, %rsi), %al
xorq %rax, %rax
ret
- .globl _vixl_internal_ReadMemory_continue
- _vixl_internal_ReadMemory_continue:
+ .globl _vixl_internal_AccessMemory_continue
+ _vixl_internal_AccessMemory_continue:
ret
)");
#else
@@ -4226,7 +4226,7 @@ void Simulator::StoreReleaseUnscaledOffsetHelper(const Instruction* instr) {
// Approximate store-release by issuing a full barrier after the load.
__sync_synchronize();
- MemWrite<T>(address, ReadRegister<T>(rt));
+ if (!MemWrite<T>(address, ReadRegister<T>(rt))) return;
LogWrite(rt, GetPrintRegisterFormat(element_size), address);
}
@@ -4428,35 +4428,35 @@ void Simulator::LoadStoreHelper(const Instruction* instr,
}
case STRB_w:
- MemWrite<uint8_t>(address, ReadWRegister(srcdst));
+ if (!MemWrite<uint8_t>(address, ReadWRegister(srcdst))) return;
break;
case STRH_w:
- MemWrite<uint16_t>(address, ReadWRegister(srcdst));
+ if (!MemWrite<uint16_t>(address, ReadWRegister(srcdst))) return;
break;
case STR_w:
- MemWrite<uint32_t>(address, ReadWRegister(srcdst));
+ if (!MemWrite<uint32_t>(address, ReadWRegister(srcdst))) return;
break;
case STR_x:
- MemWrite<uint64_t>(address, ReadXRegister(srcdst));
+ if (!MemWrite<uint64_t>(address, ReadXRegister(srcdst))) return;
break;
case STR_b:
- MemWrite<uint8_t>(address, ReadBRegister(srcdst));
+ if (!MemWrite<uint8_t>(address, ReadBRegister(srcdst))) return;
rt_is_vreg = true;
break;
case STR_h:
- MemWrite<uint16_t>(address, ReadHRegisterBits(srcdst));
+ if (!MemWrite<uint16_t>(address, ReadHRegisterBits(srcdst))) return;
rt_is_vreg = true;
break;
case STR_s:
- MemWrite<float>(address, ReadSRegister(srcdst));
+ if (!MemWrite<float>(address, ReadSRegister(srcdst))) return;
rt_is_vreg = true;
break;
case STR_d:
- MemWrite<double>(address, ReadDRegister(srcdst));
+ if (!MemWrite<double>(address, ReadDRegister(srcdst))) return;
rt_is_vreg = true;
break;
case STR_q:
- MemWrite<qreg_t>(address, ReadQRegister(srcdst));
+ if (!MemWrite<qreg_t>(address, ReadQRegister(srcdst))) return;
rt_is_vreg = true;
break;
@@ -4583,30 +4583,30 @@ void Simulator::LoadStorePairHelper(const Instruction* instr,
break;
}
case STP_w: {
- MemWrite<uint32_t>(address, ReadWRegister(rt));
- MemWrite<uint32_t>(address2, ReadWRegister(rt2));
+ if (!MemWrite<uint32_t>(address, ReadWRegister(rt))) return;
+ if (!MemWrite<uint32_t>(address2, ReadWRegister(rt2))) return;
break;
}
case STP_s: {
- MemWrite<float>(address, ReadSRegister(rt));
- MemWrite<float>(address2, ReadSRegister(rt2));
+ if (!MemWrite<float>(address, ReadSRegister(rt))) return;
+ if (!MemWrite<float>(address2, ReadSRegister(rt2))) return;
rt_is_vreg = true;
break;
}
case STP_x: {
- MemWrite<uint64_t>(address, ReadXRegister(rt));
- MemWrite<uint64_t>(address2, ReadXRegister(rt2));
+ if (!MemWrite<uint64_t>(address, ReadXRegister(rt))) return;
+ if (!MemWrite<uint64_t>(address2, ReadXRegister(rt2))) return;
break;
}
case STP_d: {
- MemWrite<double>(address, ReadDRegister(rt));
- MemWrite<double>(address2, ReadDRegister(rt2));
+ if (!MemWrite<double>(address, ReadDRegister(rt))) return;
+ if (!MemWrite<double>(address2, ReadDRegister(rt2))) return;
rt_is_vreg = true;
break;
}
case STP_q: {
- MemWrite<qreg_t>(address, ReadQRegister(rt));
- MemWrite<qreg_t>(address2, ReadQRegister(rt2));
+ if (!MemWrite<qreg_t>(address, ReadQRegister(rt))) return;
+ if (!MemWrite<qreg_t>(address2, ReadQRegister(rt2))) return;
rt_is_vreg = true;
break;
}
@@ -4678,7 +4678,7 @@ void Simulator::CompareAndSwapHelper(const Instruction* instr) {
// Approximate store-release by issuing a full barrier before the store.
__sync_synchronize();
}
- MemWrite<T>(address, newvalue);
+ if (!MemWrite<T>(address, newvalue)) return;
LogWrite(rt, GetPrintRegisterFormatForSize(element_size), address);
}
WriteRegister<T>(rs, data, NoRegLog);
@@ -4730,8 +4730,8 @@ void Simulator::CompareAndSwapPairHelper(const Instruction* instr) {
__sync_synchronize();
}
- MemWrite<T>(address, newvalue_low);
- MemWrite<T>(address2, newvalue_high);
+ if (!MemWrite<T>(address, newvalue_low)) return;
+ if (!MemWrite<T>(address2, newvalue_high)) return;
}
WriteRegister<T>(rs + 1, data_high, NoRegLog);
@@ -4991,35 +4991,41 @@ void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
case STLXRB_w:
case STLRB_w:
case STLLRB:
- MemWrite<uint8_t>(address, ReadWRegister(rt));
+ if (!MemWrite<uint8_t>(address, ReadWRegister(rt))) return;
break;
case STXRH_w:
case STLXRH_w:
case STLRH_w:
case STLLRH:
- MemWrite<uint16_t>(address, ReadWRegister(rt));
+ if (!MemWrite<uint16_t>(address, ReadWRegister(rt))) return;
break;
case STXR_w:
case STLXR_w:
case STLR_w:
case STLLR_w:
- MemWrite<uint32_t>(address, ReadWRegister(rt));
+ if (!MemWrite<uint32_t>(address, ReadWRegister(rt))) return;
break;
case STXR_x:
case STLXR_x:
case STLR_x:
case STLLR_x:
- MemWrite<uint64_t>(address, ReadXRegister(rt));
+ if (!MemWrite<uint64_t>(address, ReadXRegister(rt))) return;
break;
case STXP_w:
case STLXP_w:
- MemWrite<uint32_t>(address, ReadWRegister(rt));
- MemWrite<uint32_t>(address + element_size, ReadWRegister(rt2));
+ if (!MemWrite<uint32_t>(address, ReadWRegister(rt))) return;
+ if (!MemWrite<uint32_t>(address + element_size,
+ ReadWRegister(rt2))) {
+ return;
+ }
break;
case STXP_x:
case STLXP_x:
- MemWrite<uint64_t>(address, ReadXRegister(rt));
- MemWrite<uint64_t>(address + element_size, ReadXRegister(rt2));
+ if (!MemWrite<uint64_t>(address, ReadXRegister(rt))) return;
+ if (!MemWrite<uint64_t>(address + element_size,
+ ReadXRegister(rt2))) {
+ return;
+ }
break;
default:
VIXL_UNREACHABLE();
@@ -5102,7 +5108,7 @@ void Simulator::AtomicMemorySimpleHelper(const Instruction* instr) {
PrintRegisterFormat format = GetPrintRegisterFormatForSize(register_size);
LogExtendingRead(rt, format, element_size, address);
- MemWrite<T>(address, result);
+ if (!MemWrite<T>(address, result)) return;
format = GetPrintRegisterFormatForSize(element_size);
LogWrite(rs, format, address);
}
@@ -5132,7 +5138,7 @@ void Simulator::AtomicMemorySwapHelper(const Instruction* instr) {
// Approximate store-release by issuing a full barrier before the store.
__sync_synchronize();
}
- MemWrite<T>(address, ReadRegister<T>(rs));
+ if (!MemWrite<T>(address, ReadRegister<T>(rs))) return;
WriteRegister<T>(rt, data);
@@ -8410,22 +8416,22 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
break;
case NEON_ST1_4v:
case NEON_ST1_4v_post:
- st1(vf, ReadVRegister(reg[3]), addr[3]);
+ if (!st1(vf, ReadVRegister(reg[3]), addr[3])) return;
reg_count++;
VIXL_FALLTHROUGH();
case NEON_ST1_3v:
case NEON_ST1_3v_post:
- st1(vf, ReadVRegister(reg[2]), addr[2]);
+ if (!st1(vf, ReadVRegister(reg[2]), addr[2])) return;
reg_count++;
VIXL_FALLTHROUGH();
case NEON_ST1_2v:
case NEON_ST1_2v_post:
- st1(vf, ReadVRegister(reg[1]), addr[1]);
+ if (!st1(vf, ReadVRegister(reg[1]), addr[1])) return;
reg_count++;
VIXL_FALLTHROUGH();
case NEON_ST1_1v:
case NEON_ST1_1v_post:
- st1(vf, ReadVRegister(reg[0]), addr[0]);
+ if (!st1(vf, ReadVRegister(reg[0]), addr[0])) return;
log_read = false;
break;
case NEON_LD2_post:
@@ -8438,7 +8444,9 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
break;
case NEON_ST2:
case NEON_ST2_post:
- st2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0]);
+ if (!st2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0])) {
+ return;
+ }
struct_parts = 2;
reg_count = 2;
log_read = false;
@@ -8457,23 +8465,27 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
break;
case NEON_ST3:
case NEON_ST3_post:
- st3(vf,
- ReadVRegister(reg[0]),
- ReadVRegister(reg[1]),
- ReadVRegister(reg[2]),
- addr[0]);
+ if (!st3(vf,
+ ReadVRegister(reg[0]),
+ ReadVRegister(reg[1]),
+ ReadVRegister(reg[2]),
+ addr[0])) {
+ return;
+ }
struct_parts = 3;
reg_count = 3;
log_read = false;
break;
case NEON_ST4:
case NEON_ST4_post:
- st4(vf,
- ReadVRegister(reg[0]),
- ReadVRegister(reg[1]),
- ReadVRegister(reg[2]),
- ReadVRegister(reg[3]),
- addr[0]);
+ if (!st4(vf,
+ ReadVRegister(reg[0]),
+ ReadVRegister(reg[1]),
+ ReadVRegister(reg[2]),
+ ReadVRegister(reg[3]),
+ addr[0])) {
+ return;
+ }
struct_parts = 4;
reg_count = 4;
log_read = false;
@@ -8670,7 +8682,7 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
return;
}
} else {
- st1(vf, ReadVRegister(rt), lane, addr);
+ if (!st1(vf, ReadVRegister(rt), lane, addr)) return;
}
break;
case NEONLoadStoreSingle2:
@@ -8685,7 +8697,7 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
return;
}
} else {
- st2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr);
+ if (!st2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr)) return;
}
break;
case NEONLoadStoreSingle3:
@@ -8709,12 +8721,14 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
return;
}
} else {
- st3(vf,
- ReadVRegister(rt),
- ReadVRegister(rt2),
- ReadVRegister(rt3),
- lane,
- addr);
+ if (!st3(vf,
+ ReadVRegister(rt),
+ ReadVRegister(rt2),
+ ReadVRegister(rt3),
+ lane,
+ addr)) {
+ return;
+ }
}
break;
case NEONLoadStoreSingle4:
@@ -8740,13 +8754,15 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
return;
}
} else {
- st4(vf,
- ReadVRegister(rt),
- ReadVRegister(rt2),
- ReadVRegister(rt3),
- ReadVRegister(rt4),
- lane,
- addr);
+ if (!st4(vf,
+ ReadVRegister(rt),
+ ReadVRegister(rt2),
+ ReadVRegister(rt3),
+ ReadVRegister(rt4),
+ lane,
+ addr)) {
+ return;
+ }
}
break;
default:
@@ -13200,7 +13216,7 @@ void Simulator::VisitSVEStorePredicateRegister(const Instruction* instr) {
uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
uint64_t address = base + multiplier * pl;
for (int i = 0; i < pl; i++) {
- MemWrite(address + i, pt.GetLane<uint8_t>(i));
+ if (!MemWrite(address + i, pt.GetLane<uint8_t>(i))) return;
}
LogPWrite(instr->GetPt(), address);
break;
@@ -13221,7 +13237,7 @@ void Simulator::VisitSVEStoreVectorRegister(const Instruction* instr) {
uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
uint64_t address = base + multiplier * vl;
for (int i = 0; i < vl; i++) {
- MemWrite(address + i, zt.GetLane<uint8_t>(i));
+ if (!MemWrite(address + i, zt.GetLane<uint8_t>(i))) return;
}
LogZWrite(instr->GetRt(), address);
break;
@@ -14353,8 +14369,8 @@ void Simulator::SimulateMTEStoreTagPair(const Instruction* instr) {
int tag = GetAllocationTagFromAddress(rn);
meta_data_.SetMTETag(address, tag);
- MemWrite<uint64_t>(address, rt);
- MemWrite<uint64_t>(address + kXRegSizeInBytes, rt2);
+ if (!MemWrite<uint64_t>(address, rt)) return;
+ if (!MemWrite<uint64_t>(address + kXRegSizeInBytes, rt2)) return;
}
void Simulator::SimulateMTEStoreTag(const Instruction* instr) {
@@ -14430,7 +14446,7 @@ void Simulator::SimulateMTEStoreTag(const Instruction* instr) {
size_t fill_offset = 0;
while (fill_offset < fill_size) {
- MemWrite<uint64_t>(address + fill_offset, 0);
+ if (!MemWrite<uint64_t>(address + fill_offset, 0)) return;
fill_offset += sizeof(uint64_t);
}
}
@@ -14515,7 +14531,7 @@ void Simulator::SimulateCpyM(const Instruction* instr) {
while (xn--) {
VIXL_DEFINE_OR_RETURN(temp, MemRead<uint8_t>(xs));
- MemWrite<uint8_t>(xd, temp);
+ if (!MemWrite<uint8_t>(xd, temp)) return;
LogMemTransfer(xd, xs, temp);
xs += step;
xd += step;
@@ -14554,7 +14570,7 @@ void Simulator::SimulateSetM(const Instruction* instr) {
while (xn--) {
LogWrite(instr->GetRs(), GetPrintRegPartial(kPrintRegLaneSizeB), xd);
- MemWrite<uint8_t>(xd++, xs);
+ if (!MemWrite<uint8_t>(xd++, xs)) return;
}
WriteXRegister(instr->GetRd(), xd);
WriteXRegister(instr->GetRn(), 0);
diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h
index 77b00a64..b4b7aa62 100644
--- a/src/aarch64/simulator-aarch64.h
+++ b/src/aarch64/simulator-aarch64.h
@@ -68,26 +68,26 @@ namespace aarch64 {
class Simulator;
struct RuntimeCallStructHelper;
-enum class MemoryReadResult { Success = 0, Failure = 1 };
+enum class MemoryAccessResult { Success = 0, Failure = 1 };
-// Try to read a piece of memory at the given address. Reading that memory
+// Try to access a piece of memory at the given address. Accessing that memory
// might raise a signal which, if handled by a custom signal handler, should
// setup the native and simulated context in order to continue. Return whether
// the memory access failed (i.e: raised a signal) or succeeded.
-MemoryReadResult TryMemoryRead(uintptr_t address, uintptr_t access_size);
+MemoryAccessResult TryMemoryAccess(uintptr_t address, uintptr_t access_size);
#ifdef VIXL_ENABLE_IMPLICIT_CHECKS
// Access a byte of memory from the address at the given offset. If the memory
-// could be accessed then return MemoryReadResult::Success. If the memory could
-// not be accessed, and therefore raised a signal, setup the simulated context
-// and return MemoryReadResult::Failure.
+// could be accessed then return MemoryAccessResult::Success. If the memory
+// could not be accessed, and therefore raised a signal, setup the simulated
+// context and return MemoryAccessResult::Failure.
//
// If a signal is raised then it is expected that the signal handler will place
-// MemoryReadResult::Failure in the native return register and the address of
-// _vixl_internal_ReadMemory_continue into the native instruction pointer.
-extern "C" MemoryReadResult _vixl_internal_ReadMemory(uintptr_t address,
- uintptr_t offset);
-extern "C" uintptr_t _vixl_internal_ReadMemory_continue();
+// MemoryAccessResult::Failure in the native return register and the address of
+// _vixl_internal_AccessMemory_continue into the native instruction pointer.
+extern "C" MemoryAccessResult _vixl_internal_ReadMemory(uintptr_t address,
+ uintptr_t offset);
+extern "C" uintptr_t _vixl_internal_AccessMemory_continue();
#endif // VIXL_ENABLE_IMPLICIT_CHECKS
class SimStack {
@@ -400,8 +400,8 @@ class Memory {
if (!IsMTETagsMatched(address, pc)) {
VIXL_ABORT_WITH_MSG("Tag mismatch.");
}
- if (TryMemoryRead(reinterpret_cast<uintptr_t>(base), sizeof(value)) ==
- MemoryReadResult::Failure) {
+ if (TryMemoryAccess(reinterpret_cast<uintptr_t>(base), sizeof(value)) ==
+ MemoryAccessResult::Failure) {
return std::nullopt;
}
memcpy(&value, base, sizeof(value));
@@ -409,7 +409,7 @@ class Memory {
}
template <typename T, typename A>
- void Write(A address, T value, Instruction const* pc = nullptr) const {
+ bool Write(A address, T value, Instruction const* pc = nullptr) const {
VIXL_STATIC_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
(sizeof(value) == 4) || (sizeof(value) == 8) ||
(sizeof(value) == 16));
@@ -420,7 +420,12 @@ class Memory {
if (!IsMTETagsMatched(address, pc)) {
VIXL_ABORT_WITH_MSG("Tag mismatch.");
}
+ if (TryMemoryAccess(reinterpret_cast<uintptr_t>(base), sizeof(value)) ==
+ MemoryAccessResult::Failure) {
+ return false;
+ }
memcpy(base, &value, sizeof(value));
+ return true;
}
template <typename A>
@@ -456,7 +461,7 @@ class Memory {
}
template <typename A>
- void Write(int size_in_bytes, A address, uint64_t value) const {
+ bool Write(int size_in_bytes, A address, uint64_t value) const {
switch (size_in_bytes) {
case 1:
return Write(address, static_cast<uint8_t>(value));
@@ -468,6 +473,7 @@ class Memory {
return Write(address, value);
}
VIXL_UNREACHABLE();
+ return false;
}
void AppendMetaData(MetaDataDepot* metadata_depot) {
@@ -2058,7 +2064,7 @@ class Simulator : public DecoderVisitor {
}
template <typename T, typename A>
- void MemWrite(A address, T value) const {
+ bool MemWrite(A address, T value) const {
Instruction const* pc = ReadPc();
return memory_.Write(address, value, pc);
}
@@ -2074,7 +2080,7 @@ class Simulator : public DecoderVisitor {
}
template <typename A>
- void MemWrite(int size_in_bytes, A address, uint64_t value) const {
+ bool MemWrite(int size_in_bytes, A address, uint64_t value) const {
return memory_.Write(size_in_bytes, address, value);
}
@@ -2106,12 +2112,12 @@ class Simulator : public DecoderVisitor {
return true;
}
- void StoreLane(const LogicVRegister& src,
+ bool StoreLane(const LogicVRegister& src,
VectorFormat vform,
int index,
uint64_t addr) const {
unsigned msize_in_bytes = LaneSizeInBytesFromFormat(vform);
- MemWrite(msize_in_bytes, addr, src.Uint(vform, index));
+ return MemWrite(msize_in_bytes, addr, src.Uint(vform, index));
}
uint64_t ComputeMemOperandAddress(const MemOperand& mem_op) const;
@@ -2129,7 +2135,7 @@ class Simulator : public DecoderVisitor {
}
template <typename T>
- void WriteGenericOperand(GenericOperand operand,
+ bool WriteGenericOperand(GenericOperand operand,
T value,
RegLogMode log_mode = LogRegWrites) {
if (operand.IsCPURegister()) {
@@ -2145,8 +2151,9 @@ class Simulator : public DecoderVisitor {
WriteCPURegister(operand.GetCPURegister(), raw, log_mode);
} else {
VIXL_ASSERT(operand.IsMemOperand());
- MemWrite(ComputeMemOperandAddress(operand.GetMemOperand()), value);
+ return MemWrite(ComputeMemOperandAddress(operand.GetMemOperand()), value);
}
+ return true;
}
bool ReadN() const { return nzcv_.GetN() != 0; }
@@ -2995,7 +3002,10 @@ class Simulator : public DecoderVisitor {
R return_value = DoRuntimeCall(function,
argument_operands,
__local_index_sequence_for<P...>{});
- WriteGenericOperand(abi.GetReturnGenericOperand<R>(), return_value);
+ bool succeeded =
+ WriteGenericOperand(abi.GetReturnGenericOperand<R>(), return_value);
+ USE(succeeded);
+ VIXL_ASSERT(succeeded);
}
template <typename R, typename... P>
@@ -3181,24 +3191,26 @@ class Simulator : public DecoderVisitor {
// Returns true if the faulting instruction address (usually the program
// counter or instruction pointer) comes from an internal VIXL memory access.
// This can be used by signal handlers to check if a signal was raised from
- // the simulator (via TryMemoryRead) before the actual read/write occurs.
+ // the simulator (via TryMemoryAccess) before the actual
+ // access occurs.
bool IsSimulatedMemoryAccess(uintptr_t fault_pc) const {
- return fault_pc == reinterpret_cast<uintptr_t>(&_vixl_internal_ReadMemory);
+ return (fault_pc ==
+ reinterpret_cast<uintptr_t>(&_vixl_internal_ReadMemory));
}
// Get the instruction address of the internal VIXL memory access continuation
// label. Signal handlers can resume execution at this address to return to
- // TryMemoryRead which will continue simulation.
+ // TryMemoryAccess which will continue simulation.
uintptr_t GetSignalReturnAddress() const {
- return reinterpret_cast<uintptr_t>(&_vixl_internal_ReadMemory_continue);
+ return reinterpret_cast<uintptr_t>(&_vixl_internal_AccessMemory_continue);
}
// Replace the fault address reported by the kernel with the actual faulting
// address.
//
- // This is required because TryMemoryRead reads a section of memory 1 byte at
- // a time meaning the fault address reported may not be the base address of
- // memory being accessed.
+ // This is required because TryMemoryAccess reads a section of
+ // memory 1 byte at a time meaning the fault address reported may not be the
+ // base address of memory being accessed.
void ReplaceFaultAddress(siginfo_t* siginfo, void* context) {
#ifdef __x86_64__
// The base address being accessed is passed in as the first argument to
@@ -3436,35 +3448,35 @@ class Simulator : public DecoderVisitor {
LogicVRegister dst3,
LogicVRegister dst4,
uint64_t addr);
- void st1(VectorFormat vform, LogicVRegister src, uint64_t addr);
- void st1(VectorFormat vform, LogicVRegister src, int index, uint64_t addr);
- void st2(VectorFormat vform,
+ bool st1(VectorFormat vform, LogicVRegister src, uint64_t addr);
+ bool st1(VectorFormat vform, LogicVRegister src, int index, uint64_t addr);
+ bool st2(VectorFormat vform,
LogicVRegister src,
LogicVRegister src2,
uint64_t addr);
- void st2(VectorFormat vform,
+ bool st2(VectorFormat vform,
LogicVRegister src,
LogicVRegister src2,
int index,
uint64_t addr);
- void st3(VectorFormat vform,
+ bool st3(VectorFormat vform,
LogicVRegister src,
LogicVRegister src2,
LogicVRegister src3,
uint64_t addr);
- void st3(VectorFormat vform,
+ bool st3(VectorFormat vform,
LogicVRegister src,
LogicVRegister src2,
LogicVRegister src3,
int index,
uint64_t addr);
- void st4(VectorFormat vform,
+ bool st4(VectorFormat vform,
LogicVRegister src,
LogicVRegister src2,
LogicVRegister src3,
LogicVRegister src4,
uint64_t addr);
- void st4(VectorFormat vform,
+ bool st4(VectorFormat vform,
LogicVRegister src,
LogicVRegister src2,
LogicVRegister src3,
diff --git a/test/aarch64/test-simulator-aarch64.cc b/test/aarch64/test-simulator-aarch64.cc
index 7ea41031..979b59f3 100644
--- a/test/aarch64/test-simulator-aarch64.cc
+++ b/test/aarch64/test-simulator-aarch64.cc
@@ -102,6 +102,95 @@ namespace aarch64 {
/* The simulator can run every test. */ \
*skipped = false
+#ifdef VIXL_ENABLE_IMPLICIT_CHECKS
+// The signal handler needs access to the simulator.
+Simulator* gImplicitCheckSim;
+
+#ifdef __x86_64__
+#include <signal.h>
+#include <ucontext.h>
+void HandleSegFault(int sig, siginfo_t* info, void* context) {
+ USE(sig);
+ USE(info);
+ Simulator* sim = gImplicitCheckSim;
+
+ // Did the signal come from the simulator?
+ ucontext_t* uc = reinterpret_cast<ucontext_t*>(context);
+ uintptr_t fault_pc = uc->uc_mcontext.gregs[REG_RIP];
+ VIXL_CHECK(sim->IsSimulatedMemoryAccess(fault_pc));
+
+ // Increment the counter (x1) each time we handle a signal.
+ int64_t counter = reinterpret_cast<int64_t>(sim->ReadXRegister(1));
+ sim->WriteXRegister(1, ++counter);
+
+ // Return to the VIXL memory access continuation point, which is also the
+ // next instruction, after this handler.
+ uc->uc_mcontext.gregs[REG_RIP] = sim->GetSignalReturnAddress();
+ // Return that the memory access failed.
+ uc->uc_mcontext.gregs[REG_RAX] =
+ static_cast<greg_t>(MemoryAccessResult::Failure);
+}
+#endif // __x86_64__
+
+// Start an implicit check test with a counter and start label so the number of
+// faults can be counted. Note: each instruction after the start will be
+// expected to fault.
+#define START_IMPLICIT_CHECK() \
+ gImplicitCheckSim = &simulator; \
+ /* Set up a signal handler to count the number of faulting instructions. */ \
+ struct sigaction sa; \
+ sa.sa_sigaction = HandleSegFault; \
+ sigaction(SIGSEGV, &sa, NULL); \
+ START(); \
+ /* Reset the counter. */ \
+ __ Mov(x1, 0); \
+ /* Use a consistent bad address. */ \
+ __ Mov(x15, xzr); \
+ __ Mov(ip0, xzr); \
+ /* Load an amount of data to load. */ \
+ __ Mov(ip1, 4096); \
+ [[maybe_unused]] MemOperand bad_memory = MemOperand(ip0); \
+ if (masm.GetCPUFeatures()->Has(CPUFeatures::kSVE)) { \
+ /* Turn on all lanes to ensure all loads/stores are tested. */ \
+ __ Ptrue(p0.VnB()); \
+ __ Ptrue(p1.VnB()); \
+ __ Ptrue(p2.VnB()); \
+ __ Ptrue(p3.VnB()); \
+ __ Ptrue(p4.VnB()); \
+ __ Ptrue(p5.VnB()); \
+ __ Ptrue(p6.VnB()); \
+ __ Ptrue(p7.VnB()); \
+ __ Ptrue(p8.VnB()); \
+ __ Ptrue(p9.VnB()); \
+ __ Ptrue(p10.VnB()); \
+ __ Ptrue(p11.VnB()); \
+ __ Ptrue(p12.VnB()); \
+ __ Ptrue(p13.VnB()); \
+ __ Ptrue(p14.VnB()); \
+ __ Ptrue(p15.VnB()); \
+ } \
+ Label l_start, l_end; \
+ __ Bind(&l_start);
+
+#define END_IMPLICIT_CHECK() \
+ __ Bind(&l_end); \
+ /* Return the counter. */ \
+ __ Mov(x0, x1); \
+ END();
+
+#define TRY_RUN_IMPLICIT_CHECK() \
+ bool skipped; \
+ TRY_RUN(&skipped); \
+ /* Implicit checks should only be used with the simulator. */ \
+ VIXL_ASSERT(!skipped); \
+ /* Check that each load/store instruction generated a segfault that was */ \
+ /* raised and dealt with. */ \
+ size_t result = simulator.ReadXRegister(0); \
+ size_t num_of_faulting_instr = masm.GetSizeOfCodeGeneratedSince(&l_start) - \
+ masm.GetSizeOfCodeGeneratedSince(&l_end); \
+ VIXL_CHECK((result * kInstructionSize) == num_of_faulting_instr);
+
+#endif // VIXL_ENABLE_IMPLICIT_CHECKS
#else // VIXL_INCLUDE_SIMULATOR_AARCH64
@@ -5012,6 +5101,802 @@ DEFINE_TEST_NEON_FHM_BYELEMENT(fmlsl, Basic, Basic, Basic)
DEFINE_TEST_NEON_FHM_BYELEMENT(fmlsl2, Basic, Basic, Basic)
+#ifdef VIXL_ENABLE_IMPLICIT_CHECKS
+TEST(ImplicitCheck) {
+ SETUP_WITH_FEATURES(CPUFeatures::kNEON);
+ START_IMPLICIT_CHECK();
+
+ EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
+ // Invalid memory reads.
+ __ ldar(w3, bad_memory);
+ __ ldar(x4, bad_memory);
+ __ ldarb(w5, bad_memory);
+ __ ldarb(x6, bad_memory);
+ __ ldarh(w7, bad_memory);
+ __ ldarh(x8, bad_memory);
+ __ ldaxp(w9, w10, bad_memory);
+ __ ldaxp(x11, x12, bad_memory);
+ __ ldaxr(w13, bad_memory);
+ __ ldaxr(x14, bad_memory);
+ __ ldaxrb(w15, bad_memory);
+ __ ldaxrb(x16, bad_memory);
+ __ ldaxrh(w17, bad_memory);
+ __ ldaxrh(x18, bad_memory);
+ __ ldnp(w19, w20, bad_memory);
+ __ ldnp(x21, x22, bad_memory);
+ __ ldp(w23, w24, bad_memory);
+ __ ldp(x25, x26, bad_memory);
+ __ ldpsw(x27, x28, bad_memory);
+ __ ldr(w29, bad_memory);
+ __ ldr(x2, bad_memory);
+ __ ldrb(w3, bad_memory);
+ __ ldrb(x4, bad_memory);
+ __ ldrh(w5, bad_memory);
+ __ ldrh(x6, bad_memory);
+ __ ldrsb(w7, bad_memory);
+ __ ldrsb(x8, bad_memory);
+ __ ldrsh(w9, bad_memory);
+ __ ldrsh(x10, bad_memory);
+ __ ldrsw(x11, bad_memory);
+ __ ldur(w12, bad_memory);
+ __ ldur(x13, bad_memory);
+ __ ldurb(w14, bad_memory);
+ __ ldurb(x15, bad_memory);
+ __ ldurh(w16, bad_memory);
+ __ ldurh(x17, bad_memory);
+ __ ldursb(w18, bad_memory);
+ __ ldursb(x19, bad_memory);
+ __ ldursh(w20, bad_memory);
+ __ ldursh(x21, bad_memory);
+ __ ldursw(x22, bad_memory);
+ __ ldxp(w23, w24, bad_memory);
+ __ ldxp(x25, x26, bad_memory);
+ __ ldxr(w27, bad_memory);
+ __ ldxr(x28, bad_memory);
+ __ ldxrb(w29, bad_memory);
+ __ ldxrb(x2, bad_memory);
+ __ ldxrh(w3, bad_memory);
+ __ ldxrh(x4, bad_memory);
+
+ // Invalid memory writes. Note: exclusive store instructions are not tested
+ // because they can fail due to the global monitor before trying to perform a
+ // memory store.
+ __ stlr(w18, bad_memory);
+ __ stlr(x19, bad_memory);
+ __ stlrb(w20, bad_memory);
+ __ stlrb(x21, bad_memory);
+ __ stlrh(w22, bad_memory);
+ __ stlrh(x23, bad_memory);
+ __ stnp(w14, w15, bad_memory);
+ __ stnp(x16, x17, bad_memory);
+ __ stp(w18, w19, bad_memory);
+ __ stp(x20, x21, bad_memory);
+ __ str(w22, bad_memory);
+ __ str(x23, bad_memory);
+ __ strb(w24, bad_memory);
+ __ strb(x25, bad_memory);
+ __ strh(w26, bad_memory);
+ __ strh(x27, bad_memory);
+ __ stur(w28, bad_memory);
+ __ stur(x29, bad_memory);
+ __ sturb(w2, bad_memory);
+ __ sturb(x3, bad_memory);
+ __ sturh(w4, bad_memory);
+ __ sturh(x5, bad_memory);
+
+ END_IMPLICIT_CHECK();
+ TRY_RUN_IMPLICIT_CHECK();
+}
+
+TEST(ImplicitCheckNeon) {
+ SETUP_WITH_FEATURES(CPUFeatures::kNEON);
+ START_IMPLICIT_CHECK();
+
+ EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
+ __ ld1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), bad_memory);
+ __ ld1(v23.V16B(), v24.V16B(), v25.V16B(), v26.V16B(), bad_memory);
+ __ ld1(v5.V16B(), v6.V16B(), v7.V16B(), v8.V16B(), bad_memory);
+ __ ld1(v18.V16B(), v19.V16B(), v20.V16B(), bad_memory);
+ __ ld1(v13.V16B(), v14.V16B(), v15.V16B(), bad_memory);
+ __ ld1(v19.V16B(), v20.V16B(), v21.V16B(), bad_memory);
+ __ ld1(v17.V16B(), v18.V16B(), bad_memory);
+ __ ld1(v20.V16B(), v21.V16B(), bad_memory);
+ __ ld1(v28.V16B(), v29.V16B(), bad_memory);
+ __ ld1(v29.V16B(), bad_memory);
+ __ ld1(v21.V16B(), bad_memory);
+ __ ld1(v4.V16B(), bad_memory);
+ __ ld1(v4.V1D(), v5.V1D(), v6.V1D(), v7.V1D(), bad_memory);
+ __ ld1(v17.V1D(), v18.V1D(), v19.V1D(), v20.V1D(), bad_memory);
+ __ ld1(v28.V1D(), v29.V1D(), v30.V1D(), v31.V1D(), bad_memory);
+ __ ld1(v20.V1D(), v21.V1D(), v22.V1D(), bad_memory);
+ __ ld1(v19.V1D(), v20.V1D(), v21.V1D(), bad_memory);
+ __ ld1(v12.V1D(), v13.V1D(), v14.V1D(), bad_memory);
+ __ ld1(v29.V1D(), v30.V1D(), bad_memory);
+ __ ld1(v31.V1D(), v0.V1D(), bad_memory);
+ __ ld1(v3.V1D(), v4.V1D(), bad_memory);
+ __ ld1(v28.V1D(), bad_memory);
+ __ ld1(v11.V1D(), bad_memory);
+ __ ld1(v29.V1D(), bad_memory);
+ __ ld1(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(), bad_memory);
+ __ ld1(v8.V2D(), v9.V2D(), v10.V2D(), v11.V2D(), bad_memory);
+ __ ld1(v14.V2D(), v15.V2D(), v16.V2D(), v17.V2D(), bad_memory);
+ __ ld1(v26.V2D(), v27.V2D(), v28.V2D(), bad_memory);
+ __ ld1(v5.V2D(), v6.V2D(), v7.V2D(), bad_memory);
+ __ ld1(v26.V2D(), v27.V2D(), v28.V2D(), bad_memory);
+ __ ld1(v18.V2D(), v19.V2D(), bad_memory);
+ __ ld1(v21.V2D(), v22.V2D(), bad_memory);
+ __ ld1(v17.V2D(), v18.V2D(), bad_memory);
+ __ ld1(v5.V2D(), bad_memory);
+ __ ld1(v6.V2D(), bad_memory);
+ __ ld1(v15.V2D(), bad_memory);
+ __ ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), bad_memory);
+ __ ld1(v24.V2S(), v25.V2S(), v26.V2S(), v27.V2S(), bad_memory);
+ __ ld1(v27.V2S(), v28.V2S(), v29.V2S(), v30.V2S(), bad_memory);
+ __ ld1(v11.V2S(), v12.V2S(), v13.V2S(), bad_memory);
+ __ ld1(v8.V2S(), v9.V2S(), v10.V2S(), bad_memory);
+ __ ld1(v31.V2S(), v0.V2S(), v1.V2S(), bad_memory);
+ __ ld1(v0.V2S(), v1.V2S(), bad_memory);
+ __ ld1(v13.V2S(), v14.V2S(), bad_memory);
+ __ ld1(v3.V2S(), v4.V2S(), bad_memory);
+ __ ld1(v26.V2S(), bad_memory);
+ __ ld1(v0.V2S(), bad_memory);
+ __ ld1(v11.V2S(), bad_memory);
+ __ ld1(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), bad_memory);
+ __ ld1(v24.V4H(), v25.V4H(), v26.V4H(), v27.V4H(), bad_memory);
+ __ ld1(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), bad_memory);
+ __ ld1(v30.V4H(), v31.V4H(), v0.V4H(), bad_memory);
+ __ ld1(v25.V4H(), v26.V4H(), v27.V4H(), bad_memory);
+ __ ld1(v3.V4H(), v4.V4H(), v5.V4H(), bad_memory);
+ __ ld1(v3.V4H(), v4.V4H(), bad_memory);
+ __ ld1(v3.V4H(), v4.V4H(), bad_memory);
+ __ ld1(v23.V4H(), v24.V4H(), bad_memory);
+ __ ld1(v26.V4H(), bad_memory);
+ __ ld1(v1.V4H(), bad_memory);
+ __ ld1(v14.V4H(), bad_memory);
+ __ ld1(v26.V4S(), v27.V4S(), v28.V4S(), v29.V4S(), bad_memory);
+ __ ld1(v28.V4S(), v29.V4S(), v30.V4S(), v31.V4S(), bad_memory);
+ __ ld1(v4.V4S(), v5.V4S(), v6.V4S(), v7.V4S(), bad_memory);
+ __ ld1(v2.V4S(), v3.V4S(), v4.V4S(), bad_memory);
+ __ ld1(v22.V4S(), v23.V4S(), v24.V4S(), bad_memory);
+ __ ld1(v15.V4S(), v16.V4S(), v17.V4S(), bad_memory);
+ __ ld1(v20.V4S(), v21.V4S(), bad_memory);
+ __ ld1(v30.V4S(), v31.V4S(), bad_memory);
+ __ ld1(v11.V4S(), v12.V4S(), bad_memory);
+ __ ld1(v15.V4S(), bad_memory);
+ __ ld1(v12.V4S(), bad_memory);
+ __ ld1(v0.V4S(), bad_memory);
+ __ ld1(v17.V8B(), v18.V8B(), v19.V8B(), v20.V8B(), bad_memory);
+ __ ld1(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), bad_memory);
+ __ ld1(v9.V8B(), v10.V8B(), v11.V8B(), v12.V8B(), bad_memory);
+ __ ld1(v4.V8B(), v5.V8B(), v6.V8B(), bad_memory);
+ __ ld1(v2.V8B(), v3.V8B(), v4.V8B(), bad_memory);
+ __ ld1(v12.V8B(), v13.V8B(), v14.V8B(), bad_memory);
+ __ ld1(v10.V8B(), v11.V8B(), bad_memory);
+ __ ld1(v11.V8B(), v12.V8B(), bad_memory);
+ __ ld1(v27.V8B(), v28.V8B(), bad_memory);
+ __ ld1(v31.V8B(), bad_memory);
+ __ ld1(v10.V8B(), bad_memory);
+ __ ld1(v28.V8B(), bad_memory);
+ __ ld1(v5.V8H(), v6.V8H(), v7.V8H(), v8.V8H(), bad_memory);
+ __ ld1(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), bad_memory);
+ __ ld1(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(), bad_memory);
+ __ ld1(v26.V8H(), v27.V8H(), v28.V8H(), bad_memory);
+ __ ld1(v3.V8H(), v4.V8H(), v5.V8H(), bad_memory);
+ __ ld1(v17.V8H(), v18.V8H(), v19.V8H(), bad_memory);
+ __ ld1(v4.V8H(), v5.V8H(), bad_memory);
+ __ ld1(v21.V8H(), v22.V8H(), bad_memory);
+ __ ld1(v4.V8H(), v5.V8H(), bad_memory);
+ __ ld1(v9.V8H(), bad_memory);
+ __ ld1(v27.V8H(), bad_memory);
+ __ ld1(v26.V8H(), bad_memory);
+ __ ld1(v19.B(), 1, bad_memory);
+ __ ld1(v12.B(), 3, bad_memory);
+ __ ld1(v27.B(), 12, bad_memory);
+ __ ld1(v10.D(), 1, bad_memory);
+ __ ld1(v26.D(), 1, bad_memory);
+ __ ld1(v7.D(), 1, bad_memory);
+ __ ld1(v19.H(), 5, bad_memory);
+ __ ld1(v10.H(), 1, bad_memory);
+ __ ld1(v5.H(), 4, bad_memory);
+ __ ld1(v21.S(), 2, bad_memory);
+ __ ld1(v13.S(), 2, bad_memory);
+ __ ld1(v1.S(), 2, bad_memory);
+ __ ld1r(v2.V16B(), bad_memory);
+ __ ld1r(v2.V16B(), bad_memory);
+ __ ld1r(v22.V16B(), bad_memory);
+ __ ld1r(v25.V1D(), bad_memory);
+ __ ld1r(v9.V1D(), bad_memory);
+ __ ld1r(v23.V1D(), bad_memory);
+ __ ld1r(v19.V2D(), bad_memory);
+ __ ld1r(v21.V2D(), bad_memory);
+ __ ld1r(v30.V2D(), bad_memory);
+ __ ld1r(v24.V2S(), bad_memory);
+ __ ld1r(v26.V2S(), bad_memory);
+ __ ld1r(v28.V2S(), bad_memory);
+ __ ld1r(v19.V4H(), bad_memory);
+ __ ld1r(v1.V4H(), bad_memory);
+ __ ld1r(v21.V4H(), bad_memory);
+ __ ld1r(v15.V4S(), bad_memory);
+ __ ld1r(v21.V4S(), bad_memory);
+ __ ld1r(v23.V4S(), bad_memory);
+ __ ld1r(v26.V8B(), bad_memory);
+ __ ld1r(v14.V8B(), bad_memory);
+ __ ld1r(v19.V8B(), bad_memory);
+ __ ld1r(v13.V8H(), bad_memory);
+ __ ld1r(v30.V8H(), bad_memory);
+ __ ld1r(v27.V8H(), bad_memory);
+ __ ld2(v21.V16B(), v22.V16B(), bad_memory);
+ __ ld2(v21.V16B(), v22.V16B(), bad_memory);
+ __ ld2(v12.V16B(), v13.V16B(), bad_memory);
+ __ ld2(v14.V2D(), v15.V2D(), bad_memory);
+ __ ld2(v0.V2D(), v1.V2D(), bad_memory);
+ __ ld2(v12.V2D(), v13.V2D(), bad_memory);
+ __ ld2(v27.V2S(), v28.V2S(), bad_memory);
+ __ ld2(v2.V2S(), v3.V2S(), bad_memory);
+ __ ld2(v12.V2S(), v13.V2S(), bad_memory);
+ __ ld2(v9.V4H(), v10.V4H(), bad_memory);
+ __ ld2(v23.V4H(), v24.V4H(), bad_memory);
+ __ ld2(v1.V4H(), v2.V4H(), bad_memory);
+ __ ld2(v20.V4S(), v21.V4S(), bad_memory);
+ __ ld2(v10.V4S(), v11.V4S(), bad_memory);
+ __ ld2(v24.V4S(), v25.V4S(), bad_memory);
+ __ ld2(v17.V8B(), v18.V8B(), bad_memory);
+ __ ld2(v13.V8B(), v14.V8B(), bad_memory);
+ __ ld2(v7.V8B(), v8.V8B(), bad_memory);
+ __ ld2(v30.V8H(), v31.V8H(), bad_memory);
+ __ ld2(v4.V8H(), v5.V8H(), bad_memory);
+ __ ld2(v13.V8H(), v14.V8H(), bad_memory);
+ __ ld2(v5.B(), v6.B(), 12, bad_memory);
+ __ ld2(v16.B(), v17.B(), 7, bad_memory);
+ __ ld2(v29.B(), v30.B(), 2, bad_memory);
+ __ ld2(v11.D(), v12.D(), 1, bad_memory);
+ __ ld2(v26.D(), v27.D(), 0, bad_memory);
+ __ ld2(v25.D(), v26.D(), 0, bad_memory);
+ __ ld2(v18.H(), v19.H(), 7, bad_memory);
+ __ ld2(v17.H(), v18.H(), 5, bad_memory);
+ __ ld2(v30.H(), v31.H(), 2, bad_memory);
+ __ ld2(v29.S(), v30.S(), 3, bad_memory);
+ __ ld2(v28.S(), v29.S(), 0, bad_memory);
+ __ ld2(v6.S(), v7.S(), 1, bad_memory);
+ __ ld2r(v26.V16B(), v27.V16B(), bad_memory);
+ __ ld2r(v21.V16B(), v22.V16B(), bad_memory);
+ __ ld2r(v5.V16B(), v6.V16B(), bad_memory);
+ __ ld2r(v26.V1D(), v27.V1D(), bad_memory);
+ __ ld2r(v14.V1D(), v15.V1D(), bad_memory);
+ __ ld2r(v23.V1D(), v24.V1D(), bad_memory);
+ __ ld2r(v11.V2D(), v12.V2D(), bad_memory);
+ __ ld2r(v29.V2D(), v30.V2D(), bad_memory);
+ __ ld2r(v15.V2D(), v16.V2D(), bad_memory);
+ __ ld2r(v26.V2S(), v27.V2S(), bad_memory);
+ __ ld2r(v22.V2S(), v23.V2S(), bad_memory);
+ __ ld2r(v2.V2S(), v3.V2S(), bad_memory);
+ __ ld2r(v2.V4H(), v3.V4H(), bad_memory);
+ __ ld2r(v9.V4H(), v10.V4H(), bad_memory);
+ __ ld2r(v6.V4H(), v7.V4H(), bad_memory);
+ __ ld2r(v7.V4S(), v8.V4S(), bad_memory);
+ __ ld2r(v19.V4S(), v20.V4S(), bad_memory);
+ __ ld2r(v21.V4S(), v22.V4S(), bad_memory);
+ __ ld2r(v26.V8B(), v27.V8B(), bad_memory);
+ __ ld2r(v20.V8B(), v21.V8B(), bad_memory);
+ __ ld2r(v11.V8B(), v12.V8B(), bad_memory);
+ __ ld2r(v12.V8H(), v13.V8H(), bad_memory);
+ __ ld2r(v6.V8H(), v7.V8H(), bad_memory);
+ __ ld2r(v25.V8H(), v26.V8H(), bad_memory);
+ __ ld3(v20.V16B(), v21.V16B(), v22.V16B(), bad_memory);
+ __ ld3(v28.V16B(), v29.V16B(), v30.V16B(), bad_memory);
+ __ ld3(v20.V16B(), v21.V16B(), v22.V16B(), bad_memory);
+ __ ld3(v21.V2D(), v22.V2D(), v23.V2D(), bad_memory);
+ __ ld3(v18.V2D(), v19.V2D(), v20.V2D(), bad_memory);
+ __ ld3(v27.V2D(), v28.V2D(), v29.V2D(), bad_memory);
+ __ ld3(v7.V2S(), v8.V2S(), v9.V2S(), bad_memory);
+ __ ld3(v20.V2S(), v21.V2S(), v22.V2S(), bad_memory);
+ __ ld3(v26.V2S(), v27.V2S(), v28.V2S(), bad_memory);
+ __ ld3(v27.V4H(), v28.V4H(), v29.V4H(), bad_memory);
+ __ ld3(v28.V4H(), v29.V4H(), v30.V4H(), bad_memory);
+ __ ld3(v7.V4H(), v8.V4H(), v9.V4H(), bad_memory);
+ __ ld3(v2.V4S(), v3.V4S(), v4.V4S(), bad_memory);
+ __ ld3(v24.V4S(), v25.V4S(), v26.V4S(), bad_memory);
+ __ ld3(v11.V4S(), v12.V4S(), v13.V4S(), bad_memory);
+ __ ld3(v29.V8B(), v30.V8B(), v31.V8B(), bad_memory);
+ __ ld3(v1.V8B(), v2.V8B(), v3.V8B(), bad_memory);
+ __ ld3(v12.V8B(), v13.V8B(), v14.V8B(), bad_memory);
+ __ ld3(v22.V8H(), v23.V8H(), v24.V8H(), bad_memory);
+ __ ld3(v13.V8H(), v14.V8H(), v15.V8H(), bad_memory);
+ __ ld3(v28.V8H(), v29.V8H(), v30.V8H(), bad_memory);
+ __ ld3(v21.B(), v22.B(), v23.B(), 11, bad_memory);
+ __ ld3(v5.B(), v6.B(), v7.B(), 9, bad_memory);
+ __ ld3(v23.B(), v24.B(), v25.B(), 0, bad_memory);
+ __ ld3(v16.D(), v17.D(), v18.D(), 0, bad_memory);
+ __ ld3(v30.D(), v31.D(), v0.D(), 0, bad_memory);
+ __ ld3(v28.D(), v29.D(), v30.D(), 1, bad_memory);
+ __ ld3(v13.H(), v14.H(), v15.H(), 2, bad_memory);
+ __ ld3(v22.H(), v23.H(), v24.H(), 7, bad_memory);
+ __ ld3(v14.H(), v15.H(), v16.H(), 3, bad_memory);
+ __ ld3(v22.S(), v23.S(), v24.S(), 3, bad_memory);
+ __ ld3(v30.S(), v31.S(), v0.S(), 2, bad_memory);
+ __ ld3(v12.S(), v13.S(), v14.S(), 1, bad_memory);
+ __ ld3r(v24.V16B(), v25.V16B(), v26.V16B(), bad_memory);
+ __ ld3r(v24.V16B(), v25.V16B(), v26.V16B(), bad_memory);
+ __ ld3r(v3.V16B(), v4.V16B(), v5.V16B(), bad_memory);
+ __ ld3r(v4.V1D(), v5.V1D(), v6.V1D(), bad_memory);
+ __ ld3r(v7.V1D(), v8.V1D(), v9.V1D(), bad_memory);
+ __ ld3r(v17.V1D(), v18.V1D(), v19.V1D(), bad_memory);
+ __ ld3r(v16.V2D(), v17.V2D(), v18.V2D(), bad_memory);
+ __ ld3r(v20.V2D(), v21.V2D(), v22.V2D(), bad_memory);
+ __ ld3r(v14.V2D(), v15.V2D(), v16.V2D(), bad_memory);
+ __ ld3r(v10.V2S(), v11.V2S(), v12.V2S(), bad_memory);
+ __ ld3r(v0.V2S(), v1.V2S(), v2.V2S(), bad_memory);
+ __ ld3r(v23.V2S(), v24.V2S(), v25.V2S(), bad_memory);
+ __ ld3r(v22.V4H(), v23.V4H(), v24.V4H(), bad_memory);
+ __ ld3r(v6.V4H(), v7.V4H(), v8.V4H(), bad_memory);
+ __ ld3r(v7.V4H(), v8.V4H(), v9.V4H(), bad_memory);
+ __ ld3r(v26.V4S(), v27.V4S(), v28.V4S(), bad_memory);
+ __ ld3r(v0.V4S(), v1.V4S(), v2.V4S(), bad_memory);
+ __ ld3r(v30.V4S(), v31.V4S(), v0.V4S(), bad_memory);
+ __ ld3r(v2.V8B(), v3.V8B(), v4.V8B(), bad_memory);
+ __ ld3r(v10.V8B(), v11.V8B(), v12.V8B(), bad_memory);
+ __ ld3r(v28.V8B(), v29.V8B(), v30.V8B(), bad_memory);
+ __ ld3r(v6.V8H(), v7.V8H(), v8.V8H(), bad_memory);
+ __ ld3r(v29.V8H(), v30.V8H(), v31.V8H(), bad_memory);
+ __ ld3r(v7.V8H(), v8.V8H(), v9.V8H(), bad_memory);
+ __ ld4(v3.V16B(), v4.V16B(), v5.V16B(), v6.V16B(), bad_memory);
+ __ ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(), bad_memory);
+ __ ld4(v5.V16B(), v6.V16B(), v7.V16B(), v8.V16B(), bad_memory);
+ __ ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), bad_memory);
+ __ ld4(v4.V2D(), v5.V2D(), v6.V2D(), v7.V2D(), bad_memory);
+ __ ld4(v29.V2D(), v30.V2D(), v31.V2D(), v0.V2D(), bad_memory);
+ __ ld4(v27.V2S(), v28.V2S(), v29.V2S(), v30.V2S(), bad_memory);
+ __ ld4(v24.V2S(), v25.V2S(), v26.V2S(), v27.V2S(), bad_memory);
+ __ ld4(v4.V2S(), v5.V2S(), v6.V2S(), v7.V2S(), bad_memory);
+ __ ld4(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), bad_memory);
+ __ ld4(v23.V4H(), v24.V4H(), v25.V4H(), v26.V4H(), bad_memory);
+ __ ld4(v2.V4H(), v3.V4H(), v4.V4H(), v5.V4H(), bad_memory);
+ __ ld4(v7.V4S(), v8.V4S(), v9.V4S(), v10.V4S(), bad_memory);
+ __ ld4(v28.V4S(), v29.V4S(), v30.V4S(), v31.V4S(), bad_memory);
+ __ ld4(v29.V4S(), v30.V4S(), v31.V4S(), v0.V4S(), bad_memory);
+ __ ld4(v15.V8B(), v16.V8B(), v17.V8B(), v18.V8B(), bad_memory);
+ __ ld4(v27.V8B(), v28.V8B(), v29.V8B(), v30.V8B(), bad_memory);
+ __ ld4(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), bad_memory);
+ __ ld4(v25.V8H(), v26.V8H(), v27.V8H(), v28.V8H(), bad_memory);
+ __ ld4(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), bad_memory);
+ __ ld4(v20.V8H(), v21.V8H(), v22.V8H(), v23.V8H(), bad_memory);
+ __ ld4(v20.B(), v21.B(), v22.B(), v23.B(), 3, bad_memory);
+ __ ld4(v12.B(), v13.B(), v14.B(), v15.B(), 3, bad_memory);
+ __ ld4(v27.B(), v28.B(), v29.B(), v30.B(), 6, bad_memory);
+ __ ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, bad_memory);
+ __ ld4(v15.D(), v16.D(), v17.D(), v18.D(), 1, bad_memory);
+ __ ld4(v16.D(), v17.D(), v18.D(), v19.D(), 1, bad_memory);
+ __ ld4(v2.H(), v3.H(), v4.H(), v5.H(), 6, bad_memory);
+ __ ld4(v5.H(), v6.H(), v7.H(), v8.H(), 3, bad_memory);
+ __ ld4(v7.H(), v8.H(), v9.H(), v10.H(), 6, bad_memory);
+ __ ld4(v6.S(), v7.S(), v8.S(), v9.S(), 1, bad_memory);
+ __ ld4(v25.S(), v26.S(), v27.S(), v28.S(), 2, bad_memory);
+ __ ld4(v8.S(), v9.S(), v10.S(), v11.S(), 3, bad_memory);
+ __ ld4r(v14.V16B(), v15.V16B(), v16.V16B(), v17.V16B(), bad_memory);
+ __ ld4r(v13.V16B(), v14.V16B(), v15.V16B(), v16.V16B(), bad_memory);
+ __ ld4r(v9.V16B(), v10.V16B(), v11.V16B(), v12.V16B(), bad_memory);
+ __ ld4r(v8.V1D(), v9.V1D(), v10.V1D(), v11.V1D(), bad_memory);
+ __ ld4r(v4.V1D(), v5.V1D(), v6.V1D(), v7.V1D(), bad_memory);
+ __ ld4r(v26.V1D(), v27.V1D(), v28.V1D(), v29.V1D(), bad_memory);
+ __ ld4r(v19.V2D(), v20.V2D(), v21.V2D(), v22.V2D(), bad_memory);
+ __ ld4r(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(), bad_memory);
+ __ ld4r(v15.V2D(), v16.V2D(), v17.V2D(), v18.V2D(), bad_memory);
+ __ ld4r(v31.V2S(), v0.V2S(), v1.V2S(), v2.V2S(), bad_memory);
+ __ ld4r(v28.V2S(), v29.V2S(), v30.V2S(), v31.V2S(), bad_memory);
+ __ ld4r(v11.V2S(), v12.V2S(), v13.V2S(), v14.V2S(), bad_memory);
+ __ ld4r(v19.V4H(), v20.V4H(), v21.V4H(), v22.V4H(), bad_memory);
+ __ ld4r(v22.V4H(), v23.V4H(), v24.V4H(), v25.V4H(), bad_memory);
+ __ ld4r(v20.V4H(), v21.V4H(), v22.V4H(), v23.V4H(), bad_memory);
+ __ ld4r(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), bad_memory);
+ __ ld4r(v25.V4S(), v26.V4S(), v27.V4S(), v28.V4S(), bad_memory);
+ __ ld4r(v23.V4S(), v24.V4S(), v25.V4S(), v26.V4S(), bad_memory);
+ __ ld4r(v22.V8B(), v23.V8B(), v24.V8B(), v25.V8B(), bad_memory);
+ __ ld4r(v27.V8B(), v28.V8B(), v29.V8B(), v30.V8B(), bad_memory);
+ __ ld4r(v29.V8B(), v30.V8B(), v31.V8B(), v0.V8B(), bad_memory);
+ __ ld4r(v28.V8H(), v29.V8H(), v30.V8H(), v31.V8H(), bad_memory);
+ __ ld4r(v25.V8H(), v26.V8H(), v27.V8H(), v28.V8H(), bad_memory);
+ __ ld4r(v22.V8H(), v23.V8H(), v24.V8H(), v25.V8H(), bad_memory);
+
+ __ st1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), bad_memory);
+ __ st1(v10.V16B(), v11.V16B(), v12.V16B(), v13.V16B(), bad_memory);
+ __ st1(v27.V16B(), v28.V16B(), v29.V16B(), v30.V16B(), bad_memory);
+ __ st1(v16.V16B(), v17.V16B(), v18.V16B(), bad_memory);
+ __ st1(v21.V16B(), v22.V16B(), v23.V16B(), bad_memory);
+ __ st1(v9.V16B(), v10.V16B(), v11.V16B(), bad_memory);
+ __ st1(v7.V16B(), v8.V16B(), bad_memory);
+ __ st1(v26.V16B(), v27.V16B(), bad_memory);
+ __ st1(v22.V16B(), v23.V16B(), bad_memory);
+ __ st1(v23.V16B(), bad_memory);
+ __ st1(v28.V16B(), bad_memory);
+ __ st1(v2.V16B(), bad_memory);
+ __ st1(v29.V1D(), v30.V1D(), v31.V1D(), v0.V1D(), bad_memory);
+ __ st1(v12.V1D(), v13.V1D(), v14.V1D(), v15.V1D(), bad_memory);
+ __ st1(v30.V1D(), v31.V1D(), v0.V1D(), v1.V1D(), bad_memory);
+ __ st1(v16.V1D(), v17.V1D(), v18.V1D(), bad_memory);
+ __ st1(v3.V1D(), v4.V1D(), v5.V1D(), bad_memory);
+ __ st1(v14.V1D(), v15.V1D(), v16.V1D(), bad_memory);
+ __ st1(v18.V1D(), v19.V1D(), bad_memory);
+ __ st1(v5.V1D(), v6.V1D(), bad_memory);
+ __ st1(v2.V1D(), v3.V1D(), bad_memory);
+ __ st1(v4.V1D(), bad_memory);
+ __ st1(v27.V1D(), bad_memory);
+ __ st1(v23.V1D(), bad_memory);
+ __ st1(v2.V2D(), v3.V2D(), v4.V2D(), v5.V2D(), bad_memory);
+ __ st1(v22.V2D(), v23.V2D(), v24.V2D(), v25.V2D(), bad_memory);
+ __ st1(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(), bad_memory);
+ __ st1(v17.V2D(), v18.V2D(), v19.V2D(), bad_memory);
+ __ st1(v16.V2D(), v17.V2D(), v18.V2D(), bad_memory);
+ __ st1(v22.V2D(), v23.V2D(), v24.V2D(), bad_memory);
+ __ st1(v21.V2D(), v22.V2D(), bad_memory);
+ __ st1(v6.V2D(), v7.V2D(), bad_memory);
+ __ st1(v27.V2D(), v28.V2D(), bad_memory);
+ __ st1(v21.V2D(), bad_memory);
+ __ st1(v29.V2D(), bad_memory);
+ __ st1(v20.V2D(), bad_memory);
+ __ st1(v22.V2S(), v23.V2S(), v24.V2S(), v25.V2S(), bad_memory);
+ __ st1(v8.V2S(), v9.V2S(), v10.V2S(), v11.V2S(), bad_memory);
+ __ st1(v15.V2S(), v16.V2S(), v17.V2S(), v18.V2S(), bad_memory);
+ __ st1(v2.V2S(), v3.V2S(), v4.V2S(), bad_memory);
+ __ st1(v23.V2S(), v24.V2S(), v25.V2S(), bad_memory);
+ __ st1(v7.V2S(), v8.V2S(), v9.V2S(), bad_memory);
+ __ st1(v28.V2S(), v29.V2S(), bad_memory);
+ __ st1(v29.V2S(), v30.V2S(), bad_memory);
+ __ st1(v23.V2S(), v24.V2S(), bad_memory);
+ __ st1(v6.V2S(), bad_memory);
+ __ st1(v11.V2S(), bad_memory);
+ __ st1(v17.V2S(), bad_memory);
+ __ st1(v6.V4H(), v7.V4H(), v8.V4H(), v9.V4H(), bad_memory);
+ __ st1(v9.V4H(), v10.V4H(), v11.V4H(), v12.V4H(), bad_memory);
+ __ st1(v25.V4H(), v26.V4H(), v27.V4H(), v28.V4H(), bad_memory);
+ __ st1(v11.V4H(), v12.V4H(), v13.V4H(), bad_memory);
+ __ st1(v10.V4H(), v11.V4H(), v12.V4H(), bad_memory);
+ __ st1(v12.V4H(), v13.V4H(), v14.V4H(), bad_memory);
+ __ st1(v13.V4H(), v14.V4H(), bad_memory);
+ __ st1(v15.V4H(), v16.V4H(), bad_memory);
+ __ st1(v21.V4H(), v22.V4H(), bad_memory);
+ __ st1(v16.V4H(), bad_memory);
+ __ st1(v8.V4H(), bad_memory);
+ __ st1(v30.V4H(), bad_memory);
+ __ st1(v3.V4S(), v4.V4S(), v5.V4S(), v6.V4S(), bad_memory);
+ __ st1(v25.V4S(), v26.V4S(), v27.V4S(), v28.V4S(), bad_memory);
+ __ st1(v5.V4S(), v6.V4S(), v7.V4S(), v8.V4S(), bad_memory);
+ __ st1(v31.V4S(), v0.V4S(), v1.V4S(), bad_memory);
+ __ st1(v30.V4S(), v31.V4S(), v0.V4S(), bad_memory);
+ __ st1(v6.V4S(), v7.V4S(), v8.V4S(), bad_memory);
+ __ st1(v17.V4S(), v18.V4S(), bad_memory);
+ __ st1(v31.V4S(), v0.V4S(), bad_memory);
+ __ st1(v1.V4S(), v2.V4S(), bad_memory);
+ __ st1(v26.V4S(), bad_memory);
+ __ st1(v15.V4S(), bad_memory);
+ __ st1(v13.V4S(), bad_memory);
+ __ st1(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), bad_memory);
+ __ st1(v10.V8B(), v11.V8B(), v12.V8B(), v13.V8B(), bad_memory);
+ __ st1(v15.V8B(), v16.V8B(), v17.V8B(), v18.V8B(), bad_memory);
+ __ st1(v19.V8B(), v20.V8B(), v21.V8B(), bad_memory);
+ __ st1(v31.V8B(), v0.V8B(), v1.V8B(), bad_memory);
+ __ st1(v9.V8B(), v10.V8B(), v11.V8B(), bad_memory);
+ __ st1(v12.V8B(), v13.V8B(), bad_memory);
+ __ st1(v2.V8B(), v3.V8B(), bad_memory);
+ __ st1(v0.V8B(), v1.V8B(), bad_memory);
+ __ st1(v16.V8B(), bad_memory);
+ __ st1(v25.V8B(), bad_memory);
+ __ st1(v31.V8B(), bad_memory);
+ __ st1(v4.V8H(), v5.V8H(), v6.V8H(), v7.V8H(), bad_memory);
+ __ st1(v3.V8H(), v4.V8H(), v5.V8H(), v6.V8H(), bad_memory);
+ __ st1(v26.V8H(), v27.V8H(), v28.V8H(), v29.V8H(), bad_memory);
+ __ st1(v10.V8H(), v11.V8H(), v12.V8H(), bad_memory);
+ __ st1(v21.V8H(), v22.V8H(), v23.V8H(), bad_memory);
+ __ st1(v18.V8H(), v19.V8H(), v20.V8H(), bad_memory);
+ __ st1(v26.V8H(), v27.V8H(), bad_memory);
+ __ st1(v24.V8H(), v25.V8H(), bad_memory);
+ __ st1(v17.V8H(), v18.V8H(), bad_memory);
+ __ st1(v29.V8H(), bad_memory);
+ __ st1(v19.V8H(), bad_memory);
+ __ st1(v23.V8H(), bad_memory);
+ __ st1(v19.B(), 15, bad_memory);
+ __ st1(v25.B(), 9, bad_memory);
+ __ st1(v4.B(), 8, bad_memory);
+ __ st1(v13.D(), 0, bad_memory);
+ __ st1(v30.D(), 0, bad_memory);
+ __ st1(v3.D(), 0, bad_memory);
+ __ st1(v22.H(), 0, bad_memory);
+ __ st1(v31.H(), 7, bad_memory);
+ __ st1(v23.H(), 3, bad_memory);
+ __ st1(v0.S(), 0, bad_memory);
+ __ st1(v11.S(), 3, bad_memory);
+ __ st1(v24.S(), 3, bad_memory);
+ __ st2(v7.V16B(), v8.V16B(), bad_memory);
+ __ st2(v5.V16B(), v6.V16B(), bad_memory);
+ __ st2(v18.V16B(), v19.V16B(), bad_memory);
+ __ st2(v14.V2D(), v15.V2D(), bad_memory);
+ __ st2(v7.V2D(), v8.V2D(), bad_memory);
+ __ st2(v24.V2D(), v25.V2D(), bad_memory);
+ __ st2(v22.V2S(), v23.V2S(), bad_memory);
+ __ st2(v4.V2S(), v5.V2S(), bad_memory);
+ __ st2(v2.V2S(), v3.V2S(), bad_memory);
+ __ st2(v23.V4H(), v24.V4H(), bad_memory);
+ __ st2(v8.V4H(), v9.V4H(), bad_memory);
+ __ st2(v7.V4H(), v8.V4H(), bad_memory);
+ __ st2(v17.V4S(), v18.V4S(), bad_memory);
+ __ st2(v6.V4S(), v7.V4S(), bad_memory);
+ __ st2(v26.V4S(), v27.V4S(), bad_memory);
+ __ st2(v31.V8B(), v0.V8B(), bad_memory);
+ __ st2(v0.V8B(), v1.V8B(), bad_memory);
+ __ st2(v21.V8B(), v22.V8B(), bad_memory);
+ __ st2(v7.V8H(), v8.V8H(), bad_memory);
+ __ st2(v22.V8H(), v23.V8H(), bad_memory);
+ __ st2(v4.V8H(), v5.V8H(), bad_memory);
+ __ st2(v8.B(), v9.B(), 15, bad_memory);
+ __ st2(v8.B(), v9.B(), 15, bad_memory);
+ __ st2(v7.B(), v8.B(), 4, bad_memory);
+ __ st2(v25.D(), v26.D(), 0, bad_memory);
+ __ st2(v17.D(), v18.D(), 1, bad_memory);
+ __ st2(v3.D(), v4.D(), 1, bad_memory);
+ __ st2(v4.H(), v5.H(), 3, bad_memory);
+ __ st2(v0.H(), v1.H(), 5, bad_memory);
+ __ st2(v22.H(), v23.H(), 2, bad_memory);
+ __ st2(v14.S(), v15.S(), 3, bad_memory);
+ __ st2(v23.S(), v24.S(), 3, bad_memory);
+ __ st2(v0.S(), v1.S(), 2, bad_memory);
+ __ st3(v26.V16B(), v27.V16B(), v28.V16B(), bad_memory);
+ __ st3(v21.V16B(), v22.V16B(), v23.V16B(), bad_memory);
+ __ st3(v24.V16B(), v25.V16B(), v26.V16B(), bad_memory);
+ __ st3(v17.V2D(), v18.V2D(), v19.V2D(), bad_memory);
+ __ st3(v23.V2D(), v24.V2D(), v25.V2D(), bad_memory);
+ __ st3(v10.V2D(), v11.V2D(), v12.V2D(), bad_memory);
+ __ st3(v9.V2S(), v10.V2S(), v11.V2S(), bad_memory);
+ __ st3(v13.V2S(), v14.V2S(), v15.V2S(), bad_memory);
+ __ st3(v22.V2S(), v23.V2S(), v24.V2S(), bad_memory);
+ __ st3(v31.V4H(), v0.V4H(), v1.V4H(), bad_memory);
+ __ st3(v8.V4H(), v9.V4H(), v10.V4H(), bad_memory);
+ __ st3(v19.V4H(), v20.V4H(), v21.V4H(), bad_memory);
+ __ st3(v18.V4S(), v19.V4S(), v20.V4S(), bad_memory);
+ __ st3(v25.V4S(), v26.V4S(), v27.V4S(), bad_memory);
+ __ st3(v16.V4S(), v17.V4S(), v18.V4S(), bad_memory);
+ __ st3(v27.V8B(), v28.V8B(), v29.V8B(), bad_memory);
+ __ st3(v29.V8B(), v30.V8B(), v31.V8B(), bad_memory);
+ __ st3(v30.V8B(), v31.V8B(), v0.V8B(), bad_memory);
+ __ st3(v8.V8H(), v9.V8H(), v10.V8H(), bad_memory);
+ __ st3(v18.V8H(), v19.V8H(), v20.V8H(), bad_memory);
+ __ st3(v18.V8H(), v19.V8H(), v20.V8H(), bad_memory);
+ __ st3(v31.B(), v0.B(), v1.B(), 10, bad_memory);
+ __ st3(v4.B(), v5.B(), v6.B(), 5, bad_memory);
+ __ st3(v5.B(), v6.B(), v7.B(), 1, bad_memory);
+ __ st3(v5.D(), v6.D(), v7.D(), 0, bad_memory);
+ __ st3(v6.D(), v7.D(), v8.D(), 0, bad_memory);
+ __ st3(v0.D(), v1.D(), v2.D(), 0, bad_memory);
+ __ st3(v31.H(), v0.H(), v1.H(), 2, bad_memory);
+ __ st3(v14.H(), v15.H(), v16.H(), 5, bad_memory);
+ __ st3(v21.H(), v22.H(), v23.H(), 6, bad_memory);
+ __ st3(v21.S(), v22.S(), v23.S(), 0, bad_memory);
+ __ st3(v11.S(), v12.S(), v13.S(), 1, bad_memory);
+ __ st3(v15.S(), v16.S(), v17.S(), 0, bad_memory);
+ __ st4(v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), bad_memory);
+ __ st4(v24.V16B(), v25.V16B(), v26.V16B(), v27.V16B(), bad_memory);
+ __ st4(v15.V16B(), v16.V16B(), v17.V16B(), v18.V16B(), bad_memory);
+ __ st4(v16.V2D(), v17.V2D(), v18.V2D(), v19.V2D(), bad_memory);
+ __ st4(v17.V2D(), v18.V2D(), v19.V2D(), v20.V2D(), bad_memory);
+ __ st4(v9.V2D(), v10.V2D(), v11.V2D(), v12.V2D(), bad_memory);
+ __ st4(v23.V2S(), v24.V2S(), v25.V2S(), v26.V2S(), bad_memory);
+ __ st4(v15.V2S(), v16.V2S(), v17.V2S(), v18.V2S(), bad_memory);
+ __ st4(v24.V2S(), v25.V2S(), v26.V2S(), v27.V2S(), bad_memory);
+ __ st4(v14.V4H(), v15.V4H(), v16.V4H(), v17.V4H(), bad_memory);
+ __ st4(v18.V4H(), v19.V4H(), v20.V4H(), v21.V4H(), bad_memory);
+ __ st4(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), bad_memory);
+ __ st4(v13.V4S(), v14.V4S(), v15.V4S(), v16.V4S(), bad_memory);
+ __ st4(v6.V4S(), v7.V4S(), v8.V4S(), v9.V4S(), bad_memory);
+ __ st4(v15.V4S(), v16.V4S(), v17.V4S(), v18.V4S(), bad_memory);
+ __ st4(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), bad_memory);
+ __ st4(v25.V8B(), v26.V8B(), v27.V8B(), v28.V8B(), bad_memory);
+ __ st4(v19.V8B(), v20.V8B(), v21.V8B(), v22.V8B(), bad_memory);
+ __ st4(v19.V8H(), v20.V8H(), v21.V8H(), v22.V8H(), bad_memory);
+ __ st4(v15.V8H(), v16.V8H(), v17.V8H(), v18.V8H(), bad_memory);
+ __ st4(v31.V8H(), v0.V8H(), v1.V8H(), v2.V8H(), bad_memory);
+ __ st4(v0.B(), v1.B(), v2.B(), v3.B(), 13, bad_memory);
+ __ st4(v4.B(), v5.B(), v6.B(), v7.B(), 10, bad_memory);
+ __ st4(v9.B(), v10.B(), v11.B(), v12.B(), 9, bad_memory);
+ __ st4(v2.D(), v3.D(), v4.D(), v5.D(), 1, bad_memory);
+ __ st4(v7.D(), v8.D(), v9.D(), v10.D(), 0, bad_memory);
+ __ st4(v31.D(), v0.D(), v1.D(), v2.D(), 1, bad_memory);
+ __ st4(v2.H(), v3.H(), v4.H(), v5.H(), 1, bad_memory);
+ __ st4(v27.H(), v28.H(), v29.H(), v30.H(), 3, bad_memory);
+ __ st4(v24.H(), v25.H(), v26.H(), v27.H(), 4, bad_memory);
+ __ st4(v18.S(), v19.S(), v20.S(), v21.S(), 2, bad_memory);
+ __ st4(v6.S(), v7.S(), v8.S(), v9.S(), 2, bad_memory);
+ __ st4(v25.S(), v26.S(), v27.S(), v28.S(), 1, bad_memory);
+
+ END_IMPLICIT_CHECK();
+ TRY_RUN_IMPLICIT_CHECK();
+}
+
+TEST(ImplicitCheckSve) {
+ SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON);
+ START_IMPLICIT_CHECK();
+
+ SVEMemOperand bad_sve_memory = SVEMemOperand(ip0);
+
+ EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
+ // Simple, unpredicated loads and stores.
+ __ Str(p12.VnD(), bad_sve_memory);
+ __ Str(p13.VnS(), bad_sve_memory);
+ __ Str(p14.VnH(), bad_sve_memory);
+ __ Str(p15.VnB(), bad_sve_memory);
+ __ Ldr(p8.VnD(), bad_sve_memory);
+ __ Ldr(p9.VnS(), bad_sve_memory);
+ __ Ldr(p10.VnH(), bad_sve_memory);
+ __ Ldr(p11.VnB(), bad_sve_memory);
+
+ __ Str(z0.VnD(), bad_sve_memory);
+ __ Str(z1.VnS(), bad_sve_memory);
+ __ Str(z2.VnH(), bad_sve_memory);
+ __ Str(z3.VnB(), bad_sve_memory);
+ __ Ldr(z20.VnD(), bad_sve_memory);
+ __ Ldr(z21.VnS(), bad_sve_memory);
+ __ Ldr(z22.VnH(), bad_sve_memory);
+ __ Ldr(z23.VnB(), bad_sve_memory);
+
+ // Structured accesses.
+ __ St1b(z0.VnB(), p2, bad_sve_memory);
+ __ St1h(z1.VnH(), p1, bad_sve_memory);
+ __ St1w(z2.VnS(), p1, bad_sve_memory);
+ __ St1d(z3.VnD(), p2, bad_sve_memory);
+ __ Ld1b(z20.VnB(), p1.Zeroing(), bad_sve_memory);
+ __ Ld1h(z21.VnH(), p2.Zeroing(), bad_sve_memory);
+ __ Ld1w(z22.VnS(), p1.Zeroing(), bad_sve_memory);
+ __ Ld1d(z23.VnD(), p1.Zeroing(), bad_sve_memory);
+
+ // Structured, packed accesses.
+ __ St1b(z2.VnH(), p1, bad_sve_memory);
+ __ St1b(z3.VnS(), p2, bad_sve_memory);
+ __ St1b(z4.VnD(), p2, bad_sve_memory);
+ __ St1h(z0.VnS(), p1, bad_sve_memory);
+ __ St1h(z1.VnD(), p1, bad_sve_memory);
+ __ St1w(z2.VnD(), p1, bad_sve_memory);
+ __ Ld1b(z20.VnH(), p1.Zeroing(), bad_sve_memory);
+ __ Ld1b(z21.VnS(), p1.Zeroing(), bad_sve_memory);
+ __ Ld1b(z22.VnD(), p1.Zeroing(), bad_sve_memory);
+ __ Ld1h(z23.VnS(), p2.Zeroing(), bad_sve_memory);
+ __ Ld1h(z24.VnD(), p2.Zeroing(), bad_sve_memory);
+ __ Ld1w(z20.VnD(), p1.Zeroing(), bad_sve_memory);
+ __ Ld1sb(z21.VnH(), p1.Zeroing(), bad_sve_memory);
+ __ Ld1sb(z22.VnS(), p1.Zeroing(), bad_sve_memory);
+ __ Ld1sb(z23.VnD(), p2.Zeroing(), bad_sve_memory);
+ __ Ld1sh(z24.VnS(), p2.Zeroing(), bad_sve_memory);
+ __ Ld1sh(z20.VnD(), p1.Zeroing(), bad_sve_memory);
+ __ Ld1sw(z21.VnD(), p1.Zeroing(), bad_sve_memory);
+
+ // Structured, interleaved accesses.
+ __ St2b(z0.VnB(), z1.VnB(), p4, bad_sve_memory);
+ __ St2h(z1.VnH(), z2.VnH(), p4, bad_sve_memory);
+ __ St2w(z2.VnS(), z3.VnS(), p3, bad_sve_memory);
+ __ St2d(z3.VnD(), z4.VnD(), p4, bad_sve_memory);
+ __ Ld2b(z20.VnB(), z21.VnB(), p5.Zeroing(), bad_sve_memory);
+ __ Ld2h(z21.VnH(), z22.VnH(), p6.Zeroing(), bad_sve_memory);
+ __ Ld2w(z22.VnS(), z23.VnS(), p6.Zeroing(), bad_sve_memory);
+ __ Ld2d(z23.VnD(), z24.VnD(), p5.Zeroing(), bad_sve_memory);
+
+ __ St3b(z4.VnB(), z5.VnB(), z6.VnB(), p4, bad_sve_memory);
+ __ St3h(z5.VnH(), z6.VnH(), z7.VnH(), p4, bad_sve_memory);
+ __ St3w(z6.VnS(), z7.VnS(), z8.VnS(), p3, bad_sve_memory);
+ __ St3d(z7.VnD(), z8.VnD(), z9.VnD(), p4, bad_sve_memory);
+ __ Ld3b(z24.VnB(), z25.VnB(), z26.VnB(), p5.Zeroing(), bad_sve_memory);
+ __ Ld3h(z25.VnH(), z26.VnH(), z27.VnH(), p6.Zeroing(), bad_sve_memory);
+ __ Ld3w(z26.VnS(), z27.VnS(), z28.VnS(), p6.Zeroing(), bad_sve_memory);
+ __ Ld3d(z27.VnD(), z28.VnD(), z29.VnD(), p5.Zeroing(), bad_sve_memory);
+
+ __ St4b(z31.VnB(), z0.VnB(), z1.VnB(), z2.VnB(), p4, bad_sve_memory);
+ __ St4h(z0.VnH(), z1.VnH(), z2.VnH(), z3.VnH(), p4, bad_sve_memory);
+ __ St4w(z1.VnS(), z2.VnS(), z3.VnS(), z4.VnS(), p3, bad_sve_memory);
+ __ St4d(z2.VnD(), z3.VnD(), z4.VnD(), z5.VnD(), p4, bad_sve_memory);
+ __ Ld4b(z25.VnB(),
+ z26.VnB(),
+ z27.VnB(),
+ z28.VnB(),
+ p5.Zeroing(),
+ bad_sve_memory);
+ __ Ld4h(z26.VnH(),
+ z27.VnH(),
+ z28.VnH(),
+ z29.VnH(),
+ p6.Zeroing(),
+ bad_sve_memory);
+ __ Ld4w(z27.VnS(),
+ z28.VnS(),
+ z29.VnS(),
+ z30.VnS(),
+ p6.Zeroing(),
+ bad_sve_memory);
+ __ Ld4d(z28.VnD(),
+ z29.VnD(),
+ z30.VnD(),
+ z31.VnD(),
+ p5.Zeroing(),
+ bad_sve_memory);
+
+ END_IMPLICIT_CHECK();
+ TRY_RUN_IMPLICIT_CHECK();
+}
+
+TEST(ImplicitCheckAtomics) {
+ SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kAtomics);
+ START_IMPLICIT_CHECK();
+
+ EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
+#define INST_LIST(OP) \
+ __ Ld##OP##b(w0, w0, bad_memory); \
+ __ Ld##OP##ab(w0, w1, bad_memory); \
+ __ Ld##OP##lb(w0, w2, bad_memory); \
+ __ Ld##OP##alb(w0, w3, bad_memory); \
+ __ Ld##OP##h(w0, w0, bad_memory); \
+ __ Ld##OP##ah(w0, w1, bad_memory); \
+ __ Ld##OP##lh(w0, w2, bad_memory); \
+ __ Ld##OP##alh(w0, w3, bad_memory); \
+ __ Ld##OP(w0, w0, bad_memory); \
+ __ Ld##OP##a(w0, w1, bad_memory); \
+ __ Ld##OP##l(w0, w2, bad_memory); \
+ __ Ld##OP##al(w0, w3, bad_memory); \
+ __ Ld##OP(x0, x0, bad_memory); \
+ __ Ld##OP##a(x0, x1, bad_memory); \
+ __ Ld##OP##l(x0, x2, bad_memory); \
+ __ Ld##OP##al(x0, x3, bad_memory); \
+ __ St##OP##b(w0, bad_memory); \
+ __ St##OP##lb(w0, bad_memory); \
+ __ St##OP##h(w0, bad_memory); \
+ __ St##OP##lh(w0, bad_memory); \
+ __ St##OP(w0, bad_memory); \
+ __ St##OP##l(w0, bad_memory); \
+ __ St##OP(x0, bad_memory); \
+ __ St##OP##l(x0, bad_memory);
+
+ INST_LIST(add);
+ INST_LIST(set);
+ INST_LIST(eor);
+ INST_LIST(smin);
+ INST_LIST(smax);
+ INST_LIST(umin);
+ INST_LIST(umax);
+ INST_LIST(clr);
+
+#undef INST_LIST
+
+ END_IMPLICIT_CHECK();
+ TRY_RUN_IMPLICIT_CHECK();
+}
+
+TEST(ImplicitCheckMops) {
+ SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kMOPS);
+ START_IMPLICIT_CHECK();
+
+ EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
+ __ Set(x15, ip1, ip0);
+ __ Setn(x15, ip1, ip0);
+ __ Setg(x15, ip1, ip0);
+ __ Setgn(x15, ip1, ip0);
+
+ __ Cpy(x15, ip0, ip1);
+ __ Cpyn(x15, ip0, ip1);
+ __ Cpyrn(x15, ip0, ip1);
+ __ Cpywn(x15, ip0, ip1);
+ __ Cpyf(x15, ip0, ip1);
+ __ Cpyfn(x15, ip0, ip1);
+ __ Cpyfrn(x15, ip0, ip1);
+ __ Cpyfwn(x15, ip0, ip1);
+
+ // The macro-assembler expands each instruction into prologue, main and
+ // epilogue instructions where only the main instruction will fail. Increase
+ // the counter to account for those additional instructions and the following
+ // instructions.
+ __ Mov(x0, 3);
+ __ Mul(x1, x1, x0);
+ __ Add(x1, x1, x0);
+
+ END_IMPLICIT_CHECK();
+ TRY_RUN_IMPLICIT_CHECK();
+}
+#endif // VIXL_ENABLE_IMPLICIT_CHECKS
+
#undef __
#define __ masm->
@@ -5141,85 +6026,6 @@ TEST(RunFrom) {
VIXL_CHECK(res_double == 6.0);
}
-#if defined(VIXL_ENABLE_IMPLICIT_CHECKS) && defined(__x86_64__)
-#include <signal.h>
-#include <ucontext.h>
-
-// Generate a function that creates a segfault by loading from an invalid
-// address.
-Instruction* GenerateSegFault(MacroAssembler* masm, Label* start, Label* end) {
- masm->Reset();
-
- // Reset the counter.
- __ Mov(x1, 0);
-
- // Perform a series of invalid memory reads.
- __ Bind(start);
- __ Ldrb(w0, MemOperand());
- __ Ldrh(w0, MemOperand());
- __ Ldr(w0, MemOperand());
- __ Ldr(x0, MemOperand());
- __ Ldr(q0, MemOperand());
- __ Ld1(v0.D(), MemOperand());
- __ Ld2(v0.D(), v1.D(), MemOperand());
- __ Ld3(v0.D(), v1.D(), v2.D(), MemOperand());
- __ Ld4(v0.D(), v1.D(), v2.D(), v3.D(), MemOperand());
- __ Ld1r(v0.D(), MemOperand());
- __ Ld2r(v0.D(), v1.D(), MemOperand());
- __ Ld3r(v0.D(), v1.D(), v2.D(), MemOperand());
- __ Ld4r(v0.D(), v1.D(), v2.D(), v3.D(), MemOperand());
- __ Bind(end);
-
- // Return the counter.
- __ Mov(x0, x1);
- __ Ret();
-
- masm->FinalizeCode();
- return masm->GetBuffer()->GetStartAddress<Instruction*>();
-}
-
-Simulator* gImplicitCheckSim;
-
-void HandleSegFault(int sig, siginfo_t* info, void* context) {
- USE(sig);
- USE(info);
- Simulator* sim = gImplicitCheckSim;
-
- // Did the signal come from the simulator?
- ucontext_t* uc = reinterpret_cast<ucontext_t*>(context);
- uintptr_t fault_pc = uc->uc_mcontext.gregs[REG_RIP];
- VIXL_CHECK(sim->IsSimulatedMemoryAccess(fault_pc));
-
- // Increment the counter (x1) each time we handle a signal.
- int64_t counter = reinterpret_cast<int64_t>(sim->ReadXRegister(1));
- sim->WriteXRegister(1, ++counter);
-
- // Return to the VIXL memory access continuation point, which is also the
- // next instruction, after this handler.
- uc->uc_mcontext.gregs[REG_RIP] = sim->GetSignalReturnAddress();
- // Return that the memory read failed.
- uc->uc_mcontext.gregs[REG_RAX] =
- static_cast<greg_t>(MemoryReadResult::Failure);
-}
-
-TEST(ImplicitCheck) {
- SETUP_WITH_FEATURES(CPUFeatures::kNEON);
-
- gImplicitCheckSim = &simulator;
- struct sigaction sa;
- sa.sa_sigaction = HandleSegFault;
- sigaction(SIGSEGV, &sa, NULL);
-
- // Check that each load/store instruction generated a segfault that was
- // raised and dealt with.
- Label start, end;
- size_t result =
- simulator.RunFrom<int64_t>(GenerateSegFault(&masm, &start, &end));
- size_t num_of_faulting_instr = masm.GetSizeOfCodeGeneratedSince(&start) -
- masm.GetSizeOfCodeGeneratedSince(&end);
- VIXL_CHECK((result * kInstructionSize) == num_of_faulting_instr);
-}
-#endif // __x86_64__
#endif