aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/x86/vm
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/x86/vm')
-rw-r--r--src/cpu/x86/vm/assembler_x86.cpp6092
-rw-r--r--src/cpu/x86/vm/assembler_x86.hpp1135
-rw-r--r--src/cpu/x86/vm/assembler_x86.inline.hpp48
-rw-r--r--src/cpu/x86/vm/c1_LIRAssembler_x86.cpp3
-rw-r--r--src/cpu/x86/vm/cppInterpreter_x86.cpp2
-rw-r--r--src/cpu/x86/vm/frame_x86.inline.hpp2
-rw-r--r--src/cpu/x86/vm/icBuffer_x86.cpp4
-rw-r--r--src/cpu/x86/vm/icache_x86.cpp2
-rw-r--r--src/cpu/x86/vm/interp_masm_x86_32.hpp4
-rw-r--r--src/cpu/x86/vm/interp_masm_x86_64.hpp4
-rw-r--r--src/cpu/x86/vm/interpreter_x86_32.cpp2
-rw-r--r--src/cpu/x86/vm/interpreter_x86_64.cpp2
-rw-r--r--src/cpu/x86/vm/jniFastGetField_x86_32.cpp2
-rw-r--r--src/cpu/x86/vm/jniFastGetField_x86_64.cpp2
-rw-r--r--src/cpu/x86/vm/macroAssembler_x86.cpp6099
-rw-r--r--src/cpu/x86/vm/macroAssembler_x86.hpp1172
-rw-r--r--src/cpu/x86/vm/metaspaceShared_x86_32.cpp3
-rw-r--r--src/cpu/x86/vm/metaspaceShared_x86_64.cpp3
-rw-r--r--src/cpu/x86/vm/methodHandles_x86.cpp1
-rw-r--r--src/cpu/x86/vm/nativeInst_x86.cpp2
-rw-r--r--src/cpu/x86/vm/relocInfo_x86.cpp3
-rw-r--r--src/cpu/x86/vm/runtime_x86_32.cpp5
-rw-r--r--src/cpu/x86/vm/runtime_x86_64.cpp5
-rw-r--r--src/cpu/x86/vm/sharedRuntime_x86_32.cpp4
-rw-r--r--src/cpu/x86/vm/sharedRuntime_x86_64.cpp4
-rw-r--r--src/cpu/x86/vm/stubGenerator_x86_32.cpp4
-rw-r--r--src/cpu/x86/vm/stubGenerator_x86_64.cpp4
-rw-r--r--src/cpu/x86/vm/templateInterpreter_x86_32.cpp2
-rw-r--r--src/cpu/x86/vm/templateInterpreter_x86_64.cpp2
-rw-r--r--src/cpu/x86/vm/templateTable_x86_32.cpp2
-rw-r--r--src/cpu/x86/vm/templateTable_x86_64.cpp1
-rw-r--r--src/cpu/x86/vm/vm_version_x86.cpp3
-rw-r--r--src/cpu/x86/vm/vtableStubs_x86_32.cpp3
-rw-r--r--src/cpu/x86/vm/vtableStubs_x86_64.cpp3
34 files changed, 7362 insertions, 7267 deletions
diff --git a/src/cpu/x86/vm/assembler_x86.cpp b/src/cpu/x86/vm/assembler_x86.cpp
index 6b9677d30..0a51534ed 100644
--- a/src/cpu/x86/vm/assembler_x86.cpp
+++ b/src/cpu/x86/vm/assembler_x86.cpp
@@ -23,7 +23,8 @@
*/
#include "precompiled.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
#include "gc_interface/collectedHeap.inline.hpp"
#include "interpreter/interpreter.hpp"
#include "memory/cardTableModRefBS.hpp"
@@ -1154,7 +1155,7 @@ void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
assert(entry != NULL, "call most probably wrong");
InstructionMark im(this);
emit_byte(0xE8);
- intptr_t disp = entry - (_code_pos + sizeof(int32_t));
+ intptr_t disp = entry - (pc() + sizeof(int32_t));
assert(is_simm32(disp), "must be 32bit offset (call2)");
// Technically, should use call32_operand, but this format is
// implied by the fact that we're emitting a call instruction.
@@ -1167,6 +1168,10 @@ void Assembler::cdql() {
emit_byte(0x99);
}
+void Assembler::cld() {
+ emit_byte(0xfc);
+}
+
void Assembler::cmovl(Condition cc, Register dst, Register src) {
NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
int encode = prefix_and_encode(dst->encoding(), src->encoding());
@@ -1260,6 +1265,11 @@ void Assembler::comiss(XMMRegister dst, XMMRegister src) {
emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
}
+void Assembler::cpuid() {
+ emit_byte(0x0F);
+ emit_byte(0xA2);
+}
+
void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3);
@@ -1417,7 +1427,7 @@ void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
const int short_size = 2;
const int long_size = 6;
- intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos;
+ intptr_t offs = (intptr_t)dst - (intptr_t)pc();
if (maybe_short && is8bit(offs - short_size)) {
// 0111 tttn #8-bit disp
emit_byte(0x70 | cc);
@@ -1447,14 +1457,14 @@ void Assembler::jccb(Condition cc, Label& L) {
const int short_size = 2;
address entry = target(L);
#ifdef ASSERT
- intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size);
+ intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
intptr_t delta = short_branch_delta();
if (delta != 0) {
dist += (dist < 0 ? (-delta) :delta);
}
assert(is8bit(dist), "Dispacement too large for a short jmp");
#endif
- intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos;
+ intptr_t offs = (intptr_t)entry - (intptr_t)pc();
// 0111 tttn #8-bit disp
emit_byte(0x70 | cc);
emit_byte((offs - short_size) & 0xFF);
@@ -1480,7 +1490,7 @@ void Assembler::jmp(Label& L, bool maybe_short) {
InstructionMark im(this);
const int short_size = 2;
const int long_size = 5;
- intptr_t offs = entry - _code_pos;
+ intptr_t offs = entry - pc();
if (maybe_short && is8bit(offs - short_size)) {
emit_byte(0xEB);
emit_byte((offs - short_size) & 0xFF);
@@ -1510,7 +1520,7 @@ void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
InstructionMark im(this);
emit_byte(0xE9);
assert(dest != NULL, "must have a target");
- intptr_t disp = dest - (_code_pos + sizeof(int32_t));
+ intptr_t disp = dest - (pc() + sizeof(int32_t));
assert(is_simm32(disp), "must be 32bit offset (jmp)");
emit_data(disp, rspec.reloc(), call32_operand);
}
@@ -1521,14 +1531,14 @@ void Assembler::jmpb(Label& L) {
address entry = target(L);
assert(entry != NULL, "jmp most probably wrong");
#ifdef ASSERT
- intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size);
+ intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
intptr_t delta = short_branch_delta();
if (delta != 0) {
dist += (dist < 0 ? (-delta) :delta);
}
assert(is8bit(dist), "Dispacement too large for a short jmp");
#endif
- intptr_t offs = entry - _code_pos;
+ intptr_t offs = entry - pc();
emit_byte(0xEB);
emit_byte((offs - short_size) & 0xFF);
} else {
@@ -1558,6 +1568,12 @@ void Assembler::leal(Register dst, Address src) {
emit_operand(dst, src);
}
+void Assembler::lfence() {
+ emit_byte(0x0F);
+ emit_byte(0xAE);
+ emit_byte(0xE8);
+}
+
void Assembler::lock() {
emit_byte(0xF0);
}
@@ -2671,6 +2687,10 @@ void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
}
+void Assembler::std() {
+ emit_byte(0xfd);
+}
+
void Assembler::sqrtss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
@@ -2816,6 +2836,12 @@ void Assembler::xchgl(Register dst, Register src) {
emit_byte(0xc0 | encode);
}
+void Assembler::xgetbv() {
+ emit_byte(0x0F);
+ emit_byte(0x01);
+ emit_byte(0xD0);
+}
+
void Assembler::xorl(Register dst, int32_t imm32) {
prefix(dst);
emit_arith(0x81, 0xF0, dst, imm32);
@@ -4361,7 +4387,7 @@ bool Assembler::reachable(AddressLiteral adr) {
disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
if (!is_simm32(disp)) return false;
- disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int));
+ disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
// Because rip relative is a disp + address_of_next_instruction and we
// don't know the value of address_of_next_instruction we apply a fudge factor
@@ -4392,7 +4418,7 @@ void Assembler::emit_data64(jlong data,
relocInfo::relocType rtype,
int format) {
if (rtype == relocInfo::none) {
- emit_long64(data);
+ emit_int64(data);
} else {
emit_data64(data, Relocation::spec_simple(rtype), format);
}
@@ -4410,7 +4436,7 @@ void Assembler::emit_data64(jlong data,
#ifdef ASSERT
check_relocation(rspec, format);
#endif
- emit_long64(data);
+ emit_int64(data);
}
int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
@@ -4943,7 +4969,7 @@ void Assembler::mov64(Register dst, int64_t imm64) {
InstructionMark im(this);
int encode = prefixq_and_encode(dst->encoding());
emit_byte(0xB8 | encode);
- emit_long64(imm64);
+ emit_int64(imm64);
}
void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
@@ -5417,6043 +5443,3 @@ void Assembler::xorq(Register dst, Address src) {
}
#endif // !LP64
-
-static Assembler::Condition reverse[] = {
- Assembler::noOverflow /* overflow = 0x0 */ ,
- Assembler::overflow /* noOverflow = 0x1 */ ,
- Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ ,
- Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ ,
- Assembler::notZero /* zero = 0x4, equal = 0x4 */ ,
- Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ ,
- Assembler::above /* belowEqual = 0x6 */ ,
- Assembler::belowEqual /* above = 0x7 */ ,
- Assembler::positive /* negative = 0x8 */ ,
- Assembler::negative /* positive = 0x9 */ ,
- Assembler::noParity /* parity = 0xa */ ,
- Assembler::parity /* noParity = 0xb */ ,
- Assembler::greaterEqual /* less = 0xc */ ,
- Assembler::less /* greaterEqual = 0xd */ ,
- Assembler::greater /* lessEqual = 0xe */ ,
- Assembler::lessEqual /* greater = 0xf, */
-
-};
-
-
-// Implementation of MacroAssembler
-
-// First all the versions that have distinct versions depending on 32/64 bit
-// Unless the difference is trivial (1 line or so).
-
-#ifndef _LP64
-
-// 32bit versions
-
-Address MacroAssembler::as_Address(AddressLiteral adr) {
- return Address(adr.target(), adr.rspec());
-}
-
-Address MacroAssembler::as_Address(ArrayAddress adr) {
- return Address::make_array(adr);
-}
-
-int MacroAssembler::biased_locking_enter(Register lock_reg,
- Register obj_reg,
- Register swap_reg,
- Register tmp_reg,
- bool swap_reg_contains_mark,
- Label& done,
- Label* slow_case,
- BiasedLockingCounters* counters) {
- assert(UseBiasedLocking, "why call this otherwise?");
- assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
- assert_different_registers(lock_reg, obj_reg, swap_reg);
-
- if (PrintBiasedLockingStatistics && counters == NULL)
- counters = BiasedLocking::counters();
-
- bool need_tmp_reg = false;
- if (tmp_reg == noreg) {
- need_tmp_reg = true;
- tmp_reg = lock_reg;
- } else {
- assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
- }
- assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
- Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
- Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes());
- Address saved_mark_addr(lock_reg, 0);
-
- // Biased locking
- // See whether the lock is currently biased toward our thread and
- // whether the epoch is still valid
- // Note that the runtime guarantees sufficient alignment of JavaThread
- // pointers to allow age to be placed into low bits
- // First check to see whether biasing is even enabled for this object
- Label cas_label;
- int null_check_offset = -1;
- if (!swap_reg_contains_mark) {
- null_check_offset = offset();
- movl(swap_reg, mark_addr);
- }
- if (need_tmp_reg) {
- push(tmp_reg);
- }
- movl(tmp_reg, swap_reg);
- andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
- cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
- if (need_tmp_reg) {
- pop(tmp_reg);
- }
- jcc(Assembler::notEqual, cas_label);
- // The bias pattern is present in the object's header. Need to check
- // whether the bias owner and the epoch are both still current.
- // Note that because there is no current thread register on x86 we
- // need to store off the mark word we read out of the object to
- // avoid reloading it and needing to recheck invariants below. This
- // store is unfortunate but it makes the overall code shorter and
- // simpler.
- movl(saved_mark_addr, swap_reg);
- if (need_tmp_reg) {
- push(tmp_reg);
- }
- get_thread(tmp_reg);
- xorl(swap_reg, tmp_reg);
- if (swap_reg_contains_mark) {
- null_check_offset = offset();
- }
- movl(tmp_reg, klass_addr);
- xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset()));
- andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
- if (need_tmp_reg) {
- pop(tmp_reg);
- }
- if (counters != NULL) {
- cond_inc32(Assembler::zero,
- ExternalAddress((address)counters->biased_lock_entry_count_addr()));
- }
- jcc(Assembler::equal, done);
-
- Label try_revoke_bias;
- Label try_rebias;
-
- // At this point we know that the header has the bias pattern and
- // that we are not the bias owner in the current epoch. We need to
- // figure out more details about the state of the header in order to
- // know what operations can be legally performed on the object's
- // header.
-
- // If the low three bits in the xor result aren't clear, that means
- // the prototype header is no longer biased and we have to revoke
- // the bias on this object.
- testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
- jcc(Assembler::notZero, try_revoke_bias);
-
- // Biasing is still enabled for this data type. See whether the
- // epoch of the current bias is still valid, meaning that the epoch
- // bits of the mark word are equal to the epoch bits of the
- // prototype header. (Note that the prototype header's epoch bits
- // only change at a safepoint.) If not, attempt to rebias the object
- // toward the current thread. Note that we must be absolutely sure
- // that the current epoch is invalid in order to do this because
- // otherwise the manipulations it performs on the mark word are
- // illegal.
- testl(swap_reg, markOopDesc::epoch_mask_in_place);
- jcc(Assembler::notZero, try_rebias);
-
- // The epoch of the current bias is still valid but we know nothing
- // about the owner; it might be set or it might be clear. Try to
- // acquire the bias of the object using an atomic operation. If this
- // fails we will go in to the runtime to revoke the object's bias.
- // Note that we first construct the presumed unbiased header so we
- // don't accidentally blow away another thread's valid bias.
- movl(swap_reg, saved_mark_addr);
- andl(swap_reg,
- markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
- if (need_tmp_reg) {
- push(tmp_reg);
- }
- get_thread(tmp_reg);
- orl(tmp_reg, swap_reg);
- if (os::is_MP()) {
- lock();
- }
- cmpxchgptr(tmp_reg, Address(obj_reg, 0));
- if (need_tmp_reg) {
- pop(tmp_reg);
- }
- // If the biasing toward our thread failed, this means that
- // another thread succeeded in biasing it toward itself and we
- // need to revoke that bias. The revocation will occur in the
- // interpreter runtime in the slow case.
- if (counters != NULL) {
- cond_inc32(Assembler::zero,
- ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
- }
- if (slow_case != NULL) {
- jcc(Assembler::notZero, *slow_case);
- }
- jmp(done);
-
- bind(try_rebias);
- // At this point we know the epoch has expired, meaning that the
- // current "bias owner", if any, is actually invalid. Under these
- // circumstances _only_, we are allowed to use the current header's
- // value as the comparison value when doing the cas to acquire the
- // bias in the current epoch. In other words, we allow transfer of
- // the bias from one thread to another directly in this situation.
- //
- // FIXME: due to a lack of registers we currently blow away the age
- // bits in this situation. Should attempt to preserve them.
- if (need_tmp_reg) {
- push(tmp_reg);
- }
- get_thread(tmp_reg);
- movl(swap_reg, klass_addr);
- orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset()));
- movl(swap_reg, saved_mark_addr);
- if (os::is_MP()) {
- lock();
- }
- cmpxchgptr(tmp_reg, Address(obj_reg, 0));
- if (need_tmp_reg) {
- pop(tmp_reg);
- }
- // If the biasing toward our thread failed, then another thread
- // succeeded in biasing it toward itself and we need to revoke that
- // bias. The revocation will occur in the runtime in the slow case.
- if (counters != NULL) {
- cond_inc32(Assembler::zero,
- ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
- }
- if (slow_case != NULL) {
- jcc(Assembler::notZero, *slow_case);
- }
- jmp(done);
-
- bind(try_revoke_bias);
- // The prototype mark in the klass doesn't have the bias bit set any
- // more, indicating that objects of this data type are not supposed
- // to be biased any more. We are going to try to reset the mark of
- // this object to the prototype value and fall through to the
- // CAS-based locking scheme. Note that if our CAS fails, it means
- // that another thread raced us for the privilege of revoking the
- // bias of this particular object, so it's okay to continue in the
- // normal locking code.
- //
- // FIXME: due to a lack of registers we currently blow away the age
- // bits in this situation. Should attempt to preserve them.
- movl(swap_reg, saved_mark_addr);
- if (need_tmp_reg) {
- push(tmp_reg);
- }
- movl(tmp_reg, klass_addr);
- movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
- if (os::is_MP()) {
- lock();
- }
- cmpxchgptr(tmp_reg, Address(obj_reg, 0));
- if (need_tmp_reg) {
- pop(tmp_reg);
- }
- // Fall through to the normal CAS-based lock, because no matter what
- // the result of the above CAS, some thread must have succeeded in
- // removing the bias bit from the object's header.
- if (counters != NULL) {
- cond_inc32(Assembler::zero,
- ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
- }
-
- bind(cas_label);
-
- return null_check_offset;
-}
-void MacroAssembler::call_VM_leaf_base(address entry_point,
- int number_of_arguments) {
- call(RuntimeAddress(entry_point));
- increment(rsp, number_of_arguments * wordSize);
-}
-
-void MacroAssembler::cmpklass(Address src1, Metadata* obj) {
- cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::cmpklass(Register src1, Metadata* obj) {
- cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::cmpoop(Address src1, jobject obj) {
- cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::cmpoop(Register src1, jobject obj) {
- cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::extend_sign(Register hi, Register lo) {
- // According to Intel Doc. AP-526, "Integer Divide", p.18.
- if (VM_Version::is_P6() && hi == rdx && lo == rax) {
- cdql();
- } else {
- movl(hi, lo);
- sarl(hi, 31);
- }
-}
-
-void MacroAssembler::jC2(Register tmp, Label& L) {
- // set parity bit if FPU flag C2 is set (via rax)
- save_rax(tmp);
- fwait(); fnstsw_ax();
- sahf();
- restore_rax(tmp);
- // branch
- jcc(Assembler::parity, L);
-}
-
-void MacroAssembler::jnC2(Register tmp, Label& L) {
- // set parity bit if FPU flag C2 is set (via rax)
- save_rax(tmp);
- fwait(); fnstsw_ax();
- sahf();
- restore_rax(tmp);
- // branch
- jcc(Assembler::noParity, L);
-}
-
-// 32bit can do a case table jump in one instruction but we no longer allow the base
-// to be installed in the Address class
-void MacroAssembler::jump(ArrayAddress entry) {
- jmp(as_Address(entry));
-}
-
-// Note: y_lo will be destroyed
-void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
- // Long compare for Java (semantics as described in JVM spec.)
- Label high, low, done;
-
- cmpl(x_hi, y_hi);
- jcc(Assembler::less, low);
- jcc(Assembler::greater, high);
- // x_hi is the return register
- xorl(x_hi, x_hi);
- cmpl(x_lo, y_lo);
- jcc(Assembler::below, low);
- jcc(Assembler::equal, done);
-
- bind(high);
- xorl(x_hi, x_hi);
- increment(x_hi);
- jmp(done);
-
- bind(low);
- xorl(x_hi, x_hi);
- decrementl(x_hi);
-
- bind(done);
-}
-
-void MacroAssembler::lea(Register dst, AddressLiteral src) {
- mov_literal32(dst, (int32_t)src.target(), src.rspec());
-}
-
-void MacroAssembler::lea(Address dst, AddressLiteral adr) {
- // leal(dst, as_Address(adr));
- // see note in movl as to why we must use a move
- mov_literal32(dst, (int32_t) adr.target(), adr.rspec());
-}
-
-void MacroAssembler::leave() {
- mov(rsp, rbp);
- pop(rbp);
-}
-
-void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) {
- // Multiplication of two Java long values stored on the stack
- // as illustrated below. Result is in rdx:rax.
- //
- // rsp ---> [ ?? ] \ \
- // .... | y_rsp_offset |
- // [ y_lo ] / (in bytes) | x_rsp_offset
- // [ y_hi ] | (in bytes)
- // .... |
- // [ x_lo ] /
- // [ x_hi ]
- // ....
- //
- // Basic idea: lo(result) = lo(x_lo * y_lo)
- // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
- Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset);
- Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset);
- Label quick;
- // load x_hi, y_hi and check if quick
- // multiplication is possible
- movl(rbx, x_hi);
- movl(rcx, y_hi);
- movl(rax, rbx);
- orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0
- jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply
- // do full multiplication
- // 1st step
- mull(y_lo); // x_hi * y_lo
- movl(rbx, rax); // save lo(x_hi * y_lo) in rbx,
- // 2nd step
- movl(rax, x_lo);
- mull(rcx); // x_lo * y_hi
- addl(rbx, rax); // add lo(x_lo * y_hi) to rbx,
- // 3rd step
- bind(quick); // note: rbx, = 0 if quick multiply!
- movl(rax, x_lo);
- mull(y_lo); // x_lo * y_lo
- addl(rdx, rbx); // correct hi(x_lo * y_lo)
-}
-
-void MacroAssembler::lneg(Register hi, Register lo) {
- negl(lo);
- adcl(hi, 0);
- negl(hi);
-}
-
-void MacroAssembler::lshl(Register hi, Register lo) {
- // Java shift left long support (semantics as described in JVM spec., p.305)
- // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n))
- // shift value is in rcx !
- assert(hi != rcx, "must not use rcx");
- assert(lo != rcx, "must not use rcx");
- const Register s = rcx; // shift count
- const int n = BitsPerWord;
- Label L;
- andl(s, 0x3f); // s := s & 0x3f (s < 0x40)
- cmpl(s, n); // if (s < n)
- jcc(Assembler::less, L); // else (s >= n)
- movl(hi, lo); // x := x << n
- xorl(lo, lo);
- // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
- bind(L); // s (mod n) < n
- shldl(hi, lo); // x := x << s
- shll(lo);
-}
-
-
-void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) {
- // Java shift right long support (semantics as described in JVM spec., p.306 & p.310)
- // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n))
- assert(hi != rcx, "must not use rcx");
- assert(lo != rcx, "must not use rcx");
- const Register s = rcx; // shift count
- const int n = BitsPerWord;
- Label L;
- andl(s, 0x3f); // s := s & 0x3f (s < 0x40)
- cmpl(s, n); // if (s < n)
- jcc(Assembler::less, L); // else (s >= n)
- movl(lo, hi); // x := x >> n
- if (sign_extension) sarl(hi, 31);
- else xorl(hi, hi);
- // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
- bind(L); // s (mod n) < n
- shrdl(lo, hi); // x := x >> s
- if (sign_extension) sarl(hi);
- else shrl(hi);
-}
-
-void MacroAssembler::movoop(Register dst, jobject obj) {
- mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::movoop(Address dst, jobject obj) {
- mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
- mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
- mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::movptr(Register dst, AddressLiteral src) {
- if (src.is_lval()) {
- mov_literal32(dst, (intptr_t)src.target(), src.rspec());
- } else {
- movl(dst, as_Address(src));
- }
-}
-
-void MacroAssembler::movptr(ArrayAddress dst, Register src) {
- movl(as_Address(dst), src);
-}
-
-void MacroAssembler::movptr(Register dst, ArrayAddress src) {
- movl(dst, as_Address(src));
-}
-
-// src should NEVER be a real pointer. Use AddressLiteral for true pointers
-void MacroAssembler::movptr(Address dst, intptr_t src) {
- movl(dst, src);
-}
-
-
-void MacroAssembler::pop_callee_saved_registers() {
- pop(rcx);
- pop(rdx);
- pop(rdi);
- pop(rsi);
-}
-
-void MacroAssembler::pop_fTOS() {
- fld_d(Address(rsp, 0));
- addl(rsp, 2 * wordSize);
-}
-
-void MacroAssembler::push_callee_saved_registers() {
- push(rsi);
- push(rdi);
- push(rdx);
- push(rcx);
-}
-
-void MacroAssembler::push_fTOS() {
- subl(rsp, 2 * wordSize);
- fstp_d(Address(rsp, 0));
-}
-
-
-void MacroAssembler::pushoop(jobject obj) {
- push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::pushklass(Metadata* obj) {
- push_literal32((int32_t)obj, metadata_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::pushptr(AddressLiteral src) {
- if (src.is_lval()) {
- push_literal32((int32_t)src.target(), src.rspec());
- } else {
- pushl(as_Address(src));
- }
-}
-
-void MacroAssembler::set_word_if_not_zero(Register dst) {
- xorl(dst, dst);
- set_byte_if_not_zero(dst);
-}
-
-static void pass_arg0(MacroAssembler* masm, Register arg) {
- masm->push(arg);
-}
-
-static void pass_arg1(MacroAssembler* masm, Register arg) {
- masm->push(arg);
-}
-
-static void pass_arg2(MacroAssembler* masm, Register arg) {
- masm->push(arg);
-}
-
-static void pass_arg3(MacroAssembler* masm, Register arg) {
- masm->push(arg);
-}
-
-#ifndef PRODUCT
-extern "C" void findpc(intptr_t x);
-#endif
-
-void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
- // In order to get locks to work, we need to fake a in_VM state
- JavaThread* thread = JavaThread::current();
- JavaThreadState saved_state = thread->thread_state();
- thread->set_thread_state(_thread_in_vm);
- if (ShowMessageBoxOnError) {
- JavaThread* thread = JavaThread::current();
- JavaThreadState saved_state = thread->thread_state();
- thread->set_thread_state(_thread_in_vm);
- if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
- ttyLocker ttyl;
- BytecodeCounter::print();
- }
- // To see where a verify_oop failed, get $ebx+40/X for this frame.
- // This is the value of eip which points to where verify_oop will return.
- if (os::message_box(msg, "Execution stopped, print registers?")) {
- print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip);
- BREAKPOINT;
- }
- } else {
- ttyLocker ttyl;
- ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
- }
- // Don't assert holding the ttyLock
- assert(false, err_msg("DEBUG MESSAGE: %s", msg));
- ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
-}
-
-void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) {
- ttyLocker ttyl;
- FlagSetting fs(Debugging, true);
- tty->print_cr("eip = 0x%08x", eip);
-#ifndef PRODUCT
- if ((WizardMode || Verbose) && PrintMiscellaneous) {
- tty->cr();
- findpc(eip);
- tty->cr();
- }
-#endif
-#define PRINT_REG(rax) \
- { tty->print("%s = ", #rax); os::print_location(tty, rax); }
- PRINT_REG(rax);
- PRINT_REG(rbx);
- PRINT_REG(rcx);
- PRINT_REG(rdx);
- PRINT_REG(rdi);
- PRINT_REG(rsi);
- PRINT_REG(rbp);
- PRINT_REG(rsp);
-#undef PRINT_REG
- // Print some words near top of staack.
- int* dump_sp = (int*) rsp;
- for (int col1 = 0; col1 < 8; col1++) {
- tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
- os::print_location(tty, *dump_sp++);
- }
- for (int row = 0; row < 16; row++) {
- tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
- for (int col = 0; col < 8; col++) {
- tty->print(" 0x%08x", *dump_sp++);
- }
- tty->cr();
- }
- // Print some instructions around pc:
- Disassembler::decode((address)eip-64, (address)eip);
- tty->print_cr("--------");
- Disassembler::decode((address)eip, (address)eip+32);
-}
-
-void MacroAssembler::stop(const char* msg) {
- ExternalAddress message((address)msg);
- // push address of message
- pushptr(message.addr());
- { Label L; call(L, relocInfo::none); bind(L); } // push eip
- pusha(); // push registers
- call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
- hlt();
-}
-
-void MacroAssembler::warn(const char* msg) {
- push_CPU_state();
-
- ExternalAddress message((address) msg);
- // push address of message
- pushptr(message.addr());
-
- call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)));
- addl(rsp, wordSize); // discard argument
- pop_CPU_state();
-}
-
-void MacroAssembler::print_state() {
- { Label L; call(L, relocInfo::none); bind(L); } // push eip
- pusha(); // push registers
-
- push_CPU_state();
- call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32)));
- pop_CPU_state();
-
- popa();
- addl(rsp, wordSize);
-}
-
-#else // _LP64
-
-// 64 bit versions
-
-Address MacroAssembler::as_Address(AddressLiteral adr) {
- // amd64 always does this as a pc-rel
- // we can be absolute or disp based on the instruction type
- // jmp/call are displacements others are absolute
- assert(!adr.is_lval(), "must be rval");
- assert(reachable(adr), "must be");
- return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
-
-}
-
-Address MacroAssembler::as_Address(ArrayAddress adr) {
- AddressLiteral base = adr.base();
- lea(rscratch1, base);
- Address index = adr.index();
- assert(index._disp == 0, "must not have disp"); // maybe it can?
- Address array(rscratch1, index._index, index._scale, index._disp);
- return array;
-}
-
-int MacroAssembler::biased_locking_enter(Register lock_reg,
- Register obj_reg,
- Register swap_reg,
- Register tmp_reg,
- bool swap_reg_contains_mark,
- Label& done,
- Label* slow_case,
- BiasedLockingCounters* counters) {
- assert(UseBiasedLocking, "why call this otherwise?");
- assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
- assert(tmp_reg != noreg, "tmp_reg must be supplied");
- assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
- assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
- Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
- Address saved_mark_addr(lock_reg, 0);
-
- if (PrintBiasedLockingStatistics && counters == NULL)
- counters = BiasedLocking::counters();
-
- // Biased locking
- // See whether the lock is currently biased toward our thread and
- // whether the epoch is still valid
- // Note that the runtime guarantees sufficient alignment of JavaThread
- // pointers to allow age to be placed into low bits
- // First check to see whether biasing is even enabled for this object
- Label cas_label;
- int null_check_offset = -1;
- if (!swap_reg_contains_mark) {
- null_check_offset = offset();
- movq(swap_reg, mark_addr);
- }
- movq(tmp_reg, swap_reg);
- andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
- cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
- jcc(Assembler::notEqual, cas_label);
- // The bias pattern is present in the object's header. Need to check
- // whether the bias owner and the epoch are both still current.
- load_prototype_header(tmp_reg, obj_reg);
- orq(tmp_reg, r15_thread);
- xorq(tmp_reg, swap_reg);
- andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
- if (counters != NULL) {
- cond_inc32(Assembler::zero,
- ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
- }
- jcc(Assembler::equal, done);
-
- Label try_revoke_bias;
- Label try_rebias;
-
- // At this point we know that the header has the bias pattern and
- // that we are not the bias owner in the current epoch. We need to
- // figure out more details about the state of the header in order to
- // know what operations can be legally performed on the object's
- // header.
-
- // If the low three bits in the xor result aren't clear, that means
- // the prototype header is no longer biased and we have to revoke
- // the bias on this object.
- testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
- jcc(Assembler::notZero, try_revoke_bias);
-
- // Biasing is still enabled for this data type. See whether the
- // epoch of the current bias is still valid, meaning that the epoch
- // bits of the mark word are equal to the epoch bits of the
- // prototype header. (Note that the prototype header's epoch bits
- // only change at a safepoint.) If not, attempt to rebias the object
- // toward the current thread. Note that we must be absolutely sure
- // that the current epoch is invalid in order to do this because
- // otherwise the manipulations it performs on the mark word are
- // illegal.
- testq(tmp_reg, markOopDesc::epoch_mask_in_place);
- jcc(Assembler::notZero, try_rebias);
-
- // The epoch of the current bias is still valid but we know nothing
- // about the owner; it might be set or it might be clear. Try to
- // acquire the bias of the object using an atomic operation. If this
- // fails we will go in to the runtime to revoke the object's bias.
- // Note that we first construct the presumed unbiased header so we
- // don't accidentally blow away another thread's valid bias.
- andq(swap_reg,
- markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
- movq(tmp_reg, swap_reg);
- orq(tmp_reg, r15_thread);
- if (os::is_MP()) {
- lock();
- }
- cmpxchgq(tmp_reg, Address(obj_reg, 0));
- // If the biasing toward our thread failed, this means that
- // another thread succeeded in biasing it toward itself and we
- // need to revoke that bias. The revocation will occur in the
- // interpreter runtime in the slow case.
- if (counters != NULL) {
- cond_inc32(Assembler::zero,
- ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
- }
- if (slow_case != NULL) {
- jcc(Assembler::notZero, *slow_case);
- }
- jmp(done);
-
- bind(try_rebias);
- // At this point we know the epoch has expired, meaning that the
- // current "bias owner", if any, is actually invalid. Under these
- // circumstances _only_, we are allowed to use the current header's
- // value as the comparison value when doing the cas to acquire the
- // bias in the current epoch. In other words, we allow transfer of
- // the bias from one thread to another directly in this situation.
- //
- // FIXME: due to a lack of registers we currently blow away the age
- // bits in this situation. Should attempt to preserve them.
- load_prototype_header(tmp_reg, obj_reg);
- orq(tmp_reg, r15_thread);
- if (os::is_MP()) {
- lock();
- }
- cmpxchgq(tmp_reg, Address(obj_reg, 0));
- // If the biasing toward our thread failed, then another thread
- // succeeded in biasing it toward itself and we need to revoke that
- // bias. The revocation will occur in the runtime in the slow case.
- if (counters != NULL) {
- cond_inc32(Assembler::zero,
- ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
- }
- if (slow_case != NULL) {
- jcc(Assembler::notZero, *slow_case);
- }
- jmp(done);
-
- bind(try_revoke_bias);
- // The prototype mark in the klass doesn't have the bias bit set any
- // more, indicating that objects of this data type are not supposed
- // to be biased any more. We are going to try to reset the mark of
- // this object to the prototype value and fall through to the
- // CAS-based locking scheme. Note that if our CAS fails, it means
- // that another thread raced us for the privilege of revoking the
- // bias of this particular object, so it's okay to continue in the
- // normal locking code.
- //
- // FIXME: due to a lack of registers we currently blow away the age
- // bits in this situation. Should attempt to preserve them.
- load_prototype_header(tmp_reg, obj_reg);
- if (os::is_MP()) {
- lock();
- }
- cmpxchgq(tmp_reg, Address(obj_reg, 0));
- // Fall through to the normal CAS-based lock, because no matter what
- // the result of the above CAS, some thread must have succeeded in
- // removing the bias bit from the object's header.
- if (counters != NULL) {
- cond_inc32(Assembler::zero,
- ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
- }
-
- bind(cas_label);
-
- return null_check_offset;
-}
-
-void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
- Label L, E;
-
-#ifdef _WIN64
- // Windows always allocates space for it's register args
- assert(num_args <= 4, "only register arguments supported");
- subq(rsp, frame::arg_reg_save_area_bytes);
-#endif
-
- // Align stack if necessary
- testl(rsp, 15);
- jcc(Assembler::zero, L);
-
- subq(rsp, 8);
- {
- call(RuntimeAddress(entry_point));
- }
- addq(rsp, 8);
- jmp(E);
-
- bind(L);
- {
- call(RuntimeAddress(entry_point));
- }
-
- bind(E);
-
-#ifdef _WIN64
- // restore stack pointer
- addq(rsp, frame::arg_reg_save_area_bytes);
-#endif
-
-}
-
-void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
- assert(!src2.is_lval(), "should use cmpptr");
-
- if (reachable(src2)) {
- cmpq(src1, as_Address(src2));
- } else {
- lea(rscratch1, src2);
- Assembler::cmpq(src1, Address(rscratch1, 0));
- }
-}
-
-int MacroAssembler::corrected_idivq(Register reg) {
- // Full implementation of Java ldiv and lrem; checks for special
- // case as described in JVM spec., p.243 & p.271. The function
- // returns the (pc) offset of the idivl instruction - may be needed
- // for implicit exceptions.
- //
- // normal case special case
- //
- // input : rax: dividend min_long
- // reg: divisor (may not be eax/edx) -1
- //
- // output: rax: quotient (= rax idiv reg) min_long
- // rdx: remainder (= rax irem reg) 0
- assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
- static const int64_t min_long = 0x8000000000000000;
- Label normal_case, special_case;
-
- // check for special case
- cmp64(rax, ExternalAddress((address) &min_long));
- jcc(Assembler::notEqual, normal_case);
- xorl(rdx, rdx); // prepare rdx for possible special case (where
- // remainder = 0)
- cmpq(reg, -1);
- jcc(Assembler::equal, special_case);
-
- // handle normal case
- bind(normal_case);
- cdqq();
- int idivq_offset = offset();
- idivq(reg);
-
- // normal and special case exit
- bind(special_case);
-
- return idivq_offset;
-}
-
-void MacroAssembler::decrementq(Register reg, int value) {
- if (value == min_jint) { subq(reg, value); return; }
- if (value < 0) { incrementq(reg, -value); return; }
- if (value == 0) { ; return; }
- if (value == 1 && UseIncDec) { decq(reg) ; return; }
- /* else */ { subq(reg, value) ; return; }
-}
-
-void MacroAssembler::decrementq(Address dst, int value) {
- if (value == min_jint) { subq(dst, value); return; }
- if (value < 0) { incrementq(dst, -value); return; }
- if (value == 0) { ; return; }
- if (value == 1 && UseIncDec) { decq(dst) ; return; }
- /* else */ { subq(dst, value) ; return; }
-}
-
-void MacroAssembler::incrementq(Register reg, int value) {
- if (value == min_jint) { addq(reg, value); return; }
- if (value < 0) { decrementq(reg, -value); return; }
- if (value == 0) { ; return; }
- if (value == 1 && UseIncDec) { incq(reg) ; return; }
- /* else */ { addq(reg, value) ; return; }
-}
-
-void MacroAssembler::incrementq(Address dst, int value) {
- if (value == min_jint) { addq(dst, value); return; }
- if (value < 0) { decrementq(dst, -value); return; }
- if (value == 0) { ; return; }
- if (value == 1 && UseIncDec) { incq(dst) ; return; }
- /* else */ { addq(dst, value) ; return; }
-}
-
-// 32bit can do a case table jump in one instruction but we no longer allow the base
-// to be installed in the Address class
-void MacroAssembler::jump(ArrayAddress entry) {
- lea(rscratch1, entry.base());
- Address dispatch = entry.index();
- assert(dispatch._base == noreg, "must be");
- dispatch._base = rscratch1;
- jmp(dispatch);
-}
-
-void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
- ShouldNotReachHere(); // 64bit doesn't use two regs
- cmpq(x_lo, y_lo);
-}
-
-void MacroAssembler::lea(Register dst, AddressLiteral src) {
- mov_literal64(dst, (intptr_t)src.target(), src.rspec());
-}
-
-void MacroAssembler::lea(Address dst, AddressLiteral adr) {
- mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec());
- movptr(dst, rscratch1);
-}
-
-void MacroAssembler::leave() {
- // %%% is this really better? Why not on 32bit too?
- emit_byte(0xC9); // LEAVE
-}
-
-void MacroAssembler::lneg(Register hi, Register lo) {
- ShouldNotReachHere(); // 64bit doesn't use two regs
- negq(lo);
-}
-
-void MacroAssembler::movoop(Register dst, jobject obj) {
- mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::movoop(Address dst, jobject obj) {
- mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
- movq(dst, rscratch1);
-}
-
-void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
- mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate());
-}
-
-void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
- mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate());
- movq(dst, rscratch1);
-}
-
-void MacroAssembler::movptr(Register dst, AddressLiteral src) {
- if (src.is_lval()) {
- mov_literal64(dst, (intptr_t)src.target(), src.rspec());
- } else {
- if (reachable(src)) {
- movq(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- movq(dst, Address(rscratch1,0));
- }
- }
-}
-
-void MacroAssembler::movptr(ArrayAddress dst, Register src) {
- movq(as_Address(dst), src);
-}
-
-void MacroAssembler::movptr(Register dst, ArrayAddress src) {
- movq(dst, as_Address(src));
-}
-
-// src should NEVER be a real pointer. Use AddressLiteral for true pointers
-void MacroAssembler::movptr(Address dst, intptr_t src) {
- mov64(rscratch1, src);
- movq(dst, rscratch1);
-}
-
-// These are mostly for initializing NULL
-void MacroAssembler::movptr(Address dst, int32_t src) {
- movslq(dst, src);
-}
-
-void MacroAssembler::movptr(Register dst, int32_t src) {
- mov64(dst, (intptr_t)src);
-}
-
-void MacroAssembler::pushoop(jobject obj) {
- movoop(rscratch1, obj);
- push(rscratch1);
-}
-
-void MacroAssembler::pushklass(Metadata* obj) {
- mov_metadata(rscratch1, obj);
- push(rscratch1);
-}
-
-void MacroAssembler::pushptr(AddressLiteral src) {
- lea(rscratch1, src);
- if (src.is_lval()) {
- push(rscratch1);
- } else {
- pushq(Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::reset_last_Java_frame(bool clear_fp,
- bool clear_pc) {
- // we must set sp to zero to clear frame
- movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
- // must clear fp, so that compiled frames are not confused; it is
- // possible that we need it only for debugging
- if (clear_fp) {
- movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
- }
-
- if (clear_pc) {
- movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
- }
-}
-
-void MacroAssembler::set_last_Java_frame(Register last_java_sp,
- Register last_java_fp,
- address last_java_pc) {
- // determine last_java_sp register
- if (!last_java_sp->is_valid()) {
- last_java_sp = rsp;
- }
-
- // last_java_fp is optional
- if (last_java_fp->is_valid()) {
- movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()),
- last_java_fp);
- }
-
- // last_java_pc is optional
- if (last_java_pc != NULL) {
- Address java_pc(r15_thread,
- JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
- lea(rscratch1, InternalAddress(last_java_pc));
- movptr(java_pc, rscratch1);
- }
-
- movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
-}
-
-static void pass_arg0(MacroAssembler* masm, Register arg) {
- if (c_rarg0 != arg ) {
- masm->mov(c_rarg0, arg);
- }
-}
-
-static void pass_arg1(MacroAssembler* masm, Register arg) {
- if (c_rarg1 != arg ) {
- masm->mov(c_rarg1, arg);
- }
-}
-
-static void pass_arg2(MacroAssembler* masm, Register arg) {
- if (c_rarg2 != arg ) {
- masm->mov(c_rarg2, arg);
- }
-}
-
-static void pass_arg3(MacroAssembler* masm, Register arg) {
- if (c_rarg3 != arg ) {
- masm->mov(c_rarg3, arg);
- }
-}
-
-void MacroAssembler::stop(const char* msg) {
- address rip = pc();
- pusha(); // get regs on stack
- lea(c_rarg0, ExternalAddress((address) msg));
- lea(c_rarg1, InternalAddress(rip));
- movq(c_rarg2, rsp); // pass pointer to regs array
- andq(rsp, -16); // align stack as required by ABI
- call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
- hlt();
-}
-
-void MacroAssembler::warn(const char* msg) {
- push(rbp);
- movq(rbp, rsp);
- andq(rsp, -16); // align stack as required by push_CPU_state and call
- push_CPU_state(); // keeps alignment at 16 bytes
- lea(c_rarg0, ExternalAddress((address) msg));
- call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
- pop_CPU_state();
- mov(rsp, rbp);
- pop(rbp);
-}
-
-void MacroAssembler::print_state() {
- address rip = pc();
- pusha(); // get regs on stack
- push(rbp);
- movq(rbp, rsp);
- andq(rsp, -16); // align stack as required by push_CPU_state and call
- push_CPU_state(); // keeps alignment at 16 bytes
-
- lea(c_rarg0, InternalAddress(rip));
- lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array
- call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64), c_rarg0, c_rarg1);
-
- pop_CPU_state();
- mov(rsp, rbp);
- pop(rbp);
- popa();
-}
-
-#ifndef PRODUCT
-extern "C" void findpc(intptr_t x);
-#endif
-
-void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
- // In order to get locks to work, we need to fake a in_VM state
- if (ShowMessageBoxOnError) {
- JavaThread* thread = JavaThread::current();
- JavaThreadState saved_state = thread->thread_state();
- thread->set_thread_state(_thread_in_vm);
-#ifndef PRODUCT
- if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
- ttyLocker ttyl;
- BytecodeCounter::print();
- }
-#endif
- // To see where a verify_oop failed, get $ebx+40/X for this frame.
- // XXX correct this offset for amd64
- // This is the value of eip which points to where verify_oop will return.
- if (os::message_box(msg, "Execution stopped, print registers?")) {
- print_state64(pc, regs);
- BREAKPOINT;
- assert(false, "start up GDB");
- }
- ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
- } else {
- ttyLocker ttyl;
- ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
- msg);
- assert(false, err_msg("DEBUG MESSAGE: %s", msg));
- }
-}
-
-void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) {
- ttyLocker ttyl;
- FlagSetting fs(Debugging, true);
- tty->print_cr("rip = 0x%016lx", pc);
-#ifndef PRODUCT
- tty->cr();
- findpc(pc);
- tty->cr();
-#endif
-#define PRINT_REG(rax, value) \
- { tty->print("%s = ", #rax); os::print_location(tty, value); }
- PRINT_REG(rax, regs[15]);
- PRINT_REG(rbx, regs[12]);
- PRINT_REG(rcx, regs[14]);
- PRINT_REG(rdx, regs[13]);
- PRINT_REG(rdi, regs[8]);
- PRINT_REG(rsi, regs[9]);
- PRINT_REG(rbp, regs[10]);
- PRINT_REG(rsp, regs[11]);
- PRINT_REG(r8 , regs[7]);
- PRINT_REG(r9 , regs[6]);
- PRINT_REG(r10, regs[5]);
- PRINT_REG(r11, regs[4]);
- PRINT_REG(r12, regs[3]);
- PRINT_REG(r13, regs[2]);
- PRINT_REG(r14, regs[1]);
- PRINT_REG(r15, regs[0]);
-#undef PRINT_REG
- // Print some words near top of staack.
- int64_t* rsp = (int64_t*) regs[11];
- int64_t* dump_sp = rsp;
- for (int col1 = 0; col1 < 8; col1++) {
- tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp);
- os::print_location(tty, *dump_sp++);
- }
- for (int row = 0; row < 25; row++) {
- tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp);
- for (int col = 0; col < 4; col++) {
- tty->print(" 0x%016lx", *dump_sp++);
- }
- tty->cr();
- }
- // Print some instructions around pc:
- Disassembler::decode((address)pc-64, (address)pc);
- tty->print_cr("--------");
- Disassembler::decode((address)pc, (address)pc+32);
-}
-
-#endif // _LP64
-
-// Now versions that are common to 32/64 bit
-
-void MacroAssembler::addptr(Register dst, int32_t imm32) {
- LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32));
-}
-
-void MacroAssembler::addptr(Register dst, Register src) {
- LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
-}
-
-void MacroAssembler::addptr(Address dst, Register src) {
- LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
-}
-
-void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- Assembler::addsd(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- Assembler::addsd(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- addss(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- addss(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::align(int modulus) {
- if (offset() % modulus != 0) {
- nop(modulus - (offset() % modulus));
- }
-}
-
-void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
- // Used in sign-masking with aligned address.
- assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
- if (reachable(src)) {
- Assembler::andpd(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- Assembler::andpd(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) {
- // Used in sign-masking with aligned address.
- assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
- if (reachable(src)) {
- Assembler::andps(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- Assembler::andps(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::andptr(Register dst, int32_t imm32) {
- LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
-}
-
-void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
- pushf();
- if (os::is_MP())
- lock();
- incrementl(counter_addr);
- popf();
-}
-
-// Writes to stack successive pages until offset reached to check for
-// stack overflow + shadow pages. This clobbers tmp.
-void MacroAssembler::bang_stack_size(Register size, Register tmp) {
- movptr(tmp, rsp);
- // Bang stack for total size given plus shadow page size.
- // Bang one page at a time because large size can bang beyond yellow and
- // red zones.
- Label loop;
- bind(loop);
- movl(Address(tmp, (-os::vm_page_size())), size );
- subptr(tmp, os::vm_page_size());
- subl(size, os::vm_page_size());
- jcc(Assembler::greater, loop);
-
- // Bang down shadow pages too.
- // The -1 because we already subtracted 1 page.
- for (int i = 0; i< StackShadowPages-1; i++) {
- // this could be any sized move but this is can be a debugging crumb
- // so the bigger the better.
- movptr(Address(tmp, (-i*os::vm_page_size())), size );
- }
-}
-
-void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
- assert(UseBiasedLocking, "why call this otherwise?");
-
- // Check for biased locking unlock case, which is a no-op
- // Note: we do not have to check the thread ID for two reasons.
- // First, the interpreter checks for IllegalMonitorStateException at
- // a higher level. Second, if the bias was revoked while we held the
- // lock, the object could not be rebiased toward another thread, so
- // the bias bit would be clear.
- movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
- andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
- cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
- jcc(Assembler::equal, done);
-}
-
-void MacroAssembler::c2bool(Register x) {
- // implements x == 0 ? 0 : 1
- // note: must only look at least-significant byte of x
- // since C-style booleans are stored in one byte
- // only! (was bug)
- andl(x, 0xFF);
- setb(Assembler::notZero, x);
-}
-
-// Wouldn't need if AddressLiteral version had new name
-void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
- Assembler::call(L, rtype);
-}
-
-void MacroAssembler::call(Register entry) {
- Assembler::call(entry);
-}
-
-void MacroAssembler::call(AddressLiteral entry) {
- if (reachable(entry)) {
- Assembler::call_literal(entry.target(), entry.rspec());
- } else {
- lea(rscratch1, entry);
- Assembler::call(rscratch1);
- }
-}
-
-void MacroAssembler::ic_call(address entry) {
- RelocationHolder rh = virtual_call_Relocation::spec(pc());
- movptr(rax, (intptr_t)Universe::non_oop_word());
- call(AddressLiteral(entry, rh));
-}
-
-// Implementation of call_VM versions
-
-void MacroAssembler::call_VM(Register oop_result,
- address entry_point,
- bool check_exceptions) {
- Label C, E;
- call(C, relocInfo::none);
- jmp(E);
-
- bind(C);
- call_VM_helper(oop_result, entry_point, 0, check_exceptions);
- ret(0);
-
- bind(E);
-}
-
-void MacroAssembler::call_VM(Register oop_result,
- address entry_point,
- Register arg_1,
- bool check_exceptions) {
- Label C, E;
- call(C, relocInfo::none);
- jmp(E);
-
- bind(C);
- pass_arg1(this, arg_1);
- call_VM_helper(oop_result, entry_point, 1, check_exceptions);
- ret(0);
-
- bind(E);
-}
-
-void MacroAssembler::call_VM(Register oop_result,
- address entry_point,
- Register arg_1,
- Register arg_2,
- bool check_exceptions) {
- Label C, E;
- call(C, relocInfo::none);
- jmp(E);
-
- bind(C);
-
- LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
-
- pass_arg2(this, arg_2);
- pass_arg1(this, arg_1);
- call_VM_helper(oop_result, entry_point, 2, check_exceptions);
- ret(0);
-
- bind(E);
-}
-
-void MacroAssembler::call_VM(Register oop_result,
- address entry_point,
- Register arg_1,
- Register arg_2,
- Register arg_3,
- bool check_exceptions) {
- Label C, E;
- call(C, relocInfo::none);
- jmp(E);
-
- bind(C);
-
- LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
- LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
- pass_arg3(this, arg_3);
-
- LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
- pass_arg2(this, arg_2);
-
- pass_arg1(this, arg_1);
- call_VM_helper(oop_result, entry_point, 3, check_exceptions);
- ret(0);
-
- bind(E);
-}
-
-void MacroAssembler::call_VM(Register oop_result,
- Register last_java_sp,
- address entry_point,
- int number_of_arguments,
- bool check_exceptions) {
- Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
- call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
-}
-
-void MacroAssembler::call_VM(Register oop_result,
- Register last_java_sp,
- address entry_point,
- Register arg_1,
- bool check_exceptions) {
- pass_arg1(this, arg_1);
- call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
-}
-
-void MacroAssembler::call_VM(Register oop_result,
- Register last_java_sp,
- address entry_point,
- Register arg_1,
- Register arg_2,
- bool check_exceptions) {
-
- LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
- pass_arg2(this, arg_2);
- pass_arg1(this, arg_1);
- call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
-}
-
-void MacroAssembler::call_VM(Register oop_result,
- Register last_java_sp,
- address entry_point,
- Register arg_1,
- Register arg_2,
- Register arg_3,
- bool check_exceptions) {
- LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
- LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
- pass_arg3(this, arg_3);
- LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
- pass_arg2(this, arg_2);
- pass_arg1(this, arg_1);
- call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
-}
-
-void MacroAssembler::super_call_VM(Register oop_result,
- Register last_java_sp,
- address entry_point,
- int number_of_arguments,
- bool check_exceptions) {
- Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
- MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
-}
-
-void MacroAssembler::super_call_VM(Register oop_result,
- Register last_java_sp,
- address entry_point,
- Register arg_1,
- bool check_exceptions) {
- pass_arg1(this, arg_1);
- super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
-}
-
-void MacroAssembler::super_call_VM(Register oop_result,
- Register last_java_sp,
- address entry_point,
- Register arg_1,
- Register arg_2,
- bool check_exceptions) {
-
- LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
- pass_arg2(this, arg_2);
- pass_arg1(this, arg_1);
- super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
-}
-
-void MacroAssembler::super_call_VM(Register oop_result,
- Register last_java_sp,
- address entry_point,
- Register arg_1,
- Register arg_2,
- Register arg_3,
- bool check_exceptions) {
- LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
- LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
- pass_arg3(this, arg_3);
- LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
- pass_arg2(this, arg_2);
- pass_arg1(this, arg_1);
- super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
-}
-
-void MacroAssembler::call_VM_base(Register oop_result,
- Register java_thread,
- Register last_java_sp,
- address entry_point,
- int number_of_arguments,
- bool check_exceptions) {
- // determine java_thread register
- if (!java_thread->is_valid()) {
-#ifdef _LP64
- java_thread = r15_thread;
-#else
- java_thread = rdi;
- get_thread(java_thread);
-#endif // LP64
- }
- // determine last_java_sp register
- if (!last_java_sp->is_valid()) {
- last_java_sp = rsp;
- }
- // debugging support
- assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
- LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"));
-#ifdef ASSERT
- // TraceBytecodes does not use r12 but saves it over the call, so don't verify
- // r12 is the heapbase.
- LP64_ONLY(if ((UseCompressedOops || UseCompressedKlassPointers) && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");)
-#endif // ASSERT
-
- assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
- assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
-
- // push java thread (becomes first argument of C function)
-
- NOT_LP64(push(java_thread); number_of_arguments++);
- LP64_ONLY(mov(c_rarg0, r15_thread));
-
- // set last Java frame before call
- assert(last_java_sp != rbp, "can't use ebp/rbp");
-
- // Only interpreter should have to set fp
- set_last_Java_frame(java_thread, last_java_sp, rbp, NULL);
-
- // do the call, remove parameters
- MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
-
- // restore the thread (cannot use the pushed argument since arguments
- // may be overwritten by C code generated by an optimizing compiler);
- // however can use the register value directly if it is callee saved.
- if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) {
- // rdi & rsi (also r15) are callee saved -> nothing to do
-#ifdef ASSERT
- guarantee(java_thread != rax, "change this code");
- push(rax);
- { Label L;
- get_thread(rax);
- cmpptr(java_thread, rax);
- jcc(Assembler::equal, L);
- STOP("MacroAssembler::call_VM_base: rdi not callee saved?");
- bind(L);
- }
- pop(rax);
-#endif
- } else {
- get_thread(java_thread);
- }
- // reset last Java frame
- // Only interpreter should have to clear fp
- reset_last_Java_frame(java_thread, true, false);
-
-#ifndef CC_INTERP
- // C++ interp handles this in the interpreter
- check_and_handle_popframe(java_thread);
- check_and_handle_earlyret(java_thread);
-#endif /* CC_INTERP */
-
- if (check_exceptions) {
- // check for pending exceptions (java_thread is set upon return)
- cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD);
-#ifndef _LP64
- jump_cc(Assembler::notEqual,
- RuntimeAddress(StubRoutines::forward_exception_entry()));
-#else
- // This used to conditionally jump to forward_exception however it is
- // possible if we relocate that the branch will not reach. So we must jump
- // around so we can always reach
-
- Label ok;
- jcc(Assembler::equal, ok);
- jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
- bind(ok);
-#endif // LP64
- }
-
- // get oop result if there is one and reset the value in the thread
- if (oop_result->is_valid()) {
- get_vm_result(oop_result, java_thread);
- }
-}
-
-void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
-
- // Calculate the value for last_Java_sp
- // somewhat subtle. call_VM does an intermediate call
- // which places a return address on the stack just under the
- // stack pointer as the user finsihed with it. This allows
- // use to retrieve last_Java_pc from last_Java_sp[-1].
- // On 32bit we then have to push additional args on the stack to accomplish
- // the actual requested call. On 64bit call_VM only can use register args
- // so the only extra space is the return address that call_VM created.
- // This hopefully explains the calculations here.
-
-#ifdef _LP64
- // We've pushed one address, correct last_Java_sp
- lea(rax, Address(rsp, wordSize));
-#else
- lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize));
-#endif // LP64
-
- call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions);
-
-}
-
-void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
- call_VM_leaf_base(entry_point, number_of_arguments);
-}
-
-void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
- pass_arg0(this, arg_0);
- call_VM_leaf(entry_point, 1);
-}
-
-void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
-
- LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
- pass_arg1(this, arg_1);
- pass_arg0(this, arg_0);
- call_VM_leaf(entry_point, 2);
-}
-
-void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
- LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
- LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
- pass_arg2(this, arg_2);
- LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
- pass_arg1(this, arg_1);
- pass_arg0(this, arg_0);
- call_VM_leaf(entry_point, 3);
-}
-
-void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
- pass_arg0(this, arg_0);
- MacroAssembler::call_VM_leaf_base(entry_point, 1);
-}
-
-void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
-
- LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
- pass_arg1(this, arg_1);
- pass_arg0(this, arg_0);
- MacroAssembler::call_VM_leaf_base(entry_point, 2);
-}
-
-void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
- LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
- LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
- pass_arg2(this, arg_2);
- LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
- pass_arg1(this, arg_1);
- pass_arg0(this, arg_0);
- MacroAssembler::call_VM_leaf_base(entry_point, 3);
-}
-
-void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
- LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg"));
- LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
- LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
- pass_arg3(this, arg_3);
- LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
- LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
- pass_arg2(this, arg_2);
- LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
- pass_arg1(this, arg_1);
- pass_arg0(this, arg_0);
- MacroAssembler::call_VM_leaf_base(entry_point, 4);
-}
-
-void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
- movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
- movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD);
- verify_oop(oop_result, "broken oop in call_VM_base");
-}
-
-void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
- movptr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
- movptr(Address(java_thread, JavaThread::vm_result_2_offset()), NULL_WORD);
-}
-
-void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
-}
-
-void MacroAssembler::check_and_handle_popframe(Register java_thread) {
-}
-
-void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) {
- if (reachable(src1)) {
- cmpl(as_Address(src1), imm);
- } else {
- lea(rscratch1, src1);
- cmpl(Address(rscratch1, 0), imm);
- }
-}
-
-void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
- assert(!src2.is_lval(), "use cmpptr");
- if (reachable(src2)) {
- cmpl(src1, as_Address(src2));
- } else {
- lea(rscratch1, src2);
- cmpl(src1, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::cmp32(Register src1, int32_t imm) {
- Assembler::cmpl(src1, imm);
-}
-
-void MacroAssembler::cmp32(Register src1, Address src2) {
- Assembler::cmpl(src1, src2);
-}
-
-void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
- ucomisd(opr1, opr2);
-
- Label L;
- if (unordered_is_less) {
- movl(dst, -1);
- jcc(Assembler::parity, L);
- jcc(Assembler::below , L);
- movl(dst, 0);
- jcc(Assembler::equal , L);
- increment(dst);
- } else { // unordered is greater
- movl(dst, 1);
- jcc(Assembler::parity, L);
- jcc(Assembler::above , L);
- movl(dst, 0);
- jcc(Assembler::equal , L);
- decrementl(dst);
- }
- bind(L);
-}
-
-void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
- ucomiss(opr1, opr2);
-
- Label L;
- if (unordered_is_less) {
- movl(dst, -1);
- jcc(Assembler::parity, L);
- jcc(Assembler::below , L);
- movl(dst, 0);
- jcc(Assembler::equal , L);
- increment(dst);
- } else { // unordered is greater
- movl(dst, 1);
- jcc(Assembler::parity, L);
- jcc(Assembler::above , L);
- movl(dst, 0);
- jcc(Assembler::equal , L);
- decrementl(dst);
- }
- bind(L);
-}
-
-
-void MacroAssembler::cmp8(AddressLiteral src1, int imm) {
- if (reachable(src1)) {
- cmpb(as_Address(src1), imm);
- } else {
- lea(rscratch1, src1);
- cmpb(Address(rscratch1, 0), imm);
- }
-}
-
-void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
-#ifdef _LP64
- if (src2.is_lval()) {
- movptr(rscratch1, src2);
- Assembler::cmpq(src1, rscratch1);
- } else if (reachable(src2)) {
- cmpq(src1, as_Address(src2));
- } else {
- lea(rscratch1, src2);
- Assembler::cmpq(src1, Address(rscratch1, 0));
- }
-#else
- if (src2.is_lval()) {
- cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
- } else {
- cmpl(src1, as_Address(src2));
- }
-#endif // _LP64
-}
-
-void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
- assert(src2.is_lval(), "not a mem-mem compare");
-#ifdef _LP64
- // moves src2's literal address
- movptr(rscratch1, src2);
- Assembler::cmpq(src1, rscratch1);
-#else
- cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
-#endif // _LP64
-}
-
-void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
- if (reachable(adr)) {
- if (os::is_MP())
- lock();
- cmpxchgptr(reg, as_Address(adr));
- } else {
- lea(rscratch1, adr);
- if (os::is_MP())
- lock();
- cmpxchgptr(reg, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
- LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr));
-}
-
-void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- Assembler::comisd(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- Assembler::comisd(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- Assembler::comiss(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- Assembler::comiss(dst, Address(rscratch1, 0));
- }
-}
-
-
-void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
- Condition negated_cond = negate_condition(cond);
- Label L;
- jcc(negated_cond, L);
- atomic_incl(counter_addr);
- bind(L);
-}
-
-int MacroAssembler::corrected_idivl(Register reg) {
- // Full implementation of Java idiv and irem; checks for
- // special case as described in JVM spec., p.243 & p.271.
- // The function returns the (pc) offset of the idivl
- // instruction - may be needed for implicit exceptions.
- //
- // normal case special case
- //
- // input : rax,: dividend min_int
- // reg: divisor (may not be rax,/rdx) -1
- //
- // output: rax,: quotient (= rax, idiv reg) min_int
- // rdx: remainder (= rax, irem reg) 0
- assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register");
- const int min_int = 0x80000000;
- Label normal_case, special_case;
-
- // check for special case
- cmpl(rax, min_int);
- jcc(Assembler::notEqual, normal_case);
- xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0)
- cmpl(reg, -1);
- jcc(Assembler::equal, special_case);
-
- // handle normal case
- bind(normal_case);
- cdql();
- int idivl_offset = offset();
- idivl(reg);
-
- // normal and special case exit
- bind(special_case);
-
- return idivl_offset;
-}
-
-
-
-void MacroAssembler::decrementl(Register reg, int value) {
- if (value == min_jint) {subl(reg, value) ; return; }
- if (value < 0) { incrementl(reg, -value); return; }
- if (value == 0) { ; return; }
- if (value == 1 && UseIncDec) { decl(reg) ; return; }
- /* else */ { subl(reg, value) ; return; }
-}
-
-void MacroAssembler::decrementl(Address dst, int value) {
- if (value == min_jint) {subl(dst, value) ; return; }
- if (value < 0) { incrementl(dst, -value); return; }
- if (value == 0) { ; return; }
- if (value == 1 && UseIncDec) { decl(dst) ; return; }
- /* else */ { subl(dst, value) ; return; }
-}
-
-void MacroAssembler::division_with_shift (Register reg, int shift_value) {
- assert (shift_value > 0, "illegal shift value");
- Label _is_positive;
- testl (reg, reg);
- jcc (Assembler::positive, _is_positive);
- int offset = (1 << shift_value) - 1 ;
-
- if (offset == 1) {
- incrementl(reg);
- } else {
- addl(reg, offset);
- }
-
- bind (_is_positive);
- sarl(reg, shift_value);
-}
-
-void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- Assembler::divsd(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- Assembler::divsd(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- Assembler::divss(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- Assembler::divss(dst, Address(rscratch1, 0));
- }
-}
-
-// !defined(COMPILER2) is because of stupid core builds
-#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
-void MacroAssembler::empty_FPU_stack() {
- if (VM_Version::supports_mmx()) {
- emms();
- } else {
- for (int i = 8; i-- > 0; ) ffree(i);
- }
-}
-#endif // !LP64 || C1 || !C2
-
-
-// Defines obj, preserves var_size_in_bytes
-void MacroAssembler::eden_allocate(Register obj,
- Register var_size_in_bytes,
- int con_size_in_bytes,
- Register t1,
- Label& slow_case) {
- assert(obj == rax, "obj must be in rax, for cmpxchg");
- assert_different_registers(obj, var_size_in_bytes, t1);
- if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
- jmp(slow_case);
- } else {
- Register end = t1;
- Label retry;
- bind(retry);
- ExternalAddress heap_top((address) Universe::heap()->top_addr());
- movptr(obj, heap_top);
- if (var_size_in_bytes == noreg) {
- lea(end, Address(obj, con_size_in_bytes));
- } else {
- lea(end, Address(obj, var_size_in_bytes, Address::times_1));
- }
- // if end < obj then we wrapped around => object too long => slow case
- cmpptr(end, obj);
- jcc(Assembler::below, slow_case);
- cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
- jcc(Assembler::above, slow_case);
- // Compare obj with the top addr, and if still equal, store the new top addr in
- // end at the address of the top addr pointer. Sets ZF if was equal, and clears
- // it otherwise. Use lock prefix for atomicity on MPs.
- locked_cmpxchgptr(end, heap_top);
- jcc(Assembler::notEqual, retry);
- }
-}
-
-void MacroAssembler::enter() {
- push(rbp);
- mov(rbp, rsp);
-}
-
-// A 5 byte nop that is safe for patching (see patch_verified_entry)
-void MacroAssembler::fat_nop() {
- if (UseAddressNop) {
- addr_nop_5();
- } else {
- emit_byte(0x26); // es:
- emit_byte(0x2e); // cs:
- emit_byte(0x64); // fs:
- emit_byte(0x65); // gs:
- emit_byte(0x90);
- }
-}
-
-void MacroAssembler::fcmp(Register tmp) {
- fcmp(tmp, 1, true, true);
-}
-
-void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {
- assert(!pop_right || pop_left, "usage error");
- if (VM_Version::supports_cmov()) {
- assert(tmp == noreg, "unneeded temp");
- if (pop_left) {
- fucomip(index);
- } else {
- fucomi(index);
- }
- if (pop_right) {
- fpop();
- }
- } else {
- assert(tmp != noreg, "need temp");
- if (pop_left) {
- if (pop_right) {
- fcompp();
- } else {
- fcomp(index);
- }
- } else {
- fcom(index);
- }
- // convert FPU condition into eflags condition via rax,
- save_rax(tmp);
- fwait(); fnstsw_ax();
- sahf();
- restore_rax(tmp);
- }
- // condition codes set as follows:
- //
- // CF (corresponds to C0) if x < y
- // PF (corresponds to C2) if unordered
- // ZF (corresponds to C3) if x = y
-}
-
-void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) {
- fcmp2int(dst, unordered_is_less, 1, true, true);
-}
-
-void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) {
- fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right);
- Label L;
- if (unordered_is_less) {
- movl(dst, -1);
- jcc(Assembler::parity, L);
- jcc(Assembler::below , L);
- movl(dst, 0);
- jcc(Assembler::equal , L);
- increment(dst);
- } else { // unordered is greater
- movl(dst, 1);
- jcc(Assembler::parity, L);
- jcc(Assembler::above , L);
- movl(dst, 0);
- jcc(Assembler::equal , L);
- decrementl(dst);
- }
- bind(L);
-}
-
-void MacroAssembler::fld_d(AddressLiteral src) {
- fld_d(as_Address(src));
-}
-
-void MacroAssembler::fld_s(AddressLiteral src) {
- fld_s(as_Address(src));
-}
-
-void MacroAssembler::fld_x(AddressLiteral src) {
- Assembler::fld_x(as_Address(src));
-}
-
-void MacroAssembler::fldcw(AddressLiteral src) {
- Assembler::fldcw(as_Address(src));
-}
-
-void MacroAssembler::pow_exp_core_encoding() {
- // kills rax, rcx, rdx
- subptr(rsp,sizeof(jdouble));
- // computes 2^X. Stack: X ...
- // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and
- // keep it on the thread's stack to compute 2^int(X) later
- // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1)
- // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X))
- fld_s(0); // Stack: X X ...
- frndint(); // Stack: int(X) X ...
- fsuba(1); // Stack: int(X) X-int(X) ...
- fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ...
- f2xm1(); // Stack: 2^(X-int(X))-1 ...
- fld1(); // Stack: 1 2^(X-int(X))-1 ...
- faddp(1); // Stack: 2^(X-int(X))
- // computes 2^(int(X)): add exponent bias (1023) to int(X), then
- // shift int(X)+1023 to exponent position.
- // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11
- // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent
- // values so detect them and set result to NaN.
- movl(rax,Address(rsp,0));
- movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding
- addl(rax, 1023);
- movl(rdx,rax);
- shll(rax,20);
- // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN.
- addl(rdx,1);
- // Check that 1 < int(X)+1023+1 < 2048
- // in 3 steps:
- // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048
- // 2- (int(X)+1023+1)&-2048 != 0
- // 3- (int(X)+1023+1)&-2048 != 1
- // Do 2- first because addl just updated the flags.
- cmov32(Assembler::equal,rax,rcx);
- cmpl(rdx,1);
- cmov32(Assembler::equal,rax,rcx);
- testl(rdx,rcx);
- cmov32(Assembler::notEqual,rax,rcx);
- movl(Address(rsp,4),rax);
- movl(Address(rsp,0),0);
- fmul_d(Address(rsp,0)); // Stack: 2^X ...
- addptr(rsp,sizeof(jdouble));
-}
-
-void MacroAssembler::increase_precision() {
- subptr(rsp, BytesPerWord);
- fnstcw(Address(rsp, 0));
- movl(rax, Address(rsp, 0));
- orl(rax, 0x300);
- push(rax);
- fldcw(Address(rsp, 0));
- pop(rax);
-}
-
-void MacroAssembler::restore_precision() {
- fldcw(Address(rsp, 0));
- addptr(rsp, BytesPerWord);
-}
-
-void MacroAssembler::fast_pow() {
- // computes X^Y = 2^(Y * log2(X))
- // if fast computation is not possible, result is NaN. Requires
- // fallback from user of this macro.
- // increase precision for intermediate steps of the computation
- increase_precision();
- fyl2x(); // Stack: (Y*log2(X)) ...
- pow_exp_core_encoding(); // Stack: exp(X) ...
- restore_precision();
-}
-
-void MacroAssembler::fast_exp() {
- // computes exp(X) = 2^(X * log2(e))
- // if fast computation is not possible, result is NaN. Requires
- // fallback from user of this macro.
- // increase precision for intermediate steps of the computation
- increase_precision();
- fldl2e(); // Stack: log2(e) X ...
- fmulp(1); // Stack: (X*log2(e)) ...
- pow_exp_core_encoding(); // Stack: exp(X) ...
- restore_precision();
-}
-
-void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
- // kills rax, rcx, rdx
- // pow and exp needs 2 extra registers on the fpu stack.
- Label slow_case, done;
- Register tmp = noreg;
- if (!VM_Version::supports_cmov()) {
- // fcmp needs a temporary so preserve rdx,
- tmp = rdx;
- }
- Register tmp2 = rax;
- Register tmp3 = rcx;
-
- if (is_exp) {
- // Stack: X
- fld_s(0); // duplicate argument for runtime call. Stack: X X
- fast_exp(); // Stack: exp(X) X
- fcmp(tmp, 0, false, false); // Stack: exp(X) X
- // exp(X) not equal to itself: exp(X) is NaN go to slow case.
- jcc(Assembler::parity, slow_case);
- // get rid of duplicate argument. Stack: exp(X)
- if (num_fpu_regs_in_use > 0) {
- fxch();
- fpop();
- } else {
- ffree(1);
- }
- jmp(done);
- } else {
- // Stack: X Y
- Label x_negative, y_odd;
-
- fldz(); // Stack: 0 X Y
- fcmp(tmp, 1, true, false); // Stack: X Y
- jcc(Assembler::above, x_negative);
-
- // X >= 0
-
- fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y
- fld_s(1); // Stack: X Y X Y
- fast_pow(); // Stack: X^Y X Y
- fcmp(tmp, 0, false, false); // Stack: X^Y X Y
- // X^Y not equal to itself: X^Y is NaN go to slow case.
- jcc(Assembler::parity, slow_case);
- // get rid of duplicate arguments. Stack: X^Y
- if (num_fpu_regs_in_use > 0) {
- fxch(); fpop();
- fxch(); fpop();
- } else {
- ffree(2);
- ffree(1);
- }
- jmp(done);
-
- // X <= 0
- bind(x_negative);
-
- fld_s(1); // Stack: Y X Y
- frndint(); // Stack: int(Y) X Y
- fcmp(tmp, 2, false, false); // Stack: int(Y) X Y
- jcc(Assembler::notEqual, slow_case);
-
- subptr(rsp, 8);
-
- // For X^Y, when X < 0, Y has to be an integer and the final
- // result depends on whether it's odd or even. We just checked
- // that int(Y) == Y. We move int(Y) to gp registers as a 64 bit
- // integer to test its parity. If int(Y) is huge and doesn't fit
- // in the 64 bit integer range, the integer indefinite value will
- // end up in the gp registers. Huge numbers are all even, the
- // integer indefinite number is even so it's fine.
-
-#ifdef ASSERT
- // Let's check we don't end up with an integer indefinite number
- // when not expected. First test for huge numbers: check whether
- // int(Y)+1 == int(Y) which is true for very large numbers and
- // those are all even. A 64 bit integer is guaranteed to not
- // overflow for numbers where y+1 != y (when precision is set to
- // double precision).
- Label y_not_huge;
-
- fld1(); // Stack: 1 int(Y) X Y
- fadd(1); // Stack: 1+int(Y) int(Y) X Y
-
-#ifdef _LP64
- // trip to memory to force the precision down from double extended
- // precision
- fstp_d(Address(rsp, 0));
- fld_d(Address(rsp, 0));
-#endif
-
- fcmp(tmp, 1, true, false); // Stack: int(Y) X Y
-#endif
-
- // move int(Y) as 64 bit integer to thread's stack
- fistp_d(Address(rsp,0)); // Stack: X Y
-
-#ifdef ASSERT
- jcc(Assembler::notEqual, y_not_huge);
-
- // Y is huge so we know it's even. It may not fit in a 64 bit
- // integer and we don't want the debug code below to see the
- // integer indefinite value so overwrite int(Y) on the thread's
- // stack with 0.
- movl(Address(rsp, 0), 0);
- movl(Address(rsp, 4), 0);
-
- bind(y_not_huge);
-#endif
-
- fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y
- fld_s(1); // Stack: X Y X Y
- fabs(); // Stack: abs(X) Y X Y
- fast_pow(); // Stack: abs(X)^Y X Y
- fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y
- // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case.
-
- pop(tmp2);
- NOT_LP64(pop(tmp3));
- jcc(Assembler::parity, slow_case);
-
-#ifdef ASSERT
- // Check that int(Y) is not integer indefinite value (int
- // overflow). Shouldn't happen because for values that would
- // overflow, 1+int(Y)==Y which was tested earlier.
-#ifndef _LP64
- {
- Label integer;
- testl(tmp2, tmp2);
- jcc(Assembler::notZero, integer);
- cmpl(tmp3, 0x80000000);
- jcc(Assembler::notZero, integer);
- STOP("integer indefinite value shouldn't be seen here");
- bind(integer);
- }
-#else
- {
- Label integer;
- mov(tmp3, tmp2); // preserve tmp2 for parity check below
- shlq(tmp3, 1);
- jcc(Assembler::carryClear, integer);
- jcc(Assembler::notZero, integer);
- STOP("integer indefinite value shouldn't be seen here");
- bind(integer);
- }
-#endif
-#endif
-
- // get rid of duplicate arguments. Stack: X^Y
- if (num_fpu_regs_in_use > 0) {
- fxch(); fpop();
- fxch(); fpop();
- } else {
- ffree(2);
- ffree(1);
- }
-
- testl(tmp2, 1);
- jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y
- // X <= 0, Y even: X^Y = -abs(X)^Y
-
- fchs(); // Stack: -abs(X)^Y Y
- jmp(done);
- }
-
- // slow case: runtime call
- bind(slow_case);
-
- fpop(); // pop incorrect result or int(Y)
-
- fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow),
- is_exp ? 1 : 2, num_fpu_regs_in_use);
-
- // Come here with result in F-TOS
- bind(done);
-}
-
-void MacroAssembler::fpop() {
- ffree();
- fincstp();
-}
-
-void MacroAssembler::fremr(Register tmp) {
- save_rax(tmp);
- { Label L;
- bind(L);
- fprem();
- fwait(); fnstsw_ax();
-#ifdef _LP64
- testl(rax, 0x400);
- jcc(Assembler::notEqual, L);
-#else
- sahf();
- jcc(Assembler::parity, L);
-#endif // _LP64
- }
- restore_rax(tmp);
- // Result is in ST0.
- // Note: fxch & fpop to get rid of ST1
- // (otherwise FPU stack could overflow eventually)
- fxch(1);
- fpop();
-}
-
-
-void MacroAssembler::incrementl(AddressLiteral dst) {
- if (reachable(dst)) {
- incrementl(as_Address(dst));
- } else {
- lea(rscratch1, dst);
- incrementl(Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::incrementl(ArrayAddress dst) {
- incrementl(as_Address(dst));
-}
-
-void MacroAssembler::incrementl(Register reg, int value) {
- if (value == min_jint) {addl(reg, value) ; return; }
- if (value < 0) { decrementl(reg, -value); return; }
- if (value == 0) { ; return; }
- if (value == 1 && UseIncDec) { incl(reg) ; return; }
- /* else */ { addl(reg, value) ; return; }
-}
-
-void MacroAssembler::incrementl(Address dst, int value) {
- if (value == min_jint) {addl(dst, value) ; return; }
- if (value < 0) { decrementl(dst, -value); return; }
- if (value == 0) { ; return; }
- if (value == 1 && UseIncDec) { incl(dst) ; return; }
- /* else */ { addl(dst, value) ; return; }
-}
-
-void MacroAssembler::jump(AddressLiteral dst) {
- if (reachable(dst)) {
- jmp_literal(dst.target(), dst.rspec());
- } else {
- lea(rscratch1, dst);
- jmp(rscratch1);
- }
-}
-
-void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
- if (reachable(dst)) {
- InstructionMark im(this);
- relocate(dst.reloc());
- const int short_size = 2;
- const int long_size = 6;
- int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos);
- if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
- // 0111 tttn #8-bit disp
- emit_byte(0x70 | cc);
- emit_byte((offs - short_size) & 0xFF);
- } else {
- // 0000 1111 1000 tttn #32-bit disp
- emit_byte(0x0F);
- emit_byte(0x80 | cc);
- emit_long(offs - long_size);
- }
- } else {
-#ifdef ASSERT
- warning("reversing conditional branch");
-#endif /* ASSERT */
- Label skip;
- jccb(reverse[cc], skip);
- lea(rscratch1, dst);
- Assembler::jmp(rscratch1);
- bind(skip);
- }
-}
-
-void MacroAssembler::ldmxcsr(AddressLiteral src) {
- if (reachable(src)) {
- Assembler::ldmxcsr(as_Address(src));
- } else {
- lea(rscratch1, src);
- Assembler::ldmxcsr(Address(rscratch1, 0));
- }
-}
-
-int MacroAssembler::load_signed_byte(Register dst, Address src) {
- int off;
- if (LP64_ONLY(true ||) VM_Version::is_P6()) {
- off = offset();
- movsbl(dst, src); // movsxb
- } else {
- off = load_unsigned_byte(dst, src);
- shll(dst, 24);
- sarl(dst, 24);
- }
- return off;
-}
-
-// Note: load_signed_short used to be called load_signed_word.
-// Although the 'w' in x86 opcodes refers to the term "word" in the assembler
-// manual, which means 16 bits, that usage is found nowhere in HotSpot code.
-// The term "word" in HotSpot means a 32- or 64-bit machine word.
-int MacroAssembler::load_signed_short(Register dst, Address src) {
- int off;
- if (LP64_ONLY(true ||) VM_Version::is_P6()) {
- // This is dubious to me since it seems safe to do a signed 16 => 64 bit
- // version but this is what 64bit has always done. This seems to imply
- // that users are only using 32bits worth.
- off = offset();
- movswl(dst, src); // movsxw
- } else {
- off = load_unsigned_short(dst, src);
- shll(dst, 16);
- sarl(dst, 16);
- }
- return off;
-}
-
-int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
- // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
- // and "3.9 Partial Register Penalties", p. 22).
- int off;
- if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) {
- off = offset();
- movzbl(dst, src); // movzxb
- } else {
- xorl(dst, dst);
- off = offset();
- movb(dst, src);
- }
- return off;
-}
-
-// Note: load_unsigned_short used to be called load_unsigned_word.
-int MacroAssembler::load_unsigned_short(Register dst, Address src) {
- // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
- // and "3.9 Partial Register Penalties", p. 22).
- int off;
- if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) {
- off = offset();
- movzwl(dst, src); // movzxw
- } else {
- xorl(dst, dst);
- off = offset();
- movw(dst, src);
- }
- return off;
-}
-
-void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
- switch (size_in_bytes) {
-#ifndef _LP64
- case 8:
- assert(dst2 != noreg, "second dest register required");
- movl(dst, src);
- movl(dst2, src.plus_disp(BytesPerInt));
- break;
-#else
- case 8: movq(dst, src); break;
-#endif
- case 4: movl(dst, src); break;
- case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
- case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
- default: ShouldNotReachHere();
- }
-}
-
-void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
- switch (size_in_bytes) {
-#ifndef _LP64
- case 8:
- assert(src2 != noreg, "second source register required");
- movl(dst, src);
- movl(dst.plus_disp(BytesPerInt), src2);
- break;
-#else
- case 8: movq(dst, src); break;
-#endif
- case 4: movl(dst, src); break;
- case 2: movw(dst, src); break;
- case 1: movb(dst, src); break;
- default: ShouldNotReachHere();
- }
-}
-
-void MacroAssembler::mov32(AddressLiteral dst, Register src) {
- if (reachable(dst)) {
- movl(as_Address(dst), src);
- } else {
- lea(rscratch1, dst);
- movl(Address(rscratch1, 0), src);
- }
-}
-
-void MacroAssembler::mov32(Register dst, AddressLiteral src) {
- if (reachable(src)) {
- movl(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- movl(dst, Address(rscratch1, 0));
- }
-}
-
-// C++ bool manipulation
-
-void MacroAssembler::movbool(Register dst, Address src) {
- if(sizeof(bool) == 1)
- movb(dst, src);
- else if(sizeof(bool) == 2)
- movw(dst, src);
- else if(sizeof(bool) == 4)
- movl(dst, src);
- else
- // unsupported
- ShouldNotReachHere();
-}
-
-void MacroAssembler::movbool(Address dst, bool boolconst) {
- if(sizeof(bool) == 1)
- movb(dst, (int) boolconst);
- else if(sizeof(bool) == 2)
- movw(dst, (int) boolconst);
- else if(sizeof(bool) == 4)
- movl(dst, (int) boolconst);
- else
- // unsupported
- ShouldNotReachHere();
-}
-
-void MacroAssembler::movbool(Address dst, Register src) {
- if(sizeof(bool) == 1)
- movb(dst, src);
- else if(sizeof(bool) == 2)
- movw(dst, src);
- else if(sizeof(bool) == 4)
- movl(dst, src);
- else
- // unsupported
- ShouldNotReachHere();
-}
-
-void MacroAssembler::movbyte(ArrayAddress dst, int src) {
- movb(as_Address(dst), src);
-}
-
-void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- movdl(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- movdl(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- movq(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- movq(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- if (UseXmmLoadAndClearUpper) {
- movsd (dst, as_Address(src));
- } else {
- movlpd(dst, as_Address(src));
- }
- } else {
- lea(rscratch1, src);
- if (UseXmmLoadAndClearUpper) {
- movsd (dst, Address(rscratch1, 0));
- } else {
- movlpd(dst, Address(rscratch1, 0));
- }
- }
-}
-
-void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- movss(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- movss(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::movptr(Register dst, Register src) {
- LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
-}
-
-void MacroAssembler::movptr(Register dst, Address src) {
- LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
-}
-
-// src should NEVER be a real pointer. Use AddressLiteral for true pointers
-void MacroAssembler::movptr(Register dst, intptr_t src) {
- LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
-}
-
-void MacroAssembler::movptr(Address dst, Register src) {
- LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
-}
-
-void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- Assembler::movdqu(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- Assembler::movdqu(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- Assembler::movsd(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- Assembler::movsd(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- Assembler::movss(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- Assembler::movss(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- Assembler::mulsd(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- Assembler::mulsd(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- Assembler::mulss(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- Assembler::mulss(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::null_check(Register reg, int offset) {
- if (needs_explicit_null_check(offset)) {
- // provoke OS NULL exception if reg = NULL by
- // accessing M[reg] w/o changing any (non-CC) registers
- // NOTE: cmpl is plenty here to provoke a segv
- cmpptr(rax, Address(reg, 0));
- // Note: should probably use testl(rax, Address(reg, 0));
- // may be shorter code (however, this version of
- // testl needs to be implemented first)
- } else {
- // nothing to do, (later) access of M[reg + offset]
- // will provoke OS NULL exception if reg = NULL
- }
-}
-
-void MacroAssembler::os_breakpoint() {
- // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
- // (e.g., MSVC can't call ps() otherwise)
- call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
-}
-
-void MacroAssembler::pop_CPU_state() {
- pop_FPU_state();
- pop_IU_state();
-}
-
-void MacroAssembler::pop_FPU_state() {
- NOT_LP64(frstor(Address(rsp, 0));)
- LP64_ONLY(fxrstor(Address(rsp, 0));)
- addptr(rsp, FPUStateSizeInWords * wordSize);
-}
-
-void MacroAssembler::pop_IU_state() {
- popa();
- LP64_ONLY(addq(rsp, 8));
- popf();
-}
-
-// Save Integer and Float state
-// Warning: Stack must be 16 byte aligned (64bit)
-void MacroAssembler::push_CPU_state() {
- push_IU_state();
- push_FPU_state();
-}
-
-void MacroAssembler::push_FPU_state() {
- subptr(rsp, FPUStateSizeInWords * wordSize);
-#ifndef _LP64
- fnsave(Address(rsp, 0));
- fwait();
-#else
- fxsave(Address(rsp, 0));
-#endif // LP64
-}
-
-void MacroAssembler::push_IU_state() {
- // Push flags first because pusha kills them
- pushf();
- // Make sure rsp stays 16-byte aligned
- LP64_ONLY(subq(rsp, 8));
- pusha();
-}
-
-void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
- // determine java_thread register
- if (!java_thread->is_valid()) {
- java_thread = rdi;
- get_thread(java_thread);
- }
- // we must set sp to zero to clear frame
- movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
- if (clear_fp) {
- movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
- }
-
- if (clear_pc)
- movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
-
-}
-
-void MacroAssembler::restore_rax(Register tmp) {
- if (tmp == noreg) pop(rax);
- else if (tmp != rax) mov(rax, tmp);
-}
-
-void MacroAssembler::round_to(Register reg, int modulus) {
- addptr(reg, modulus - 1);
- andptr(reg, -modulus);
-}
-
-void MacroAssembler::save_rax(Register tmp) {
- if (tmp == noreg) push(rax);
- else if (tmp != rax) mov(tmp, rax);
-}
-
-// Write serialization page so VM thread can do a pseudo remote membar.
-// We use the current thread pointer to calculate a thread specific
-// offset to write to within the page. This minimizes bus traffic
-// due to cache line collision.
-void MacroAssembler::serialize_memory(Register thread, Register tmp) {
- movl(tmp, thread);
- shrl(tmp, os::get_serialize_page_shift_count());
- andl(tmp, (os::vm_page_size() - sizeof(int)));
-
- Address index(noreg, tmp, Address::times_1);
- ExternalAddress page(os::get_memory_serialize_page());
-
- // Size of store must match masking code above
- movl(as_Address(ArrayAddress(page, index)), tmp);
-}
-
-// Calls to C land
-//
-// When entering C land, the rbp, & rsp of the last Java frame have to be recorded
-// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
-// has to be reset to 0. This is required to allow proper stack traversal.
-void MacroAssembler::set_last_Java_frame(Register java_thread,
- Register last_java_sp,
- Register last_java_fp,
- address last_java_pc) {
- // determine java_thread register
- if (!java_thread->is_valid()) {
- java_thread = rdi;
- get_thread(java_thread);
- }
- // determine last_java_sp register
- if (!last_java_sp->is_valid()) {
- last_java_sp = rsp;
- }
-
- // last_java_fp is optional
-
- if (last_java_fp->is_valid()) {
- movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp);
- }
-
- // last_java_pc is optional
-
- if (last_java_pc != NULL) {
- lea(Address(java_thread,
- JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()),
- InternalAddress(last_java_pc));
-
- }
- movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
-}
-
-void MacroAssembler::shlptr(Register dst, int imm8) {
- LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8));
-}
-
-void MacroAssembler::shrptr(Register dst, int imm8) {
- LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8));
-}
-
-void MacroAssembler::sign_extend_byte(Register reg) {
- if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) {
- movsbl(reg, reg); // movsxb
- } else {
- shll(reg, 24);
- sarl(reg, 24);
- }
-}
-
-void MacroAssembler::sign_extend_short(Register reg) {
- if (LP64_ONLY(true ||) VM_Version::is_P6()) {
- movswl(reg, reg); // movsxw
- } else {
- shll(reg, 16);
- sarl(reg, 16);
- }
-}
-
-void MacroAssembler::testl(Register dst, AddressLiteral src) {
- assert(reachable(src), "Address should be reachable");
- testl(dst, as_Address(src));
-}
-
-void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- Assembler::sqrtsd(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- Assembler::sqrtsd(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- Assembler::sqrtss(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- Assembler::sqrtss(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- Assembler::subsd(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- Assembler::subsd(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- Assembler::subss(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- Assembler::subss(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- Assembler::ucomisd(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- Assembler::ucomisd(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- Assembler::ucomiss(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- Assembler::ucomiss(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
- // Used in sign-bit flipping with aligned address.
- assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
- if (reachable(src)) {
- Assembler::xorpd(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- Assembler::xorpd(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
- // Used in sign-bit flipping with aligned address.
- assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
- if (reachable(src)) {
- Assembler::xorps(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- Assembler::xorps(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) {
- // Used in sign-bit flipping with aligned address.
- assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
- if (reachable(src)) {
- Assembler::pshufb(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- Assembler::pshufb(dst, Address(rscratch1, 0));
- }
-}
-
-// AVX 3-operands instructions
-
-void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
- if (reachable(src)) {
- vaddsd(dst, nds, as_Address(src));
- } else {
- lea(rscratch1, src);
- vaddsd(dst, nds, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
- if (reachable(src)) {
- vaddss(dst, nds, as_Address(src));
- } else {
- lea(rscratch1, src);
- vaddss(dst, nds, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
- if (reachable(src)) {
- vandpd(dst, nds, as_Address(src), vector256);
- } else {
- lea(rscratch1, src);
- vandpd(dst, nds, Address(rscratch1, 0), vector256);
- }
-}
-
-void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
- if (reachable(src)) {
- vandps(dst, nds, as_Address(src), vector256);
- } else {
- lea(rscratch1, src);
- vandps(dst, nds, Address(rscratch1, 0), vector256);
- }
-}
-
-void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
- if (reachable(src)) {
- vdivsd(dst, nds, as_Address(src));
- } else {
- lea(rscratch1, src);
- vdivsd(dst, nds, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
- if (reachable(src)) {
- vdivss(dst, nds, as_Address(src));
- } else {
- lea(rscratch1, src);
- vdivss(dst, nds, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
- if (reachable(src)) {
- vmulsd(dst, nds, as_Address(src));
- } else {
- lea(rscratch1, src);
- vmulsd(dst, nds, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
- if (reachable(src)) {
- vmulss(dst, nds, as_Address(src));
- } else {
- lea(rscratch1, src);
- vmulss(dst, nds, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
- if (reachable(src)) {
- vsubsd(dst, nds, as_Address(src));
- } else {
- lea(rscratch1, src);
- vsubsd(dst, nds, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
- if (reachable(src)) {
- vsubss(dst, nds, as_Address(src));
- } else {
- lea(rscratch1, src);
- vsubss(dst, nds, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
- if (reachable(src)) {
- vxorpd(dst, nds, as_Address(src), vector256);
- } else {
- lea(rscratch1, src);
- vxorpd(dst, nds, Address(rscratch1, 0), vector256);
- }
-}
-
-void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
- if (reachable(src)) {
- vxorps(dst, nds, as_Address(src), vector256);
- } else {
- lea(rscratch1, src);
- vxorps(dst, nds, Address(rscratch1, 0), vector256);
- }
-}
-
-
-//////////////////////////////////////////////////////////////////////////////////
-#ifndef SERIALGC
-
-void MacroAssembler::g1_write_barrier_pre(Register obj,
- Register pre_val,
- Register thread,
- Register tmp,
- bool tosca_live,
- bool expand_call) {
-
- // If expand_call is true then we expand the call_VM_leaf macro
- // directly to skip generating the check by
- // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
-
-#ifdef _LP64
- assert(thread == r15_thread, "must be");
-#endif // _LP64
-
- Label done;
- Label runtime;
-
- assert(pre_val != noreg, "check this code");
-
- if (obj != noreg) {
- assert_different_registers(obj, pre_val, tmp);
- assert(pre_val != rax, "check this code");
- }
-
- Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
- PtrQueue::byte_offset_of_active()));
- Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
- PtrQueue::byte_offset_of_index()));
- Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
- PtrQueue::byte_offset_of_buf()));
-
-
- // Is marking active?
- if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
- cmpl(in_progress, 0);
- } else {
- assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
- cmpb(in_progress, 0);
- }
- jcc(Assembler::equal, done);
-
- // Do we need to load the previous value?
- if (obj != noreg) {
- load_heap_oop(pre_val, Address(obj, 0));
- }
-
- // Is the previous value null?
- cmpptr(pre_val, (int32_t) NULL_WORD);
- jcc(Assembler::equal, done);
-
- // Can we store original value in the thread's buffer?
- // Is index == 0?
- // (The index field is typed as size_t.)
-
- movptr(tmp, index); // tmp := *index_adr
- cmpptr(tmp, 0); // tmp == 0?
- jcc(Assembler::equal, runtime); // If yes, goto runtime
-
- subptr(tmp, wordSize); // tmp := tmp - wordSize
- movptr(index, tmp); // *index_adr := tmp
- addptr(tmp, buffer); // tmp := tmp + *buffer_adr
-
- // Record the previous value
- movptr(Address(tmp, 0), pre_val);
- jmp(done);
-
- bind(runtime);
- // save the live input values
- if(tosca_live) push(rax);
-
- if (obj != noreg && obj != rax)
- push(obj);
-
- if (pre_val != rax)
- push(pre_val);
-
- // Calling the runtime using the regular call_VM_leaf mechanism generates
- // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
- // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
- //
- // If we care generating the pre-barrier without a frame (e.g. in the
- // intrinsified Reference.get() routine) then ebp might be pointing to
- // the caller frame and so this check will most likely fail at runtime.
- //
- // Expanding the call directly bypasses the generation of the check.
- // So when we do not have have a full interpreter frame on the stack
- // expand_call should be passed true.
-
- NOT_LP64( push(thread); )
-
- if (expand_call) {
- LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
- pass_arg1(this, thread);
- pass_arg0(this, pre_val);
- MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
- } else {
- call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
- }
-
- NOT_LP64( pop(thread); )
-
- // save the live input values
- if (pre_val != rax)
- pop(pre_val);
-
- if (obj != noreg && obj != rax)
- pop(obj);
-
- if(tosca_live) pop(rax);
-
- bind(done);
-}
-
-void MacroAssembler::g1_write_barrier_post(Register store_addr,
- Register new_val,
- Register thread,
- Register tmp,
- Register tmp2) {
-#ifdef _LP64
- assert(thread == r15_thread, "must be");
-#endif // _LP64
-
- Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
- PtrQueue::byte_offset_of_index()));
- Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
- PtrQueue::byte_offset_of_buf()));
-
- BarrierSet* bs = Universe::heap()->barrier_set();
- CardTableModRefBS* ct = (CardTableModRefBS*)bs;
- Label done;
- Label runtime;
-
- // Does store cross heap regions?
-
- movptr(tmp, store_addr);
- xorptr(tmp, new_val);
- shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
- jcc(Assembler::equal, done);
-
- // crosses regions, storing NULL?
-
- cmpptr(new_val, (int32_t) NULL_WORD);
- jcc(Assembler::equal, done);
-
- // storing region crossing non-NULL, is card already dirty?
-
- ExternalAddress cardtable((address) ct->byte_map_base);
- assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
-#ifdef _LP64
- const Register card_addr = tmp;
-
- movq(card_addr, store_addr);
- shrq(card_addr, CardTableModRefBS::card_shift);
-
- lea(tmp2, cardtable);
-
- // get the address of the card
- addq(card_addr, tmp2);
-#else
- const Register card_index = tmp;
-
- movl(card_index, store_addr);
- shrl(card_index, CardTableModRefBS::card_shift);
-
- Address index(noreg, card_index, Address::times_1);
- const Register card_addr = tmp;
- lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
-#endif
- cmpb(Address(card_addr, 0), 0);
- jcc(Assembler::equal, done);
-
- // storing a region crossing, non-NULL oop, card is clean.
- // dirty card and log.
-
- movb(Address(card_addr, 0), 0);
-
- cmpl(queue_index, 0);
- jcc(Assembler::equal, runtime);
- subl(queue_index, wordSize);
- movptr(tmp2, buffer);
-#ifdef _LP64
- movslq(rscratch1, queue_index);
- addq(tmp2, rscratch1);
- movq(Address(tmp2, 0), card_addr);
-#else
- addl(tmp2, queue_index);
- movl(Address(tmp2, 0), card_index);
-#endif
- jmp(done);
-
- bind(runtime);
- // save the live input values
- push(store_addr);
- push(new_val);
-#ifdef _LP64
- call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
-#else
- push(thread);
- call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
- pop(thread);
-#endif
- pop(new_val);
- pop(store_addr);
-
- bind(done);
-}
-
-#endif // SERIALGC
-//////////////////////////////////////////////////////////////////////////////////
-
-
-void MacroAssembler::store_check(Register obj) {
- // Does a store check for the oop in register obj. The content of
- // register obj is destroyed afterwards.
- store_check_part_1(obj);
- store_check_part_2(obj);
-}
-
-void MacroAssembler::store_check(Register obj, Address dst) {
- store_check(obj);
-}
-
-
-// split the store check operation so that other instructions can be scheduled inbetween
-void MacroAssembler::store_check_part_1(Register obj) {
- BarrierSet* bs = Universe::heap()->barrier_set();
- assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
- shrptr(obj, CardTableModRefBS::card_shift);
-}
-
-void MacroAssembler::store_check_part_2(Register obj) {
- BarrierSet* bs = Universe::heap()->barrier_set();
- assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
- CardTableModRefBS* ct = (CardTableModRefBS*)bs;
- assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
-
- // The calculation for byte_map_base is as follows:
- // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
- // So this essentially converts an address to a displacement and
- // it will never need to be relocated. On 64bit however the value may be too
- // large for a 32bit displacement
-
- intptr_t disp = (intptr_t) ct->byte_map_base;
- if (is_simm32(disp)) {
- Address cardtable(noreg, obj, Address::times_1, disp);
- movb(cardtable, 0);
- } else {
- // By doing it as an ExternalAddress disp could be converted to a rip-relative
- // displacement and done in a single instruction given favorable mapping and
- // a smarter version of as_Address. Worst case it is two instructions which
- // is no worse off then loading disp into a register and doing as a simple
- // Address() as above.
- // We can't do as ExternalAddress as the only style since if disp == 0 we'll
- // assert since NULL isn't acceptable in a reloci (see 6644928). In any case
- // in some cases we'll get a single instruction version.
-
- ExternalAddress cardtable((address)disp);
- Address index(noreg, obj, Address::times_1);
- movb(as_Address(ArrayAddress(cardtable, index)), 0);
- }
-}
-
-void MacroAssembler::subptr(Register dst, int32_t imm32) {
- LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
-}
-
-// Force generation of a 4 byte immediate value even if it fits into 8bit
-void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) {
- LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32));
-}
-
-void MacroAssembler::subptr(Register dst, Register src) {
- LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
-}
-
-// C++ bool manipulation
-void MacroAssembler::testbool(Register dst) {
- if(sizeof(bool) == 1)
- testb(dst, 0xff);
- else if(sizeof(bool) == 2) {
- // testw implementation needed for two byte bools
- ShouldNotReachHere();
- } else if(sizeof(bool) == 4)
- testl(dst, dst);
- else
- // unsupported
- ShouldNotReachHere();
-}
-
-void MacroAssembler::testptr(Register dst, Register src) {
- LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
-}
-
-// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
-void MacroAssembler::tlab_allocate(Register obj,
- Register var_size_in_bytes,
- int con_size_in_bytes,
- Register t1,
- Register t2,
- Label& slow_case) {
- assert_different_registers(obj, t1, t2);
- assert_different_registers(obj, var_size_in_bytes, t1);
- Register end = t2;
- Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread);
-
- verify_tlab();
-
- NOT_LP64(get_thread(thread));
-
- movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
- if (var_size_in_bytes == noreg) {
- lea(end, Address(obj, con_size_in_bytes));
- } else {
- lea(end, Address(obj, var_size_in_bytes, Address::times_1));
- }
- cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
- jcc(Assembler::above, slow_case);
-
- // update the tlab top pointer
- movptr(Address(thread, JavaThread::tlab_top_offset()), end);
-
- // recover var_size_in_bytes if necessary
- if (var_size_in_bytes == end) {
- subptr(var_size_in_bytes, obj);
- }
- verify_tlab();
-}
-
-// Preserves rbx, and rdx.
-Register MacroAssembler::tlab_refill(Label& retry,
- Label& try_eden,
- Label& slow_case) {
- Register top = rax;
- Register t1 = rcx;
- Register t2 = rsi;
- Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
- assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
- Label do_refill, discard_tlab;
-
- if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
- // No allocation in the shared eden.
- jmp(slow_case);
- }
-
- NOT_LP64(get_thread(thread_reg));
-
- movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
- movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
-
- // calculate amount of free space
- subptr(t1, top);
- shrptr(t1, LogHeapWordSize);
-
- // Retain tlab and allocate object in shared space if
- // the amount free in the tlab is too large to discard.
- cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
- jcc(Assembler::lessEqual, discard_tlab);
-
- // Retain
- // %%% yuck as movptr...
- movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment());
- addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2);
- if (TLABStats) {
- // increment number of slow_allocations
- addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1);
- }
- jmp(try_eden);
-
- bind(discard_tlab);
- if (TLABStats) {
- // increment number of refills
- addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1);
- // accumulate wastage -- t1 is amount free in tlab
- addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1);
- }
-
- // if tlab is currently allocated (top or end != null) then
- // fill [top, end + alignment_reserve) with array object
- testptr(top, top);
- jcc(Assembler::zero, do_refill);
-
- // set up the mark word
- movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
- // set the length to the remaining space
- subptr(t1, typeArrayOopDesc::header_size(T_INT));
- addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve());
- shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint)));
- movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
- // set klass to intArrayKlass
- // dubious reloc why not an oop reloc?
- movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr()));
- // store klass last. concurrent gcs assumes klass length is valid if
- // klass field is not null.
- store_klass(top, t1);
-
- movptr(t1, top);
- subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
- incr_allocated_bytes(thread_reg, t1, 0);
-
- // refill the tlab with an eden allocation
- bind(do_refill);
- movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
- shlptr(t1, LogHeapWordSize);
- // allocate new tlab, address returned in top
- eden_allocate(top, t1, 0, t2, slow_case);
-
- // Check that t1 was preserved in eden_allocate.
-#ifdef ASSERT
- if (UseTLAB) {
- Label ok;
- Register tsize = rsi;
- assert_different_registers(tsize, thread_reg, t1);
- push(tsize);
- movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
- shlptr(tsize, LogHeapWordSize);
- cmpptr(t1, tsize);
- jcc(Assembler::equal, ok);
- STOP("assert(t1 != tlab size)");
- should_not_reach_here();
-
- bind(ok);
- pop(tsize);
- }
-#endif
- movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);
- movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);
- addptr(top, t1);
- subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
- movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
- verify_tlab();
- jmp(retry);
-
- return thread_reg; // for use by caller
-}
-
-void MacroAssembler::incr_allocated_bytes(Register thread,
- Register var_size_in_bytes,
- int con_size_in_bytes,
- Register t1) {
- if (!thread->is_valid()) {
-#ifdef _LP64
- thread = r15_thread;
-#else
- assert(t1->is_valid(), "need temp reg");
- thread = t1;
- get_thread(thread);
-#endif
- }
-
-#ifdef _LP64
- if (var_size_in_bytes->is_valid()) {
- addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
- } else {
- addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
- }
-#else
- if (var_size_in_bytes->is_valid()) {
- addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
- } else {
- addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
- }
- adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
-#endif
-}
-
-void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
- pusha();
-
- // if we are coming from c1, xmm registers may be live
- int off = 0;
- if (UseSSE == 1) {
- subptr(rsp, sizeof(jdouble)*8);
- movflt(Address(rsp,off++*sizeof(jdouble)),xmm0);
- movflt(Address(rsp,off++*sizeof(jdouble)),xmm1);
- movflt(Address(rsp,off++*sizeof(jdouble)),xmm2);
- movflt(Address(rsp,off++*sizeof(jdouble)),xmm3);
- movflt(Address(rsp,off++*sizeof(jdouble)),xmm4);
- movflt(Address(rsp,off++*sizeof(jdouble)),xmm5);
- movflt(Address(rsp,off++*sizeof(jdouble)),xmm6);
- movflt(Address(rsp,off++*sizeof(jdouble)),xmm7);
- } else if (UseSSE >= 2) {
-#ifdef COMPILER2
- if (MaxVectorSize > 16) {
- assert(UseAVX > 0, "256bit vectors are supported only with AVX");
- // Save upper half of YMM registes
- subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
- vextractf128h(Address(rsp, 0),xmm0);
- vextractf128h(Address(rsp, 16),xmm1);
- vextractf128h(Address(rsp, 32),xmm2);
- vextractf128h(Address(rsp, 48),xmm3);
- vextractf128h(Address(rsp, 64),xmm4);
- vextractf128h(Address(rsp, 80),xmm5);
- vextractf128h(Address(rsp, 96),xmm6);
- vextractf128h(Address(rsp,112),xmm7);
-#ifdef _LP64
- vextractf128h(Address(rsp,128),xmm8);
- vextractf128h(Address(rsp,144),xmm9);
- vextractf128h(Address(rsp,160),xmm10);
- vextractf128h(Address(rsp,176),xmm11);
- vextractf128h(Address(rsp,192),xmm12);
- vextractf128h(Address(rsp,208),xmm13);
- vextractf128h(Address(rsp,224),xmm14);
- vextractf128h(Address(rsp,240),xmm15);
-#endif
- }
-#endif
- // Save whole 128bit (16 bytes) XMM regiters
- subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
- movdqu(Address(rsp,off++*16),xmm0);
- movdqu(Address(rsp,off++*16),xmm1);
- movdqu(Address(rsp,off++*16),xmm2);
- movdqu(Address(rsp,off++*16),xmm3);
- movdqu(Address(rsp,off++*16),xmm4);
- movdqu(Address(rsp,off++*16),xmm5);
- movdqu(Address(rsp,off++*16),xmm6);
- movdqu(Address(rsp,off++*16),xmm7);
-#ifdef _LP64
- movdqu(Address(rsp,off++*16),xmm8);
- movdqu(Address(rsp,off++*16),xmm9);
- movdqu(Address(rsp,off++*16),xmm10);
- movdqu(Address(rsp,off++*16),xmm11);
- movdqu(Address(rsp,off++*16),xmm12);
- movdqu(Address(rsp,off++*16),xmm13);
- movdqu(Address(rsp,off++*16),xmm14);
- movdqu(Address(rsp,off++*16),xmm15);
-#endif
- }
-
- // Preserve registers across runtime call
- int incoming_argument_and_return_value_offset = -1;
- if (num_fpu_regs_in_use > 1) {
- // Must preserve all other FPU regs (could alternatively convert
- // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
- // FPU state, but can not trust C compiler)
- NEEDS_CLEANUP;
- // NOTE that in this case we also push the incoming argument(s) to
- // the stack and restore it later; we also use this stack slot to
- // hold the return value from dsin, dcos etc.
- for (int i = 0; i < num_fpu_regs_in_use; i++) {
- subptr(rsp, sizeof(jdouble));
- fstp_d(Address(rsp, 0));
- }
- incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
- for (int i = nb_args-1; i >= 0; i--) {
- fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble)));
- }
- }
-
- subptr(rsp, nb_args*sizeof(jdouble));
- for (int i = 0; i < nb_args; i++) {
- fstp_d(Address(rsp, i*sizeof(jdouble)));
- }
-
-#ifdef _LP64
- if (nb_args > 0) {
- movdbl(xmm0, Address(rsp, 0));
- }
- if (nb_args > 1) {
- movdbl(xmm1, Address(rsp, sizeof(jdouble)));
- }
- assert(nb_args <= 2, "unsupported number of args");
-#endif // _LP64
-
- // NOTE: we must not use call_VM_leaf here because that requires a
- // complete interpreter frame in debug mode -- same bug as 4387334
- // MacroAssembler::call_VM_leaf_base is perfectly safe and will
- // do proper 64bit abi
-
- NEEDS_CLEANUP;
- // Need to add stack banging before this runtime call if it needs to
- // be taken; however, there is no generic stack banging routine at
- // the MacroAssembler level
-
- MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
-
-#ifdef _LP64
- movsd(Address(rsp, 0), xmm0);
- fld_d(Address(rsp, 0));
-#endif // _LP64
- addptr(rsp, sizeof(jdouble) * nb_args);
- if (num_fpu_regs_in_use > 1) {
- // Must save return value to stack and then restore entire FPU
- // stack except incoming arguments
- fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
- for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
- fld_d(Address(rsp, 0));
- addptr(rsp, sizeof(jdouble));
- }
- fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
- addptr(rsp, sizeof(jdouble) * nb_args);
- }
-
- off = 0;
- if (UseSSE == 1) {
- movflt(xmm0, Address(rsp,off++*sizeof(jdouble)));
- movflt(xmm1, Address(rsp,off++*sizeof(jdouble)));
- movflt(xmm2, Address(rsp,off++*sizeof(jdouble)));
- movflt(xmm3, Address(rsp,off++*sizeof(jdouble)));
- movflt(xmm4, Address(rsp,off++*sizeof(jdouble)));
- movflt(xmm5, Address(rsp,off++*sizeof(jdouble)));
- movflt(xmm6, Address(rsp,off++*sizeof(jdouble)));
- movflt(xmm7, Address(rsp,off++*sizeof(jdouble)));
- addptr(rsp, sizeof(jdouble)*8);
- } else if (UseSSE >= 2) {
- // Restore whole 128bit (16 bytes) XMM regiters
- movdqu(xmm0, Address(rsp,off++*16));
- movdqu(xmm1, Address(rsp,off++*16));
- movdqu(xmm2, Address(rsp,off++*16));
- movdqu(xmm3, Address(rsp,off++*16));
- movdqu(xmm4, Address(rsp,off++*16));
- movdqu(xmm5, Address(rsp,off++*16));
- movdqu(xmm6, Address(rsp,off++*16));
- movdqu(xmm7, Address(rsp,off++*16));
-#ifdef _LP64
- movdqu(xmm8, Address(rsp,off++*16));
- movdqu(xmm9, Address(rsp,off++*16));
- movdqu(xmm10, Address(rsp,off++*16));
- movdqu(xmm11, Address(rsp,off++*16));
- movdqu(xmm12, Address(rsp,off++*16));
- movdqu(xmm13, Address(rsp,off++*16));
- movdqu(xmm14, Address(rsp,off++*16));
- movdqu(xmm15, Address(rsp,off++*16));
-#endif
- addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
-#ifdef COMPILER2
- if (MaxVectorSize > 16) {
- // Restore upper half of YMM registes.
- vinsertf128h(xmm0, Address(rsp, 0));
- vinsertf128h(xmm1, Address(rsp, 16));
- vinsertf128h(xmm2, Address(rsp, 32));
- vinsertf128h(xmm3, Address(rsp, 48));
- vinsertf128h(xmm4, Address(rsp, 64));
- vinsertf128h(xmm5, Address(rsp, 80));
- vinsertf128h(xmm6, Address(rsp, 96));
- vinsertf128h(xmm7, Address(rsp,112));
-#ifdef _LP64
- vinsertf128h(xmm8, Address(rsp,128));
- vinsertf128h(xmm9, Address(rsp,144));
- vinsertf128h(xmm10, Address(rsp,160));
- vinsertf128h(xmm11, Address(rsp,176));
- vinsertf128h(xmm12, Address(rsp,192));
- vinsertf128h(xmm13, Address(rsp,208));
- vinsertf128h(xmm14, Address(rsp,224));
- vinsertf128h(xmm15, Address(rsp,240));
-#endif
- addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
- }
-#endif
- }
- popa();
-}
-
-static const double pi_4 = 0.7853981633974483;
-
-void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
- // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
- // was attempted in this code; unfortunately it appears that the
- // switch to 80-bit precision and back causes this to be
- // unprofitable compared with simply performing a runtime call if
- // the argument is out of the (-pi/4, pi/4) range.
-
- Register tmp = noreg;
- if (!VM_Version::supports_cmov()) {
- // fcmp needs a temporary so preserve rbx,
- tmp = rbx;
- push(tmp);
- }
-
- Label slow_case, done;
-
- ExternalAddress pi4_adr = (address)&pi_4;
- if (reachable(pi4_adr)) {
- // x ?<= pi/4
- fld_d(pi4_adr);
- fld_s(1); // Stack: X PI/4 X
- fabs(); // Stack: |X| PI/4 X
- fcmp(tmp);
- jcc(Assembler::above, slow_case);
-
- // fastest case: -pi/4 <= x <= pi/4
- switch(trig) {
- case 's':
- fsin();
- break;
- case 'c':
- fcos();
- break;
- case 't':
- ftan();
- break;
- default:
- assert(false, "bad intrinsic");
- break;
- }
- jmp(done);
- }
-
- // slow case: runtime call
- bind(slow_case);
-
- switch(trig) {
- case 's':
- {
- fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use);
- }
- break;
- case 'c':
- {
- fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use);
- }
- break;
- case 't':
- {
- fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use);
- }
- break;
- default:
- assert(false, "bad intrinsic");
- break;
- }
-
- // Come here with result in F-TOS
- bind(done);
-
- if (tmp != noreg) {
- pop(tmp);
- }
-}
-
-
-// Look up the method for a megamorphic invokeinterface call.
-// The target method is determined by <intf_klass, itable_index>.
-// The receiver klass is in recv_klass.
-// On success, the result will be in method_result, and execution falls through.
-// On failure, execution transfers to the given label.
-void MacroAssembler::lookup_interface_method(Register recv_klass,
- Register intf_klass,
- RegisterOrConstant itable_index,
- Register method_result,
- Register scan_temp,
- Label& L_no_such_interface) {
- assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
- assert(itable_index.is_constant() || itable_index.as_register() == method_result,
- "caller must use same register for non-constant itable index as for method");
-
- // Compute start of first itableOffsetEntry (which is at the end of the vtable)
- int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
- int itentry_off = itableMethodEntry::method_offset_in_bytes();
- int scan_step = itableOffsetEntry::size() * wordSize;
- int vte_size = vtableEntry::size() * wordSize;
- Address::ScaleFactor times_vte_scale = Address::times_ptr;
- assert(vte_size == wordSize, "else adjust times_vte_scale");
-
- movl(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
-
- // %%% Could store the aligned, prescaled offset in the klassoop.
- lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
- if (HeapWordsPerLong > 1) {
- // Round up to align_object_offset boundary
- // see code for InstanceKlass::start_of_itable!
- round_to(scan_temp, BytesPerLong);
- }
-
- // Adjust recv_klass by scaled itable_index, so we can free itable_index.
- assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
- lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
-
- // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
- // if (scan->interface() == intf) {
- // result = (klass + scan->offset() + itable_index);
- // }
- // }
- Label search, found_method;
-
- for (int peel = 1; peel >= 0; peel--) {
- movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
- cmpptr(intf_klass, method_result);
-
- if (peel) {
- jccb(Assembler::equal, found_method);
- } else {
- jccb(Assembler::notEqual, search);
- // (invert the test to fall through to found_method...)
- }
-
- if (!peel) break;
-
- bind(search);
-
- // Check that the previous entry is non-null. A null entry means that
- // the receiver class doesn't implement the interface, and wasn't the
- // same as when the caller was compiled.
- testptr(method_result, method_result);
- jcc(Assembler::zero, L_no_such_interface);
- addptr(scan_temp, scan_step);
- }
-
- bind(found_method);
-
- // Got a hit.
- movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
- movptr(method_result, Address(recv_klass, scan_temp, Address::times_1));
-}
-
-
-// virtual method calling
-void MacroAssembler::lookup_virtual_method(Register recv_klass,
- RegisterOrConstant vtable_index,
- Register method_result) {
- const int base = InstanceKlass::vtable_start_offset() * wordSize;
- assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
- Address vtable_entry_addr(recv_klass,
- vtable_index, Address::times_ptr,
- base + vtableEntry::method_offset_in_bytes());
- movptr(method_result, vtable_entry_addr);
-}
-
-
-void MacroAssembler::check_klass_subtype(Register sub_klass,
- Register super_klass,
- Register temp_reg,
- Label& L_success) {
- Label L_failure;
- check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
- check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
- bind(L_failure);
-}
-
-
-void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
- Register super_klass,
- Register temp_reg,
- Label* L_success,
- Label* L_failure,
- Label* L_slow_path,
- RegisterOrConstant super_check_offset) {
- assert_different_registers(sub_klass, super_klass, temp_reg);
- bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
- if (super_check_offset.is_register()) {
- assert_different_registers(sub_klass, super_klass,
- super_check_offset.as_register());
- } else if (must_load_sco) {
- assert(temp_reg != noreg, "supply either a temp or a register offset");
- }
-
- Label L_fallthrough;
- int label_nulls = 0;
- if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
- if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
- if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
- assert(label_nulls <= 1, "at most one NULL in the batch");
-
- int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
- int sco_offset = in_bytes(Klass::super_check_offset_offset());
- Address super_check_offset_addr(super_klass, sco_offset);
-
- // Hacked jcc, which "knows" that L_fallthrough, at least, is in
- // range of a jccb. If this routine grows larger, reconsider at
- // least some of these.
-#define local_jcc(assembler_cond, label) \
- if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \
- else jcc( assembler_cond, label) /*omit semi*/
-
- // Hacked jmp, which may only be used just before L_fallthrough.
-#define final_jmp(label) \
- if (&(label) == &L_fallthrough) { /*do nothing*/ } \
- else jmp(label) /*omit semi*/
-
- // If the pointers are equal, we are done (e.g., String[] elements).
- // This self-check enables sharing of secondary supertype arrays among
- // non-primary types such as array-of-interface. Otherwise, each such
- // type would need its own customized SSA.
- // We move this check to the front of the fast path because many
- // type checks are in fact trivially successful in this manner,
- // so we get a nicely predicted branch right at the start of the check.
- cmpptr(sub_klass, super_klass);
- local_jcc(Assembler::equal, *L_success);
-
- // Check the supertype display:
- if (must_load_sco) {
- // Positive movl does right thing on LP64.
- movl(temp_reg, super_check_offset_addr);
- super_check_offset = RegisterOrConstant(temp_reg);
- }
- Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
- cmpptr(super_klass, super_check_addr); // load displayed supertype
-
- // This check has worked decisively for primary supers.
- // Secondary supers are sought in the super_cache ('super_cache_addr').
- // (Secondary supers are interfaces and very deeply nested subtypes.)
- // This works in the same check above because of a tricky aliasing
- // between the super_cache and the primary super display elements.
- // (The 'super_check_addr' can address either, as the case requires.)
- // Note that the cache is updated below if it does not help us find
- // what we need immediately.
- // So if it was a primary super, we can just fail immediately.
- // Otherwise, it's the slow path for us (no success at this point).
-
- if (super_check_offset.is_register()) {
- local_jcc(Assembler::equal, *L_success);
- cmpl(super_check_offset.as_register(), sc_offset);
- if (L_failure == &L_fallthrough) {
- local_jcc(Assembler::equal, *L_slow_path);
- } else {
- local_jcc(Assembler::notEqual, *L_failure);
- final_jmp(*L_slow_path);
- }
- } else if (super_check_offset.as_constant() == sc_offset) {
- // Need a slow path; fast failure is impossible.
- if (L_slow_path == &L_fallthrough) {
- local_jcc(Assembler::equal, *L_success);
- } else {
- local_jcc(Assembler::notEqual, *L_slow_path);
- final_jmp(*L_success);
- }
- } else {
- // No slow path; it's a fast decision.
- if (L_failure == &L_fallthrough) {
- local_jcc(Assembler::equal, *L_success);
- } else {
- local_jcc(Assembler::notEqual, *L_failure);
- final_jmp(*L_success);
- }
- }
-
- bind(L_fallthrough);
-
-#undef local_jcc
-#undef final_jmp
-}
-
-
-void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
- Register super_klass,
- Register temp_reg,
- Register temp2_reg,
- Label* L_success,
- Label* L_failure,
- bool set_cond_codes) {
- assert_different_registers(sub_klass, super_klass, temp_reg);
- if (temp2_reg != noreg)
- assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
-#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
-
- Label L_fallthrough;
- int label_nulls = 0;
- if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
- if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
- assert(label_nulls <= 1, "at most one NULL in the batch");
-
- // a couple of useful fields in sub_klass:
- int ss_offset = in_bytes(Klass::secondary_supers_offset());
- int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
- Address secondary_supers_addr(sub_klass, ss_offset);
- Address super_cache_addr( sub_klass, sc_offset);
-
- // Do a linear scan of the secondary super-klass chain.
- // This code is rarely used, so simplicity is a virtue here.
- // The repne_scan instruction uses fixed registers, which we must spill.
- // Don't worry too much about pre-existing connections with the input regs.
-
- assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super)
- assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter)
-
- // Get super_klass value into rax (even if it was in rdi or rcx).
- bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
- if (super_klass != rax || UseCompressedOops) {
- if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
- mov(rax, super_klass);
- }
- if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
- if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
-
-#ifndef PRODUCT
- int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
- ExternalAddress pst_counter_addr((address) pst_counter);
- NOT_LP64( incrementl(pst_counter_addr) );
- LP64_ONLY( lea(rcx, pst_counter_addr) );
- LP64_ONLY( incrementl(Address(rcx, 0)) );
-#endif //PRODUCT
-
- // We will consult the secondary-super array.
- movptr(rdi, secondary_supers_addr);
- // Load the array length. (Positive movl does right thing on LP64.)
- movl(rcx, Address(rdi, Array<Klass*>::length_offset_in_bytes()));
- // Skip to start of data.
- addptr(rdi, Array<Klass*>::base_offset_in_bytes());
-
- // Scan RCX words at [RDI] for an occurrence of RAX.
- // Set NZ/Z based on last compare.
- // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
- // not change flags (only scas instruction which is repeated sets flags).
- // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
-
- testptr(rax,rax); // Set Z = 0
- repne_scan();
-
- // Unspill the temp. registers:
- if (pushed_rdi) pop(rdi);
- if (pushed_rcx) pop(rcx);
- if (pushed_rax) pop(rax);
-
- if (set_cond_codes) {
- // Special hack for the AD files: rdi is guaranteed non-zero.
- assert(!pushed_rdi, "rdi must be left non-NULL");
- // Also, the condition codes are properly set Z/NZ on succeed/failure.
- }
-
- if (L_failure == &L_fallthrough)
- jccb(Assembler::notEqual, *L_failure);
- else jcc(Assembler::notEqual, *L_failure);
-
- // Success. Cache the super we found and proceed in triumph.
- movptr(super_cache_addr, super_klass);
-
- if (L_success != &L_fallthrough) {
- jmp(*L_success);
- }
-
-#undef IS_A_TEMP
-
- bind(L_fallthrough);
-}
-
-
-void MacroAssembler::cmov32(Condition cc, Register dst, Address src) {
- if (VM_Version::supports_cmov()) {
- cmovl(cc, dst, src);
- } else {
- Label L;
- jccb(negate_condition(cc), L);
- movl(dst, src);
- bind(L);
- }
-}
-
-void MacroAssembler::cmov32(Condition cc, Register dst, Register src) {
- if (VM_Version::supports_cmov()) {
- cmovl(cc, dst, src);
- } else {
- Label L;
- jccb(negate_condition(cc), L);
- movl(dst, src);
- bind(L);
- }
-}
-
-void MacroAssembler::verify_oop(Register reg, const char* s) {
- if (!VerifyOops) return;
-
- // Pass register number to verify_oop_subroutine
- char* b = new char[strlen(s) + 50];
- sprintf(b, "verify_oop: %s: %s", reg->name(), s);
- BLOCK_COMMENT("verify_oop {");
-#ifdef _LP64
- push(rscratch1); // save r10, trashed by movptr()
-#endif
- push(rax); // save rax,
- push(reg); // pass register argument
- ExternalAddress buffer((address) b);
- // avoid using pushptr, as it modifies scratch registers
- // and our contract is not to modify anything
- movptr(rax, buffer.addr());
- push(rax);
- // call indirectly to solve generation ordering problem
- movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
- call(rax);
- // Caller pops the arguments (oop, message) and restores rax, r10
- BLOCK_COMMENT("} verify_oop");
-}
-
-
-RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp,
- int offset) {
- intptr_t value = *delayed_value_addr;
- if (value != 0)
- return RegisterOrConstant(value + offset);
-
- // load indirectly to solve generation ordering problem
- movptr(tmp, ExternalAddress((address) delayed_value_addr));
-
-#ifdef ASSERT
- { Label L;
- testptr(tmp, tmp);
- if (WizardMode) {
- jcc(Assembler::notZero, L);
- char* buf = new char[40];
- sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]);
- STOP(buf);
- } else {
- jccb(Assembler::notZero, L);
- hlt();
- }
- bind(L);
- }
-#endif
-
- if (offset != 0)
- addptr(tmp, offset);
-
- return RegisterOrConstant(tmp);
-}
-
-
-Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
- int extra_slot_offset) {
- // cf. TemplateTable::prepare_invoke(), if (load_receiver).
- int stackElementSize = Interpreter::stackElementSize;
- int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
-#ifdef ASSERT
- int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
- assert(offset1 - offset == stackElementSize, "correct arithmetic");
-#endif
- Register scale_reg = noreg;
- Address::ScaleFactor scale_factor = Address::no_scale;
- if (arg_slot.is_constant()) {
- offset += arg_slot.as_constant() * stackElementSize;
- } else {
- scale_reg = arg_slot.as_register();
- scale_factor = Address::times(stackElementSize);
- }
- offset += wordSize; // return PC is on stack
- return Address(rsp, scale_reg, scale_factor, offset);
-}
-
-
-void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
- if (!VerifyOops) return;
-
- // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
- // Pass register number to verify_oop_subroutine
- char* b = new char[strlen(s) + 50];
- sprintf(b, "verify_oop_addr: %s", s);
-
-#ifdef _LP64
- push(rscratch1); // save r10, trashed by movptr()
-#endif
- push(rax); // save rax,
- // addr may contain rsp so we will have to adjust it based on the push
- // we just did (and on 64 bit we do two pushes)
- // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
- // stores rax into addr which is backwards of what was intended.
- if (addr.uses(rsp)) {
- lea(rax, addr);
- pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord));
- } else {
- pushptr(addr);
- }
-
- ExternalAddress buffer((address) b);
- // pass msg argument
- // avoid using pushptr, as it modifies scratch registers
- // and our contract is not to modify anything
- movptr(rax, buffer.addr());
- push(rax);
-
- // call indirectly to solve generation ordering problem
- movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
- call(rax);
- // Caller pops the arguments (addr, message) and restores rax, r10.
-}
-
-void MacroAssembler::verify_tlab() {
-#ifdef ASSERT
- if (UseTLAB && VerifyOops) {
- Label next, ok;
- Register t1 = rsi;
- Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
-
- push(t1);
- NOT_LP64(push(thread_reg));
- NOT_LP64(get_thread(thread_reg));
-
- movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
- cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
- jcc(Assembler::aboveEqual, next);
- STOP("assert(top >= start)");
- should_not_reach_here();
-
- bind(next);
- movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
- cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
- jcc(Assembler::aboveEqual, ok);
- STOP("assert(top <= end)");
- should_not_reach_here();
-
- bind(ok);
- NOT_LP64(pop(thread_reg));
- pop(t1);
- }
-#endif
-}
-
-class ControlWord {
- public:
- int32_t _value;
-
- int rounding_control() const { return (_value >> 10) & 3 ; }
- int precision_control() const { return (_value >> 8) & 3 ; }
- bool precision() const { return ((_value >> 5) & 1) != 0; }
- bool underflow() const { return ((_value >> 4) & 1) != 0; }
- bool overflow() const { return ((_value >> 3) & 1) != 0; }
- bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
- bool denormalized() const { return ((_value >> 1) & 1) != 0; }
- bool invalid() const { return ((_value >> 0) & 1) != 0; }
-
- void print() const {
- // rounding control
- const char* rc;
- switch (rounding_control()) {
- case 0: rc = "round near"; break;
- case 1: rc = "round down"; break;
- case 2: rc = "round up "; break;
- case 3: rc = "chop "; break;
- };
- // precision control
- const char* pc;
- switch (precision_control()) {
- case 0: pc = "24 bits "; break;
- case 1: pc = "reserved"; break;
- case 2: pc = "53 bits "; break;
- case 3: pc = "64 bits "; break;
- };
- // flags
- char f[9];
- f[0] = ' ';
- f[1] = ' ';
- f[2] = (precision ()) ? 'P' : 'p';
- f[3] = (underflow ()) ? 'U' : 'u';
- f[4] = (overflow ()) ? 'O' : 'o';
- f[5] = (zero_divide ()) ? 'Z' : 'z';
- f[6] = (denormalized()) ? 'D' : 'd';
- f[7] = (invalid ()) ? 'I' : 'i';
- f[8] = '\x0';
- // output
- printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
- }
-
-};
-
-class StatusWord {
- public:
- int32_t _value;
-
- bool busy() const { return ((_value >> 15) & 1) != 0; }
- bool C3() const { return ((_value >> 14) & 1) != 0; }
- bool C2() const { return ((_value >> 10) & 1) != 0; }
- bool C1() const { return ((_value >> 9) & 1) != 0; }
- bool C0() const { return ((_value >> 8) & 1) != 0; }
- int top() const { return (_value >> 11) & 7 ; }
- bool error_status() const { return ((_value >> 7) & 1) != 0; }
- bool stack_fault() const { return ((_value >> 6) & 1) != 0; }
- bool precision() const { return ((_value >> 5) & 1) != 0; }
- bool underflow() const { return ((_value >> 4) & 1) != 0; }
- bool overflow() const { return ((_value >> 3) & 1) != 0; }
- bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
- bool denormalized() const { return ((_value >> 1) & 1) != 0; }
- bool invalid() const { return ((_value >> 0) & 1) != 0; }
-
- void print() const {
- // condition codes
- char c[5];
- c[0] = (C3()) ? '3' : '-';
- c[1] = (C2()) ? '2' : '-';
- c[2] = (C1()) ? '1' : '-';
- c[3] = (C0()) ? '0' : '-';
- c[4] = '\x0';
- // flags
- char f[9];
- f[0] = (error_status()) ? 'E' : '-';
- f[1] = (stack_fault ()) ? 'S' : '-';
- f[2] = (precision ()) ? 'P' : '-';
- f[3] = (underflow ()) ? 'U' : '-';
- f[4] = (overflow ()) ? 'O' : '-';
- f[5] = (zero_divide ()) ? 'Z' : '-';
- f[6] = (denormalized()) ? 'D' : '-';
- f[7] = (invalid ()) ? 'I' : '-';
- f[8] = '\x0';
- // output
- printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top());
- }
-
-};
-
-class TagWord {
- public:
- int32_t _value;
-
- int tag_at(int i) const { return (_value >> (i*2)) & 3; }
-
- void print() const {
- printf("%04x", _value & 0xFFFF);
- }
-
-};
-
-class FPU_Register {
- public:
- int32_t _m0;
- int32_t _m1;
- int16_t _ex;
-
- bool is_indefinite() const {
- return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
- }
-
- void print() const {
- char sign = (_ex < 0) ? '-' : '+';
- const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " ";
- printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind);
- };
-
-};
-
-class FPU_State {
- public:
- enum {
- register_size = 10,
- number_of_registers = 8,
- register_mask = 7
- };
-
- ControlWord _control_word;
- StatusWord _status_word;
- TagWord _tag_word;
- int32_t _error_offset;
- int32_t _error_selector;
- int32_t _data_offset;
- int32_t _data_selector;
- int8_t _register[register_size * number_of_registers];
-
- int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
- FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; }
-
- const char* tag_as_string(int tag) const {
- switch (tag) {
- case 0: return "valid";
- case 1: return "zero";
- case 2: return "special";
- case 3: return "empty";
- }
- ShouldNotReachHere();
- return NULL;
- }
-
- void print() const {
- // print computation registers
- { int t = _status_word.top();
- for (int i = 0; i < number_of_registers; i++) {
- int j = (i - t) & register_mask;
- printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
- st(j)->print();
- printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
- }
- }
- printf("\n");
- // print control registers
- printf("ctrl = "); _control_word.print(); printf("\n");
- printf("stat = "); _status_word .print(); printf("\n");
- printf("tags = "); _tag_word .print(); printf("\n");
- }
-
-};
-
-class Flag_Register {
- public:
- int32_t _value;
-
- bool overflow() const { return ((_value >> 11) & 1) != 0; }
- bool direction() const { return ((_value >> 10) & 1) != 0; }
- bool sign() const { return ((_value >> 7) & 1) != 0; }
- bool zero() const { return ((_value >> 6) & 1) != 0; }
- bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; }
- bool parity() const { return ((_value >> 2) & 1) != 0; }
- bool carry() const { return ((_value >> 0) & 1) != 0; }
-
- void print() const {
- // flags
- char f[8];
- f[0] = (overflow ()) ? 'O' : '-';
- f[1] = (direction ()) ? 'D' : '-';
- f[2] = (sign ()) ? 'S' : '-';
- f[3] = (zero ()) ? 'Z' : '-';
- f[4] = (auxiliary_carry()) ? 'A' : '-';
- f[5] = (parity ()) ? 'P' : '-';
- f[6] = (carry ()) ? 'C' : '-';
- f[7] = '\x0';
- // output
- printf("%08x flags = %s", _value, f);
- }
-
-};
-
-class IU_Register {
- public:
- int32_t _value;
-
- void print() const {
- printf("%08x %11d", _value, _value);
- }
-
-};
-
-class IU_State {
- public:
- Flag_Register _eflags;
- IU_Register _rdi;
- IU_Register _rsi;
- IU_Register _rbp;
- IU_Register _rsp;
- IU_Register _rbx;
- IU_Register _rdx;
- IU_Register _rcx;
- IU_Register _rax;
-
- void print() const {
- // computation registers
- printf("rax, = "); _rax.print(); printf("\n");
- printf("rbx, = "); _rbx.print(); printf("\n");
- printf("rcx = "); _rcx.print(); printf("\n");
- printf("rdx = "); _rdx.print(); printf("\n");
- printf("rdi = "); _rdi.print(); printf("\n");
- printf("rsi = "); _rsi.print(); printf("\n");
- printf("rbp, = "); _rbp.print(); printf("\n");
- printf("rsp = "); _rsp.print(); printf("\n");
- printf("\n");
- // control registers
- printf("flgs = "); _eflags.print(); printf("\n");
- }
-};
-
-
-class CPU_State {
- public:
- FPU_State _fpu_state;
- IU_State _iu_state;
-
- void print() const {
- printf("--------------------------------------------------\n");
- _iu_state .print();
- printf("\n");
- _fpu_state.print();
- printf("--------------------------------------------------\n");
- }
-
-};
-
-
-static void _print_CPU_state(CPU_State* state) {
- state->print();
-};
-
-
-void MacroAssembler::print_CPU_state() {
- push_CPU_state();
- push(rsp); // pass CPU state
- call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
- addptr(rsp, wordSize); // discard argument
- pop_CPU_state();
-}
-
-
-static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
- static int counter = 0;
- FPU_State* fs = &state->_fpu_state;
- counter++;
- // For leaf calls, only verify that the top few elements remain empty.
- // We only need 1 empty at the top for C2 code.
- if( stack_depth < 0 ) {
- if( fs->tag_for_st(7) != 3 ) {
- printf("FPR7 not empty\n");
- state->print();
- assert(false, "error");
- return false;
- }
- return true; // All other stack states do not matter
- }
-
- assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std,
- "bad FPU control word");
-
- // compute stack depth
- int i = 0;
- while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++;
- int d = i;
- while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;
- // verify findings
- if (i != FPU_State::number_of_registers) {
- // stack not contiguous
- printf("%s: stack not contiguous at ST%d\n", s, i);
- state->print();
- assert(false, "error");
- return false;
- }
- // check if computed stack depth corresponds to expected stack depth
- if (stack_depth < 0) {
- // expected stack depth is -stack_depth or less
- if (d > -stack_depth) {
- // too many elements on the stack
- printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);
- state->print();
- assert(false, "error");
- return false;
- }
- } else {
- // expected stack depth is stack_depth
- if (d != stack_depth) {
- // wrong stack depth
- printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);
- state->print();
- assert(false, "error");
- return false;
- }
- }
- // everything is cool
- return true;
-}
-
-
-void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
- if (!VerifyFPU) return;
- push_CPU_state();
- push(rsp); // pass CPU state
- ExternalAddress msg((address) s);
- // pass message string s
- pushptr(msg.addr());
- push(stack_depth); // pass stack depth
- call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
- addptr(rsp, 3 * wordSize); // discard arguments
- // check for error
- { Label L;
- testl(rax, rax);
- jcc(Assembler::notZero, L);
- int3(); // break if error condition
- bind(L);
- }
- pop_CPU_state();
-}
-
-void MacroAssembler::load_klass(Register dst, Register src) {
-#ifdef _LP64
- if (UseCompressedKlassPointers) {
- movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
- decode_klass_not_null(dst);
- } else
-#endif
- movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
-}
-
-void MacroAssembler::load_prototype_header(Register dst, Register src) {
-#ifdef _LP64
- if (UseCompressedKlassPointers) {
- assert (Universe::heap() != NULL, "java heap should be initialized");
- movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
- if (Universe::narrow_klass_shift() != 0) {
- assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
- assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
- movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset()));
- } else {
- movq(dst, Address(dst, Klass::prototype_header_offset()));
- }
- } else
-#endif
- {
- movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
- movptr(dst, Address(dst, Klass::prototype_header_offset()));
- }
-}
-
-void MacroAssembler::store_klass(Register dst, Register src) {
-#ifdef _LP64
- if (UseCompressedKlassPointers) {
- encode_klass_not_null(src);
- movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
- } else
-#endif
- movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
-}
-
-void MacroAssembler::load_heap_oop(Register dst, Address src) {
-#ifdef _LP64
- // FIXME: Must change all places where we try to load the klass.
- if (UseCompressedOops) {
- movl(dst, src);
- decode_heap_oop(dst);
- } else
-#endif
- movptr(dst, src);
-}
-
-// Doesn't do verfication, generates fixed size code
-void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) {
-#ifdef _LP64
- if (UseCompressedOops) {
- movl(dst, src);
- decode_heap_oop_not_null(dst);
- } else
-#endif
- movptr(dst, src);
-}
-
-void MacroAssembler::store_heap_oop(Address dst, Register src) {
-#ifdef _LP64
- if (UseCompressedOops) {
- assert(!dst.uses(src), "not enough registers");
- encode_heap_oop(src);
- movl(dst, src);
- } else
-#endif
- movptr(dst, src);
-}
-
-void MacroAssembler::cmp_heap_oop(Register src1, Address src2, Register tmp) {
- assert_different_registers(src1, tmp);
-#ifdef _LP64
- if (UseCompressedOops) {
- bool did_push = false;
- if (tmp == noreg) {
- tmp = rax;
- push(tmp);
- did_push = true;
- assert(!src2.uses(rsp), "can't push");
- }
- load_heap_oop(tmp, src2);
- cmpptr(src1, tmp);
- if (did_push) pop(tmp);
- } else
-#endif
- cmpptr(src1, src2);
-}
-
-// Used for storing NULLs.
-void MacroAssembler::store_heap_oop_null(Address dst) {
-#ifdef _LP64
- if (UseCompressedOops) {
- movl(dst, (int32_t)NULL_WORD);
- } else {
- movslq(dst, (int32_t)NULL_WORD);
- }
-#else
- movl(dst, (int32_t)NULL_WORD);
-#endif
-}
-
-#ifdef _LP64
-void MacroAssembler::store_klass_gap(Register dst, Register src) {
- if (UseCompressedKlassPointers) {
- // Store to klass gap in destination
- movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
- }
-}
-
-#ifdef ASSERT
-void MacroAssembler::verify_heapbase(const char* msg) {
- assert (UseCompressedOops || UseCompressedKlassPointers, "should be compressed");
- assert (Universe::heap() != NULL, "java heap should be initialized");
- if (CheckCompressedOops) {
- Label ok;
- push(rscratch1); // cmpptr trashes rscratch1
- cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
- jcc(Assembler::equal, ok);
- STOP(msg);
- bind(ok);
- pop(rscratch1);
- }
-}
-#endif
-
-// Algorithm must match oop.inline.hpp encode_heap_oop.
-void MacroAssembler::encode_heap_oop(Register r) {
-#ifdef ASSERT
- verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
-#endif
- verify_oop(r, "broken oop in encode_heap_oop");
- if (Universe::narrow_oop_base() == NULL) {
- if (Universe::narrow_oop_shift() != 0) {
- assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
- shrq(r, LogMinObjAlignmentInBytes);
- }
- return;
- }
- testq(r, r);
- cmovq(Assembler::equal, r, r12_heapbase);
- subq(r, r12_heapbase);
- shrq(r, LogMinObjAlignmentInBytes);
-}
-
-void MacroAssembler::encode_heap_oop_not_null(Register r) {
-#ifdef ASSERT
- verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?");
- if (CheckCompressedOops) {
- Label ok;
- testq(r, r);
- jcc(Assembler::notEqual, ok);
- STOP("null oop passed to encode_heap_oop_not_null");
- bind(ok);
- }
-#endif
- verify_oop(r, "broken oop in encode_heap_oop_not_null");
- if (Universe::narrow_oop_base() != NULL) {
- subq(r, r12_heapbase);
- }
- if (Universe::narrow_oop_shift() != 0) {
- assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
- shrq(r, LogMinObjAlignmentInBytes);
- }
-}
-
-void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
-#ifdef ASSERT
- verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?");
- if (CheckCompressedOops) {
- Label ok;
- testq(src, src);
- jcc(Assembler::notEqual, ok);
- STOP("null oop passed to encode_heap_oop_not_null2");
- bind(ok);
- }
-#endif
- verify_oop(src, "broken oop in encode_heap_oop_not_null2");
- if (dst != src) {
- movq(dst, src);
- }
- if (Universe::narrow_oop_base() != NULL) {
- subq(dst, r12_heapbase);
- }
- if (Universe::narrow_oop_shift() != 0) {
- assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
- shrq(dst, LogMinObjAlignmentInBytes);
- }
-}
-
-void MacroAssembler::decode_heap_oop(Register r) {
-#ifdef ASSERT
- verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
-#endif
- if (Universe::narrow_oop_base() == NULL) {
- if (Universe::narrow_oop_shift() != 0) {
- assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
- shlq(r, LogMinObjAlignmentInBytes);
- }
- } else {
- Label done;
- shlq(r, LogMinObjAlignmentInBytes);
- jccb(Assembler::equal, done);
- addq(r, r12_heapbase);
- bind(done);
- }
- verify_oop(r, "broken oop in decode_heap_oop");
-}
-
-void MacroAssembler::decode_heap_oop_not_null(Register r) {
- // Note: it will change flags
- assert (UseCompressedOops, "should only be used for compressed headers");
- assert (Universe::heap() != NULL, "java heap should be initialized");
- // Cannot assert, unverified entry point counts instructions (see .ad file)
- // vtableStubs also counts instructions in pd_code_size_limit.
- // Also do not verify_oop as this is called by verify_oop.
- if (Universe::narrow_oop_shift() != 0) {
- assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
- shlq(r, LogMinObjAlignmentInBytes);
- if (Universe::narrow_oop_base() != NULL) {
- addq(r, r12_heapbase);
- }
- } else {
- assert (Universe::narrow_oop_base() == NULL, "sanity");
- }
-}
-
-void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
- // Note: it will change flags
- assert (UseCompressedOops, "should only be used for compressed headers");
- assert (Universe::heap() != NULL, "java heap should be initialized");
- // Cannot assert, unverified entry point counts instructions (see .ad file)
- // vtableStubs also counts instructions in pd_code_size_limit.
- // Also do not verify_oop as this is called by verify_oop.
- if (Universe::narrow_oop_shift() != 0) {
- assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
- if (LogMinObjAlignmentInBytes == Address::times_8) {
- leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
- } else {
- if (dst != src) {
- movq(dst, src);
- }
- shlq(dst, LogMinObjAlignmentInBytes);
- if (Universe::narrow_oop_base() != NULL) {
- addq(dst, r12_heapbase);
- }
- }
- } else {
- assert (Universe::narrow_oop_base() == NULL, "sanity");
- if (dst != src) {
- movq(dst, src);
- }
- }
-}
-
-void MacroAssembler::encode_klass_not_null(Register r) {
- assert(Metaspace::is_initialized(), "metaspace should be initialized");
-#ifdef ASSERT
- verify_heapbase("MacroAssembler::encode_klass_not_null: heap base corrupted?");
-#endif
- if (Universe::narrow_klass_base() != NULL) {
- subq(r, r12_heapbase);
- }
- if (Universe::narrow_klass_shift() != 0) {
- assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
- shrq(r, LogKlassAlignmentInBytes);
- }
-}
-
-void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
- assert(Metaspace::is_initialized(), "metaspace should be initialized");
-#ifdef ASSERT
- verify_heapbase("MacroAssembler::encode_klass_not_null2: heap base corrupted?");
-#endif
- if (dst != src) {
- movq(dst, src);
- }
- if (Universe::narrow_klass_base() != NULL) {
- subq(dst, r12_heapbase);
- }
- if (Universe::narrow_klass_shift() != 0) {
- assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
- shrq(dst, LogKlassAlignmentInBytes);
- }
-}
-
-void MacroAssembler::decode_klass_not_null(Register r) {
- assert(Metaspace::is_initialized(), "metaspace should be initialized");
- // Note: it will change flags
- assert (UseCompressedKlassPointers, "should only be used for compressed headers");
- // Cannot assert, unverified entry point counts instructions (see .ad file)
- // vtableStubs also counts instructions in pd_code_size_limit.
- // Also do not verify_oop as this is called by verify_oop.
- if (Universe::narrow_klass_shift() != 0) {
- assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
- shlq(r, LogKlassAlignmentInBytes);
- if (Universe::narrow_klass_base() != NULL) {
- addq(r, r12_heapbase);
- }
- } else {
- assert (Universe::narrow_klass_base() == NULL, "sanity");
- }
-}
-
-void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
- assert(Metaspace::is_initialized(), "metaspace should be initialized");
- // Note: it will change flags
- assert (UseCompressedKlassPointers, "should only be used for compressed headers");
- // Cannot assert, unverified entry point counts instructions (see .ad file)
- // vtableStubs also counts instructions in pd_code_size_limit.
- // Also do not verify_oop as this is called by verify_oop.
- if (Universe::narrow_klass_shift() != 0) {
- assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
- assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
- leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
- } else {
- assert (Universe::narrow_klass_base() == NULL, "sanity");
- if (dst != src) {
- movq(dst, src);
- }
- }
-}
-
-void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
- assert (UseCompressedOops, "should only be used for compressed headers");
- assert (Universe::heap() != NULL, "java heap should be initialized");
- assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
- int oop_index = oop_recorder()->find_index(obj);
- RelocationHolder rspec = oop_Relocation::spec(oop_index);
- mov_narrow_oop(dst, oop_index, rspec);
-}
-
-void MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
- assert (UseCompressedOops, "should only be used for compressed headers");
- assert (Universe::heap() != NULL, "java heap should be initialized");
- assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
- int oop_index = oop_recorder()->find_index(obj);
- RelocationHolder rspec = oop_Relocation::spec(oop_index);
- mov_narrow_oop(dst, oop_index, rspec);
-}
-
-void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
- assert (UseCompressedKlassPointers, "should only be used for compressed headers");
- assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
- int klass_index = oop_recorder()->find_index(k);
- RelocationHolder rspec = metadata_Relocation::spec(klass_index);
- mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
-}
-
-void MacroAssembler::set_narrow_klass(Address dst, Klass* k) {
- assert (UseCompressedKlassPointers, "should only be used for compressed headers");
- assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
- int klass_index = oop_recorder()->find_index(k);
- RelocationHolder rspec = metadata_Relocation::spec(klass_index);
- mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
-}
-
-void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
- assert (UseCompressedOops, "should only be used for compressed headers");
- assert (Universe::heap() != NULL, "java heap should be initialized");
- assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
- int oop_index = oop_recorder()->find_index(obj);
- RelocationHolder rspec = oop_Relocation::spec(oop_index);
- Assembler::cmp_narrow_oop(dst, oop_index, rspec);
-}
-
-void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
- assert (UseCompressedOops, "should only be used for compressed headers");
- assert (Universe::heap() != NULL, "java heap should be initialized");
- assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
- int oop_index = oop_recorder()->find_index(obj);
- RelocationHolder rspec = oop_Relocation::spec(oop_index);
- Assembler::cmp_narrow_oop(dst, oop_index, rspec);
-}
-
-void MacroAssembler::cmp_narrow_klass(Register dst, Klass* k) {
- assert (UseCompressedKlassPointers, "should only be used for compressed headers");
- assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
- int klass_index = oop_recorder()->find_index(k);
- RelocationHolder rspec = metadata_Relocation::spec(klass_index);
- Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
-}
-
-void MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) {
- assert (UseCompressedKlassPointers, "should only be used for compressed headers");
- assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
- int klass_index = oop_recorder()->find_index(k);
- RelocationHolder rspec = metadata_Relocation::spec(klass_index);
- Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
-}
-
-void MacroAssembler::reinit_heapbase() {
- if (UseCompressedOops || UseCompressedKlassPointers) {
- movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
- }
-}
-#endif // _LP64
-
-
-// C2 compiled method's prolog code.
-void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) {
-
- // WARNING: Initial instruction MUST be 5 bytes or longer so that
- // NativeJump::patch_verified_entry will be able to patch out the entry
- // code safely. The push to verify stack depth is ok at 5 bytes,
- // the frame allocation can be either 3 or 6 bytes. So if we don't do
- // stack bang then we must use the 6 byte frame allocation even if
- // we have no frame. :-(
-
- assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
- // Remove word for return addr
- framesize -= wordSize;
-
- // Calls to C2R adapters often do not accept exceptional returns.
- // We require that their callers must bang for them. But be careful, because
- // some VM calls (such as call site linkage) can use several kilobytes of
- // stack. But the stack safety zone should account for that.
- // See bugs 4446381, 4468289, 4497237.
- if (stack_bang) {
- generate_stack_overflow_check(framesize);
-
- // We always push rbp, so that on return to interpreter rbp, will be
- // restored correctly and we can correct the stack.
- push(rbp);
- // Remove word for ebp
- framesize -= wordSize;
-
- // Create frame
- if (framesize) {
- subptr(rsp, framesize);
- }
- } else {
- // Create frame (force generation of a 4 byte immediate value)
- subptr_imm32(rsp, framesize);
-
- // Save RBP register now.
- framesize -= wordSize;
- movptr(Address(rsp, framesize), rbp);
- }
-
- if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
- framesize -= wordSize;
- movptr(Address(rsp, framesize), (int32_t)0xbadb100d);
- }
-
-#ifndef _LP64
- // If method sets FPU control word do it now
- if (fp_mode_24b) {
- fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
- }
- if (UseSSE >= 2 && VerifyFPU) {
- verify_FPU(0, "FPU stack must be clean on entry");
- }
-#endif
-
-#ifdef ASSERT
- if (VerifyStackAtCalls) {
- Label L;
- push(rax);
- mov(rax, rsp);
- andptr(rax, StackAlignmentInBytes-1);
- cmpptr(rax, StackAlignmentInBytes-wordSize);
- pop(rax);
- jcc(Assembler::equal, L);
- STOP("Stack is not properly aligned!");
- bind(L);
- }
-#endif
-
-}
-
-
-// IndexOf for constant substrings with size >= 8 chars
-// which don't need to be loaded through stack.
-void MacroAssembler::string_indexofC8(Register str1, Register str2,
- Register cnt1, Register cnt2,
- int int_cnt2, Register result,
- XMMRegister vec, Register tmp) {
- ShortBranchVerifier sbv(this);
- assert(UseSSE42Intrinsics, "SSE4.2 is required");
-
- // This method uses pcmpestri inxtruction with bound registers
- // inputs:
- // xmm - substring
- // rax - substring length (elements count)
- // mem - scanned string
- // rdx - string length (elements count)
- // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
- // outputs:
- // rcx - matched index in string
- assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
-
- Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
- RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
- MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;
-
- // Note, inline_string_indexOf() generates checks:
- // if (substr.count > string.count) return -1;
- // if (substr.count == 0) return 0;
- assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars");
-
- // Load substring.
- movdqu(vec, Address(str2, 0));
- movl(cnt2, int_cnt2);
- movptr(result, str1); // string addr
-
- if (int_cnt2 > 8) {
- jmpb(SCAN_TO_SUBSTR);
-
- // Reload substr for rescan, this code
- // is executed only for large substrings (> 8 chars)
- bind(RELOAD_SUBSTR);
- movdqu(vec, Address(str2, 0));
- negptr(cnt2); // Jumped here with negative cnt2, convert to positive
-
- bind(RELOAD_STR);
- // We came here after the beginning of the substring was
- // matched but the rest of it was not so we need to search
- // again. Start from the next element after the previous match.
-
- // cnt2 is number of substring reminding elements and
- // cnt1 is number of string reminding elements when cmp failed.
- // Restored cnt1 = cnt1 - cnt2 + int_cnt2
- subl(cnt1, cnt2);
- addl(cnt1, int_cnt2);
- movl(cnt2, int_cnt2); // Now restore cnt2
-
- decrementl(cnt1); // Shift to next element
- cmpl(cnt1, cnt2);
- jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
-
- addptr(result, 2);
-
- } // (int_cnt2 > 8)
-
- // Scan string for start of substr in 16-byte vectors
- bind(SCAN_TO_SUBSTR);
- pcmpestri(vec, Address(result, 0), 0x0d);
- jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1
- subl(cnt1, 8);
- jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
- cmpl(cnt1, cnt2);
- jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
- addptr(result, 16);
- jmpb(SCAN_TO_SUBSTR);
-
- // Found a potential substr
- bind(FOUND_CANDIDATE);
- // Matched whole vector if first element matched (tmp(rcx) == 0).
- if (int_cnt2 == 8) {
- jccb(Assembler::overflow, RET_FOUND); // OF == 1
- } else { // int_cnt2 > 8
- jccb(Assembler::overflow, FOUND_SUBSTR);
- }
- // After pcmpestri tmp(rcx) contains matched element index
- // Compute start addr of substr
- lea(result, Address(result, tmp, Address::times_2));
-
- // Make sure string is still long enough
- subl(cnt1, tmp);
- cmpl(cnt1, cnt2);
- if (int_cnt2 == 8) {
- jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
- } else { // int_cnt2 > 8
- jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD);
- }
- // Left less then substring.
-
- bind(RET_NOT_FOUND);
- movl(result, -1);
- jmpb(EXIT);
-
- if (int_cnt2 > 8) {
- // This code is optimized for the case when whole substring
- // is matched if its head is matched.
- bind(MATCH_SUBSTR_HEAD);
- pcmpestri(vec, Address(result, 0), 0x0d);
- // Reload only string if does not match
- jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0
-
- Label CONT_SCAN_SUBSTR;
- // Compare the rest of substring (> 8 chars).
- bind(FOUND_SUBSTR);
- // First 8 chars are already matched.
- negptr(cnt2);
- addptr(cnt2, 8);
-
- bind(SCAN_SUBSTR);
- subl(cnt1, 8);
- cmpl(cnt2, -8); // Do not read beyond substring
- jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR);
- // Back-up strings to avoid reading beyond substring:
- // cnt1 = cnt1 - cnt2 + 8
- addl(cnt1, cnt2); // cnt2 is negative
- addl(cnt1, 8);
- movl(cnt2, 8); negptr(cnt2);
- bind(CONT_SCAN_SUBSTR);
- if (int_cnt2 < (int)G) {
- movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2));
- pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d);
- } else {
- // calculate index in register to avoid integer overflow (int_cnt2*2)
- movl(tmp, int_cnt2);
- addptr(tmp, cnt2);
- movdqu(vec, Address(str2, tmp, Address::times_2, 0));
- pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d);
- }
- // Need to reload strings pointers if not matched whole vector
- jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
- addptr(cnt2, 8);
- jcc(Assembler::negative, SCAN_SUBSTR);
- // Fall through if found full substring
-
- } // (int_cnt2 > 8)
-
- bind(RET_FOUND);
- // Found result if we matched full small substring.
- // Compute substr offset
- subptr(result, str1);
- shrl(result, 1); // index
- bind(EXIT);
-
-} // string_indexofC8
-
-// Small strings are loaded through stack if they cross page boundary.
-void MacroAssembler::string_indexof(Register str1, Register str2,
- Register cnt1, Register cnt2,
- int int_cnt2, Register result,
- XMMRegister vec, Register tmp) {
- ShortBranchVerifier sbv(this);
- assert(UseSSE42Intrinsics, "SSE4.2 is required");
- //
- // int_cnt2 is length of small (< 8 chars) constant substring
- // or (-1) for non constant substring in which case its length
- // is in cnt2 register.
- //
- // Note, inline_string_indexOf() generates checks:
- // if (substr.count > string.count) return -1;
- // if (substr.count == 0) return 0;
- //
- assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0");
-
- // This method uses pcmpestri inxtruction with bound registers
- // inputs:
- // xmm - substring
- // rax - substring length (elements count)
- // mem - scanned string
- // rdx - string length (elements count)
- // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
- // outputs:
- // rcx - matched index in string
- assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
-
- Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
- RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
- FOUND_CANDIDATE;
-
- { //========================================================
- // We don't know where these strings are located
- // and we can't read beyond them. Load them through stack.
- Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;
-
- movptr(tmp, rsp); // save old SP
-
- if (int_cnt2 > 0) { // small (< 8 chars) constant substring
- if (int_cnt2 == 1) { // One char
- load_unsigned_short(result, Address(str2, 0));
- movdl(vec, result); // move 32 bits
- } else if (int_cnt2 == 2) { // Two chars
- movdl(vec, Address(str2, 0)); // move 32 bits
- } else if (int_cnt2 == 4) { // Four chars
- movq(vec, Address(str2, 0)); // move 64 bits
- } else { // cnt2 = { 3, 5, 6, 7 }
- // Array header size is 12 bytes in 32-bit VM
- // + 6 bytes for 3 chars == 18 bytes,
- // enough space to load vec and shift.
- assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity");
- movdqu(vec, Address(str2, (int_cnt2*2)-16));
- psrldq(vec, 16-(int_cnt2*2));
- }
- } else { // not constant substring
- cmpl(cnt2, 8);
- jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough
-
- // We can read beyond string if srt+16 does not cross page boundary
- // since heaps are aligned and mapped by pages.
- assert(os::vm_page_size() < (int)G, "default page should be small");
- movl(result, str2); // We need only low 32 bits
- andl(result, (os::vm_page_size()-1));
- cmpl(result, (os::vm_page_size()-16));
- jccb(Assembler::belowEqual, CHECK_STR);
-
- // Move small strings to stack to allow load 16 bytes into vec.
- subptr(rsp, 16);
- int stk_offset = wordSize-2;
- push(cnt2);
-
- bind(COPY_SUBSTR);
- load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2));
- movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
- decrement(cnt2);
- jccb(Assembler::notZero, COPY_SUBSTR);
-
- pop(cnt2);
- movptr(str2, rsp); // New substring address
- } // non constant
-
- bind(CHECK_STR);
- cmpl(cnt1, 8);
- jccb(Assembler::aboveEqual, BIG_STRINGS);
-
- // Check cross page boundary.
- movl(result, str1); // We need only low 32 bits
- andl(result, (os::vm_page_size()-1));
- cmpl(result, (os::vm_page_size()-16));
- jccb(Assembler::belowEqual, BIG_STRINGS);
-
- subptr(rsp, 16);
- int stk_offset = -2;
- if (int_cnt2 < 0) { // not constant
- push(cnt2);
- stk_offset += wordSize;
- }
- movl(cnt2, cnt1);
-
- bind(COPY_STR);
- load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2));
- movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
- decrement(cnt2);
- jccb(Assembler::notZero, COPY_STR);
-
- if (int_cnt2 < 0) { // not constant
- pop(cnt2);
- }
- movptr(str1, rsp); // New string address
-
- bind(BIG_STRINGS);
- // Load substring.
- if (int_cnt2 < 0) { // -1
- movdqu(vec, Address(str2, 0));
- push(cnt2); // substr count
- push(str2); // substr addr
- push(str1); // string addr
- } else {
- // Small (< 8 chars) constant substrings are loaded already.
- movl(cnt2, int_cnt2);
- }
- push(tmp); // original SP
-
- } // Finished loading
-
- //========================================================
- // Start search
- //
-
- movptr(result, str1); // string addr
-
- if (int_cnt2 < 0) { // Only for non constant substring
- jmpb(SCAN_TO_SUBSTR);
-
- // SP saved at sp+0
- // String saved at sp+1*wordSize
- // Substr saved at sp+2*wordSize
- // Substr count saved at sp+3*wordSize
-
- // Reload substr for rescan, this code
- // is executed only for large substrings (> 8 chars)
- bind(RELOAD_SUBSTR);
- movptr(str2, Address(rsp, 2*wordSize));
- movl(cnt2, Address(rsp, 3*wordSize));
- movdqu(vec, Address(str2, 0));
- // We came here after the beginning of the substring was
- // matched but the rest of it was not so we need to search
- // again. Start from the next element after the previous match.
- subptr(str1, result); // Restore counter
- shrl(str1, 1);
- addl(cnt1, str1);
- decrementl(cnt1); // Shift to next element
- cmpl(cnt1, cnt2);
- jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
-
- addptr(result, 2);
- } // non constant
-
- // Scan string for start of substr in 16-byte vectors
- bind(SCAN_TO_SUBSTR);
- assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
- pcmpestri(vec, Address(result, 0), 0x0d);
- jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1
- subl(cnt1, 8);
- jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
- cmpl(cnt1, cnt2);
- jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
- addptr(result, 16);
-
- bind(ADJUST_STR);
- cmpl(cnt1, 8); // Do not read beyond string
- jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
- // Back-up string to avoid reading beyond string.
- lea(result, Address(result, cnt1, Address::times_2, -16));
- movl(cnt1, 8);
- jmpb(SCAN_TO_SUBSTR);
-
- // Found a potential substr
- bind(FOUND_CANDIDATE);
- // After pcmpestri tmp(rcx) contains matched element index
-
- // Make sure string is still long enough
- subl(cnt1, tmp);
- cmpl(cnt1, cnt2);
- jccb(Assembler::greaterEqual, FOUND_SUBSTR);
- // Left less then substring.
-
- bind(RET_NOT_FOUND);
- movl(result, -1);
- jmpb(CLEANUP);
-
- bind(FOUND_SUBSTR);
- // Compute start addr of substr
- lea(result, Address(result, tmp, Address::times_2));
-
- if (int_cnt2 > 0) { // Constant substring
- // Repeat search for small substring (< 8 chars)
- // from new point without reloading substring.
- // Have to check that we don't read beyond string.
- cmpl(tmp, 8-int_cnt2);
- jccb(Assembler::greater, ADJUST_STR);
- // Fall through if matched whole substring.
- } else { // non constant
- assert(int_cnt2 == -1, "should be != 0");
-
- addl(tmp, cnt2);
- // Found result if we matched whole substring.
- cmpl(tmp, 8);
- jccb(Assembler::lessEqual, RET_FOUND);
-
- // Repeat search for small substring (<= 8 chars)
- // from new point 'str1' without reloading substring.
- cmpl(cnt2, 8);
- // Have to check that we don't read beyond string.
- jccb(Assembler::lessEqual, ADJUST_STR);
-
- Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;
- // Compare the rest of substring (> 8 chars).
- movptr(str1, result);
-
- cmpl(tmp, cnt2);
- // First 8 chars are already matched.
- jccb(Assembler::equal, CHECK_NEXT);
-
- bind(SCAN_SUBSTR);
- pcmpestri(vec, Address(str1, 0), 0x0d);
- // Need to reload strings pointers if not matched whole vector
- jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
-
- bind(CHECK_NEXT);
- subl(cnt2, 8);
- jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring
- addptr(str1, 16);
- addptr(str2, 16);
- subl(cnt1, 8);
- cmpl(cnt2, 8); // Do not read beyond substring
- jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR);
- // Back-up strings to avoid reading beyond substring.
- lea(str2, Address(str2, cnt2, Address::times_2, -16));
- lea(str1, Address(str1, cnt2, Address::times_2, -16));
- subl(cnt1, cnt2);
- movl(cnt2, 8);
- addl(cnt1, 8);
- bind(CONT_SCAN_SUBSTR);
- movdqu(vec, Address(str2, 0));
- jmpb(SCAN_SUBSTR);
-
- bind(RET_FOUND_LONG);
- movptr(str1, Address(rsp, wordSize));
- } // non constant
-
- bind(RET_FOUND);
- // Compute substr offset
- subptr(result, str1);
- shrl(result, 1); // index
-
- bind(CLEANUP);
- pop(rsp); // restore SP
-
-} // string_indexof
-
-// Compare strings.
-void MacroAssembler::string_compare(Register str1, Register str2,
- Register cnt1, Register cnt2, Register result,
- XMMRegister vec1) {
- ShortBranchVerifier sbv(this);
- Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
-
- // Compute the minimum of the string lengths and the
- // difference of the string lengths (stack).
- // Do the conditional move stuff
- movl(result, cnt1);
- subl(cnt1, cnt2);
- push(cnt1);
- cmov32(Assembler::lessEqual, cnt2, result);
-
- // Is the minimum length zero?
- testl(cnt2, cnt2);
- jcc(Assembler::zero, LENGTH_DIFF_LABEL);
-
- // Load first characters
- load_unsigned_short(result, Address(str1, 0));
- load_unsigned_short(cnt1, Address(str2, 0));
-
- // Compare first characters
- subl(result, cnt1);
- jcc(Assembler::notZero, POP_LABEL);
- decrementl(cnt2);
- jcc(Assembler::zero, LENGTH_DIFF_LABEL);
-
- {
- // Check after comparing first character to see if strings are equivalent
- Label LSkip2;
- // Check if the strings start at same location
- cmpptr(str1, str2);
- jccb(Assembler::notEqual, LSkip2);
-
- // Check if the length difference is zero (from stack)
- cmpl(Address(rsp, 0), 0x0);
- jcc(Assembler::equal, LENGTH_DIFF_LABEL);
-
- // Strings might not be equivalent
- bind(LSkip2);
- }
-
- Address::ScaleFactor scale = Address::times_2;
- int stride = 8;
-
- // Advance to next element
- addptr(str1, 16/stride);
- addptr(str2, 16/stride);
-
- if (UseSSE42Intrinsics) {
- Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
- int pcmpmask = 0x19;
- // Setup to compare 16-byte vectors
- movl(result, cnt2);
- andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count
- jccb(Assembler::zero, COMPARE_TAIL);
-
- lea(str1, Address(str1, result, scale));
- lea(str2, Address(str2, result, scale));
- negptr(result);
-
- // pcmpestri
- // inputs:
- // vec1- substring
- // rax - negative string length (elements count)
- // mem - scaned string
- // rdx - string length (elements count)
- // pcmpmask - cmp mode: 11000 (string compare with negated result)
- // + 00 (unsigned bytes) or + 01 (unsigned shorts)
- // outputs:
- // rcx - first mismatched element index
- assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
-
- bind(COMPARE_WIDE_VECTORS);
- movdqu(vec1, Address(str1, result, scale));
- pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
- // After pcmpestri cnt1(rcx) contains mismatched element index
-
- jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1
- addptr(result, stride);
- subptr(cnt2, stride);
- jccb(Assembler::notZero, COMPARE_WIDE_VECTORS);
-
- // compare wide vectors tail
- testl(result, result);
- jccb(Assembler::zero, LENGTH_DIFF_LABEL);
-
- movl(cnt2, stride);
- movl(result, stride);
- negptr(result);
- movdqu(vec1, Address(str1, result, scale));
- pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
- jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL);
-
- // Mismatched characters in the vectors
- bind(VECTOR_NOT_EQUAL);
- addptr(result, cnt1);
- movptr(cnt2, result);
- load_unsigned_short(result, Address(str1, cnt2, scale));
- load_unsigned_short(cnt1, Address(str2, cnt2, scale));
- subl(result, cnt1);
- jmpb(POP_LABEL);
-
- bind(COMPARE_TAIL); // limit is zero
- movl(cnt2, result);
- // Fallthru to tail compare
- }
-
- // Shift str2 and str1 to the end of the arrays, negate min
- lea(str1, Address(str1, cnt2, scale, 0));
- lea(str2, Address(str2, cnt2, scale, 0));
- negptr(cnt2);
-
- // Compare the rest of the elements
- bind(WHILE_HEAD_LABEL);
- load_unsigned_short(result, Address(str1, cnt2, scale, 0));
- load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0));
- subl(result, cnt1);
- jccb(Assembler::notZero, POP_LABEL);
- increment(cnt2);
- jccb(Assembler::notZero, WHILE_HEAD_LABEL);
-
- // Strings are equal up to min length. Return the length difference.
- bind(LENGTH_DIFF_LABEL);
- pop(result);
- jmpb(DONE_LABEL);
-
- // Discard the stored length difference
- bind(POP_LABEL);
- pop(cnt1);
-
- // That's it
- bind(DONE_LABEL);
-}
-
-// Compare char[] arrays aligned to 4 bytes or substrings.
-void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
- Register limit, Register result, Register chr,
- XMMRegister vec1, XMMRegister vec2) {
- ShortBranchVerifier sbv(this);
- Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
-
- int length_offset = arrayOopDesc::length_offset_in_bytes();
- int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
-
- // Check the input args
- cmpptr(ary1, ary2);
- jcc(Assembler::equal, TRUE_LABEL);
-
- if (is_array_equ) {
- // Need additional checks for arrays_equals.
- testptr(ary1, ary1);
- jcc(Assembler::zero, FALSE_LABEL);
- testptr(ary2, ary2);
- jcc(Assembler::zero, FALSE_LABEL);
-
- // Check the lengths
- movl(limit, Address(ary1, length_offset));
- cmpl(limit, Address(ary2, length_offset));
- jcc(Assembler::notEqual, FALSE_LABEL);
- }
-
- // count == 0
- testl(limit, limit);
- jcc(Assembler::zero, TRUE_LABEL);
-
- if (is_array_equ) {
- // Load array address
- lea(ary1, Address(ary1, base_offset));
- lea(ary2, Address(ary2, base_offset));
- }
-
- shll(limit, 1); // byte count != 0
- movl(result, limit); // copy
-
- if (UseSSE42Intrinsics) {
- // With SSE4.2, use double quad vector compare
- Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
-
- // Compare 16-byte vectors
- andl(result, 0x0000000e); // tail count (in bytes)
- andl(limit, 0xfffffff0); // vector count (in bytes)
- jccb(Assembler::zero, COMPARE_TAIL);
-
- lea(ary1, Address(ary1, limit, Address::times_1));
- lea(ary2, Address(ary2, limit, Address::times_1));
- negptr(limit);
-
- bind(COMPARE_WIDE_VECTORS);
- movdqu(vec1, Address(ary1, limit, Address::times_1));
- movdqu(vec2, Address(ary2, limit, Address::times_1));
- pxor(vec1, vec2);
-
- ptest(vec1, vec1);
- jccb(Assembler::notZero, FALSE_LABEL);
- addptr(limit, 16);
- jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
-
- testl(result, result);
- jccb(Assembler::zero, TRUE_LABEL);
-
- movdqu(vec1, Address(ary1, result, Address::times_1, -16));
- movdqu(vec2, Address(ary2, result, Address::times_1, -16));
- pxor(vec1, vec2);
-
- ptest(vec1, vec1);
- jccb(Assembler::notZero, FALSE_LABEL);
- jmpb(TRUE_LABEL);
-
- bind(COMPARE_TAIL); // limit is zero
- movl(limit, result);
- // Fallthru to tail compare
- }
-
- // Compare 4-byte vectors
- andl(limit, 0xfffffffc); // vector count (in bytes)
- jccb(Assembler::zero, COMPARE_CHAR);
-
- lea(ary1, Address(ary1, limit, Address::times_1));
- lea(ary2, Address(ary2, limit, Address::times_1));
- negptr(limit);
-
- bind(COMPARE_VECTORS);
- movl(chr, Address(ary1, limit, Address::times_1));
- cmpl(chr, Address(ary2, limit, Address::times_1));
- jccb(Assembler::notEqual, FALSE_LABEL);
- addptr(limit, 4);
- jcc(Assembler::notZero, COMPARE_VECTORS);
-
- // Compare trailing char (final 2 bytes), if any
- bind(COMPARE_CHAR);
- testl(result, 0x2); // tail char
- jccb(Assembler::zero, TRUE_LABEL);
- load_unsigned_short(chr, Address(ary1, 0));
- load_unsigned_short(limit, Address(ary2, 0));
- cmpl(chr, limit);
- jccb(Assembler::notEqual, FALSE_LABEL);
-
- bind(TRUE_LABEL);
- movl(result, 1); // return true
- jmpb(DONE);
-
- bind(FALSE_LABEL);
- xorl(result, result); // return false
-
- // That's it
- bind(DONE);
-}
-
-void MacroAssembler::generate_fill(BasicType t, bool aligned,
- Register to, Register value, Register count,
- Register rtmp, XMMRegister xtmp) {
- ShortBranchVerifier sbv(this);
- assert_different_registers(to, value, count, rtmp);
- Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
- Label L_fill_2_bytes, L_fill_4_bytes;
-
- int shift = -1;
- switch (t) {
- case T_BYTE:
- shift = 2;
- break;
- case T_SHORT:
- shift = 1;
- break;
- case T_INT:
- shift = 0;
- break;
- default: ShouldNotReachHere();
- }
-
- if (t == T_BYTE) {
- andl(value, 0xff);
- movl(rtmp, value);
- shll(rtmp, 8);
- orl(value, rtmp);
- }
- if (t == T_SHORT) {
- andl(value, 0xffff);
- }
- if (t == T_BYTE || t == T_SHORT) {
- movl(rtmp, value);
- shll(rtmp, 16);
- orl(value, rtmp);
- }
-
- cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
- jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp
- if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
- // align source address at 4 bytes address boundary
- if (t == T_BYTE) {
- // One byte misalignment happens only for byte arrays
- testptr(to, 1);
- jccb(Assembler::zero, L_skip_align1);
- movb(Address(to, 0), value);
- increment(to);
- decrement(count);
- BIND(L_skip_align1);
- }
- // Two bytes misalignment happens only for byte and short (char) arrays
- testptr(to, 2);
- jccb(Assembler::zero, L_skip_align2);
- movw(Address(to, 0), value);
- addptr(to, 2);
- subl(count, 1<<(shift-1));
- BIND(L_skip_align2);
- }
- if (UseSSE < 2) {
- Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
- // Fill 32-byte chunks
- subl(count, 8 << shift);
- jcc(Assembler::less, L_check_fill_8_bytes);
- align(16);
-
- BIND(L_fill_32_bytes_loop);
-
- for (int i = 0; i < 32; i += 4) {
- movl(Address(to, i), value);
- }
-
- addptr(to, 32);
- subl(count, 8 << shift);
- jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
- BIND(L_check_fill_8_bytes);
- addl(count, 8 << shift);
- jccb(Assembler::zero, L_exit);
- jmpb(L_fill_8_bytes);
-
- //
- // length is too short, just fill qwords
- //
- BIND(L_fill_8_bytes_loop);
- movl(Address(to, 0), value);
- movl(Address(to, 4), value);
- addptr(to, 8);
- BIND(L_fill_8_bytes);
- subl(count, 1 << (shift + 1));
- jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
- // fall through to fill 4 bytes
- } else {
- Label L_fill_32_bytes;
- if (!UseUnalignedLoadStores) {
- // align to 8 bytes, we know we are 4 byte aligned to start
- testptr(to, 4);
- jccb(Assembler::zero, L_fill_32_bytes);
- movl(Address(to, 0), value);
- addptr(to, 4);
- subl(count, 1<<shift);
- }
- BIND(L_fill_32_bytes);
- {
- assert( UseSSE >= 2, "supported cpu only" );
- Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
- // Fill 32-byte chunks
- movdl(xtmp, value);
- pshufd(xtmp, xtmp, 0);
-
- subl(count, 8 << shift);
- jcc(Assembler::less, L_check_fill_8_bytes);
- align(16);
-
- BIND(L_fill_32_bytes_loop);
-
- if (UseUnalignedLoadStores) {
- movdqu(Address(to, 0), xtmp);
- movdqu(Address(to, 16), xtmp);
- } else {
- movq(Address(to, 0), xtmp);
- movq(Address(to, 8), xtmp);
- movq(Address(to, 16), xtmp);
- movq(Address(to, 24), xtmp);
- }
-
- addptr(to, 32);
- subl(count, 8 << shift);
- jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
- BIND(L_check_fill_8_bytes);
- addl(count, 8 << shift);
- jccb(Assembler::zero, L_exit);
- jmpb(L_fill_8_bytes);
-
- //
- // length is too short, just fill qwords
- //
- BIND(L_fill_8_bytes_loop);
- movq(Address(to, 0), xtmp);
- addptr(to, 8);
- BIND(L_fill_8_bytes);
- subl(count, 1 << (shift + 1));
- jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
- }
- }
- // fill trailing 4 bytes
- BIND(L_fill_4_bytes);
- testl(count, 1<<shift);
- jccb(Assembler::zero, L_fill_2_bytes);
- movl(Address(to, 0), value);
- if (t == T_BYTE || t == T_SHORT) {
- addptr(to, 4);
- BIND(L_fill_2_bytes);
- // fill trailing 2 bytes
- testl(count, 1<<(shift-1));
- jccb(Assembler::zero, L_fill_byte);
- movw(Address(to, 0), value);
- if (t == T_BYTE) {
- addptr(to, 2);
- BIND(L_fill_byte);
- // fill trailing byte
- testl(count, 1);
- jccb(Assembler::zero, L_exit);
- movb(Address(to, 0), value);
- } else {
- BIND(L_fill_byte);
- }
- } else {
- BIND(L_fill_2_bytes);
- }
- BIND(L_exit);
-}
-#undef BIND
-#undef BLOCK_COMMENT
-
-
-Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
- switch (cond) {
- // Note some conditions are synonyms for others
- case Assembler::zero: return Assembler::notZero;
- case Assembler::notZero: return Assembler::zero;
- case Assembler::less: return Assembler::greaterEqual;
- case Assembler::lessEqual: return Assembler::greater;
- case Assembler::greater: return Assembler::lessEqual;
- case Assembler::greaterEqual: return Assembler::less;
- case Assembler::below: return Assembler::aboveEqual;
- case Assembler::belowEqual: return Assembler::above;
- case Assembler::above: return Assembler::belowEqual;
- case Assembler::aboveEqual: return Assembler::below;
- case Assembler::overflow: return Assembler::noOverflow;
- case Assembler::noOverflow: return Assembler::overflow;
- case Assembler::negative: return Assembler::positive;
- case Assembler::positive: return Assembler::negative;
- case Assembler::parity: return Assembler::noParity;
- case Assembler::noParity: return Assembler::parity;
- }
- ShouldNotReachHere(); return Assembler::overflow;
-}
-
-SkipIfEqual::SkipIfEqual(
- MacroAssembler* masm, const bool* flag_addr, bool value) {
- _masm = masm;
- _masm->cmp8(ExternalAddress((address)flag_addr), value);
- _masm->jcc(Assembler::equal, _label);
-}
-
-SkipIfEqual::~SkipIfEqual() {
- _masm->bind(_label);
-}
diff --git a/src/cpu/x86/vm/assembler_x86.hpp b/src/cpu/x86/vm/assembler_x86.hpp
index 8a9bbaf42..a48aeda8d 100644
--- a/src/cpu/x86/vm/assembler_x86.hpp
+++ b/src/cpu/x86/vm/assembler_x86.hpp
@@ -25,6 +25,8 @@
#ifndef CPU_X86_VM_ASSEMBLER_X86_HPP
#define CPU_X86_VM_ASSEMBLER_X86_HPP
+#include "asm/register.hpp"
+
class BiasedLockingCounters;
// Contains all the definitions needed for x86 assembly code generation.
@@ -706,8 +708,6 @@ private:
void check_relocation(RelocationHolder const& rspec, int format);
#endif
- inline void emit_long64(jlong x);
-
void emit_data(jint data, relocInfo::relocType rtype, int format);
void emit_data(jint data, RelocationHolder const& rspec, int format);
void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
@@ -916,7 +916,7 @@ private:
void cdqq();
- void cld() { emit_byte(0xfc); }
+ void cld();
void clflush(Address adr);
@@ -963,10 +963,7 @@ private:
void comiss(XMMRegister dst, XMMRegister src);
// Identify processor type and features
- void cpuid() {
- emit_byte(0x0F);
- emit_byte(0xA2);
- }
+ void cpuid();
// Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
void cvtsd2ss(XMMRegister dst, XMMRegister src);
@@ -1211,11 +1208,7 @@ private:
void leaq(Register dst, Address src);
- void lfence() {
- emit_byte(0x0F);
- emit_byte(0xAE);
- emit_byte(0xE8);
- }
+ void lfence();
void lock();
@@ -1523,7 +1516,7 @@ private:
void sqrtss(XMMRegister dst, Address src);
void sqrtss(XMMRegister dst, XMMRegister src);
- void std() { emit_byte(0xfd); }
+ void std();
void stmxcsr( Address dst );
@@ -1580,11 +1573,7 @@ private:
void xchgq(Register dst, Register src);
// Get Value of Extended Control Register
- void xgetbv() {
- emit_byte(0x0F);
- emit_byte(0x01);
- emit_byte(0xD0);
- }
+ void xgetbv();
void xorl(Register dst, int32_t imm32);
void xorl(Register dst, Address src);
@@ -1781,1114 +1770,4 @@ private:
};
-
-// MacroAssembler extends Assembler by frequently used macros.
-//
-// Instructions for which a 'better' code sequence exists depending
-// on arguments should also go in here.
-
-class MacroAssembler: public Assembler {
- friend class LIR_Assembler;
- friend class Runtime1; // as_Address()
-
- protected:
-
- Address as_Address(AddressLiteral adr);
- Address as_Address(ArrayAddress adr);
-
- // Support for VM calls
- //
- // This is the base routine called by the different versions of call_VM_leaf. The interpreter
- // may customize this version by overriding it for its purposes (e.g., to save/restore
- // additional registers when doing a VM call).
-#ifdef CC_INTERP
- // c++ interpreter never wants to use interp_masm version of call_VM
- #define VIRTUAL
-#else
- #define VIRTUAL virtual
-#endif
-
- VIRTUAL void call_VM_leaf_base(
- address entry_point, // the entry point
- int number_of_arguments // the number of arguments to pop after the call
- );
-
- // This is the base routine called by the different versions of call_VM. The interpreter
- // may customize this version by overriding it for its purposes (e.g., to save/restore
- // additional registers when doing a VM call).
- //
- // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base
- // returns the register which contains the thread upon return. If a thread register has been
- // specified, the return value will correspond to that register. If no last_java_sp is specified
- // (noreg) than rsp will be used instead.
- VIRTUAL void call_VM_base( // returns the register containing the thread upon return
- Register oop_result, // where an oop-result ends up if any; use noreg otherwise
- Register java_thread, // the thread if computed before ; use noreg otherwise
- Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise
- address entry_point, // the entry point
- int number_of_arguments, // the number of arguments (w/o thread) to pop after the call
- bool check_exceptions // whether to check for pending exceptions after return
- );
-
- // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
- // The implementation is only non-empty for the InterpreterMacroAssembler,
- // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
- virtual void check_and_handle_popframe(Register java_thread);
- virtual void check_and_handle_earlyret(Register java_thread);
-
- void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
-
- // helpers for FPU flag access
- // tmp is a temporary register, if none is available use noreg
- void save_rax (Register tmp);
- void restore_rax(Register tmp);
-
- public:
- MacroAssembler(CodeBuffer* code) : Assembler(code) {}
-
- // Support for NULL-checks
- //
- // Generates code that causes a NULL OS exception if the content of reg is NULL.
- // If the accessed location is M[reg + offset] and the offset is known, provide the
- // offset. No explicit code generation is needed if the offset is within a certain
- // range (0 <= offset <= page_size).
-
- void null_check(Register reg, int offset = -1);
- static bool needs_explicit_null_check(intptr_t offset);
-
- // Required platform-specific helpers for Label::patch_instructions.
- // They _shadow_ the declarations in AbstractAssembler, which are undefined.
- void pd_patch_instruction(address branch, address target);
-#ifndef PRODUCT
- static void pd_print_patched_instruction(address branch);
-#endif
-
- // The following 4 methods return the offset of the appropriate move instruction
-
- // Support for fast byte/short loading with zero extension (depending on particular CPU)
- int load_unsigned_byte(Register dst, Address src);
- int load_unsigned_short(Register dst, Address src);
-
- // Support for fast byte/short loading with sign extension (depending on particular CPU)
- int load_signed_byte(Register dst, Address src);
- int load_signed_short(Register dst, Address src);
-
- // Support for sign-extension (hi:lo = extend_sign(lo))
- void extend_sign(Register hi, Register lo);
-
- // Load and store values by size and signed-ness
- void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
- void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
-
- // Support for inc/dec with optimal instruction selection depending on value
-
- void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value)) NOT_LP64(incrementl(reg, value)) ; }
- void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value)) NOT_LP64(decrementl(reg, value)) ; }
-
- void decrementl(Address dst, int value = 1);
- void decrementl(Register reg, int value = 1);
-
- void decrementq(Register reg, int value = 1);
- void decrementq(Address dst, int value = 1);
-
- void incrementl(Address dst, int value = 1);
- void incrementl(Register reg, int value = 1);
-
- void incrementq(Register reg, int value = 1);
- void incrementq(Address dst, int value = 1);
-
-
- // Support optimal SSE move instructions.
- void movflt(XMMRegister dst, XMMRegister src) {
- if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; }
- else { movss (dst, src); return; }
- }
- void movflt(XMMRegister dst, Address src) { movss(dst, src); }
- void movflt(XMMRegister dst, AddressLiteral src);
- void movflt(Address dst, XMMRegister src) { movss(dst, src); }
-
- void movdbl(XMMRegister dst, XMMRegister src) {
- if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; }
- else { movsd (dst, src); return; }
- }
-
- void movdbl(XMMRegister dst, AddressLiteral src);
-
- void movdbl(XMMRegister dst, Address src) {
- if (UseXmmLoadAndClearUpper) { movsd (dst, src); return; }
- else { movlpd(dst, src); return; }
- }
- void movdbl(Address dst, XMMRegister src) { movsd(dst, src); }
-
- void incrementl(AddressLiteral dst);
- void incrementl(ArrayAddress dst);
-
- // Alignment
- void align(int modulus);
-
- // A 5 byte nop that is safe for patching (see patch_verified_entry)
- void fat_nop();
-
- // Stack frame creation/removal
- void enter();
- void leave();
-
- // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
- // The pointer will be loaded into the thread register.
- void get_thread(Register thread);
-
-
- // Support for VM calls
- //
- // It is imperative that all calls into the VM are handled via the call_VM macros.
- // They make sure that the stack linkage is setup correctly. call_VM's correspond
- // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
-
-
- void call_VM(Register oop_result,
- address entry_point,
- bool check_exceptions = true);
- void call_VM(Register oop_result,
- address entry_point,
- Register arg_1,
- bool check_exceptions = true);
- void call_VM(Register oop_result,
- address entry_point,
- Register arg_1, Register arg_2,
- bool check_exceptions = true);
- void call_VM(Register oop_result,
- address entry_point,
- Register arg_1, Register arg_2, Register arg_3,
- bool check_exceptions = true);
-
- // Overloadings with last_Java_sp
- void call_VM(Register oop_result,
- Register last_java_sp,
- address entry_point,
- int number_of_arguments = 0,
- bool check_exceptions = true);
- void call_VM(Register oop_result,
- Register last_java_sp,
- address entry_point,
- Register arg_1, bool
- check_exceptions = true);
- void call_VM(Register oop_result,
- Register last_java_sp,
- address entry_point,
- Register arg_1, Register arg_2,
- bool check_exceptions = true);
- void call_VM(Register oop_result,
- Register last_java_sp,
- address entry_point,
- Register arg_1, Register arg_2, Register arg_3,
- bool check_exceptions = true);
-
- void get_vm_result (Register oop_result, Register thread);
- void get_vm_result_2(Register metadata_result, Register thread);
-
- // These always tightly bind to MacroAssembler::call_VM_base
- // bypassing the virtual implementation
- void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true);
- void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true);
- void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
- void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
- void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true);
-
- void call_VM_leaf(address entry_point,
- int number_of_arguments = 0);
- void call_VM_leaf(address entry_point,
- Register arg_1);
- void call_VM_leaf(address entry_point,
- Register arg_1, Register arg_2);
- void call_VM_leaf(address entry_point,
- Register arg_1, Register arg_2, Register arg_3);
-
- // These always tightly bind to MacroAssembler::call_VM_leaf_base
- // bypassing the virtual implementation
- void super_call_VM_leaf(address entry_point);
- void super_call_VM_leaf(address entry_point, Register arg_1);
- void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
- void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
- void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4);
-
- // last Java Frame (fills frame anchor)
- void set_last_Java_frame(Register thread,
- Register last_java_sp,
- Register last_java_fp,
- address last_java_pc);
-
- // thread in the default location (r15_thread on 64bit)
- void set_last_Java_frame(Register last_java_sp,
- Register last_java_fp,
- address last_java_pc);
-
- void reset_last_Java_frame(Register thread, bool clear_fp, bool clear_pc);
-
- // thread in the default location (r15_thread on 64bit)
- void reset_last_Java_frame(bool clear_fp, bool clear_pc);
-
- // Stores
- void store_check(Register obj); // store check for obj - register is destroyed afterwards
- void store_check(Register obj, Address dst); // same as above, dst is exact store location (reg. is destroyed)
-
-#ifndef SERIALGC
-
- void g1_write_barrier_pre(Register obj,
- Register pre_val,
- Register thread,
- Register tmp,
- bool tosca_live,
- bool expand_call);
-
- void g1_write_barrier_post(Register store_addr,
- Register new_val,
- Register thread,
- Register tmp,
- Register tmp2);
-
-#endif // SERIALGC
-
- // split store_check(Register obj) to enhance instruction interleaving
- void store_check_part_1(Register obj);
- void store_check_part_2(Register obj);
-
- // C 'boolean' to Java boolean: x == 0 ? 0 : 1
- void c2bool(Register x);
-
- // C++ bool manipulation
-
- void movbool(Register dst, Address src);
- void movbool(Address dst, bool boolconst);
- void movbool(Address dst, Register src);
- void testbool(Register dst);
-
- // oop manipulations
- void load_klass(Register dst, Register src);
- void store_klass(Register dst, Register src);
-
- void load_heap_oop(Register dst, Address src);
- void load_heap_oop_not_null(Register dst, Address src);
- void store_heap_oop(Address dst, Register src);
- void cmp_heap_oop(Register src1, Address src2, Register tmp = noreg);
-
- // Used for storing NULL. All other oop constants should be
- // stored using routines that take a jobject.
- void store_heap_oop_null(Address dst);
-
- void load_prototype_header(Register dst, Register src);
-
-#ifdef _LP64
- void store_klass_gap(Register dst, Register src);
-
- // This dummy is to prevent a call to store_heap_oop from
- // converting a zero (like NULL) into a Register by giving
- // the compiler two choices it can't resolve
-
- void store_heap_oop(Address dst, void* dummy);
-
- void encode_heap_oop(Register r);
- void decode_heap_oop(Register r);
- void encode_heap_oop_not_null(Register r);
- void decode_heap_oop_not_null(Register r);
- void encode_heap_oop_not_null(Register dst, Register src);
- void decode_heap_oop_not_null(Register dst, Register src);
-
- void set_narrow_oop(Register dst, jobject obj);
- void set_narrow_oop(Address dst, jobject obj);
- void cmp_narrow_oop(Register dst, jobject obj);
- void cmp_narrow_oop(Address dst, jobject obj);
-
- void encode_klass_not_null(Register r);
- void decode_klass_not_null(Register r);
- void encode_klass_not_null(Register dst, Register src);
- void decode_klass_not_null(Register dst, Register src);
- void set_narrow_klass(Register dst, Klass* k);
- void set_narrow_klass(Address dst, Klass* k);
- void cmp_narrow_klass(Register dst, Klass* k);
- void cmp_narrow_klass(Address dst, Klass* k);
-
- // if heap base register is used - reinit it with the correct value
- void reinit_heapbase();
-
- DEBUG_ONLY(void verify_heapbase(const char* msg);)
-
-#endif // _LP64
-
- // Int division/remainder for Java
- // (as idivl, but checks for special case as described in JVM spec.)
- // returns idivl instruction offset for implicit exception handling
- int corrected_idivl(Register reg);
-
- // Long division/remainder for Java
- // (as idivq, but checks for special case as described in JVM spec.)
- // returns idivq instruction offset for implicit exception handling
- int corrected_idivq(Register reg);
-
- void int3();
-
- // Long operation macros for a 32bit cpu
- // Long negation for Java
- void lneg(Register hi, Register lo);
-
- // Long multiplication for Java
- // (destroys contents of eax, ebx, ecx and edx)
- void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y
-
- // Long shifts for Java
- // (semantics as described in JVM spec.)
- void lshl(Register hi, Register lo); // hi:lo << (rcx & 0x3f)
- void lshr(Register hi, Register lo, bool sign_extension = false); // hi:lo >> (rcx & 0x3f)
-
- // Long compare for Java
- // (semantics as described in JVM spec.)
- void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y)
-
-
- // misc
-
- // Sign extension
- void sign_extend_short(Register reg);
- void sign_extend_byte(Register reg);
-
- // Division by power of 2, rounding towards 0
- void division_with_shift(Register reg, int shift_value);
-
- // Compares the top-most stack entries on the FPU stack and sets the eflags as follows:
- //
- // CF (corresponds to C0) if x < y
- // PF (corresponds to C2) if unordered
- // ZF (corresponds to C3) if x = y
- //
- // The arguments are in reversed order on the stack (i.e., top of stack is first argument).
- // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code)
- void fcmp(Register tmp);
- // Variant of the above which allows y to be further down the stack
- // and which only pops x and y if specified. If pop_right is
- // specified then pop_left must also be specified.
- void fcmp(Register tmp, int index, bool pop_left, bool pop_right);
-
- // Floating-point comparison for Java
- // Compares the top-most stack entries on the FPU stack and stores the result in dst.
- // The arguments are in reversed order on the stack (i.e., top of stack is first argument).
- // (semantics as described in JVM spec.)
- void fcmp2int(Register dst, bool unordered_is_less);
- // Variant of the above which allows y to be further down the stack
- // and which only pops x and y if specified. If pop_right is
- // specified then pop_left must also be specified.
- void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right);
-
- // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards)
- // tmp is a temporary register, if none is available use noreg
- void fremr(Register tmp);
-
-
- // same as fcmp2int, but using SSE2
- void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
- void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
-
- // Inlined sin/cos generator for Java; must not use CPU instruction
- // directly on Intel as it does not have high enough precision
- // outside of the range [-pi/4, pi/4]. Extra argument indicate the
- // number of FPU stack slots in use; all but the topmost will
- // require saving if a slow case is necessary. Assumes argument is
- // on FP TOS; result is on FP TOS. No cpu registers are changed by
- // this code.
- void trigfunc(char trig, int num_fpu_regs_in_use = 1);
-
- // branch to L if FPU flag C2 is set/not set
- // tmp is a temporary register, if none is available use noreg
- void jC2 (Register tmp, Label& L);
- void jnC2(Register tmp, Label& L);
-
- // Pop ST (ffree & fincstp combined)
- void fpop();
-
- // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
- void push_fTOS();
-
- // pops double TOS element from CPU stack and pushes on FPU stack
- void pop_fTOS();
-
- void empty_FPU_stack();
-
- void push_IU_state();
- void pop_IU_state();
-
- void push_FPU_state();
- void pop_FPU_state();
-
- void push_CPU_state();
- void pop_CPU_state();
-
- // Round up to a power of two
- void round_to(Register reg, int modulus);
-
- // Callee saved registers handling
- void push_callee_saved_registers();
- void pop_callee_saved_registers();
-
- // allocation
- void eden_allocate(
- Register obj, // result: pointer to object after successful allocation
- Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
- int con_size_in_bytes, // object size in bytes if known at compile time
- Register t1, // temp register
- Label& slow_case // continuation point if fast allocation fails
- );
- void tlab_allocate(
- Register obj, // result: pointer to object after successful allocation
- Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
- int con_size_in_bytes, // object size in bytes if known at compile time
- Register t1, // temp register
- Register t2, // temp register
- Label& slow_case // continuation point if fast allocation fails
- );
- Register tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); // returns TLS address
- void incr_allocated_bytes(Register thread,
- Register var_size_in_bytes, int con_size_in_bytes,
- Register t1 = noreg);
-
- // interface method calling
- void lookup_interface_method(Register recv_klass,
- Register intf_klass,
- RegisterOrConstant itable_index,
- Register method_result,
- Register scan_temp,
- Label& no_such_interface);
-
- // virtual method calling
- void lookup_virtual_method(Register recv_klass,
- RegisterOrConstant vtable_index,
- Register method_result);
-
- // Test sub_klass against super_klass, with fast and slow paths.
-
- // The fast path produces a tri-state answer: yes / no / maybe-slow.
- // One of the three labels can be NULL, meaning take the fall-through.
- // If super_check_offset is -1, the value is loaded up from super_klass.
- // No registers are killed, except temp_reg.
- void check_klass_subtype_fast_path(Register sub_klass,
- Register super_klass,
- Register temp_reg,
- Label* L_success,
- Label* L_failure,
- Label* L_slow_path,
- RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
-
- // The rest of the type check; must be wired to a corresponding fast path.
- // It does not repeat the fast path logic, so don't use it standalone.
- // The temp_reg and temp2_reg can be noreg, if no temps are available.
- // Updates the sub's secondary super cache as necessary.
- // If set_cond_codes, condition codes will be Z on success, NZ on failure.
- void check_klass_subtype_slow_path(Register sub_klass,
- Register super_klass,
- Register temp_reg,
- Register temp2_reg,
- Label* L_success,
- Label* L_failure,
- bool set_cond_codes = false);
-
- // Simplified, combined version, good for typical uses.
- // Falls through on failure.
- void check_klass_subtype(Register sub_klass,
- Register super_klass,
- Register temp_reg,
- Label& L_success);
-
- // method handles (JSR 292)
- Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
-
- //----
- void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0
-
- // Debugging
-
- // only if +VerifyOops
- // TODO: Make these macros with file and line like sparc version!
- void verify_oop(Register reg, const char* s = "broken oop");
- void verify_oop_addr(Address addr, const char * s = "broken oop addr");
-
- // TODO: verify method and klass metadata (compare against vptr?)
- void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
- void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
-
-#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
-#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
-
- // only if +VerifyFPU
- void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
-
- // prints msg, dumps registers and stops execution
- void stop(const char* msg);
-
- // prints msg and continues
- void warn(const char* msg);
-
- // dumps registers and other state
- void print_state();
-
- static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg);
- static void debug64(char* msg, int64_t pc, int64_t regs[]);
- static void print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip);
- static void print_state64(int64_t pc, int64_t regs[]);
-
- void os_breakpoint();
-
- void untested() { stop("untested"); }
-
- void unimplemented(const char* what = "") { char* b = new char[1024]; jio_snprintf(b, 1024, "unimplemented: %s", what); stop(b); }
-
- void should_not_reach_here() { stop("should not reach here"); }
-
- void print_CPU_state();
-
- // Stack overflow checking
- void bang_stack_with_offset(int offset) {
- // stack grows down, caller passes positive offset
- assert(offset > 0, "must bang with negative offset");
- movl(Address(rsp, (-offset)), rax);
- }
-
- // Writes to stack successive pages until offset reached to check for
- // stack overflow + shadow pages. Also, clobbers tmp
- void bang_stack_size(Register size, Register tmp);
-
- virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp,
- int offset);
-
- // Support for serializing memory accesses between threads
- void serialize_memory(Register thread, Register tmp);
-
- void verify_tlab();
-
- // Biased locking support
- // lock_reg and obj_reg must be loaded up with the appropriate values.
- // swap_reg must be rax, and is killed.
- // tmp_reg is optional. If it is supplied (i.e., != noreg) it will
- // be killed; if not supplied, push/pop will be used internally to
- // allocate a temporary (inefficient, avoid if possible).
- // Optional slow case is for implementations (interpreter and C1) which branch to
- // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
- // Returns offset of first potentially-faulting instruction for null
- // check info (currently consumed only by C1). If
- // swap_reg_contains_mark is true then returns -1 as it is assumed
- // the calling code has already passed any potential faults.
- int biased_locking_enter(Register lock_reg, Register obj_reg,
- Register swap_reg, Register tmp_reg,
- bool swap_reg_contains_mark,
- Label& done, Label* slow_case = NULL,
- BiasedLockingCounters* counters = NULL);
- void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
-
-
- Condition negate_condition(Condition cond);
-
- // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit
- // operands. In general the names are modified to avoid hiding the instruction in Assembler
- // so that we don't need to implement all the varieties in the Assembler with trivial wrappers
- // here in MacroAssembler. The major exception to this rule is call
-
- // Arithmetics
-
-
- void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)) ; }
- void addptr(Address dst, Register src);
-
- void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); }
- void addptr(Register dst, int32_t src);
- void addptr(Register dst, Register src);
- void addptr(Register dst, RegisterOrConstant src) {
- if (src.is_constant()) addptr(dst, (int) src.as_constant());
- else addptr(dst, src.as_register());
- }
-
- void andptr(Register dst, int32_t src);
- void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2)) NOT_LP64(andl(src1, src2)) ; }
-
- void cmp8(AddressLiteral src1, int imm);
-
- // renamed to drag out the casting of address to int32_t/intptr_t
- void cmp32(Register src1, int32_t imm);
-
- void cmp32(AddressLiteral src1, int32_t imm);
- // compare reg - mem, or reg - &mem
- void cmp32(Register src1, AddressLiteral src2);
-
- void cmp32(Register src1, Address src2);
-
-#ifndef _LP64
- void cmpklass(Address dst, Metadata* obj);
- void cmpklass(Register dst, Metadata* obj);
- void cmpoop(Address dst, jobject obj);
- void cmpoop(Register dst, jobject obj);
-#endif // _LP64
-
- // NOTE src2 must be the lval. This is NOT an mem-mem compare
- void cmpptr(Address src1, AddressLiteral src2);
-
- void cmpptr(Register src1, AddressLiteral src2);
-
- void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
- void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
- // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
-
- void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
- void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
-
- // cmp64 to avoild hiding cmpq
- void cmp64(Register src1, AddressLiteral src);
-
- void cmpxchgptr(Register reg, Address adr);
-
- void locked_cmpxchgptr(Register reg, AddressLiteral adr);
-
-
- void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); }
-
-
- void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); }
-
- void notptr(Register dst) { LP64_ONLY(notq(dst)) NOT_LP64(notl(dst)); }
-
- void shlptr(Register dst, int32_t shift);
- void shlptr(Register dst) { LP64_ONLY(shlq(dst)) NOT_LP64(shll(dst)); }
-
- void shrptr(Register dst, int32_t shift);
- void shrptr(Register dst) { LP64_ONLY(shrq(dst)) NOT_LP64(shrl(dst)); }
-
- void sarptr(Register dst) { LP64_ONLY(sarq(dst)) NOT_LP64(sarl(dst)); }
- void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src)) NOT_LP64(sarl(dst, src)); }
-
- void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); }
-
- void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); }
- void subptr(Register dst, int32_t src);
- // Force generation of a 4 byte immediate value even if it fits into 8bit
- void subptr_imm32(Register dst, int32_t src);
- void subptr(Register dst, Register src);
- void subptr(Register dst, RegisterOrConstant src) {
- if (src.is_constant()) subptr(dst, (int) src.as_constant());
- else subptr(dst, src.as_register());
- }
-
- void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); }
- void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); }
-
- void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; }
- void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; }
-
- void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2)) NOT_LP64(xaddl(src1, src2)) ; }
-
-
-
- // Helper functions for statistics gathering.
- // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes.
- void cond_inc32(Condition cond, AddressLiteral counter_addr);
- // Unconditional atomic increment.
- void atomic_incl(AddressLiteral counter_addr);
-
- void lea(Register dst, AddressLiteral adr);
- void lea(Address dst, AddressLiteral adr);
- void lea(Register dst, Address adr) { Assembler::lea(dst, adr); }
-
- void leal32(Register dst, Address src) { leal(dst, src); }
-
- // Import other testl() methods from the parent class or else
- // they will be hidden by the following overriding declaration.
- using Assembler::testl;
- void testl(Register dst, AddressLiteral src);
-
- void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
- void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
- void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
-
- void testptr(Register src, int32_t imm32) { LP64_ONLY(testq(src, imm32)) NOT_LP64(testl(src, imm32)); }
- void testptr(Register src1, Register src2);
-
- void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); }
- void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); }
-
- // Calls
-
- void call(Label& L, relocInfo::relocType rtype);
- void call(Register entry);
-
- // NOTE: this call tranfers to the effective address of entry NOT
- // the address contained by entry. This is because this is more natural
- // for jumps/calls.
- void call(AddressLiteral entry);
-
- // Emit the CompiledIC call idiom
- void ic_call(address entry);
-
- // Jumps
-
- // NOTE: these jumps tranfer to the effective address of dst NOT
- // the address contained by dst. This is because this is more natural
- // for jumps/calls.
- void jump(AddressLiteral dst);
- void jump_cc(Condition cc, AddressLiteral dst);
-
- // 32bit can do a case table jump in one instruction but we no longer allow the base
- // to be installed in the Address class. This jump will tranfers to the address
- // contained in the location described by entry (not the address of entry)
- void jump(ArrayAddress entry);
-
- // Floating
-
- void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); }
- void andpd(XMMRegister dst, AddressLiteral src);
-
- void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); }
- void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); }
- void andps(XMMRegister dst, AddressLiteral src);
-
- void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); }
- void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); }
- void comiss(XMMRegister dst, AddressLiteral src);
-
- void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); }
- void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); }
- void comisd(XMMRegister dst, AddressLiteral src);
-
- void fadd_s(Address src) { Assembler::fadd_s(src); }
- void fadd_s(AddressLiteral src) { Assembler::fadd_s(as_Address(src)); }
-
- void fldcw(Address src) { Assembler::fldcw(src); }
- void fldcw(AddressLiteral src);
-
- void fld_s(int index) { Assembler::fld_s(index); }
- void fld_s(Address src) { Assembler::fld_s(src); }
- void fld_s(AddressLiteral src);
-
- void fld_d(Address src) { Assembler::fld_d(src); }
- void fld_d(AddressLiteral src);
-
- void fld_x(Address src) { Assembler::fld_x(src); }
- void fld_x(AddressLiteral src);
-
- void fmul_s(Address src) { Assembler::fmul_s(src); }
- void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); }
-
- void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
- void ldmxcsr(AddressLiteral src);
-
- // compute pow(x,y) and exp(x) with x86 instructions. Don't cover
- // all corner cases and may result in NaN and require fallback to a
- // runtime call.
- void fast_pow();
- void fast_exp();
- void increase_precision();
- void restore_precision();
-
- // computes exp(x). Fallback to runtime call included.
- void exp_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(true, num_fpu_regs_in_use); }
- // computes pow(x,y). Fallback to runtime call included.
- void pow_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(false, num_fpu_regs_in_use); }
-
-private:
-
- // call runtime as a fallback for trig functions and pow/exp.
- void fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use);
-
- // computes 2^(Ylog2X); Ylog2X in ST(0)
- void pow_exp_core_encoding();
-
- // computes pow(x,y) or exp(x). Fallback to runtime call included.
- void pow_or_exp(bool is_exp, int num_fpu_regs_in_use);
-
- // these are private because users should be doing movflt/movdbl
-
- void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); }
- void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); }
- void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); }
- void movss(XMMRegister dst, AddressLiteral src);
-
- void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); }
- void movlpd(XMMRegister dst, AddressLiteral src);
-
-public:
-
- void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); }
- void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); }
- void addsd(XMMRegister dst, AddressLiteral src);
-
- void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); }
- void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); }
- void addss(XMMRegister dst, AddressLiteral src);
-
- void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); }
- void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); }
- void divsd(XMMRegister dst, AddressLiteral src);
-
- void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); }
- void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); }
- void divss(XMMRegister dst, AddressLiteral src);
-
- // Move Unaligned Double Quadword
- void movdqu(Address dst, XMMRegister src) { Assembler::movdqu(dst, src); }
- void movdqu(XMMRegister dst, Address src) { Assembler::movdqu(dst, src); }
- void movdqu(XMMRegister dst, XMMRegister src) { Assembler::movdqu(dst, src); }
- void movdqu(XMMRegister dst, AddressLiteral src);
-
- void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
- void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); }
- void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
- void movsd(XMMRegister dst, AddressLiteral src);
-
- void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); }
- void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); }
- void mulsd(XMMRegister dst, AddressLiteral src);
-
- void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); }
- void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); }
- void mulss(XMMRegister dst, AddressLiteral src);
-
- void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); }
- void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); }
- void sqrtsd(XMMRegister dst, AddressLiteral src);
-
- void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); }
- void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); }
- void sqrtss(XMMRegister dst, AddressLiteral src);
-
- void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); }
- void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); }
- void subsd(XMMRegister dst, AddressLiteral src);
-
- void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); }
- void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); }
- void subss(XMMRegister dst, AddressLiteral src);
-
- void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); }
- void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); }
- void ucomiss(XMMRegister dst, AddressLiteral src);
-
- void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); }
- void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); }
- void ucomisd(XMMRegister dst, AddressLiteral src);
-
- // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
- void xorpd(XMMRegister dst, XMMRegister src) { Assembler::xorpd(dst, src); }
- void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); }
- void xorpd(XMMRegister dst, AddressLiteral src);
-
- // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
- void xorps(XMMRegister dst, XMMRegister src) { Assembler::xorps(dst, src); }
- void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); }
- void xorps(XMMRegister dst, AddressLiteral src);
-
- // Shuffle Bytes
- void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); }
- void pshufb(XMMRegister dst, Address src) { Assembler::pshufb(dst, src); }
- void pshufb(XMMRegister dst, AddressLiteral src);
- // AVX 3-operands instructions
-
- void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); }
- void vaddsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddsd(dst, nds, src); }
- void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
-
- void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); }
- void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); }
- void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
-
- void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); }
- void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); }
- void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
-
- void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); }
- void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); }
- void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
-
- void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
- void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); }
- void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
-
- void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); }
- void vdivss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivss(dst, nds, src); }
- void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
-
- void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); }
- void vmulsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulsd(dst, nds, src); }
- void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
-
- void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); }
- void vmulss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulss(dst, nds, src); }
- void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
-
- void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); }
- void vsubsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubsd(dst, nds, src); }
- void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
-
- void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); }
- void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); }
- void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
-
- // AVX Vector instructions
-
- void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
- void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
- void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
-
- void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
- void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
- void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
-
- void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
- if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
- Assembler::vpxor(dst, nds, src, vector256);
- else
- Assembler::vxorpd(dst, nds, src, vector256);
- }
- void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
- if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
- Assembler::vpxor(dst, nds, src, vector256);
- else
- Assembler::vxorpd(dst, nds, src, vector256);
- }
-
- // Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector.
- void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
- if (UseAVX > 1) // vinserti128h is available only in AVX2
- Assembler::vinserti128h(dst, nds, src);
- else
- Assembler::vinsertf128h(dst, nds, src);
- }
-
- // Data
-
- void cmov32( Condition cc, Register dst, Address src);
- void cmov32( Condition cc, Register dst, Register src);
-
- void cmov( Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); }
-
- void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); }
- void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); }
-
- void movoop(Register dst, jobject obj);
- void movoop(Address dst, jobject obj);
-
- void mov_metadata(Register dst, Metadata* obj);
- void mov_metadata(Address dst, Metadata* obj);
-
- void movptr(ArrayAddress dst, Register src);
- // can this do an lea?
- void movptr(Register dst, ArrayAddress src);
-
- void movptr(Register dst, Address src);
-
- void movptr(Register dst, AddressLiteral src);
-
- void movptr(Register dst, intptr_t src);
- void movptr(Register dst, Register src);
- void movptr(Address dst, intptr_t src);
-
- void movptr(Address dst, Register src);
-
- void movptr(Register dst, RegisterOrConstant src) {
- if (src.is_constant()) movptr(dst, src.as_constant());
- else movptr(dst, src.as_register());
- }
-
-#ifdef _LP64
- // Generally the next two are only used for moving NULL
- // Although there are situations in initializing the mark word where
- // they could be used. They are dangerous.
-
- // They only exist on LP64 so that int32_t and intptr_t are not the same
- // and we have ambiguous declarations.
-
- void movptr(Address dst, int32_t imm32);
- void movptr(Register dst, int32_t imm32);
-#endif // _LP64
-
- // to avoid hiding movl
- void mov32(AddressLiteral dst, Register src);
- void mov32(Register dst, AddressLiteral src);
-
- // to avoid hiding movb
- void movbyte(ArrayAddress dst, int src);
-
- // Import other mov() methods from the parent class or else
- // they will be hidden by the following overriding declaration.
- using Assembler::movdl;
- using Assembler::movq;
- void movdl(XMMRegister dst, AddressLiteral src);
- void movq(XMMRegister dst, AddressLiteral src);
-
- // Can push value or effective address
- void pushptr(AddressLiteral src);
-
- void pushptr(Address src) { LP64_ONLY(pushq(src)) NOT_LP64(pushl(src)); }
- void popptr(Address src) { LP64_ONLY(popq(src)) NOT_LP64(popl(src)); }
-
- void pushoop(jobject obj);
- void pushklass(Metadata* obj);
-
- // sign extend as need a l to ptr sized element
- void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); }
- void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); }
-
- // C2 compiled method's prolog code.
- void verified_entry(int framesize, bool stack_bang, bool fp_mode_24b);
-
- // IndexOf strings.
- // Small strings are loaded through stack if they cross page boundary.
- void string_indexof(Register str1, Register str2,
- Register cnt1, Register cnt2,
- int int_cnt2, Register result,
- XMMRegister vec, Register tmp);
-
- // IndexOf for constant substrings with size >= 8 elements
- // which don't need to be loaded through stack.
- void string_indexofC8(Register str1, Register str2,
- Register cnt1, Register cnt2,
- int int_cnt2, Register result,
- XMMRegister vec, Register tmp);
-
- // Smallest code: we don't need to load through stack,
- // check string tail.
-
- // Compare strings.
- void string_compare(Register str1, Register str2,
- Register cnt1, Register cnt2, Register result,
- XMMRegister vec1);
-
- // Compare char[] arrays.
- void char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
- Register limit, Register result, Register chr,
- XMMRegister vec1, XMMRegister vec2);
-
- // Fill primitive arrays
- void generate_fill(BasicType t, bool aligned,
- Register to, Register value, Register count,
- Register rtmp, XMMRegister xtmp);
-
-#undef VIRTUAL
-
-};
-
-/**
- * class SkipIfEqual:
- *
- * Instantiating this class will result in assembly code being output that will
- * jump around any code emitted between the creation of the instance and it's
- * automatic destruction at the end of a scope block, depending on the value of
- * the flag passed to the constructor, which will be checked at run-time.
- */
-class SkipIfEqual {
- private:
- MacroAssembler* _masm;
- Label _label;
-
- public:
- SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
- ~SkipIfEqual();
-};
-
-#ifdef ASSERT
-inline bool AbstractAssembler::pd_check_instruction_mark() { return true; }
-#endif
-
#endif // CPU_X86_VM_ASSEMBLER_X86_HPP
diff --git a/src/cpu/x86/vm/assembler_x86.inline.hpp b/src/cpu/x86/vm/assembler_x86.inline.hpp
index bf299c6da..7dbb09507 100644
--- a/src/cpu/x86/vm/assembler_x86.inline.hpp
+++ b/src/cpu/x86/vm/assembler_x86.inline.hpp
@@ -28,48 +28,6 @@
#include "asm/assembler.inline.hpp"
#include "asm/codeBuffer.hpp"
#include "code/codeCache.hpp"
-#include "runtime/handles.inline.hpp"
-
-inline void MacroAssembler::pd_patch_instruction(address branch, address target) {
- unsigned char op = branch[0];
- assert(op == 0xE8 /* call */ ||
- op == 0xE9 /* jmp */ ||
- op == 0xEB /* short jmp */ ||
- (op & 0xF0) == 0x70 /* short jcc */ ||
- op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */,
- "Invalid opcode at patch point");
-
- if (op == 0xEB || (op & 0xF0) == 0x70) {
- // short offset operators (jmp and jcc)
- char* disp = (char*) &branch[1];
- int imm8 = target - (address) &disp[1];
- guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset");
- *disp = imm8;
- } else {
- int* disp = (int*) &branch[(op == 0x0F)? 2: 1];
- int imm32 = target - (address) &disp[1];
- *disp = imm32;
- }
-}
-
-#ifndef PRODUCT
-inline void MacroAssembler::pd_print_patched_instruction(address branch) {
- const char* s;
- unsigned char op = branch[0];
- if (op == 0xE8) {
- s = "call";
- } else if (op == 0xE9 || op == 0xEB) {
- s = "jmp";
- } else if ((op & 0xF0) == 0x70) {
- s = "jcc";
- } else if (op == 0x0F) {
- s = "jcc";
- } else {
- s = "????";
- }
- tty->print("%s (unresolved)", s);
-}
-#endif // ndef PRODUCT
#ifndef _LP64
inline int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { return reg_enc; }
@@ -87,12 +45,6 @@ inline void Assembler::prefixq(Address adr, Register reg) {}
inline void Assembler::prefix(Address adr, XMMRegister reg) {}
inline void Assembler::prefixq(Address adr, XMMRegister reg) {}
-#else
-inline void Assembler::emit_long64(jlong x) {
- *(jlong*) _code_pos = x;
- _code_pos += sizeof(jlong);
- code_section()->set_end(_code_pos);
-}
#endif // _LP64
#endif // CPU_X86_VM_ASSEMBLER_X86_INLINE_HPP
diff --git a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
index 684ae866e..83146761c 100644
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
@@ -23,7 +23,8 @@
*/
#include "precompiled.hpp"
-#include "asm/assembler.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
#include "c1/c1_Compilation.hpp"
#include "c1/c1_LIRAssembler.hpp"
#include "c1/c1_MacroAssembler.hpp"
diff --git a/src/cpu/x86/vm/cppInterpreter_x86.cpp b/src/cpu/x86/vm/cppInterpreter_x86.cpp
index 64a9463f8..946c400e5 100644
--- a/src/cpu/x86/vm/cppInterpreter_x86.cpp
+++ b/src/cpu/x86/vm/cppInterpreter_x86.cpp
@@ -23,7 +23,7 @@
*/
#include "precompiled.hpp"
-#include "asm/assembler.hpp"
+#include "asm/macroAssembler.hpp"
#include "interpreter/bytecodeHistogram.hpp"
#include "interpreter/cppInterpreter.hpp"
#include "interpreter/interpreter.hpp"
diff --git a/src/cpu/x86/vm/frame_x86.inline.hpp b/src/cpu/x86/vm/frame_x86.inline.hpp
index 55459c09d..ea8111b9f 100644
--- a/src/cpu/x86/vm/frame_x86.inline.hpp
+++ b/src/cpu/x86/vm/frame_x86.inline.hpp
@@ -25,6 +25,8 @@
#ifndef CPU_X86_VM_FRAME_X86_INLINE_HPP
#define CPU_X86_VM_FRAME_X86_INLINE_HPP
+#include "code/codeCache.hpp"
+
// Inline functions for Intel frames:
// Constructors:
diff --git a/src/cpu/x86/vm/icBuffer_x86.cpp b/src/cpu/x86/vm/icBuffer_x86.cpp
index ede401d4d..62ecc447a 100644
--- a/src/cpu/x86/vm/icBuffer_x86.cpp
+++ b/src/cpu/x86/vm/icBuffer_x86.cpp
@@ -23,8 +23,8 @@
*/
#include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
#include "code/icBuffer.hpp"
#include "gc_interface/collectedHeap.inline.hpp"
#include "interpreter/bytecodes.hpp"
diff --git a/src/cpu/x86/vm/icache_x86.cpp b/src/cpu/x86/vm/icache_x86.cpp
index 91d4f4da1..b9ec2f6d1 100644
--- a/src/cpu/x86/vm/icache_x86.cpp
+++ b/src/cpu/x86/vm/icache_x86.cpp
@@ -23,7 +23,7 @@
*/
#include "precompiled.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
#include "runtime/icache.hpp"
#define __ _masm->
diff --git a/src/cpu/x86/vm/interp_masm_x86_32.hpp b/src/cpu/x86/vm/interp_masm_x86_32.hpp
index 7f8463a9a..6dada56de 100644
--- a/src/cpu/x86/vm/interp_masm_x86_32.hpp
+++ b/src/cpu/x86/vm/interp_masm_x86_32.hpp
@@ -25,8 +25,10 @@
#ifndef CPU_X86_VM_INTERP_MASM_X86_32_HPP
#define CPU_X86_VM_INTERP_MASM_X86_32_HPP
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
#include "interpreter/invocationCounter.hpp"
+#include "runtime/frame.hpp"
// This file specializes the assember with interpreter-specific macros
diff --git a/src/cpu/x86/vm/interp_masm_x86_64.hpp b/src/cpu/x86/vm/interp_masm_x86_64.hpp
index eb8c4f0a4..66a001366 100644
--- a/src/cpu/x86/vm/interp_masm_x86_64.hpp
+++ b/src/cpu/x86/vm/interp_masm_x86_64.hpp
@@ -25,8 +25,10 @@
#ifndef CPU_X86_VM_INTERP_MASM_X86_64_HPP
#define CPU_X86_VM_INTERP_MASM_X86_64_HPP
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
#include "interpreter/invocationCounter.hpp"
+#include "runtime/frame.hpp"
// This file specializes the assember with interpreter-specific macros
diff --git a/src/cpu/x86/vm/interpreter_x86_32.cpp b/src/cpu/x86/vm/interpreter_x86_32.cpp
index bed8137e4..865801e3d 100644
--- a/src/cpu/x86/vm/interpreter_x86_32.cpp
+++ b/src/cpu/x86/vm/interpreter_x86_32.cpp
@@ -23,7 +23,7 @@
*/
#include "precompiled.hpp"
-#include "asm/assembler.hpp"
+#include "asm/macroAssembler.hpp"
#include "interpreter/bytecodeHistogram.hpp"
#include "interpreter/interpreter.hpp"
#include "interpreter/interpreterGenerator.hpp"
diff --git a/src/cpu/x86/vm/interpreter_x86_64.cpp b/src/cpu/x86/vm/interpreter_x86_64.cpp
index 3b0a6b445..bc5229b99 100644
--- a/src/cpu/x86/vm/interpreter_x86_64.cpp
+++ b/src/cpu/x86/vm/interpreter_x86_64.cpp
@@ -23,7 +23,7 @@
*/
#include "precompiled.hpp"
-#include "asm/assembler.hpp"
+#include "asm/macroAssembler.hpp"
#include "interpreter/bytecodeHistogram.hpp"
#include "interpreter/interpreter.hpp"
#include "interpreter/interpreterGenerator.hpp"
diff --git a/src/cpu/x86/vm/jniFastGetField_x86_32.cpp b/src/cpu/x86/vm/jniFastGetField_x86_32.cpp
index 57edd9b19..e4fb943a9 100644
--- a/src/cpu/x86/vm/jniFastGetField_x86_32.cpp
+++ b/src/cpu/x86/vm/jniFastGetField_x86_32.cpp
@@ -23,7 +23,7 @@
*/
#include "precompiled.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
#include "memory/resourceArea.hpp"
#include "prims/jniFastGetField.hpp"
#include "prims/jvm_misc.hpp"
diff --git a/src/cpu/x86/vm/jniFastGetField_x86_64.cpp b/src/cpu/x86/vm/jniFastGetField_x86_64.cpp
index 8b0d2e6fa..1f523c762 100644
--- a/src/cpu/x86/vm/jniFastGetField_x86_64.cpp
+++ b/src/cpu/x86/vm/jniFastGetField_x86_64.cpp
@@ -23,7 +23,7 @@
*/
#include "precompiled.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
#include "memory/resourceArea.hpp"
#include "prims/jniFastGetField.hpp"
#include "prims/jvm_misc.hpp"
diff --git a/src/cpu/x86/vm/macroAssembler_x86.cpp b/src/cpu/x86/vm/macroAssembler_x86.cpp
new file mode 100644
index 000000000..fa2d7fa43
--- /dev/null
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp
@@ -0,0 +1,6099 @@
+/*
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "compiler/disassembler.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/cardTableModRefBS.hpp"
+#include "memory/resourceArea.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#ifndef SERIALGC
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
+#endif
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#define STOP(error) stop(error)
+#else
+#define BLOCK_COMMENT(str) block_comment(str)
+#define STOP(error) block_comment(error); stop(error)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+
+#ifdef ASSERT
+bool AbstractAssembler::pd_check_instruction_mark() { return true; }
+#endif
+
+static Assembler::Condition reverse[] = {
+ Assembler::noOverflow /* overflow = 0x0 */ ,
+ Assembler::overflow /* noOverflow = 0x1 */ ,
+ Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ ,
+ Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ ,
+ Assembler::notZero /* zero = 0x4, equal = 0x4 */ ,
+ Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ ,
+ Assembler::above /* belowEqual = 0x6 */ ,
+ Assembler::belowEqual /* above = 0x7 */ ,
+ Assembler::positive /* negative = 0x8 */ ,
+ Assembler::negative /* positive = 0x9 */ ,
+ Assembler::noParity /* parity = 0xa */ ,
+ Assembler::parity /* noParity = 0xb */ ,
+ Assembler::greaterEqual /* less = 0xc */ ,
+ Assembler::less /* greaterEqual = 0xd */ ,
+ Assembler::greater /* lessEqual = 0xe */ ,
+ Assembler::lessEqual /* greater = 0xf, */
+
+};
+
+
+// Implementation of MacroAssembler
+
+// First all the versions that have distinct versions depending on 32/64 bit
+// Unless the difference is trivial (1 line or so).
+
+#ifndef _LP64
+
+// 32bit versions
+
+Address MacroAssembler::as_Address(AddressLiteral adr) {
+ return Address(adr.target(), adr.rspec());
+}
+
+Address MacroAssembler::as_Address(ArrayAddress adr) {
+ return Address::make_array(adr);
+}
+
+int MacroAssembler::biased_locking_enter(Register lock_reg,
+ Register obj_reg,
+ Register swap_reg,
+ Register tmp_reg,
+ bool swap_reg_contains_mark,
+ Label& done,
+ Label* slow_case,
+ BiasedLockingCounters* counters) {
+ assert(UseBiasedLocking, "why call this otherwise?");
+ assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
+ assert_different_registers(lock_reg, obj_reg, swap_reg);
+
+ if (PrintBiasedLockingStatistics && counters == NULL)
+ counters = BiasedLocking::counters();
+
+ bool need_tmp_reg = false;
+ if (tmp_reg == noreg) {
+ need_tmp_reg = true;
+ tmp_reg = lock_reg;
+ } else {
+ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
+ }
+ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
+ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
+ Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes());
+ Address saved_mark_addr(lock_reg, 0);
+
+ // Biased locking
+ // See whether the lock is currently biased toward our thread and
+ // whether the epoch is still valid
+ // Note that the runtime guarantees sufficient alignment of JavaThread
+ // pointers to allow age to be placed into low bits
+ // First check to see whether biasing is even enabled for this object
+ Label cas_label;
+ int null_check_offset = -1;
+ if (!swap_reg_contains_mark) {
+ null_check_offset = offset();
+ movl(swap_reg, mark_addr);
+ }
+ if (need_tmp_reg) {
+ push(tmp_reg);
+ }
+ movl(tmp_reg, swap_reg);
+ andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
+ cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
+ if (need_tmp_reg) {
+ pop(tmp_reg);
+ }
+ jcc(Assembler::notEqual, cas_label);
+ // The bias pattern is present in the object's header. Need to check
+ // whether the bias owner and the epoch are both still current.
+ // Note that because there is no current thread register on x86 we
+ // need to store off the mark word we read out of the object to
+ // avoid reloading it and needing to recheck invariants below. This
+ // store is unfortunate but it makes the overall code shorter and
+ // simpler.
+ movl(saved_mark_addr, swap_reg);
+ if (need_tmp_reg) {
+ push(tmp_reg);
+ }
+ get_thread(tmp_reg);
+ xorl(swap_reg, tmp_reg);
+ if (swap_reg_contains_mark) {
+ null_check_offset = offset();
+ }
+ movl(tmp_reg, klass_addr);
+ xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset()));
+ andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
+ if (need_tmp_reg) {
+ pop(tmp_reg);
+ }
+ if (counters != NULL) {
+ cond_inc32(Assembler::zero,
+ ExternalAddress((address)counters->biased_lock_entry_count_addr()));
+ }
+ jcc(Assembler::equal, done);
+
+ Label try_revoke_bias;
+ Label try_rebias;
+
+ // At this point we know that the header has the bias pattern and
+ // that we are not the bias owner in the current epoch. We need to
+ // figure out more details about the state of the header in order to
+ // know what operations can be legally performed on the object's
+ // header.
+
+ // If the low three bits in the xor result aren't clear, that means
+ // the prototype header is no longer biased and we have to revoke
+ // the bias on this object.
+ testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
+ jcc(Assembler::notZero, try_revoke_bias);
+
+ // Biasing is still enabled for this data type. See whether the
+ // epoch of the current bias is still valid, meaning that the epoch
+ // bits of the mark word are equal to the epoch bits of the
+ // prototype header. (Note that the prototype header's epoch bits
+ // only change at a safepoint.) If not, attempt to rebias the object
+ // toward the current thread. Note that we must be absolutely sure
+ // that the current epoch is invalid in order to do this because
+ // otherwise the manipulations it performs on the mark word are
+ // illegal.
+ testl(swap_reg, markOopDesc::epoch_mask_in_place);
+ jcc(Assembler::notZero, try_rebias);
+
+ // The epoch of the current bias is still valid but we know nothing
+ // about the owner; it might be set or it might be clear. Try to
+ // acquire the bias of the object using an atomic operation. If this
+ // fails we will go in to the runtime to revoke the object's bias.
+ // Note that we first construct the presumed unbiased header so we
+ // don't accidentally blow away another thread's valid bias.
+ movl(swap_reg, saved_mark_addr);
+ andl(swap_reg,
+ markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
+ if (need_tmp_reg) {
+ push(tmp_reg);
+ }
+ get_thread(tmp_reg);
+ orl(tmp_reg, swap_reg);
+ if (os::is_MP()) {
+ lock();
+ }
+ cmpxchgptr(tmp_reg, Address(obj_reg, 0));
+ if (need_tmp_reg) {
+ pop(tmp_reg);
+ }
+ // If the biasing toward our thread failed, this means that
+ // another thread succeeded in biasing it toward itself and we
+ // need to revoke that bias. The revocation will occur in the
+ // interpreter runtime in the slow case.
+ if (counters != NULL) {
+ cond_inc32(Assembler::zero,
+ ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
+ }
+ if (slow_case != NULL) {
+ jcc(Assembler::notZero, *slow_case);
+ }
+ jmp(done);
+
+ bind(try_rebias);
+ // At this point we know the epoch has expired, meaning that the
+ // current "bias owner", if any, is actually invalid. Under these
+ // circumstances _only_, we are allowed to use the current header's
+ // value as the comparison value when doing the cas to acquire the
+ // bias in the current epoch. In other words, we allow transfer of
+ // the bias from one thread to another directly in this situation.
+ //
+ // FIXME: due to a lack of registers we currently blow away the age
+ // bits in this situation. Should attempt to preserve them.
+ if (need_tmp_reg) {
+ push(tmp_reg);
+ }
+ get_thread(tmp_reg);
+ movl(swap_reg, klass_addr);
+ orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset()));
+ movl(swap_reg, saved_mark_addr);
+ if (os::is_MP()) {
+ lock();
+ }
+ cmpxchgptr(tmp_reg, Address(obj_reg, 0));
+ if (need_tmp_reg) {
+ pop(tmp_reg);
+ }
+ // If the biasing toward our thread failed, then another thread
+ // succeeded in biasing it toward itself and we need to revoke that
+ // bias. The revocation will occur in the runtime in the slow case.
+ if (counters != NULL) {
+ cond_inc32(Assembler::zero,
+ ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
+ }
+ if (slow_case != NULL) {
+ jcc(Assembler::notZero, *slow_case);
+ }
+ jmp(done);
+
+ bind(try_revoke_bias);
+ // The prototype mark in the klass doesn't have the bias bit set any
+ // more, indicating that objects of this data type are not supposed
+ // to be biased any more. We are going to try to reset the mark of
+ // this object to the prototype value and fall through to the
+ // CAS-based locking scheme. Note that if our CAS fails, it means
+ // that another thread raced us for the privilege of revoking the
+ // bias of this particular object, so it's okay to continue in the
+ // normal locking code.
+ //
+ // FIXME: due to a lack of registers we currently blow away the age
+ // bits in this situation. Should attempt to preserve them.
+ movl(swap_reg, saved_mark_addr);
+ if (need_tmp_reg) {
+ push(tmp_reg);
+ }
+ movl(tmp_reg, klass_addr);
+ movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
+ if (os::is_MP()) {
+ lock();
+ }
+ cmpxchgptr(tmp_reg, Address(obj_reg, 0));
+ if (need_tmp_reg) {
+ pop(tmp_reg);
+ }
+ // Fall through to the normal CAS-based lock, because no matter what
+ // the result of the above CAS, some thread must have succeeded in
+ // removing the bias bit from the object's header.
+ if (counters != NULL) {
+ cond_inc32(Assembler::zero,
+ ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
+ }
+
+ bind(cas_label);
+
+ return null_check_offset;
+}
+void MacroAssembler::call_VM_leaf_base(address entry_point,
+ int number_of_arguments) {
+ call(RuntimeAddress(entry_point));
+ increment(rsp, number_of_arguments * wordSize);
+}
+
+void MacroAssembler::cmpklass(Address src1, Metadata* obj) {
+ cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::cmpklass(Register src1, Metadata* obj) {
+ cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::cmpoop(Address src1, jobject obj) {
+ cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::cmpoop(Register src1, jobject obj) {
+ cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::extend_sign(Register hi, Register lo) {
+ // According to Intel Doc. AP-526, "Integer Divide", p.18.
+ if (VM_Version::is_P6() && hi == rdx && lo == rax) {
+ cdql();
+ } else {
+ movl(hi, lo);
+ sarl(hi, 31);
+ }
+}
+
+void MacroAssembler::jC2(Register tmp, Label& L) {
+ // set parity bit if FPU flag C2 is set (via rax)
+ save_rax(tmp);
+ fwait(); fnstsw_ax();
+ sahf();
+ restore_rax(tmp);
+ // branch
+ jcc(Assembler::parity, L);
+}
+
+void MacroAssembler::jnC2(Register tmp, Label& L) {
+ // set parity bit if FPU flag C2 is set (via rax)
+ save_rax(tmp);
+ fwait(); fnstsw_ax();
+ sahf();
+ restore_rax(tmp);
+ // branch
+ jcc(Assembler::noParity, L);
+}
+
+// 32bit can do a case table jump in one instruction but we no longer allow the base
+// to be installed in the Address class
+void MacroAssembler::jump(ArrayAddress entry) {
+ jmp(as_Address(entry));
+}
+
+// Note: y_lo will be destroyed
+void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
+ // Long compare for Java (semantics as described in JVM spec.)
+ Label high, low, done;
+
+ cmpl(x_hi, y_hi);
+ jcc(Assembler::less, low);
+ jcc(Assembler::greater, high);
+ // x_hi is the return register
+ xorl(x_hi, x_hi);
+ cmpl(x_lo, y_lo);
+ jcc(Assembler::below, low);
+ jcc(Assembler::equal, done);
+
+ bind(high);
+ xorl(x_hi, x_hi);
+ increment(x_hi);
+ jmp(done);
+
+ bind(low);
+ xorl(x_hi, x_hi);
+ decrementl(x_hi);
+
+ bind(done);
+}
+
+void MacroAssembler::lea(Register dst, AddressLiteral src) {
+ mov_literal32(dst, (int32_t)src.target(), src.rspec());
+}
+
+void MacroAssembler::lea(Address dst, AddressLiteral adr) {
+ // leal(dst, as_Address(adr));
+ // see note in movl as to why we must use a move
+ mov_literal32(dst, (int32_t) adr.target(), adr.rspec());
+}
+
+void MacroAssembler::leave() {
+ mov(rsp, rbp);
+ pop(rbp);
+}
+
+void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) {
+ // Multiplication of two Java long values stored on the stack
+ // as illustrated below. Result is in rdx:rax.
+ //
+ // rsp ---> [ ?? ] \ \
+ // .... | y_rsp_offset |
+ // [ y_lo ] / (in bytes) | x_rsp_offset
+ // [ y_hi ] | (in bytes)
+ // .... |
+ // [ x_lo ] /
+ // [ x_hi ]
+ // ....
+ //
+ // Basic idea: lo(result) = lo(x_lo * y_lo)
+ // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
+ Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset);
+ Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset);
+ Label quick;
+ // load x_hi, y_hi and check if quick
+ // multiplication is possible
+ movl(rbx, x_hi);
+ movl(rcx, y_hi);
+ movl(rax, rbx);
+ orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0
+ jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply
+ // do full multiplication
+ // 1st step
+ mull(y_lo); // x_hi * y_lo
+ movl(rbx, rax); // save lo(x_hi * y_lo) in rbx,
+ // 2nd step
+ movl(rax, x_lo);
+ mull(rcx); // x_lo * y_hi
+ addl(rbx, rax); // add lo(x_lo * y_hi) to rbx,
+ // 3rd step
+ bind(quick); // note: rbx, = 0 if quick multiply!
+ movl(rax, x_lo);
+ mull(y_lo); // x_lo * y_lo
+ addl(rdx, rbx); // correct hi(x_lo * y_lo)
+}
+
+void MacroAssembler::lneg(Register hi, Register lo) {
+ negl(lo);
+ adcl(hi, 0);
+ negl(hi);
+}
+
+void MacroAssembler::lshl(Register hi, Register lo) {
+ // Java shift left long support (semantics as described in JVM spec., p.305)
+ // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n))
+ // shift value is in rcx !
+ assert(hi != rcx, "must not use rcx");
+ assert(lo != rcx, "must not use rcx");
+ const Register s = rcx; // shift count
+ const int n = BitsPerWord;
+ Label L;
+ andl(s, 0x3f); // s := s & 0x3f (s < 0x40)
+ cmpl(s, n); // if (s < n)
+ jcc(Assembler::less, L); // else (s >= n)
+ movl(hi, lo); // x := x << n
+ xorl(lo, lo);
+ // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
+ bind(L); // s (mod n) < n
+ shldl(hi, lo); // x := x << s
+ shll(lo);
+}
+
+
+void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) {
+ // Java shift right long support (semantics as described in JVM spec., p.306 & p.310)
+ // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n))
+ assert(hi != rcx, "must not use rcx");
+ assert(lo != rcx, "must not use rcx");
+ const Register s = rcx; // shift count
+ const int n = BitsPerWord;
+ Label L;
+ andl(s, 0x3f); // s := s & 0x3f (s < 0x40)
+ cmpl(s, n); // if (s < n)
+ jcc(Assembler::less, L); // else (s >= n)
+ movl(lo, hi); // x := x >> n
+ if (sign_extension) sarl(hi, 31);
+ else xorl(hi, hi);
+ // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
+ bind(L); // s (mod n) < n
+ shrdl(lo, hi); // x := x >> s
+ if (sign_extension) sarl(hi);
+ else shrl(hi);
+}
+
+void MacroAssembler::movoop(Register dst, jobject obj) {
+ mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::movoop(Address dst, jobject obj) {
+ mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
+ mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
+ mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::movptr(Register dst, AddressLiteral src) {
+ if (src.is_lval()) {
+ mov_literal32(dst, (intptr_t)src.target(), src.rspec());
+ } else {
+ movl(dst, as_Address(src));
+ }
+}
+
+void MacroAssembler::movptr(ArrayAddress dst, Register src) {
+ movl(as_Address(dst), src);
+}
+
+void MacroAssembler::movptr(Register dst, ArrayAddress src) {
+ movl(dst, as_Address(src));
+}
+
+// src should NEVER be a real pointer. Use AddressLiteral for true pointers
+void MacroAssembler::movptr(Address dst, intptr_t src) {
+ movl(dst, src);
+}
+
+
+void MacroAssembler::pop_callee_saved_registers() {
+ pop(rcx);
+ pop(rdx);
+ pop(rdi);
+ pop(rsi);
+}
+
+void MacroAssembler::pop_fTOS() {
+ fld_d(Address(rsp, 0));
+ addl(rsp, 2 * wordSize);
+}
+
+void MacroAssembler::push_callee_saved_registers() {
+ push(rsi);
+ push(rdi);
+ push(rdx);
+ push(rcx);
+}
+
+void MacroAssembler::push_fTOS() {
+ subl(rsp, 2 * wordSize);
+ fstp_d(Address(rsp, 0));
+}
+
+
+void MacroAssembler::pushoop(jobject obj) {
+ push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::pushklass(Metadata* obj) {
+ push_literal32((int32_t)obj, metadata_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::pushptr(AddressLiteral src) {
+ if (src.is_lval()) {
+ push_literal32((int32_t)src.target(), src.rspec());
+ } else {
+ pushl(as_Address(src));
+ }
+}
+
+void MacroAssembler::set_word_if_not_zero(Register dst) {
+ xorl(dst, dst);
+ set_byte_if_not_zero(dst);
+}
+
+static void pass_arg0(MacroAssembler* masm, Register arg) {
+ masm->push(arg);
+}
+
+static void pass_arg1(MacroAssembler* masm, Register arg) {
+ masm->push(arg);
+}
+
+static void pass_arg2(MacroAssembler* masm, Register arg) {
+ masm->push(arg);
+}
+
+static void pass_arg3(MacroAssembler* masm, Register arg) {
+ masm->push(arg);
+}
+
+#ifndef PRODUCT
+extern "C" void findpc(intptr_t x);
+#endif
+
+void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
+ // In order to get locks to work, we need to fake a in_VM state
+ JavaThread* thread = JavaThread::current();
+ JavaThreadState saved_state = thread->thread_state();
+ thread->set_thread_state(_thread_in_vm);
+ if (ShowMessageBoxOnError) {
+ JavaThread* thread = JavaThread::current();
+ JavaThreadState saved_state = thread->thread_state();
+ thread->set_thread_state(_thread_in_vm);
+ if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
+ ttyLocker ttyl;
+ BytecodeCounter::print();
+ }
+ // To see where a verify_oop failed, get $ebx+40/X for this frame.
+ // This is the value of eip which points to where verify_oop will return.
+ if (os::message_box(msg, "Execution stopped, print registers?")) {
+ print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip);
+ BREAKPOINT;
+ }
+ } else {
+ ttyLocker ttyl;
+ ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
+ }
+ // Don't assert holding the ttyLock
+ assert(false, err_msg("DEBUG MESSAGE: %s", msg));
+ ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
+}
+
+void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) {
+ ttyLocker ttyl;
+ FlagSetting fs(Debugging, true);
+ tty->print_cr("eip = 0x%08x", eip);
+#ifndef PRODUCT
+ if ((WizardMode || Verbose) && PrintMiscellaneous) {
+ tty->cr();
+ findpc(eip);
+ tty->cr();
+ }
+#endif
+#define PRINT_REG(rax) \
+ { tty->print("%s = ", #rax); os::print_location(tty, rax); }
+ PRINT_REG(rax);
+ PRINT_REG(rbx);
+ PRINT_REG(rcx);
+ PRINT_REG(rdx);
+ PRINT_REG(rdi);
+ PRINT_REG(rsi);
+ PRINT_REG(rbp);
+ PRINT_REG(rsp);
+#undef PRINT_REG
+ // Print some words near top of staack.
+ int* dump_sp = (int*) rsp;
+ for (int col1 = 0; col1 < 8; col1++) {
+ tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
+ os::print_location(tty, *dump_sp++);
+ }
+ for (int row = 0; row < 16; row++) {
+ tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
+ for (int col = 0; col < 8; col++) {
+ tty->print(" 0x%08x", *dump_sp++);
+ }
+ tty->cr();
+ }
+ // Print some instructions around pc:
+ Disassembler::decode((address)eip-64, (address)eip);
+ tty->print_cr("--------");
+ Disassembler::decode((address)eip, (address)eip+32);
+}
+
+void MacroAssembler::stop(const char* msg) {
+ ExternalAddress message((address)msg);
+ // push address of message
+ pushptr(message.addr());
+ { Label L; call(L, relocInfo::none); bind(L); } // push eip
+ pusha(); // push registers
+ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
+ hlt();
+}
+
+void MacroAssembler::warn(const char* msg) {
+ push_CPU_state();
+
+ ExternalAddress message((address) msg);
+ // push address of message
+ pushptr(message.addr());
+
+ call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)));
+ addl(rsp, wordSize); // discard argument
+ pop_CPU_state();
+}
+
+void MacroAssembler::print_state() {
+ { Label L; call(L, relocInfo::none); bind(L); } // push eip
+ pusha(); // push registers
+
+ push_CPU_state();
+ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32)));
+ pop_CPU_state();
+
+ popa();
+ addl(rsp, wordSize);
+}
+
+#else // _LP64
+
+// 64 bit versions
+
+Address MacroAssembler::as_Address(AddressLiteral adr) {
+ // amd64 always does this as a pc-rel
+ // we can be absolute or disp based on the instruction type
+ // jmp/call are displacements others are absolute
+ assert(!adr.is_lval(), "must be rval");
+ assert(reachable(adr), "must be");
+ return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
+
+}
+
+Address MacroAssembler::as_Address(ArrayAddress adr) {
+ AddressLiteral base = adr.base();
+ lea(rscratch1, base);
+ Address index = adr.index();
+ assert(index._disp == 0, "must not have disp"); // maybe it can?
+ Address array(rscratch1, index._index, index._scale, index._disp);
+ return array;
+}
+
+int MacroAssembler::biased_locking_enter(Register lock_reg,
+ Register obj_reg,
+ Register swap_reg,
+ Register tmp_reg,
+ bool swap_reg_contains_mark,
+ Label& done,
+ Label* slow_case,
+ BiasedLockingCounters* counters) {
+ assert(UseBiasedLocking, "why call this otherwise?");
+ assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
+ assert(tmp_reg != noreg, "tmp_reg must be supplied");
+ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
+ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
+ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
+ Address saved_mark_addr(lock_reg, 0);
+
+ if (PrintBiasedLockingStatistics && counters == NULL)
+ counters = BiasedLocking::counters();
+
+ // Biased locking
+ // See whether the lock is currently biased toward our thread and
+ // whether the epoch is still valid
+ // Note that the runtime guarantees sufficient alignment of JavaThread
+ // pointers to allow age to be placed into low bits
+ // First check to see whether biasing is even enabled for this object
+ Label cas_label;
+ int null_check_offset = -1;
+ if (!swap_reg_contains_mark) {
+ null_check_offset = offset();
+ movq(swap_reg, mark_addr);
+ }
+ movq(tmp_reg, swap_reg);
+ andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
+ cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
+ jcc(Assembler::notEqual, cas_label);
+ // The bias pattern is present in the object's header. Need to check
+ // whether the bias owner and the epoch are both still current.
+ load_prototype_header(tmp_reg, obj_reg);
+ orq(tmp_reg, r15_thread);
+ xorq(tmp_reg, swap_reg);
+ andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
+ if (counters != NULL) {
+ cond_inc32(Assembler::zero,
+ ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
+ }
+ jcc(Assembler::equal, done);
+
+ Label try_revoke_bias;
+ Label try_rebias;
+
+ // At this point we know that the header has the bias pattern and
+ // that we are not the bias owner in the current epoch. We need to
+ // figure out more details about the state of the header in order to
+ // know what operations can be legally performed on the object's
+ // header.
+
+ // If the low three bits in the xor result aren't clear, that means
+ // the prototype header is no longer biased and we have to revoke
+ // the bias on this object.
+ testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
+ jcc(Assembler::notZero, try_revoke_bias);
+
+ // Biasing is still enabled for this data type. See whether the
+ // epoch of the current bias is still valid, meaning that the epoch
+ // bits of the mark word are equal to the epoch bits of the
+ // prototype header. (Note that the prototype header's epoch bits
+ // only change at a safepoint.) If not, attempt to rebias the object
+ // toward the current thread. Note that we must be absolutely sure
+ // that the current epoch is invalid in order to do this because
+ // otherwise the manipulations it performs on the mark word are
+ // illegal.
+ testq(tmp_reg, markOopDesc::epoch_mask_in_place);
+ jcc(Assembler::notZero, try_rebias);
+
+ // The epoch of the current bias is still valid but we know nothing
+ // about the owner; it might be set or it might be clear. Try to
+ // acquire the bias of the object using an atomic operation. If this
+ // fails we will go in to the runtime to revoke the object's bias.
+ // Note that we first construct the presumed unbiased header so we
+ // don't accidentally blow away another thread's valid bias.
+ andq(swap_reg,
+ markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
+ movq(tmp_reg, swap_reg);
+ orq(tmp_reg, r15_thread);
+ if (os::is_MP()) {
+ lock();
+ }
+ cmpxchgq(tmp_reg, Address(obj_reg, 0));
+ // If the biasing toward our thread failed, this means that
+ // another thread succeeded in biasing it toward itself and we
+ // need to revoke that bias. The revocation will occur in the
+ // interpreter runtime in the slow case.
+ if (counters != NULL) {
+ cond_inc32(Assembler::zero,
+ ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
+ }
+ if (slow_case != NULL) {
+ jcc(Assembler::notZero, *slow_case);
+ }
+ jmp(done);
+
+ bind(try_rebias);
+ // At this point we know the epoch has expired, meaning that the
+ // current "bias owner", if any, is actually invalid. Under these
+ // circumstances _only_, we are allowed to use the current header's
+ // value as the comparison value when doing the cas to acquire the
+ // bias in the current epoch. In other words, we allow transfer of
+ // the bias from one thread to another directly in this situation.
+ //
+ // FIXME: due to a lack of registers we currently blow away the age
+ // bits in this situation. Should attempt to preserve them.
+ load_prototype_header(tmp_reg, obj_reg);
+ orq(tmp_reg, r15_thread);
+ if (os::is_MP()) {
+ lock();
+ }
+ cmpxchgq(tmp_reg, Address(obj_reg, 0));
+ // If the biasing toward our thread failed, then another thread
+ // succeeded in biasing it toward itself and we need to revoke that
+ // bias. The revocation will occur in the runtime in the slow case.
+ if (counters != NULL) {
+ cond_inc32(Assembler::zero,
+ ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
+ }
+ if (slow_case != NULL) {
+ jcc(Assembler::notZero, *slow_case);
+ }
+ jmp(done);
+
+ bind(try_revoke_bias);
+ // The prototype mark in the klass doesn't have the bias bit set any
+ // more, indicating that objects of this data type are not supposed
+ // to be biased any more. We are going to try to reset the mark of
+ // this object to the prototype value and fall through to the
+ // CAS-based locking scheme. Note that if our CAS fails, it means
+ // that another thread raced us for the privilege of revoking the
+ // bias of this particular object, so it's okay to continue in the
+ // normal locking code.
+ //
+ // FIXME: due to a lack of registers we currently blow away the age
+ // bits in this situation. Should attempt to preserve them.
+ load_prototype_header(tmp_reg, obj_reg);
+ if (os::is_MP()) {
+ lock();
+ }
+ cmpxchgq(tmp_reg, Address(obj_reg, 0));
+ // Fall through to the normal CAS-based lock, because no matter what
+ // the result of the above CAS, some thread must have succeeded in
+ // removing the bias bit from the object's header.
+ if (counters != NULL) {
+ cond_inc32(Assembler::zero,
+ ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
+ }
+
+ bind(cas_label);
+
+ return null_check_offset;
+}
+
+void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
+ Label L, E;
+
+#ifdef _WIN64
+ // Windows always allocates space for it's register args
+ assert(num_args <= 4, "only register arguments supported");
+ subq(rsp, frame::arg_reg_save_area_bytes);
+#endif
+
+ // Align stack if necessary
+ testl(rsp, 15);
+ jcc(Assembler::zero, L);
+
+ subq(rsp, 8);
+ {
+ call(RuntimeAddress(entry_point));
+ }
+ addq(rsp, 8);
+ jmp(E);
+
+ bind(L);
+ {
+ call(RuntimeAddress(entry_point));
+ }
+
+ bind(E);
+
+#ifdef _WIN64
+ // restore stack pointer
+ addq(rsp, frame::arg_reg_save_area_bytes);
+#endif
+
+}
+
+void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
+ assert(!src2.is_lval(), "should use cmpptr");
+
+ if (reachable(src2)) {
+ cmpq(src1, as_Address(src2));
+ } else {
+ lea(rscratch1, src2);
+ Assembler::cmpq(src1, Address(rscratch1, 0));
+ }
+}
+
+int MacroAssembler::corrected_idivq(Register reg) {
+ // Full implementation of Java ldiv and lrem; checks for special
+ // case as described in JVM spec., p.243 & p.271. The function
+ // returns the (pc) offset of the idivl instruction - may be needed
+ // for implicit exceptions.
+ //
+ // normal case special case
+ //
+ // input : rax: dividend min_long
+ // reg: divisor (may not be eax/edx) -1
+ //
+ // output: rax: quotient (= rax idiv reg) min_long
+ // rdx: remainder (= rax irem reg) 0
+ assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register");
+ static const int64_t min_long = 0x8000000000000000;
+ Label normal_case, special_case;
+
+ // check for special case
+ cmp64(rax, ExternalAddress((address) &min_long));
+ jcc(Assembler::notEqual, normal_case);
+ xorl(rdx, rdx); // prepare rdx for possible special case (where
+ // remainder = 0)
+ cmpq(reg, -1);
+ jcc(Assembler::equal, special_case);
+
+ // handle normal case
+ bind(normal_case);
+ cdqq();
+ int idivq_offset = offset();
+ idivq(reg);
+
+ // normal and special case exit
+ bind(special_case);
+
+ return idivq_offset;
+}
+
+void MacroAssembler::decrementq(Register reg, int value) {
+ if (value == min_jint) { subq(reg, value); return; }
+ if (value < 0) { incrementq(reg, -value); return; }
+ if (value == 0) { ; return; }
+ if (value == 1 && UseIncDec) { decq(reg) ; return; }
+ /* else */ { subq(reg, value) ; return; }
+}
+
+void MacroAssembler::decrementq(Address dst, int value) {
+ if (value == min_jint) { subq(dst, value); return; }
+ if (value < 0) { incrementq(dst, -value); return; }
+ if (value == 0) { ; return; }
+ if (value == 1 && UseIncDec) { decq(dst) ; return; }
+ /* else */ { subq(dst, value) ; return; }
+}
+
+void MacroAssembler::incrementq(Register reg, int value) {
+ if (value == min_jint) { addq(reg, value); return; }
+ if (value < 0) { decrementq(reg, -value); return; }
+ if (value == 0) { ; return; }
+ if (value == 1 && UseIncDec) { incq(reg) ; return; }
+ /* else */ { addq(reg, value) ; return; }
+}
+
+void MacroAssembler::incrementq(Address dst, int value) {
+ if (value == min_jint) { addq(dst, value); return; }
+ if (value < 0) { decrementq(dst, -value); return; }
+ if (value == 0) { ; return; }
+ if (value == 1 && UseIncDec) { incq(dst) ; return; }
+ /* else */ { addq(dst, value) ; return; }
+}
+
+// 32bit can do a case table jump in one instruction but we no longer allow the base
+// to be installed in the Address class
+void MacroAssembler::jump(ArrayAddress entry) {
+ lea(rscratch1, entry.base());
+ Address dispatch = entry.index();
+ assert(dispatch._base == noreg, "must be");
+ dispatch._base = rscratch1;
+ jmp(dispatch);
+}
+
+void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
+ ShouldNotReachHere(); // 64bit doesn't use two regs
+ cmpq(x_lo, y_lo);
+}
+
+void MacroAssembler::lea(Register dst, AddressLiteral src) {
+ mov_literal64(dst, (intptr_t)src.target(), src.rspec());
+}
+
+void MacroAssembler::lea(Address dst, AddressLiteral adr) {
+ mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec());
+ movptr(dst, rscratch1);
+}
+
+void MacroAssembler::leave() {
+ // %%% is this really better? Why not on 32bit too?
+ emit_byte(0xC9); // LEAVE
+}
+
+void MacroAssembler::lneg(Register hi, Register lo) {
+ ShouldNotReachHere(); // 64bit doesn't use two regs
+ negq(lo);
+}
+
+void MacroAssembler::movoop(Register dst, jobject obj) {
+ mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::movoop(Address dst, jobject obj) {
+ mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
+ movq(dst, rscratch1);
+}
+
+void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
+ mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate());
+}
+
+void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
+ mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate());
+ movq(dst, rscratch1);
+}
+
+void MacroAssembler::movptr(Register dst, AddressLiteral src) {
+ if (src.is_lval()) {
+ mov_literal64(dst, (intptr_t)src.target(), src.rspec());
+ } else {
+ if (reachable(src)) {
+ movq(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ movq(dst, Address(rscratch1,0));
+ }
+ }
+}
+
+void MacroAssembler::movptr(ArrayAddress dst, Register src) {
+ movq(as_Address(dst), src);
+}
+
+void MacroAssembler::movptr(Register dst, ArrayAddress src) {
+ movq(dst, as_Address(src));
+}
+
+// src should NEVER be a real pointer. Use AddressLiteral for true pointers
+void MacroAssembler::movptr(Address dst, intptr_t src) {
+ mov64(rscratch1, src);
+ movq(dst, rscratch1);
+}
+
+// These are mostly for initializing NULL
+void MacroAssembler::movptr(Address dst, int32_t src) {
+ movslq(dst, src);
+}
+
+void MacroAssembler::movptr(Register dst, int32_t src) {
+ mov64(dst, (intptr_t)src);
+}
+
+void MacroAssembler::pushoop(jobject obj) {
+ movoop(rscratch1, obj);
+ push(rscratch1);
+}
+
+void MacroAssembler::pushklass(Metadata* obj) {
+ mov_metadata(rscratch1, obj);
+ push(rscratch1);
+}
+
+void MacroAssembler::pushptr(AddressLiteral src) {
+ lea(rscratch1, src);
+ if (src.is_lval()) {
+ push(rscratch1);
+ } else {
+ pushq(Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::reset_last_Java_frame(bool clear_fp,
+ bool clear_pc) {
+ // we must set sp to zero to clear frame
+ movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
+ // must clear fp, so that compiled frames are not confused; it is
+ // possible that we need it only for debugging
+ if (clear_fp) {
+ movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
+ }
+
+ if (clear_pc) {
+ movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
+ }
+}
+
+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
+ Register last_java_fp,
+ address last_java_pc) {
+ // determine last_java_sp register
+ if (!last_java_sp->is_valid()) {
+ last_java_sp = rsp;
+ }
+
+ // last_java_fp is optional
+ if (last_java_fp->is_valid()) {
+ movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()),
+ last_java_fp);
+ }
+
+ // last_java_pc is optional
+ if (last_java_pc != NULL) {
+ Address java_pc(r15_thread,
+ JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
+ lea(rscratch1, InternalAddress(last_java_pc));
+ movptr(java_pc, rscratch1);
+ }
+
+ movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
+}
+
+static void pass_arg0(MacroAssembler* masm, Register arg) {
+ if (c_rarg0 != arg ) {
+ masm->mov(c_rarg0, arg);
+ }
+}
+
+static void pass_arg1(MacroAssembler* masm, Register arg) {
+ if (c_rarg1 != arg ) {
+ masm->mov(c_rarg1, arg);
+ }
+}
+
+static void pass_arg2(MacroAssembler* masm, Register arg) {
+ if (c_rarg2 != arg ) {
+ masm->mov(c_rarg2, arg);
+ }
+}
+
+static void pass_arg3(MacroAssembler* masm, Register arg) {
+ if (c_rarg3 != arg ) {
+ masm->mov(c_rarg3, arg);
+ }
+}
+
+void MacroAssembler::stop(const char* msg) {
+ address rip = pc();
+ pusha(); // get regs on stack
+ lea(c_rarg0, ExternalAddress((address) msg));
+ lea(c_rarg1, InternalAddress(rip));
+ movq(c_rarg2, rsp); // pass pointer to regs array
+ andq(rsp, -16); // align stack as required by ABI
+ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
+ hlt();
+}
+
+void MacroAssembler::warn(const char* msg) {
+ push(rbp);
+ movq(rbp, rsp);
+ andq(rsp, -16); // align stack as required by push_CPU_state and call
+ push_CPU_state(); // keeps alignment at 16 bytes
+ lea(c_rarg0, ExternalAddress((address) msg));
+ call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
+ pop_CPU_state();
+ mov(rsp, rbp);
+ pop(rbp);
+}
+
+void MacroAssembler::print_state() {
+ address rip = pc();
+ pusha(); // get regs on stack
+ push(rbp);
+ movq(rbp, rsp);
+ andq(rsp, -16); // align stack as required by push_CPU_state and call
+ push_CPU_state(); // keeps alignment at 16 bytes
+
+ lea(c_rarg0, InternalAddress(rip));
+ lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array
+ call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64), c_rarg0, c_rarg1);
+
+ pop_CPU_state();
+ mov(rsp, rbp);
+ pop(rbp);
+ popa();
+}
+
+#ifndef PRODUCT
+extern "C" void findpc(intptr_t x);
+#endif
+
+void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
+ // In order to get locks to work, we need to fake a in_VM state
+ if (ShowMessageBoxOnError) {
+ JavaThread* thread = JavaThread::current();
+ JavaThreadState saved_state = thread->thread_state();
+ thread->set_thread_state(_thread_in_vm);
+#ifndef PRODUCT
+ if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
+ ttyLocker ttyl;
+ BytecodeCounter::print();
+ }
+#endif
+ // To see where a verify_oop failed, get $ebx+40/X for this frame.
+ // XXX correct this offset for amd64
+ // This is the value of eip which points to where verify_oop will return.
+ if (os::message_box(msg, "Execution stopped, print registers?")) {
+ print_state64(pc, regs);
+ BREAKPOINT;
+ assert(false, "start up GDB");
+ }
+ ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
+ } else {
+ ttyLocker ttyl;
+ ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
+ msg);
+ assert(false, err_msg("DEBUG MESSAGE: %s", msg));
+ }
+}
+
+void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) {
+ ttyLocker ttyl;
+ FlagSetting fs(Debugging, true);
+ tty->print_cr("rip = 0x%016lx", pc);
+#ifndef PRODUCT
+ tty->cr();
+ findpc(pc);
+ tty->cr();
+#endif
+#define PRINT_REG(rax, value) \
+ { tty->print("%s = ", #rax); os::print_location(tty, value); }
+ PRINT_REG(rax, regs[15]);
+ PRINT_REG(rbx, regs[12]);
+ PRINT_REG(rcx, regs[14]);
+ PRINT_REG(rdx, regs[13]);
+ PRINT_REG(rdi, regs[8]);
+ PRINT_REG(rsi, regs[9]);
+ PRINT_REG(rbp, regs[10]);
+ PRINT_REG(rsp, regs[11]);
+ PRINT_REG(r8 , regs[7]);
+ PRINT_REG(r9 , regs[6]);
+ PRINT_REG(r10, regs[5]);
+ PRINT_REG(r11, regs[4]);
+ PRINT_REG(r12, regs[3]);
+ PRINT_REG(r13, regs[2]);
+ PRINT_REG(r14, regs[1]);
+ PRINT_REG(r15, regs[0]);
+#undef PRINT_REG
+ // Print some words near top of staack.
+ int64_t* rsp = (int64_t*) regs[11];
+ int64_t* dump_sp = rsp;
+ for (int col1 = 0; col1 < 8; col1++) {
+ tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp);
+ os::print_location(tty, *dump_sp++);
+ }
+ for (int row = 0; row < 25; row++) {
+ tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp);
+ for (int col = 0; col < 4; col++) {
+ tty->print(" 0x%016lx", *dump_sp++);
+ }
+ tty->cr();
+ }
+ // Print some instructions around pc:
+ Disassembler::decode((address)pc-64, (address)pc);
+ tty->print_cr("--------");
+ Disassembler::decode((address)pc, (address)pc+32);
+}
+
+#endif // _LP64
+
+// Now versions that are common to 32/64 bit
+
+void MacroAssembler::addptr(Register dst, int32_t imm32) {
+ LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32));
+}
+
+void MacroAssembler::addptr(Register dst, Register src) {
+ LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
+}
+
+void MacroAssembler::addptr(Address dst, Register src) {
+ LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
+}
+
+void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::addsd(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::addsd(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ addss(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ addss(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::align(int modulus) {
+ if (offset() % modulus != 0) {
+ nop(modulus - (offset() % modulus));
+ }
+}
+
+void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
+ // Used in sign-masking with aligned address.
+ assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+ if (reachable(src)) {
+ Assembler::andpd(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::andpd(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) {
+ // Used in sign-masking with aligned address.
+ assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+ if (reachable(src)) {
+ Assembler::andps(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::andps(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::andptr(Register dst, int32_t imm32) {
+ LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
+}
+
+void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
+ pushf();
+ if (os::is_MP())
+ lock();
+ incrementl(counter_addr);
+ popf();
+}
+
+// Writes to stack successive pages until offset reached to check for
+// stack overflow + shadow pages. This clobbers tmp.
+void MacroAssembler::bang_stack_size(Register size, Register tmp) {
+ movptr(tmp, rsp);
+ // Bang stack for total size given plus shadow page size.
+ // Bang one page at a time because large size can bang beyond yellow and
+ // red zones.
+ Label loop;
+ bind(loop);
+ movl(Address(tmp, (-os::vm_page_size())), size );
+ subptr(tmp, os::vm_page_size());
+ subl(size, os::vm_page_size());
+ jcc(Assembler::greater, loop);
+
+ // Bang down shadow pages too.
+ // The -1 because we already subtracted 1 page.
+ for (int i = 0; i< StackShadowPages-1; i++) {
+ // this could be any sized move but this is can be a debugging crumb
+ // so the bigger the better.
+ movptr(Address(tmp, (-i*os::vm_page_size())), size );
+ }
+}
+
+void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
+ assert(UseBiasedLocking, "why call this otherwise?");
+
+ // Check for biased locking unlock case, which is a no-op
+ // Note: we do not have to check the thread ID for two reasons.
+ // First, the interpreter checks for IllegalMonitorStateException at
+ // a higher level. Second, if the bias was revoked while we held the
+ // lock, the object could not be rebiased toward another thread, so
+ // the bias bit would be clear.
+ movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+ andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
+ cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
+ jcc(Assembler::equal, done);
+}
+
+void MacroAssembler::c2bool(Register x) {
+ // implements x == 0 ? 0 : 1
+ // note: must only look at least-significant byte of x
+ // since C-style booleans are stored in one byte
+ // only! (was bug)
+ andl(x, 0xFF);
+ setb(Assembler::notZero, x);
+}
+
+// Wouldn't need if AddressLiteral version had new name
+void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
+ Assembler::call(L, rtype);
+}
+
+void MacroAssembler::call(Register entry) {
+ Assembler::call(entry);
+}
+
+void MacroAssembler::call(AddressLiteral entry) {
+ if (reachable(entry)) {
+ Assembler::call_literal(entry.target(), entry.rspec());
+ } else {
+ lea(rscratch1, entry);
+ Assembler::call(rscratch1);
+ }
+}
+
+void MacroAssembler::ic_call(address entry) {
+ RelocationHolder rh = virtual_call_Relocation::spec(pc());
+ movptr(rax, (intptr_t)Universe::non_oop_word());
+ call(AddressLiteral(entry, rh));
+}
+
+// Implementation of call_VM versions
+
+void MacroAssembler::call_VM(Register oop_result,
+ address entry_point,
+ bool check_exceptions) {
+ Label C, E;
+ call(C, relocInfo::none);
+ jmp(E);
+
+ bind(C);
+ call_VM_helper(oop_result, entry_point, 0, check_exceptions);
+ ret(0);
+
+ bind(E);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+ address entry_point,
+ Register arg_1,
+ bool check_exceptions) {
+ Label C, E;
+ call(C, relocInfo::none);
+ jmp(E);
+
+ bind(C);
+ pass_arg1(this, arg_1);
+ call_VM_helper(oop_result, entry_point, 1, check_exceptions);
+ ret(0);
+
+ bind(E);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+ address entry_point,
+ Register arg_1,
+ Register arg_2,
+ bool check_exceptions) {
+ Label C, E;
+ call(C, relocInfo::none);
+ jmp(E);
+
+ bind(C);
+
+ LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
+
+ pass_arg2(this, arg_2);
+ pass_arg1(this, arg_1);
+ call_VM_helper(oop_result, entry_point, 2, check_exceptions);
+ ret(0);
+
+ bind(E);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+ address entry_point,
+ Register arg_1,
+ Register arg_2,
+ Register arg_3,
+ bool check_exceptions) {
+ Label C, E;
+ call(C, relocInfo::none);
+ jmp(E);
+
+ bind(C);
+
+ LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
+ LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
+ pass_arg3(this, arg_3);
+
+ LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
+ pass_arg2(this, arg_2);
+
+ pass_arg1(this, arg_1);
+ call_VM_helper(oop_result, entry_point, 3, check_exceptions);
+ ret(0);
+
+ bind(E);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ int number_of_arguments,
+ bool check_exceptions) {
+ Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
+ call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ Register arg_1,
+ bool check_exceptions) {
+ pass_arg1(this, arg_1);
+ call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ Register arg_1,
+ Register arg_2,
+ bool check_exceptions) {
+
+ LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
+ pass_arg2(this, arg_2);
+ pass_arg1(this, arg_1);
+ call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ Register arg_1,
+ Register arg_2,
+ Register arg_3,
+ bool check_exceptions) {
+ LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
+ LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
+ pass_arg3(this, arg_3);
+ LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
+ pass_arg2(this, arg_2);
+ pass_arg1(this, arg_1);
+ call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
+}
+
+void MacroAssembler::super_call_VM(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ int number_of_arguments,
+ bool check_exceptions) {
+ Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
+ MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
+}
+
+void MacroAssembler::super_call_VM(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ Register arg_1,
+ bool check_exceptions) {
+ pass_arg1(this, arg_1);
+ super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
+}
+
+void MacroAssembler::super_call_VM(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ Register arg_1,
+ Register arg_2,
+ bool check_exceptions) {
+
+ LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
+ pass_arg2(this, arg_2);
+ pass_arg1(this, arg_1);
+ super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
+}
+
+void MacroAssembler::super_call_VM(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ Register arg_1,
+ Register arg_2,
+ Register arg_3,
+ bool check_exceptions) {
+ LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
+ LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
+ pass_arg3(this, arg_3);
+ LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
+ pass_arg2(this, arg_2);
+ pass_arg1(this, arg_1);
+ super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
+}
+
+void MacroAssembler::call_VM_base(Register oop_result,
+ Register java_thread,
+ Register last_java_sp,
+ address entry_point,
+ int number_of_arguments,
+ bool check_exceptions) {
+ // determine java_thread register
+ if (!java_thread->is_valid()) {
+#ifdef _LP64
+ java_thread = r15_thread;
+#else
+ java_thread = rdi;
+ get_thread(java_thread);
+#endif // LP64
+ }
+ // determine last_java_sp register
+ if (!last_java_sp->is_valid()) {
+ last_java_sp = rsp;
+ }
+ // debugging support
+ assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
+ LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"));
+#ifdef ASSERT
+ // TraceBytecodes does not use r12 but saves it over the call, so don't verify
+ // r12 is the heapbase.
+ LP64_ONLY(if ((UseCompressedOops || UseCompressedKlassPointers) && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");)
+#endif // ASSERT
+
+ assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
+ assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
+
+ // push java thread (becomes first argument of C function)
+
+ NOT_LP64(push(java_thread); number_of_arguments++);
+ LP64_ONLY(mov(c_rarg0, r15_thread));
+
+ // set last Java frame before call
+ assert(last_java_sp != rbp, "can't use ebp/rbp");
+
+ // Only interpreter should have to set fp
+ set_last_Java_frame(java_thread, last_java_sp, rbp, NULL);
+
+ // do the call, remove parameters
+ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
+
+ // restore the thread (cannot use the pushed argument since arguments
+ // may be overwritten by C code generated by an optimizing compiler);
+ // however can use the register value directly if it is callee saved.
+ if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) {
+ // rdi & rsi (also r15) are callee saved -> nothing to do
+#ifdef ASSERT
+ guarantee(java_thread != rax, "change this code");
+ push(rax);
+ { Label L;
+ get_thread(rax);
+ cmpptr(java_thread, rax);
+ jcc(Assembler::equal, L);
+ STOP("MacroAssembler::call_VM_base: rdi not callee saved?");
+ bind(L);
+ }
+ pop(rax);
+#endif
+ } else {
+ get_thread(java_thread);
+ }
+ // reset last Java frame
+ // Only interpreter should have to clear fp
+ reset_last_Java_frame(java_thread, true, false);
+
+#ifndef CC_INTERP
+ // C++ interp handles this in the interpreter
+ check_and_handle_popframe(java_thread);
+ check_and_handle_earlyret(java_thread);
+#endif /* CC_INTERP */
+
+ if (check_exceptions) {
+ // check for pending exceptions (java_thread is set upon return)
+ cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD);
+#ifndef _LP64
+ jump_cc(Assembler::notEqual,
+ RuntimeAddress(StubRoutines::forward_exception_entry()));
+#else
+ // This used to conditionally jump to forward_exception however it is
+ // possible if we relocate that the branch will not reach. So we must jump
+ // around so we can always reach
+
+ Label ok;
+ jcc(Assembler::equal, ok);
+ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+ bind(ok);
+#endif // LP64
+ }
+
+ // get oop result if there is one and reset the value in the thread
+ if (oop_result->is_valid()) {
+ get_vm_result(oop_result, java_thread);
+ }
+}
+
+void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
+
+ // Calculate the value for last_Java_sp
+ // somewhat subtle. call_VM does an intermediate call
+ // which places a return address on the stack just under the
+ // stack pointer as the user finsihed with it. This allows
+ // use to retrieve last_Java_pc from last_Java_sp[-1].
+ // On 32bit we then have to push additional args on the stack to accomplish
+ // the actual requested call. On 64bit call_VM only can use register args
+ // so the only extra space is the return address that call_VM created.
+ // This hopefully explains the calculations here.
+
+#ifdef _LP64
+ // We've pushed one address, correct last_Java_sp
+ lea(rax, Address(rsp, wordSize));
+#else
+ lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize));
+#endif // LP64
+
+ call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions);
+
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
+ call_VM_leaf_base(entry_point, number_of_arguments);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
+ pass_arg0(this, arg_0);
+ call_VM_leaf(entry_point, 1);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
+
+ LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
+ pass_arg1(this, arg_1);
+ pass_arg0(this, arg_0);
+ call_VM_leaf(entry_point, 2);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
+ LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
+ LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
+ pass_arg2(this, arg_2);
+ LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
+ pass_arg1(this, arg_1);
+ pass_arg0(this, arg_0);
+ call_VM_leaf(entry_point, 3);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
+ pass_arg0(this, arg_0);
+ MacroAssembler::call_VM_leaf_base(entry_point, 1);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
+
+ LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
+ pass_arg1(this, arg_1);
+ pass_arg0(this, arg_0);
+ MacroAssembler::call_VM_leaf_base(entry_point, 2);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
+ LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
+ LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
+ pass_arg2(this, arg_2);
+ LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
+ pass_arg1(this, arg_1);
+ pass_arg0(this, arg_0);
+ MacroAssembler::call_VM_leaf_base(entry_point, 3);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
+ LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg"));
+ LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
+ LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
+ pass_arg3(this, arg_3);
+ LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
+ LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
+ pass_arg2(this, arg_2);
+ LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
+ pass_arg1(this, arg_1);
+ pass_arg0(this, arg_0);
+ MacroAssembler::call_VM_leaf_base(entry_point, 4);
+}
+
+void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
+ movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
+ movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD);
+ verify_oop(oop_result, "broken oop in call_VM_base");
+}
+
+void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
+ movptr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
+ movptr(Address(java_thread, JavaThread::vm_result_2_offset()), NULL_WORD);
+}
+
+void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
+}
+
+void MacroAssembler::check_and_handle_popframe(Register java_thread) {
+}
+
+void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) {
+ if (reachable(src1)) {
+ cmpl(as_Address(src1), imm);
+ } else {
+ lea(rscratch1, src1);
+ cmpl(Address(rscratch1, 0), imm);
+ }
+}
+
+void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
+ assert(!src2.is_lval(), "use cmpptr");
+ if (reachable(src2)) {
+ cmpl(src1, as_Address(src2));
+ } else {
+ lea(rscratch1, src2);
+ cmpl(src1, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::cmp32(Register src1, int32_t imm) {
+ Assembler::cmpl(src1, imm);
+}
+
+void MacroAssembler::cmp32(Register src1, Address src2) {
+ Assembler::cmpl(src1, src2);
+}
+
+void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
+ ucomisd(opr1, opr2);
+
+ Label L;
+ if (unordered_is_less) {
+ movl(dst, -1);
+ jcc(Assembler::parity, L);
+ jcc(Assembler::below , L);
+ movl(dst, 0);
+ jcc(Assembler::equal , L);
+ increment(dst);
+ } else { // unordered is greater
+ movl(dst, 1);
+ jcc(Assembler::parity, L);
+ jcc(Assembler::above , L);
+ movl(dst, 0);
+ jcc(Assembler::equal , L);
+ decrementl(dst);
+ }
+ bind(L);
+}
+
+void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
+ ucomiss(opr1, opr2);
+
+ Label L;
+ if (unordered_is_less) {
+ movl(dst, -1);
+ jcc(Assembler::parity, L);
+ jcc(Assembler::below , L);
+ movl(dst, 0);
+ jcc(Assembler::equal , L);
+ increment(dst);
+ } else { // unordered is greater
+ movl(dst, 1);
+ jcc(Assembler::parity, L);
+ jcc(Assembler::above , L);
+ movl(dst, 0);
+ jcc(Assembler::equal , L);
+ decrementl(dst);
+ }
+ bind(L);
+}
+
+
+void MacroAssembler::cmp8(AddressLiteral src1, int imm) {
+ if (reachable(src1)) {
+ cmpb(as_Address(src1), imm);
+ } else {
+ lea(rscratch1, src1);
+ cmpb(Address(rscratch1, 0), imm);
+ }
+}
+
+void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
+#ifdef _LP64
+ if (src2.is_lval()) {
+ movptr(rscratch1, src2);
+ Assembler::cmpq(src1, rscratch1);
+ } else if (reachable(src2)) {
+ cmpq(src1, as_Address(src2));
+ } else {
+ lea(rscratch1, src2);
+ Assembler::cmpq(src1, Address(rscratch1, 0));
+ }
+#else
+ if (src2.is_lval()) {
+ cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
+ } else {
+ cmpl(src1, as_Address(src2));
+ }
+#endif // _LP64
+}
+
+void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
+ assert(src2.is_lval(), "not a mem-mem compare");
+#ifdef _LP64
+ // moves src2's literal address
+ movptr(rscratch1, src2);
+ Assembler::cmpq(src1, rscratch1);
+#else
+ cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
+#endif // _LP64
+}
+
+void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
+ if (reachable(adr)) {
+ if (os::is_MP())
+ lock();
+ cmpxchgptr(reg, as_Address(adr));
+ } else {
+ lea(rscratch1, adr);
+ if (os::is_MP())
+ lock();
+ cmpxchgptr(reg, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
+ LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr));
+}
+
+void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::comisd(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::comisd(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::comiss(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::comiss(dst, Address(rscratch1, 0));
+ }
+}
+
+
+void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
+ Condition negated_cond = negate_condition(cond);
+ Label L;
+ jcc(negated_cond, L);
+ atomic_incl(counter_addr);
+ bind(L);
+}
+
+int MacroAssembler::corrected_idivl(Register reg) {
+ // Full implementation of Java idiv and irem; checks for
+ // special case as described in JVM spec., p.243 & p.271.
+ // The function returns the (pc) offset of the idivl
+ // instruction - may be needed for implicit exceptions.
+ //
+ // normal case special case
+ //
+ // input : rax,: dividend min_int
+ // reg: divisor (may not be rax,/rdx) -1
+ //
+ // output: rax,: quotient (= rax, idiv reg) min_int
+ // rdx: remainder (= rax, irem reg) 0
+ assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register");
+ const int min_int = 0x80000000;
+ Label normal_case, special_case;
+
+ // check for special case
+ cmpl(rax, min_int);
+ jcc(Assembler::notEqual, normal_case);
+ xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0)
+ cmpl(reg, -1);
+ jcc(Assembler::equal, special_case);
+
+ // handle normal case
+ bind(normal_case);
+ cdql();
+ int idivl_offset = offset();
+ idivl(reg);
+
+ // normal and special case exit
+ bind(special_case);
+
+ return idivl_offset;
+}
+
+
+
+void MacroAssembler::decrementl(Register reg, int value) {
+ if (value == min_jint) {subl(reg, value) ; return; }
+ if (value < 0) { incrementl(reg, -value); return; }
+ if (value == 0) { ; return; }
+ if (value == 1 && UseIncDec) { decl(reg) ; return; }
+ /* else */ { subl(reg, value) ; return; }
+}
+
+void MacroAssembler::decrementl(Address dst, int value) {
+ if (value == min_jint) {subl(dst, value) ; return; }
+ if (value < 0) { incrementl(dst, -value); return; }
+ if (value == 0) { ; return; }
+ if (value == 1 && UseIncDec) { decl(dst) ; return; }
+ /* else */ { subl(dst, value) ; return; }
+}
+
+void MacroAssembler::division_with_shift (Register reg, int shift_value) {
+ assert (shift_value > 0, "illegal shift value");
+ Label _is_positive;
+ testl (reg, reg);
+ jcc (Assembler::positive, _is_positive);
+ int offset = (1 << shift_value) - 1 ;
+
+ if (offset == 1) {
+ incrementl(reg);
+ } else {
+ addl(reg, offset);
+ }
+
+ bind (_is_positive);
+ sarl(reg, shift_value);
+}
+
+void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::divsd(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::divsd(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::divss(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::divss(dst, Address(rscratch1, 0));
+ }
+}
+
+// !defined(COMPILER2) is because of stupid core builds
+#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
+void MacroAssembler::empty_FPU_stack() {
+ if (VM_Version::supports_mmx()) {
+ emms();
+ } else {
+ for (int i = 8; i-- > 0; ) ffree(i);
+ }
+}
+#endif // !LP64 || C1 || !C2
+
+
+// Defines obj, preserves var_size_in_bytes
+void MacroAssembler::eden_allocate(Register obj,
+ Register var_size_in_bytes,
+ int con_size_in_bytes,
+ Register t1,
+ Label& slow_case) {
+ assert(obj == rax, "obj must be in rax, for cmpxchg");
+ assert_different_registers(obj, var_size_in_bytes, t1);
+ if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
+ jmp(slow_case);
+ } else {
+ Register end = t1;
+ Label retry;
+ bind(retry);
+ ExternalAddress heap_top((address) Universe::heap()->top_addr());
+ movptr(obj, heap_top);
+ if (var_size_in_bytes == noreg) {
+ lea(end, Address(obj, con_size_in_bytes));
+ } else {
+ lea(end, Address(obj, var_size_in_bytes, Address::times_1));
+ }
+ // if end < obj then we wrapped around => object too long => slow case
+ cmpptr(end, obj);
+ jcc(Assembler::below, slow_case);
+ cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
+ jcc(Assembler::above, slow_case);
+ // Compare obj with the top addr, and if still equal, store the new top addr in
+ // end at the address of the top addr pointer. Sets ZF if was equal, and clears
+ // it otherwise. Use lock prefix for atomicity on MPs.
+ locked_cmpxchgptr(end, heap_top);
+ jcc(Assembler::notEqual, retry);
+ }
+}
+
+void MacroAssembler::enter() {
+ push(rbp);
+ mov(rbp, rsp);
+}
+
+// A 5 byte nop that is safe for patching (see patch_verified_entry)
+void MacroAssembler::fat_nop() {
+ if (UseAddressNop) {
+ addr_nop_5();
+ } else {
+ emit_byte(0x26); // es:
+ emit_byte(0x2e); // cs:
+ emit_byte(0x64); // fs:
+ emit_byte(0x65); // gs:
+ emit_byte(0x90);
+ }
+}
+
+void MacroAssembler::fcmp(Register tmp) {
+ fcmp(tmp, 1, true, true);
+}
+
+void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {
+ assert(!pop_right || pop_left, "usage error");
+ if (VM_Version::supports_cmov()) {
+ assert(tmp == noreg, "unneeded temp");
+ if (pop_left) {
+ fucomip(index);
+ } else {
+ fucomi(index);
+ }
+ if (pop_right) {
+ fpop();
+ }
+ } else {
+ assert(tmp != noreg, "need temp");
+ if (pop_left) {
+ if (pop_right) {
+ fcompp();
+ } else {
+ fcomp(index);
+ }
+ } else {
+ fcom(index);
+ }
+ // convert FPU condition into eflags condition via rax,
+ save_rax(tmp);
+ fwait(); fnstsw_ax();
+ sahf();
+ restore_rax(tmp);
+ }
+ // condition codes set as follows:
+ //
+ // CF (corresponds to C0) if x < y
+ // PF (corresponds to C2) if unordered
+ // ZF (corresponds to C3) if x = y
+}
+
+void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) {
+ fcmp2int(dst, unordered_is_less, 1, true, true);
+}
+
+void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) {
+ fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right);
+ Label L;
+ if (unordered_is_less) {
+ movl(dst, -1);
+ jcc(Assembler::parity, L);
+ jcc(Assembler::below , L);
+ movl(dst, 0);
+ jcc(Assembler::equal , L);
+ increment(dst);
+ } else { // unordered is greater
+ movl(dst, 1);
+ jcc(Assembler::parity, L);
+ jcc(Assembler::above , L);
+ movl(dst, 0);
+ jcc(Assembler::equal , L);
+ decrementl(dst);
+ }
+ bind(L);
+}
+
+void MacroAssembler::fld_d(AddressLiteral src) {
+ fld_d(as_Address(src));
+}
+
+void MacroAssembler::fld_s(AddressLiteral src) {
+ fld_s(as_Address(src));
+}
+
+void MacroAssembler::fld_x(AddressLiteral src) {
+ Assembler::fld_x(as_Address(src));
+}
+
+void MacroAssembler::fldcw(AddressLiteral src) {
+ Assembler::fldcw(as_Address(src));
+}
+
+void MacroAssembler::pow_exp_core_encoding() {
+ // kills rax, rcx, rdx
+ subptr(rsp,sizeof(jdouble));
+ // computes 2^X. Stack: X ...
+ // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and
+ // keep it on the thread's stack to compute 2^int(X) later
+ // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1)
+ // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X))
+ fld_s(0); // Stack: X X ...
+ frndint(); // Stack: int(X) X ...
+ fsuba(1); // Stack: int(X) X-int(X) ...
+ fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ...
+ f2xm1(); // Stack: 2^(X-int(X))-1 ...
+ fld1(); // Stack: 1 2^(X-int(X))-1 ...
+ faddp(1); // Stack: 2^(X-int(X))
+ // computes 2^(int(X)): add exponent bias (1023) to int(X), then
+ // shift int(X)+1023 to exponent position.
+ // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11
+ // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent
+ // values so detect them and set result to NaN.
+ movl(rax,Address(rsp,0));
+ movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding
+ addl(rax, 1023);
+ movl(rdx,rax);
+ shll(rax,20);
+ // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN.
+ addl(rdx,1);
+ // Check that 1 < int(X)+1023+1 < 2048
+ // in 3 steps:
+ // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048
+ // 2- (int(X)+1023+1)&-2048 != 0
+ // 3- (int(X)+1023+1)&-2048 != 1
+ // Do 2- first because addl just updated the flags.
+ cmov32(Assembler::equal,rax,rcx);
+ cmpl(rdx,1);
+ cmov32(Assembler::equal,rax,rcx);
+ testl(rdx,rcx);
+ cmov32(Assembler::notEqual,rax,rcx);
+ movl(Address(rsp,4),rax);
+ movl(Address(rsp,0),0);
+ fmul_d(Address(rsp,0)); // Stack: 2^X ...
+ addptr(rsp,sizeof(jdouble));
+}
+
+void MacroAssembler::increase_precision() {
+ subptr(rsp, BytesPerWord);
+ fnstcw(Address(rsp, 0));
+ movl(rax, Address(rsp, 0));
+ orl(rax, 0x300);
+ push(rax);
+ fldcw(Address(rsp, 0));
+ pop(rax);
+}
+
+void MacroAssembler::restore_precision() {
+ fldcw(Address(rsp, 0));
+ addptr(rsp, BytesPerWord);
+}
+
+void MacroAssembler::fast_pow() {
+ // computes X^Y = 2^(Y * log2(X))
+ // if fast computation is not possible, result is NaN. Requires
+ // fallback from user of this macro.
+ // increase precision for intermediate steps of the computation
+ increase_precision();
+ fyl2x(); // Stack: (Y*log2(X)) ...
+ pow_exp_core_encoding(); // Stack: exp(X) ...
+ restore_precision();
+}
+
+void MacroAssembler::fast_exp() {
+ // computes exp(X) = 2^(X * log2(e))
+ // if fast computation is not possible, result is NaN. Requires
+ // fallback from user of this macro.
+ // increase precision for intermediate steps of the computation
+ increase_precision();
+ fldl2e(); // Stack: log2(e) X ...
+ fmulp(1); // Stack: (X*log2(e)) ...
+ pow_exp_core_encoding(); // Stack: exp(X) ...
+ restore_precision();
+}
+
+void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
+ // kills rax, rcx, rdx
+ // pow and exp needs 2 extra registers on the fpu stack.
+ Label slow_case, done;
+ Register tmp = noreg;
+ if (!VM_Version::supports_cmov()) {
+ // fcmp needs a temporary so preserve rdx,
+ tmp = rdx;
+ }
+ Register tmp2 = rax;
+ Register tmp3 = rcx;
+
+ if (is_exp) {
+ // Stack: X
+ fld_s(0); // duplicate argument for runtime call. Stack: X X
+ fast_exp(); // Stack: exp(X) X
+ fcmp(tmp, 0, false, false); // Stack: exp(X) X
+ // exp(X) not equal to itself: exp(X) is NaN go to slow case.
+ jcc(Assembler::parity, slow_case);
+ // get rid of duplicate argument. Stack: exp(X)
+ if (num_fpu_regs_in_use > 0) {
+ fxch();
+ fpop();
+ } else {
+ ffree(1);
+ }
+ jmp(done);
+ } else {
+ // Stack: X Y
+ Label x_negative, y_odd;
+
+ fldz(); // Stack: 0 X Y
+ fcmp(tmp, 1, true, false); // Stack: X Y
+ jcc(Assembler::above, x_negative);
+
+ // X >= 0
+
+ fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y
+ fld_s(1); // Stack: X Y X Y
+ fast_pow(); // Stack: X^Y X Y
+ fcmp(tmp, 0, false, false); // Stack: X^Y X Y
+ // X^Y not equal to itself: X^Y is NaN go to slow case.
+ jcc(Assembler::parity, slow_case);
+ // get rid of duplicate arguments. Stack: X^Y
+ if (num_fpu_regs_in_use > 0) {
+ fxch(); fpop();
+ fxch(); fpop();
+ } else {
+ ffree(2);
+ ffree(1);
+ }
+ jmp(done);
+
+ // X <= 0
+ bind(x_negative);
+
+ fld_s(1); // Stack: Y X Y
+ frndint(); // Stack: int(Y) X Y
+ fcmp(tmp, 2, false, false); // Stack: int(Y) X Y
+ jcc(Assembler::notEqual, slow_case);
+
+ subptr(rsp, 8);
+
+ // For X^Y, when X < 0, Y has to be an integer and the final
+ // result depends on whether it's odd or even. We just checked
+ // that int(Y) == Y. We move int(Y) to gp registers as a 64 bit
+ // integer to test its parity. If int(Y) is huge and doesn't fit
+ // in the 64 bit integer range, the integer indefinite value will
+ // end up in the gp registers. Huge numbers are all even, the
+ // integer indefinite number is even so it's fine.
+
+#ifdef ASSERT
+ // Let's check we don't end up with an integer indefinite number
+ // when not expected. First test for huge numbers: check whether
+ // int(Y)+1 == int(Y) which is true for very large numbers and
+ // those are all even. A 64 bit integer is guaranteed to not
+ // overflow for numbers where y+1 != y (when precision is set to
+ // double precision).
+ Label y_not_huge;
+
+ fld1(); // Stack: 1 int(Y) X Y
+ fadd(1); // Stack: 1+int(Y) int(Y) X Y
+
+#ifdef _LP64
+ // trip to memory to force the precision down from double extended
+ // precision
+ fstp_d(Address(rsp, 0));
+ fld_d(Address(rsp, 0));
+#endif
+
+ fcmp(tmp, 1, true, false); // Stack: int(Y) X Y
+#endif
+
+ // move int(Y) as 64 bit integer to thread's stack
+ fistp_d(Address(rsp,0)); // Stack: X Y
+
+#ifdef ASSERT
+ jcc(Assembler::notEqual, y_not_huge);
+
+ // Y is huge so we know it's even. It may not fit in a 64 bit
+ // integer and we don't want the debug code below to see the
+ // integer indefinite value so overwrite int(Y) on the thread's
+ // stack with 0.
+ movl(Address(rsp, 0), 0);
+ movl(Address(rsp, 4), 0);
+
+ bind(y_not_huge);
+#endif
+
+ fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y
+ fld_s(1); // Stack: X Y X Y
+ fabs(); // Stack: abs(X) Y X Y
+ fast_pow(); // Stack: abs(X)^Y X Y
+ fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y
+ // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case.
+
+ pop(tmp2);
+ NOT_LP64(pop(tmp3));
+ jcc(Assembler::parity, slow_case);
+
+#ifdef ASSERT
+ // Check that int(Y) is not integer indefinite value (int
+ // overflow). Shouldn't happen because for values that would
+ // overflow, 1+int(Y)==Y which was tested earlier.
+#ifndef _LP64
+ {
+ Label integer;
+ testl(tmp2, tmp2);
+ jcc(Assembler::notZero, integer);
+ cmpl(tmp3, 0x80000000);
+ jcc(Assembler::notZero, integer);
+ STOP("integer indefinite value shouldn't be seen here");
+ bind(integer);
+ }
+#else
+ {
+ Label integer;
+ mov(tmp3, tmp2); // preserve tmp2 for parity check below
+ shlq(tmp3, 1);
+ jcc(Assembler::carryClear, integer);
+ jcc(Assembler::notZero, integer);
+ STOP("integer indefinite value shouldn't be seen here");
+ bind(integer);
+ }
+#endif
+#endif
+
+ // get rid of duplicate arguments. Stack: X^Y
+ if (num_fpu_regs_in_use > 0) {
+ fxch(); fpop();
+ fxch(); fpop();
+ } else {
+ ffree(2);
+ ffree(1);
+ }
+
+ testl(tmp2, 1);
+ jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y
+ // X <= 0, Y even: X^Y = -abs(X)^Y
+
+ fchs(); // Stack: -abs(X)^Y Y
+ jmp(done);
+ }
+
+ // slow case: runtime call
+ bind(slow_case);
+
+ fpop(); // pop incorrect result or int(Y)
+
+ fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow),
+ is_exp ? 1 : 2, num_fpu_regs_in_use);
+
+ // Come here with result in F-TOS
+ bind(done);
+}
+
+void MacroAssembler::fpop() {
+ ffree();
+ fincstp();
+}
+
+void MacroAssembler::fremr(Register tmp) {
+ save_rax(tmp);
+ { Label L;
+ bind(L);
+ fprem();
+ fwait(); fnstsw_ax();
+#ifdef _LP64
+ testl(rax, 0x400);
+ jcc(Assembler::notEqual, L);
+#else
+ sahf();
+ jcc(Assembler::parity, L);
+#endif // _LP64
+ }
+ restore_rax(tmp);
+ // Result is in ST0.
+ // Note: fxch & fpop to get rid of ST1
+ // (otherwise FPU stack could overflow eventually)
+ fxch(1);
+ fpop();
+}
+
+
+void MacroAssembler::incrementl(AddressLiteral dst) {
+ if (reachable(dst)) {
+ incrementl(as_Address(dst));
+ } else {
+ lea(rscratch1, dst);
+ incrementl(Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::incrementl(ArrayAddress dst) {
+ incrementl(as_Address(dst));
+}
+
+void MacroAssembler::incrementl(Register reg, int value) {
+ if (value == min_jint) {addl(reg, value) ; return; }
+ if (value < 0) { decrementl(reg, -value); return; }
+ if (value == 0) { ; return; }
+ if (value == 1 && UseIncDec) { incl(reg) ; return; }
+ /* else */ { addl(reg, value) ; return; }
+}
+
+void MacroAssembler::incrementl(Address dst, int value) {
+ if (value == min_jint) {addl(dst, value) ; return; }
+ if (value < 0) { decrementl(dst, -value); return; }
+ if (value == 0) { ; return; }
+ if (value == 1 && UseIncDec) { incl(dst) ; return; }
+ /* else */ { addl(dst, value) ; return; }
+}
+
+void MacroAssembler::jump(AddressLiteral dst) {
+ if (reachable(dst)) {
+ jmp_literal(dst.target(), dst.rspec());
+ } else {
+ lea(rscratch1, dst);
+ jmp(rscratch1);
+ }
+}
+
+void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
+ if (reachable(dst)) {
+ InstructionMark im(this);
+ relocate(dst.reloc());
+ const int short_size = 2;
+ const int long_size = 6;
+ int offs = (intptr_t)dst.target() - ((intptr_t)pc());
+ if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
+ // 0111 tttn #8-bit disp
+ emit_byte(0x70 | cc);
+ emit_byte((offs - short_size) & 0xFF);
+ } else {
+ // 0000 1111 1000 tttn #32-bit disp
+ emit_byte(0x0F);
+ emit_byte(0x80 | cc);
+ emit_long(offs - long_size);
+ }
+ } else {
+#ifdef ASSERT
+ warning("reversing conditional branch");
+#endif /* ASSERT */
+ Label skip;
+ jccb(reverse[cc], skip);
+ lea(rscratch1, dst);
+ Assembler::jmp(rscratch1);
+ bind(skip);
+ }
+}
+
+void MacroAssembler::ldmxcsr(AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::ldmxcsr(as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::ldmxcsr(Address(rscratch1, 0));
+ }
+}
+
+int MacroAssembler::load_signed_byte(Register dst, Address src) {
+ int off;
+ if (LP64_ONLY(true ||) VM_Version::is_P6()) {
+ off = offset();
+ movsbl(dst, src); // movsxb
+ } else {
+ off = load_unsigned_byte(dst, src);
+ shll(dst, 24);
+ sarl(dst, 24);
+ }
+ return off;
+}
+
+// Note: load_signed_short used to be called load_signed_word.
+// Although the 'w' in x86 opcodes refers to the term "word" in the assembler
+// manual, which means 16 bits, that usage is found nowhere in HotSpot code.
+// The term "word" in HotSpot means a 32- or 64-bit machine word.
+int MacroAssembler::load_signed_short(Register dst, Address src) {
+ int off;
+ if (LP64_ONLY(true ||) VM_Version::is_P6()) {
+ // This is dubious to me since it seems safe to do a signed 16 => 64 bit
+ // version but this is what 64bit has always done. This seems to imply
+ // that users are only using 32bits worth.
+ off = offset();
+ movswl(dst, src); // movsxw
+ } else {
+ off = load_unsigned_short(dst, src);
+ shll(dst, 16);
+ sarl(dst, 16);
+ }
+ return off;
+}
+
+int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
+ // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
+ // and "3.9 Partial Register Penalties", p. 22).
+ int off;
+ if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) {
+ off = offset();
+ movzbl(dst, src); // movzxb
+ } else {
+ xorl(dst, dst);
+ off = offset();
+ movb(dst, src);
+ }
+ return off;
+}
+
+// Note: load_unsigned_short used to be called load_unsigned_word.
+int MacroAssembler::load_unsigned_short(Register dst, Address src) {
+ // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
+ // and "3.9 Partial Register Penalties", p. 22).
+ int off;
+ if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) {
+ off = offset();
+ movzwl(dst, src); // movzxw
+ } else {
+ xorl(dst, dst);
+ off = offset();
+ movw(dst, src);
+ }
+ return off;
+}
+
+void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
+ switch (size_in_bytes) {
+#ifndef _LP64
+ case 8:
+ assert(dst2 != noreg, "second dest register required");
+ movl(dst, src);
+ movl(dst2, src.plus_disp(BytesPerInt));
+ break;
+#else
+ case 8: movq(dst, src); break;
+#endif
+ case 4: movl(dst, src); break;
+ case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
+ case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
+ default: ShouldNotReachHere();
+ }
+}
+
+void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
+ switch (size_in_bytes) {
+#ifndef _LP64
+ case 8:
+ assert(src2 != noreg, "second source register required");
+ movl(dst, src);
+ movl(dst.plus_disp(BytesPerInt), src2);
+ break;
+#else
+ case 8: movq(dst, src); break;
+#endif
+ case 4: movl(dst, src); break;
+ case 2: movw(dst, src); break;
+ case 1: movb(dst, src); break;
+ default: ShouldNotReachHere();
+ }
+}
+
+void MacroAssembler::mov32(AddressLiteral dst, Register src) {
+ if (reachable(dst)) {
+ movl(as_Address(dst), src);
+ } else {
+ lea(rscratch1, dst);
+ movl(Address(rscratch1, 0), src);
+ }
+}
+
+void MacroAssembler::mov32(Register dst, AddressLiteral src) {
+ if (reachable(src)) {
+ movl(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ movl(dst, Address(rscratch1, 0));
+ }
+}
+
+// C++ bool manipulation
+
+void MacroAssembler::movbool(Register dst, Address src) {
+ if(sizeof(bool) == 1)
+ movb(dst, src);
+ else if(sizeof(bool) == 2)
+ movw(dst, src);
+ else if(sizeof(bool) == 4)
+ movl(dst, src);
+ else
+ // unsupported
+ ShouldNotReachHere();
+}
+
+void MacroAssembler::movbool(Address dst, bool boolconst) {
+ if(sizeof(bool) == 1)
+ movb(dst, (int) boolconst);
+ else if(sizeof(bool) == 2)
+ movw(dst, (int) boolconst);
+ else if(sizeof(bool) == 4)
+ movl(dst, (int) boolconst);
+ else
+ // unsupported
+ ShouldNotReachHere();
+}
+
+void MacroAssembler::movbool(Address dst, Register src) {
+ if(sizeof(bool) == 1)
+ movb(dst, src);
+ else if(sizeof(bool) == 2)
+ movw(dst, src);
+ else if(sizeof(bool) == 4)
+ movl(dst, src);
+ else
+ // unsupported
+ ShouldNotReachHere();
+}
+
+void MacroAssembler::movbyte(ArrayAddress dst, int src) {
+ movb(as_Address(dst), src);
+}
+
+void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ movdl(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ movdl(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ movq(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ movq(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ if (UseXmmLoadAndClearUpper) {
+ movsd (dst, as_Address(src));
+ } else {
+ movlpd(dst, as_Address(src));
+ }
+ } else {
+ lea(rscratch1, src);
+ if (UseXmmLoadAndClearUpper) {
+ movsd (dst, Address(rscratch1, 0));
+ } else {
+ movlpd(dst, Address(rscratch1, 0));
+ }
+ }
+}
+
+void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ movss(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ movss(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::movptr(Register dst, Register src) {
+ LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
+}
+
+void MacroAssembler::movptr(Register dst, Address src) {
+ LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
+}
+
+// src should NEVER be a real pointer. Use AddressLiteral for true pointers
+void MacroAssembler::movptr(Register dst, intptr_t src) {
+ LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
+}
+
+void MacroAssembler::movptr(Address dst, Register src) {
+ LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
+}
+
+void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::movdqu(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::movdqu(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::movsd(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::movsd(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::movss(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::movss(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::mulsd(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::mulsd(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::mulss(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::mulss(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::null_check(Register reg, int offset) {
+ if (needs_explicit_null_check(offset)) {
+ // provoke OS NULL exception if reg = NULL by
+ // accessing M[reg] w/o changing any (non-CC) registers
+ // NOTE: cmpl is plenty here to provoke a segv
+ cmpptr(rax, Address(reg, 0));
+ // Note: should probably use testl(rax, Address(reg, 0));
+ // may be shorter code (however, this version of
+ // testl needs to be implemented first)
+ } else {
+ // nothing to do, (later) access of M[reg + offset]
+ // will provoke OS NULL exception if reg = NULL
+ }
+}
+
+void MacroAssembler::os_breakpoint() {
+ // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
+ // (e.g., MSVC can't call ps() otherwise)
+ call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
+}
+
+void MacroAssembler::pop_CPU_state() {
+ pop_FPU_state();
+ pop_IU_state();
+}
+
+void MacroAssembler::pop_FPU_state() {
+ NOT_LP64(frstor(Address(rsp, 0));)
+ LP64_ONLY(fxrstor(Address(rsp, 0));)
+ addptr(rsp, FPUStateSizeInWords * wordSize);
+}
+
+void MacroAssembler::pop_IU_state() {
+ popa();
+ LP64_ONLY(addq(rsp, 8));
+ popf();
+}
+
+// Save Integer and Float state
+// Warning: Stack must be 16 byte aligned (64bit)
+void MacroAssembler::push_CPU_state() {
+ push_IU_state();
+ push_FPU_state();
+}
+
+void MacroAssembler::push_FPU_state() {
+ subptr(rsp, FPUStateSizeInWords * wordSize);
+#ifndef _LP64
+ fnsave(Address(rsp, 0));
+ fwait();
+#else
+ fxsave(Address(rsp, 0));
+#endif // LP64
+}
+
+void MacroAssembler::push_IU_state() {
+ // Push flags first because pusha kills them
+ pushf();
+ // Make sure rsp stays 16-byte aligned
+ LP64_ONLY(subq(rsp, 8));
+ pusha();
+}
+
+void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
+ // determine java_thread register
+ if (!java_thread->is_valid()) {
+ java_thread = rdi;
+ get_thread(java_thread);
+ }
+ // we must set sp to zero to clear frame
+ movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
+ if (clear_fp) {
+ movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
+ }
+
+ if (clear_pc)
+ movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
+
+}
+
+void MacroAssembler::restore_rax(Register tmp) {
+ if (tmp == noreg) pop(rax);
+ else if (tmp != rax) mov(rax, tmp);
+}
+
+void MacroAssembler::round_to(Register reg, int modulus) {
+ addptr(reg, modulus - 1);
+ andptr(reg, -modulus);
+}
+
+void MacroAssembler::save_rax(Register tmp) {
+ if (tmp == noreg) push(rax);
+ else if (tmp != rax) mov(tmp, rax);
+}
+
+// Write serialization page so VM thread can do a pseudo remote membar.
+// We use the current thread pointer to calculate a thread specific
+// offset to write to within the page. This minimizes bus traffic
+// due to cache line collision.
+void MacroAssembler::serialize_memory(Register thread, Register tmp) {
+ movl(tmp, thread);
+ shrl(tmp, os::get_serialize_page_shift_count());
+ andl(tmp, (os::vm_page_size() - sizeof(int)));
+
+ Address index(noreg, tmp, Address::times_1);
+ ExternalAddress page(os::get_memory_serialize_page());
+
+ // Size of store must match masking code above
+ movl(as_Address(ArrayAddress(page, index)), tmp);
+}
+
+// Calls to C land
+//
+// When entering C land, the rbp, & rsp of the last Java frame have to be recorded
+// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
+// has to be reset to 0. This is required to allow proper stack traversal.
+void MacroAssembler::set_last_Java_frame(Register java_thread,
+ Register last_java_sp,
+ Register last_java_fp,
+ address last_java_pc) {
+ // determine java_thread register
+ if (!java_thread->is_valid()) {
+ java_thread = rdi;
+ get_thread(java_thread);
+ }
+ // determine last_java_sp register
+ if (!last_java_sp->is_valid()) {
+ last_java_sp = rsp;
+ }
+
+ // last_java_fp is optional
+
+ if (last_java_fp->is_valid()) {
+ movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp);
+ }
+
+ // last_java_pc is optional
+
+ if (last_java_pc != NULL) {
+ lea(Address(java_thread,
+ JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()),
+ InternalAddress(last_java_pc));
+
+ }
+ movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
+}
+
+void MacroAssembler::shlptr(Register dst, int imm8) {
+ LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8));
+}
+
+void MacroAssembler::shrptr(Register dst, int imm8) {
+ LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8));
+}
+
+void MacroAssembler::sign_extend_byte(Register reg) {
+ if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) {
+ movsbl(reg, reg); // movsxb
+ } else {
+ shll(reg, 24);
+ sarl(reg, 24);
+ }
+}
+
+void MacroAssembler::sign_extend_short(Register reg) {
+ if (LP64_ONLY(true ||) VM_Version::is_P6()) {
+ movswl(reg, reg); // movsxw
+ } else {
+ shll(reg, 16);
+ sarl(reg, 16);
+ }
+}
+
+void MacroAssembler::testl(Register dst, AddressLiteral src) {
+ assert(reachable(src), "Address should be reachable");
+ testl(dst, as_Address(src));
+}
+
+void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::sqrtsd(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::sqrtsd(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::sqrtss(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::sqrtss(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::subsd(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::subsd(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::subss(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::subss(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::ucomisd(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::ucomisd(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::ucomiss(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::ucomiss(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
+ // Used in sign-bit flipping with aligned address.
+ assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+ if (reachable(src)) {
+ Assembler::xorpd(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::xorpd(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
+ // Used in sign-bit flipping with aligned address.
+ assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+ if (reachable(src)) {
+ Assembler::xorps(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::xorps(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) {
+ // Used in sign-bit flipping with aligned address.
+ assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+ if (reachable(src)) {
+ Assembler::pshufb(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::pshufb(dst, Address(rscratch1, 0));
+ }
+}
+
+// AVX 3-operands instructions
+
+void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+ if (reachable(src)) {
+ vaddsd(dst, nds, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ vaddsd(dst, nds, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+ if (reachable(src)) {
+ vaddss(dst, nds, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ vaddss(dst, nds, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
+ if (reachable(src)) {
+ vandpd(dst, nds, as_Address(src), vector256);
+ } else {
+ lea(rscratch1, src);
+ vandpd(dst, nds, Address(rscratch1, 0), vector256);
+ }
+}
+
+void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
+ if (reachable(src)) {
+ vandps(dst, nds, as_Address(src), vector256);
+ } else {
+ lea(rscratch1, src);
+ vandps(dst, nds, Address(rscratch1, 0), vector256);
+ }
+}
+
+void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+ if (reachable(src)) {
+ vdivsd(dst, nds, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ vdivsd(dst, nds, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+ if (reachable(src)) {
+ vdivss(dst, nds, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ vdivss(dst, nds, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+ if (reachable(src)) {
+ vmulsd(dst, nds, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ vmulsd(dst, nds, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+ if (reachable(src)) {
+ vmulss(dst, nds, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ vmulss(dst, nds, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+ if (reachable(src)) {
+ vsubsd(dst, nds, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ vsubsd(dst, nds, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+ if (reachable(src)) {
+ vsubss(dst, nds, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ vsubss(dst, nds, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
+ if (reachable(src)) {
+ vxorpd(dst, nds, as_Address(src), vector256);
+ } else {
+ lea(rscratch1, src);
+ vxorpd(dst, nds, Address(rscratch1, 0), vector256);
+ }
+}
+
+void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
+ if (reachable(src)) {
+ vxorps(dst, nds, as_Address(src), vector256);
+ } else {
+ lea(rscratch1, src);
+ vxorps(dst, nds, Address(rscratch1, 0), vector256);
+ }
+}
+
+
+//////////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+void MacroAssembler::g1_write_barrier_pre(Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp,
+ bool tosca_live,
+ bool expand_call) {
+
+ // If expand_call is true then we expand the call_VM_leaf macro
+ // directly to skip generating the check by
+ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
+
+#ifdef _LP64
+ assert(thread == r15_thread, "must be");
+#endif // _LP64
+
+ Label done;
+ Label runtime;
+
+ assert(pre_val != noreg, "check this code");
+
+ if (obj != noreg) {
+ assert_different_registers(obj, pre_val, tmp);
+ assert(pre_val != rax, "check this code");
+ }
+
+ Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+ PtrQueue::byte_offset_of_active()));
+ Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+ PtrQueue::byte_offset_of_index()));
+ Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+ PtrQueue::byte_offset_of_buf()));
+
+
+ // Is marking active?
+ if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+ cmpl(in_progress, 0);
+ } else {
+ assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
+ cmpb(in_progress, 0);
+ }
+ jcc(Assembler::equal, done);
+
+ // Do we need to load the previous value?
+ if (obj != noreg) {
+ load_heap_oop(pre_val, Address(obj, 0));
+ }
+
+ // Is the previous value null?
+ cmpptr(pre_val, (int32_t) NULL_WORD);
+ jcc(Assembler::equal, done);
+
+ // Can we store original value in the thread's buffer?
+ // Is index == 0?
+ // (The index field is typed as size_t.)
+
+ movptr(tmp, index); // tmp := *index_adr
+ cmpptr(tmp, 0); // tmp == 0?
+ jcc(Assembler::equal, runtime); // If yes, goto runtime
+
+ subptr(tmp, wordSize); // tmp := tmp - wordSize
+ movptr(index, tmp); // *index_adr := tmp
+ addptr(tmp, buffer); // tmp := tmp + *buffer_adr
+
+ // Record the previous value
+ movptr(Address(tmp, 0), pre_val);
+ jmp(done);
+
+ bind(runtime);
+ // save the live input values
+ if(tosca_live) push(rax);
+
+ if (obj != noreg && obj != rax)
+ push(obj);
+
+ if (pre_val != rax)
+ push(pre_val);
+
+ // Calling the runtime using the regular call_VM_leaf mechanism generates
+ // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
+ // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
+ //
+ // If we care generating the pre-barrier without a frame (e.g. in the
+ // intrinsified Reference.get() routine) then ebp might be pointing to
+ // the caller frame and so this check will most likely fail at runtime.
+ //
+ // Expanding the call directly bypasses the generation of the check.
+ // So when we do not have have a full interpreter frame on the stack
+ // expand_call should be passed true.
+
+ NOT_LP64( push(thread); )
+
+ if (expand_call) {
+ LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
+ pass_arg1(this, thread);
+ pass_arg0(this, pre_val);
+ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
+ } else {
+ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
+ }
+
+ NOT_LP64( pop(thread); )
+
+ // save the live input values
+ if (pre_val != rax)
+ pop(pre_val);
+
+ if (obj != noreg && obj != rax)
+ pop(obj);
+
+ if(tosca_live) pop(rax);
+
+ bind(done);
+}
+
+void MacroAssembler::g1_write_barrier_post(Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp,
+ Register tmp2) {
+#ifdef _LP64
+ assert(thread == r15_thread, "must be");
+#endif // _LP64
+
+ Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+ PtrQueue::byte_offset_of_index()));
+ Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+ PtrQueue::byte_offset_of_buf()));
+
+ BarrierSet* bs = Universe::heap()->barrier_set();
+ CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+ Label done;
+ Label runtime;
+
+ // Does store cross heap regions?
+
+ movptr(tmp, store_addr);
+ xorptr(tmp, new_val);
+ shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
+ jcc(Assembler::equal, done);
+
+ // crosses regions, storing NULL?
+
+ cmpptr(new_val, (int32_t) NULL_WORD);
+ jcc(Assembler::equal, done);
+
+ // storing region crossing non-NULL, is card already dirty?
+
+ ExternalAddress cardtable((address) ct->byte_map_base);
+ assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+#ifdef _LP64
+ const Register card_addr = tmp;
+
+ movq(card_addr, store_addr);
+ shrq(card_addr, CardTableModRefBS::card_shift);
+
+ lea(tmp2, cardtable);
+
+ // get the address of the card
+ addq(card_addr, tmp2);
+#else
+ const Register card_index = tmp;
+
+ movl(card_index, store_addr);
+ shrl(card_index, CardTableModRefBS::card_shift);
+
+ Address index(noreg, card_index, Address::times_1);
+ const Register card_addr = tmp;
+ lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
+#endif
+ cmpb(Address(card_addr, 0), 0);
+ jcc(Assembler::equal, done);
+
+ // storing a region crossing, non-NULL oop, card is clean.
+ // dirty card and log.
+
+ movb(Address(card_addr, 0), 0);
+
+ cmpl(queue_index, 0);
+ jcc(Assembler::equal, runtime);
+ subl(queue_index, wordSize);
+ movptr(tmp2, buffer);
+#ifdef _LP64
+ movslq(rscratch1, queue_index);
+ addq(tmp2, rscratch1);
+ movq(Address(tmp2, 0), card_addr);
+#else
+ addl(tmp2, queue_index);
+ movl(Address(tmp2, 0), card_index);
+#endif
+ jmp(done);
+
+ bind(runtime);
+ // save the live input values
+ push(store_addr);
+ push(new_val);
+#ifdef _LP64
+ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
+#else
+ push(thread);
+ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
+ pop(thread);
+#endif
+ pop(new_val);
+ pop(store_addr);
+
+ bind(done);
+}
+
+#endif // SERIALGC
+//////////////////////////////////////////////////////////////////////////////////
+
+
+void MacroAssembler::store_check(Register obj) {
+ // Does a store check for the oop in register obj. The content of
+ // register obj is destroyed afterwards.
+ store_check_part_1(obj);
+ store_check_part_2(obj);
+}
+
+void MacroAssembler::store_check(Register obj, Address dst) {
+ store_check(obj);
+}
+
+
+// split the store check operation so that other instructions can be scheduled inbetween
+void MacroAssembler::store_check_part_1(Register obj) {
+ BarrierSet* bs = Universe::heap()->barrier_set();
+ assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
+ shrptr(obj, CardTableModRefBS::card_shift);
+}
+
+void MacroAssembler::store_check_part_2(Register obj) {
+ BarrierSet* bs = Universe::heap()->barrier_set();
+ assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
+ CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+ assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+ // The calculation for byte_map_base is as follows:
+ // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
+ // So this essentially converts an address to a displacement and
+ // it will never need to be relocated. On 64bit however the value may be too
+ // large for a 32bit displacement
+
+ intptr_t disp = (intptr_t) ct->byte_map_base;
+ if (is_simm32(disp)) {
+ Address cardtable(noreg, obj, Address::times_1, disp);
+ movb(cardtable, 0);
+ } else {
+ // By doing it as an ExternalAddress disp could be converted to a rip-relative
+ // displacement and done in a single instruction given favorable mapping and
+ // a smarter version of as_Address. Worst case it is two instructions which
+ // is no worse off then loading disp into a register and doing as a simple
+ // Address() as above.
+ // We can't do as ExternalAddress as the only style since if disp == 0 we'll
+ // assert since NULL isn't acceptable in a reloci (see 6644928). In any case
+ // in some cases we'll get a single instruction version.
+
+ ExternalAddress cardtable((address)disp);
+ Address index(noreg, obj, Address::times_1);
+ movb(as_Address(ArrayAddress(cardtable, index)), 0);
+ }
+}
+
+void MacroAssembler::subptr(Register dst, int32_t imm32) {
+ LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
+}
+
+// Force generation of a 4 byte immediate value even if it fits into 8bit
+void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) {
+ LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32));
+}
+
+void MacroAssembler::subptr(Register dst, Register src) {
+ LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
+}
+
+// C++ bool manipulation
+void MacroAssembler::testbool(Register dst) {
+ if(sizeof(bool) == 1)
+ testb(dst, 0xff);
+ else if(sizeof(bool) == 2) {
+ // testw implementation needed for two byte bools
+ ShouldNotReachHere();
+ } else if(sizeof(bool) == 4)
+ testl(dst, dst);
+ else
+ // unsupported
+ ShouldNotReachHere();
+}
+
+void MacroAssembler::testptr(Register dst, Register src) {
+ LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
+}
+
+// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
+void MacroAssembler::tlab_allocate(Register obj,
+ Register var_size_in_bytes,
+ int con_size_in_bytes,
+ Register t1,
+ Register t2,
+ Label& slow_case) {
+ assert_different_registers(obj, t1, t2);
+ assert_different_registers(obj, var_size_in_bytes, t1);
+ Register end = t2;
+ Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread);
+
+ verify_tlab();
+
+ NOT_LP64(get_thread(thread));
+
+ movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
+ if (var_size_in_bytes == noreg) {
+ lea(end, Address(obj, con_size_in_bytes));
+ } else {
+ lea(end, Address(obj, var_size_in_bytes, Address::times_1));
+ }
+ cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
+ jcc(Assembler::above, slow_case);
+
+ // update the tlab top pointer
+ movptr(Address(thread, JavaThread::tlab_top_offset()), end);
+
+ // recover var_size_in_bytes if necessary
+ if (var_size_in_bytes == end) {
+ subptr(var_size_in_bytes, obj);
+ }
+ verify_tlab();
+}
+
+// Preserves rbx, and rdx.
+Register MacroAssembler::tlab_refill(Label& retry,
+ Label& try_eden,
+ Label& slow_case) {
+ Register top = rax;
+ Register t1 = rcx;
+ Register t2 = rsi;
+ Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread);
+ assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx);
+ Label do_refill, discard_tlab;
+
+ if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
+ // No allocation in the shared eden.
+ jmp(slow_case);
+ }
+
+ NOT_LP64(get_thread(thread_reg));
+
+ movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
+ movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
+
+ // calculate amount of free space
+ subptr(t1, top);
+ shrptr(t1, LogHeapWordSize);
+
+ // Retain tlab and allocate object in shared space if
+ // the amount free in the tlab is too large to discard.
+ cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())));
+ jcc(Assembler::lessEqual, discard_tlab);
+
+ // Retain
+ // %%% yuck as movptr...
+ movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment());
+ addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2);
+ if (TLABStats) {
+ // increment number of slow_allocations
+ addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1);
+ }
+ jmp(try_eden);
+
+ bind(discard_tlab);
+ if (TLABStats) {
+ // increment number of refills
+ addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1);
+ // accumulate wastage -- t1 is amount free in tlab
+ addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1);
+ }
+
+ // if tlab is currently allocated (top or end != null) then
+ // fill [top, end + alignment_reserve) with array object
+ testptr(top, top);
+ jcc(Assembler::zero, do_refill);
+
+ // set up the mark word
+ movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
+ // set the length to the remaining space
+ subptr(t1, typeArrayOopDesc::header_size(T_INT));
+ addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve());
+ shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint)));
+ movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
+ // set klass to intArrayKlass
+ // dubious reloc why not an oop reloc?
+ movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr()));
+ // store klass last. concurrent gcs assumes klass length is valid if
+ // klass field is not null.
+ store_klass(top, t1);
+
+ movptr(t1, top);
+ subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
+ incr_allocated_bytes(thread_reg, t1, 0);
+
+ // refill the tlab with an eden allocation
+ bind(do_refill);
+ movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
+ shlptr(t1, LogHeapWordSize);
+ // allocate new tlab, address returned in top
+ eden_allocate(top, t1, 0, t2, slow_case);
+
+ // Check that t1 was preserved in eden_allocate.
+#ifdef ASSERT
+ if (UseTLAB) {
+ Label ok;
+ Register tsize = rsi;
+ assert_different_registers(tsize, thread_reg, t1);
+ push(tsize);
+ movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
+ shlptr(tsize, LogHeapWordSize);
+ cmpptr(t1, tsize);
+ jcc(Assembler::equal, ok);
+ STOP("assert(t1 != tlab size)");
+ should_not_reach_here();
+
+ bind(ok);
+ pop(tsize);
+ }
+#endif
+ movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top);
+ movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top);
+ addptr(top, t1);
+ subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
+ movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
+ verify_tlab();
+ jmp(retry);
+
+ return thread_reg; // for use by caller
+}
+
+void MacroAssembler::incr_allocated_bytes(Register thread,
+ Register var_size_in_bytes,
+ int con_size_in_bytes,
+ Register t1) {
+ if (!thread->is_valid()) {
+#ifdef _LP64
+ thread = r15_thread;
+#else
+ assert(t1->is_valid(), "need temp reg");
+ thread = t1;
+ get_thread(thread);
+#endif
+ }
+
+#ifdef _LP64
+ if (var_size_in_bytes->is_valid()) {
+ addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
+ } else {
+ addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
+ }
+#else
+ if (var_size_in_bytes->is_valid()) {
+ addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
+ } else {
+ addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
+ }
+ adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
+#endif
+}
+
+void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
+ pusha();
+
+ // if we are coming from c1, xmm registers may be live
+ int off = 0;
+ if (UseSSE == 1) {
+ subptr(rsp, sizeof(jdouble)*8);
+ movflt(Address(rsp,off++*sizeof(jdouble)),xmm0);
+ movflt(Address(rsp,off++*sizeof(jdouble)),xmm1);
+ movflt(Address(rsp,off++*sizeof(jdouble)),xmm2);
+ movflt(Address(rsp,off++*sizeof(jdouble)),xmm3);
+ movflt(Address(rsp,off++*sizeof(jdouble)),xmm4);
+ movflt(Address(rsp,off++*sizeof(jdouble)),xmm5);
+ movflt(Address(rsp,off++*sizeof(jdouble)),xmm6);
+ movflt(Address(rsp,off++*sizeof(jdouble)),xmm7);
+ } else if (UseSSE >= 2) {
+#ifdef COMPILER2
+ if (MaxVectorSize > 16) {
+ assert(UseAVX > 0, "256bit vectors are supported only with AVX");
+ // Save upper half of YMM registes
+ subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
+ vextractf128h(Address(rsp, 0),xmm0);
+ vextractf128h(Address(rsp, 16),xmm1);
+ vextractf128h(Address(rsp, 32),xmm2);
+ vextractf128h(Address(rsp, 48),xmm3);
+ vextractf128h(Address(rsp, 64),xmm4);
+ vextractf128h(Address(rsp, 80),xmm5);
+ vextractf128h(Address(rsp, 96),xmm6);
+ vextractf128h(Address(rsp,112),xmm7);
+#ifdef _LP64
+ vextractf128h(Address(rsp,128),xmm8);
+ vextractf128h(Address(rsp,144),xmm9);
+ vextractf128h(Address(rsp,160),xmm10);
+ vextractf128h(Address(rsp,176),xmm11);
+ vextractf128h(Address(rsp,192),xmm12);
+ vextractf128h(Address(rsp,208),xmm13);
+ vextractf128h(Address(rsp,224),xmm14);
+ vextractf128h(Address(rsp,240),xmm15);
+#endif
+ }
+#endif
+ // Save whole 128bit (16 bytes) XMM regiters
+ subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
+ movdqu(Address(rsp,off++*16),xmm0);
+ movdqu(Address(rsp,off++*16),xmm1);
+ movdqu(Address(rsp,off++*16),xmm2);
+ movdqu(Address(rsp,off++*16),xmm3);
+ movdqu(Address(rsp,off++*16),xmm4);
+ movdqu(Address(rsp,off++*16),xmm5);
+ movdqu(Address(rsp,off++*16),xmm6);
+ movdqu(Address(rsp,off++*16),xmm7);
+#ifdef _LP64
+ movdqu(Address(rsp,off++*16),xmm8);
+ movdqu(Address(rsp,off++*16),xmm9);
+ movdqu(Address(rsp,off++*16),xmm10);
+ movdqu(Address(rsp,off++*16),xmm11);
+ movdqu(Address(rsp,off++*16),xmm12);
+ movdqu(Address(rsp,off++*16),xmm13);
+ movdqu(Address(rsp,off++*16),xmm14);
+ movdqu(Address(rsp,off++*16),xmm15);
+#endif
+ }
+
+ // Preserve registers across runtime call
+ int incoming_argument_and_return_value_offset = -1;
+ if (num_fpu_regs_in_use > 1) {
+ // Must preserve all other FPU regs (could alternatively convert
+ // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
+ // FPU state, but can not trust C compiler)
+ NEEDS_CLEANUP;
+ // NOTE that in this case we also push the incoming argument(s) to
+ // the stack and restore it later; we also use this stack slot to
+ // hold the return value from dsin, dcos etc.
+ for (int i = 0; i < num_fpu_regs_in_use; i++) {
+ subptr(rsp, sizeof(jdouble));
+ fstp_d(Address(rsp, 0));
+ }
+ incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
+ for (int i = nb_args-1; i >= 0; i--) {
+ fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble)));
+ }
+ }
+
+ subptr(rsp, nb_args*sizeof(jdouble));
+ for (int i = 0; i < nb_args; i++) {
+ fstp_d(Address(rsp, i*sizeof(jdouble)));
+ }
+
+#ifdef _LP64
+ if (nb_args > 0) {
+ movdbl(xmm0, Address(rsp, 0));
+ }
+ if (nb_args > 1) {
+ movdbl(xmm1, Address(rsp, sizeof(jdouble)));
+ }
+ assert(nb_args <= 2, "unsupported number of args");
+#endif // _LP64
+
+ // NOTE: we must not use call_VM_leaf here because that requires a
+ // complete interpreter frame in debug mode -- same bug as 4387334
+ // MacroAssembler::call_VM_leaf_base is perfectly safe and will
+ // do proper 64bit abi
+
+ NEEDS_CLEANUP;
+ // Need to add stack banging before this runtime call if it needs to
+ // be taken; however, there is no generic stack banging routine at
+ // the MacroAssembler level
+
+ MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
+
+#ifdef _LP64
+ movsd(Address(rsp, 0), xmm0);
+ fld_d(Address(rsp, 0));
+#endif // _LP64
+ addptr(rsp, sizeof(jdouble) * nb_args);
+ if (num_fpu_regs_in_use > 1) {
+ // Must save return value to stack and then restore entire FPU
+ // stack except incoming arguments
+ fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
+ for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
+ fld_d(Address(rsp, 0));
+ addptr(rsp, sizeof(jdouble));
+ }
+ fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
+ addptr(rsp, sizeof(jdouble) * nb_args);
+ }
+
+ off = 0;
+ if (UseSSE == 1) {
+ movflt(xmm0, Address(rsp,off++*sizeof(jdouble)));
+ movflt(xmm1, Address(rsp,off++*sizeof(jdouble)));
+ movflt(xmm2, Address(rsp,off++*sizeof(jdouble)));
+ movflt(xmm3, Address(rsp,off++*sizeof(jdouble)));
+ movflt(xmm4, Address(rsp,off++*sizeof(jdouble)));
+ movflt(xmm5, Address(rsp,off++*sizeof(jdouble)));
+ movflt(xmm6, Address(rsp,off++*sizeof(jdouble)));
+ movflt(xmm7, Address(rsp,off++*sizeof(jdouble)));
+ addptr(rsp, sizeof(jdouble)*8);
+ } else if (UseSSE >= 2) {
+ // Restore whole 128bit (16 bytes) XMM regiters
+ movdqu(xmm0, Address(rsp,off++*16));
+ movdqu(xmm1, Address(rsp,off++*16));
+ movdqu(xmm2, Address(rsp,off++*16));
+ movdqu(xmm3, Address(rsp,off++*16));
+ movdqu(xmm4, Address(rsp,off++*16));
+ movdqu(xmm5, Address(rsp,off++*16));
+ movdqu(xmm6, Address(rsp,off++*16));
+ movdqu(xmm7, Address(rsp,off++*16));
+#ifdef _LP64
+ movdqu(xmm8, Address(rsp,off++*16));
+ movdqu(xmm9, Address(rsp,off++*16));
+ movdqu(xmm10, Address(rsp,off++*16));
+ movdqu(xmm11, Address(rsp,off++*16));
+ movdqu(xmm12, Address(rsp,off++*16));
+ movdqu(xmm13, Address(rsp,off++*16));
+ movdqu(xmm14, Address(rsp,off++*16));
+ movdqu(xmm15, Address(rsp,off++*16));
+#endif
+ addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
+#ifdef COMPILER2
+ if (MaxVectorSize > 16) {
+ // Restore upper half of YMM registes.
+ vinsertf128h(xmm0, Address(rsp, 0));
+ vinsertf128h(xmm1, Address(rsp, 16));
+ vinsertf128h(xmm2, Address(rsp, 32));
+ vinsertf128h(xmm3, Address(rsp, 48));
+ vinsertf128h(xmm4, Address(rsp, 64));
+ vinsertf128h(xmm5, Address(rsp, 80));
+ vinsertf128h(xmm6, Address(rsp, 96));
+ vinsertf128h(xmm7, Address(rsp,112));
+#ifdef _LP64
+ vinsertf128h(xmm8, Address(rsp,128));
+ vinsertf128h(xmm9, Address(rsp,144));
+ vinsertf128h(xmm10, Address(rsp,160));
+ vinsertf128h(xmm11, Address(rsp,176));
+ vinsertf128h(xmm12, Address(rsp,192));
+ vinsertf128h(xmm13, Address(rsp,208));
+ vinsertf128h(xmm14, Address(rsp,224));
+ vinsertf128h(xmm15, Address(rsp,240));
+#endif
+ addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
+ }
+#endif
+ }
+ popa();
+}
+
+static const double pi_4 = 0.7853981633974483;
+
+void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
+ // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
+ // was attempted in this code; unfortunately it appears that the
+ // switch to 80-bit precision and back causes this to be
+ // unprofitable compared with simply performing a runtime call if
+ // the argument is out of the (-pi/4, pi/4) range.
+
+ Register tmp = noreg;
+ if (!VM_Version::supports_cmov()) {
+ // fcmp needs a temporary so preserve rbx,
+ tmp = rbx;
+ push(tmp);
+ }
+
+ Label slow_case, done;
+
+ ExternalAddress pi4_adr = (address)&pi_4;
+ if (reachable(pi4_adr)) {
+ // x ?<= pi/4
+ fld_d(pi4_adr);
+ fld_s(1); // Stack: X PI/4 X
+ fabs(); // Stack: |X| PI/4 X
+ fcmp(tmp);
+ jcc(Assembler::above, slow_case);
+
+ // fastest case: -pi/4 <= x <= pi/4
+ switch(trig) {
+ case 's':
+ fsin();
+ break;
+ case 'c':
+ fcos();
+ break;
+ case 't':
+ ftan();
+ break;
+ default:
+ assert(false, "bad intrinsic");
+ break;
+ }
+ jmp(done);
+ }
+
+ // slow case: runtime call
+ bind(slow_case);
+
+ switch(trig) {
+ case 's':
+ {
+ fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use);
+ }
+ break;
+ case 'c':
+ {
+ fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use);
+ }
+ break;
+ case 't':
+ {
+ fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use);
+ }
+ break;
+ default:
+ assert(false, "bad intrinsic");
+ break;
+ }
+
+ // Come here with result in F-TOS
+ bind(done);
+
+ if (tmp != noreg) {
+ pop(tmp);
+ }
+}
+
+
+// Look up the method for a megamorphic invokeinterface call.
+// The target method is determined by <intf_klass, itable_index>.
+// The receiver klass is in recv_klass.
+// On success, the result will be in method_result, and execution falls through.
+// On failure, execution transfers to the given label.
+void MacroAssembler::lookup_interface_method(Register recv_klass,
+ Register intf_klass,
+ RegisterOrConstant itable_index,
+ Register method_result,
+ Register scan_temp,
+ Label& L_no_such_interface) {
+ assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
+ assert(itable_index.is_constant() || itable_index.as_register() == method_result,
+ "caller must use same register for non-constant itable index as for method");
+
+ // Compute start of first itableOffsetEntry (which is at the end of the vtable)
+ int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
+ int itentry_off = itableMethodEntry::method_offset_in_bytes();
+ int scan_step = itableOffsetEntry::size() * wordSize;
+ int vte_size = vtableEntry::size() * wordSize;
+ Address::ScaleFactor times_vte_scale = Address::times_ptr;
+ assert(vte_size == wordSize, "else adjust times_vte_scale");
+
+ movl(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
+
+ // %%% Could store the aligned, prescaled offset in the klassoop.
+ lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
+ if (HeapWordsPerLong > 1) {
+ // Round up to align_object_offset boundary
+ // see code for InstanceKlass::start_of_itable!
+ round_to(scan_temp, BytesPerLong);
+ }
+
+ // Adjust recv_klass by scaled itable_index, so we can free itable_index.
+ assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
+ lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
+
+ // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
+ // if (scan->interface() == intf) {
+ // result = (klass + scan->offset() + itable_index);
+ // }
+ // }
+ Label search, found_method;
+
+ for (int peel = 1; peel >= 0; peel--) {
+ movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
+ cmpptr(intf_klass, method_result);
+
+ if (peel) {
+ jccb(Assembler::equal, found_method);
+ } else {
+ jccb(Assembler::notEqual, search);
+ // (invert the test to fall through to found_method...)
+ }
+
+ if (!peel) break;
+
+ bind(search);
+
+ // Check that the previous entry is non-null. A null entry means that
+ // the receiver class doesn't implement the interface, and wasn't the
+ // same as when the caller was compiled.
+ testptr(method_result, method_result);
+ jcc(Assembler::zero, L_no_such_interface);
+ addptr(scan_temp, scan_step);
+ }
+
+ bind(found_method);
+
+ // Got a hit.
+ movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
+ movptr(method_result, Address(recv_klass, scan_temp, Address::times_1));
+}
+
+
+// virtual method calling
+void MacroAssembler::lookup_virtual_method(Register recv_klass,
+ RegisterOrConstant vtable_index,
+ Register method_result) {
+ const int base = InstanceKlass::vtable_start_offset() * wordSize;
+ assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
+ Address vtable_entry_addr(recv_klass,
+ vtable_index, Address::times_ptr,
+ base + vtableEntry::method_offset_in_bytes());
+ movptr(method_result, vtable_entry_addr);
+}
+
+
+void MacroAssembler::check_klass_subtype(Register sub_klass,
+ Register super_klass,
+ Register temp_reg,
+ Label& L_success) {
+ Label L_failure;
+ check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
+ check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
+ bind(L_failure);
+}
+
+
+void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
+ Register super_klass,
+ Register temp_reg,
+ Label* L_success,
+ Label* L_failure,
+ Label* L_slow_path,
+ RegisterOrConstant super_check_offset) {
+ assert_different_registers(sub_klass, super_klass, temp_reg);
+ bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
+ if (super_check_offset.is_register()) {
+ assert_different_registers(sub_klass, super_klass,
+ super_check_offset.as_register());
+ } else if (must_load_sco) {
+ assert(temp_reg != noreg, "supply either a temp or a register offset");
+ }
+
+ Label L_fallthrough;
+ int label_nulls = 0;
+ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
+ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
+ if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
+ assert(label_nulls <= 1, "at most one NULL in the batch");
+
+ int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+ int sco_offset = in_bytes(Klass::super_check_offset_offset());
+ Address super_check_offset_addr(super_klass, sco_offset);
+
+ // Hacked jcc, which "knows" that L_fallthrough, at least, is in
+ // range of a jccb. If this routine grows larger, reconsider at
+ // least some of these.
+#define local_jcc(assembler_cond, label) \
+ if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \
+ else jcc( assembler_cond, label) /*omit semi*/
+
+ // Hacked jmp, which may only be used just before L_fallthrough.
+#define final_jmp(label) \
+ if (&(label) == &L_fallthrough) { /*do nothing*/ } \
+ else jmp(label) /*omit semi*/
+
+ // If the pointers are equal, we are done (e.g., String[] elements).
+ // This self-check enables sharing of secondary supertype arrays among
+ // non-primary types such as array-of-interface. Otherwise, each such
+ // type would need its own customized SSA.
+ // We move this check to the front of the fast path because many
+ // type checks are in fact trivially successful in this manner,
+ // so we get a nicely predicted branch right at the start of the check.
+ cmpptr(sub_klass, super_klass);
+ local_jcc(Assembler::equal, *L_success);
+
+ // Check the supertype display:
+ if (must_load_sco) {
+ // Positive movl does right thing on LP64.
+ movl(temp_reg, super_check_offset_addr);
+ super_check_offset = RegisterOrConstant(temp_reg);
+ }
+ Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
+ cmpptr(super_klass, super_check_addr); // load displayed supertype
+
+ // This check has worked decisively for primary supers.
+ // Secondary supers are sought in the super_cache ('super_cache_addr').
+ // (Secondary supers are interfaces and very deeply nested subtypes.)
+ // This works in the same check above because of a tricky aliasing
+ // between the super_cache and the primary super display elements.
+ // (The 'super_check_addr' can address either, as the case requires.)
+ // Note that the cache is updated below if it does not help us find
+ // what we need immediately.
+ // So if it was a primary super, we can just fail immediately.
+ // Otherwise, it's the slow path for us (no success at this point).
+
+ if (super_check_offset.is_register()) {
+ local_jcc(Assembler::equal, *L_success);
+ cmpl(super_check_offset.as_register(), sc_offset);
+ if (L_failure == &L_fallthrough) {
+ local_jcc(Assembler::equal, *L_slow_path);
+ } else {
+ local_jcc(Assembler::notEqual, *L_failure);
+ final_jmp(*L_slow_path);
+ }
+ } else if (super_check_offset.as_constant() == sc_offset) {
+ // Need a slow path; fast failure is impossible.
+ if (L_slow_path == &L_fallthrough) {
+ local_jcc(Assembler::equal, *L_success);
+ } else {
+ local_jcc(Assembler::notEqual, *L_slow_path);
+ final_jmp(*L_success);
+ }
+ } else {
+ // No slow path; it's a fast decision.
+ if (L_failure == &L_fallthrough) {
+ local_jcc(Assembler::equal, *L_success);
+ } else {
+ local_jcc(Assembler::notEqual, *L_failure);
+ final_jmp(*L_success);
+ }
+ }
+
+ bind(L_fallthrough);
+
+#undef local_jcc
+#undef final_jmp
+}
+
+
+void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
+ Register super_klass,
+ Register temp_reg,
+ Register temp2_reg,
+ Label* L_success,
+ Label* L_failure,
+ bool set_cond_codes) {
+ assert_different_registers(sub_klass, super_klass, temp_reg);
+ if (temp2_reg != noreg)
+ assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
+#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
+
+ Label L_fallthrough;
+ int label_nulls = 0;
+ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
+ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
+ assert(label_nulls <= 1, "at most one NULL in the batch");
+
+ // a couple of useful fields in sub_klass:
+ int ss_offset = in_bytes(Klass::secondary_supers_offset());
+ int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+ Address secondary_supers_addr(sub_klass, ss_offset);
+ Address super_cache_addr( sub_klass, sc_offset);
+
+ // Do a linear scan of the secondary super-klass chain.
+ // This code is rarely used, so simplicity is a virtue here.
+ // The repne_scan instruction uses fixed registers, which we must spill.
+ // Don't worry too much about pre-existing connections with the input regs.
+
+ assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super)
+ assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter)
+
+ // Get super_klass value into rax (even if it was in rdi or rcx).
+ bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
+ if (super_klass != rax || UseCompressedOops) {
+ if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
+ mov(rax, super_klass);
+ }
+ if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
+ if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
+
+#ifndef PRODUCT
+ int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
+ ExternalAddress pst_counter_addr((address) pst_counter);
+ NOT_LP64( incrementl(pst_counter_addr) );
+ LP64_ONLY( lea(rcx, pst_counter_addr) );
+ LP64_ONLY( incrementl(Address(rcx, 0)) );
+#endif //PRODUCT
+
+ // We will consult the secondary-super array.
+ movptr(rdi, secondary_supers_addr);
+ // Load the array length. (Positive movl does right thing on LP64.)
+ movl(rcx, Address(rdi, Array<Klass*>::length_offset_in_bytes()));
+ // Skip to start of data.
+ addptr(rdi, Array<Klass*>::base_offset_in_bytes());
+
+ // Scan RCX words at [RDI] for an occurrence of RAX.
+ // Set NZ/Z based on last compare.
+ // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
+ // not change flags (only scas instruction which is repeated sets flags).
+ // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
+
+ testptr(rax,rax); // Set Z = 0
+ repne_scan();
+
+ // Unspill the temp. registers:
+ if (pushed_rdi) pop(rdi);
+ if (pushed_rcx) pop(rcx);
+ if (pushed_rax) pop(rax);
+
+ if (set_cond_codes) {
+ // Special hack for the AD files: rdi is guaranteed non-zero.
+ assert(!pushed_rdi, "rdi must be left non-NULL");
+ // Also, the condition codes are properly set Z/NZ on succeed/failure.
+ }
+
+ if (L_failure == &L_fallthrough)
+ jccb(Assembler::notEqual, *L_failure);
+ else jcc(Assembler::notEqual, *L_failure);
+
+ // Success. Cache the super we found and proceed in triumph.
+ movptr(super_cache_addr, super_klass);
+
+ if (L_success != &L_fallthrough) {
+ jmp(*L_success);
+ }
+
+#undef IS_A_TEMP
+
+ bind(L_fallthrough);
+}
+
+
+void MacroAssembler::cmov32(Condition cc, Register dst, Address src) {
+ if (VM_Version::supports_cmov()) {
+ cmovl(cc, dst, src);
+ } else {
+ Label L;
+ jccb(negate_condition(cc), L);
+ movl(dst, src);
+ bind(L);
+ }
+}
+
+void MacroAssembler::cmov32(Condition cc, Register dst, Register src) {
+ if (VM_Version::supports_cmov()) {
+ cmovl(cc, dst, src);
+ } else {
+ Label L;
+ jccb(negate_condition(cc), L);
+ movl(dst, src);
+ bind(L);
+ }
+}
+
+void MacroAssembler::verify_oop(Register reg, const char* s) {
+ if (!VerifyOops) return;
+
+ // Pass register number to verify_oop_subroutine
+ char* b = new char[strlen(s) + 50];
+ sprintf(b, "verify_oop: %s: %s", reg->name(), s);
+ BLOCK_COMMENT("verify_oop {");
+#ifdef _LP64
+ push(rscratch1); // save r10, trashed by movptr()
+#endif
+ push(rax); // save rax,
+ push(reg); // pass register argument
+ ExternalAddress buffer((address) b);
+ // avoid using pushptr, as it modifies scratch registers
+ // and our contract is not to modify anything
+ movptr(rax, buffer.addr());
+ push(rax);
+ // call indirectly to solve generation ordering problem
+ movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
+ call(rax);
+ // Caller pops the arguments (oop, message) and restores rax, r10
+ BLOCK_COMMENT("} verify_oop");
+}
+
+
+RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
+ Register tmp,
+ int offset) {
+ intptr_t value = *delayed_value_addr;
+ if (value != 0)
+ return RegisterOrConstant(value + offset);
+
+ // load indirectly to solve generation ordering problem
+ movptr(tmp, ExternalAddress((address) delayed_value_addr));
+
+#ifdef ASSERT
+ { Label L;
+ testptr(tmp, tmp);
+ if (WizardMode) {
+ jcc(Assembler::notZero, L);
+ char* buf = new char[40];
+ sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]);
+ STOP(buf);
+ } else {
+ jccb(Assembler::notZero, L);
+ hlt();
+ }
+ bind(L);
+ }
+#endif
+
+ if (offset != 0)
+ addptr(tmp, offset);
+
+ return RegisterOrConstant(tmp);
+}
+
+
+Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
+ int extra_slot_offset) {
+ // cf. TemplateTable::prepare_invoke(), if (load_receiver).
+ int stackElementSize = Interpreter::stackElementSize;
+ int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
+#ifdef ASSERT
+ int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
+ assert(offset1 - offset == stackElementSize, "correct arithmetic");
+#endif
+ Register scale_reg = noreg;
+ Address::ScaleFactor scale_factor = Address::no_scale;
+ if (arg_slot.is_constant()) {
+ offset += arg_slot.as_constant() * stackElementSize;
+ } else {
+ scale_reg = arg_slot.as_register();
+ scale_factor = Address::times(stackElementSize);
+ }
+ offset += wordSize; // return PC is on stack
+ return Address(rsp, scale_reg, scale_factor, offset);
+}
+
+
+void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
+ if (!VerifyOops) return;
+
+ // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
+ // Pass register number to verify_oop_subroutine
+ char* b = new char[strlen(s) + 50];
+ sprintf(b, "verify_oop_addr: %s", s);
+
+#ifdef _LP64
+ push(rscratch1); // save r10, trashed by movptr()
+#endif
+ push(rax); // save rax,
+ // addr may contain rsp so we will have to adjust it based on the push
+ // we just did (and on 64 bit we do two pushes)
+ // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
+ // stores rax into addr which is backwards of what was intended.
+ if (addr.uses(rsp)) {
+ lea(rax, addr);
+ pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord));
+ } else {
+ pushptr(addr);
+ }
+
+ ExternalAddress buffer((address) b);
+ // pass msg argument
+ // avoid using pushptr, as it modifies scratch registers
+ // and our contract is not to modify anything
+ movptr(rax, buffer.addr());
+ push(rax);
+
+ // call indirectly to solve generation ordering problem
+ movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
+ call(rax);
+ // Caller pops the arguments (addr, message) and restores rax, r10.
+}
+
+void MacroAssembler::verify_tlab() {
+#ifdef ASSERT
+ if (UseTLAB && VerifyOops) {
+ Label next, ok;
+ Register t1 = rsi;
+ Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
+
+ push(t1);
+ NOT_LP64(push(thread_reg));
+ NOT_LP64(get_thread(thread_reg));
+
+ movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
+ cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
+ jcc(Assembler::aboveEqual, next);
+ STOP("assert(top >= start)");
+ should_not_reach_here();
+
+ bind(next);
+ movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
+ cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
+ jcc(Assembler::aboveEqual, ok);
+ STOP("assert(top <= end)");
+ should_not_reach_here();
+
+ bind(ok);
+ NOT_LP64(pop(thread_reg));
+ pop(t1);
+ }
+#endif
+}
+
+class ControlWord {
+ public:
+ int32_t _value;
+
+ int rounding_control() const { return (_value >> 10) & 3 ; }
+ int precision_control() const { return (_value >> 8) & 3 ; }
+ bool precision() const { return ((_value >> 5) & 1) != 0; }
+ bool underflow() const { return ((_value >> 4) & 1) != 0; }
+ bool overflow() const { return ((_value >> 3) & 1) != 0; }
+ bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
+ bool denormalized() const { return ((_value >> 1) & 1) != 0; }
+ bool invalid() const { return ((_value >> 0) & 1) != 0; }
+
+ void print() const {
+ // rounding control
+ const char* rc;
+ switch (rounding_control()) {
+ case 0: rc = "round near"; break;
+ case 1: rc = "round down"; break;
+ case 2: rc = "round up "; break;
+ case 3: rc = "chop "; break;
+ };
+ // precision control
+ const char* pc;
+ switch (precision_control()) {
+ case 0: pc = "24 bits "; break;
+ case 1: pc = "reserved"; break;
+ case 2: pc = "53 bits "; break;
+ case 3: pc = "64 bits "; break;
+ };
+ // flags
+ char f[9];
+ f[0] = ' ';
+ f[1] = ' ';
+ f[2] = (precision ()) ? 'P' : 'p';
+ f[3] = (underflow ()) ? 'U' : 'u';
+ f[4] = (overflow ()) ? 'O' : 'o';
+ f[5] = (zero_divide ()) ? 'Z' : 'z';
+ f[6] = (denormalized()) ? 'D' : 'd';
+ f[7] = (invalid ()) ? 'I' : 'i';
+ f[8] = '\x0';
+ // output
+ printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
+ }
+
+};
+
+class StatusWord {
+ public:
+ int32_t _value;
+
+ bool busy() const { return ((_value >> 15) & 1) != 0; }
+ bool C3() const { return ((_value >> 14) & 1) != 0; }
+ bool C2() const { return ((_value >> 10) & 1) != 0; }
+ bool C1() const { return ((_value >> 9) & 1) != 0; }
+ bool C0() const { return ((_value >> 8) & 1) != 0; }
+ int top() const { return (_value >> 11) & 7 ; }
+ bool error_status() const { return ((_value >> 7) & 1) != 0; }
+ bool stack_fault() const { return ((_value >> 6) & 1) != 0; }
+ bool precision() const { return ((_value >> 5) & 1) != 0; }
+ bool underflow() const { return ((_value >> 4) & 1) != 0; }
+ bool overflow() const { return ((_value >> 3) & 1) != 0; }
+ bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
+ bool denormalized() const { return ((_value >> 1) & 1) != 0; }
+ bool invalid() const { return ((_value >> 0) & 1) != 0; }
+
+ void print() const {
+ // condition codes
+ char c[5];
+ c[0] = (C3()) ? '3' : '-';
+ c[1] = (C2()) ? '2' : '-';
+ c[2] = (C1()) ? '1' : '-';
+ c[3] = (C0()) ? '0' : '-';
+ c[4] = '\x0';
+ // flags
+ char f[9];
+ f[0] = (error_status()) ? 'E' : '-';
+ f[1] = (stack_fault ()) ? 'S' : '-';
+ f[2] = (precision ()) ? 'P' : '-';
+ f[3] = (underflow ()) ? 'U' : '-';
+ f[4] = (overflow ()) ? 'O' : '-';
+ f[5] = (zero_divide ()) ? 'Z' : '-';
+ f[6] = (denormalized()) ? 'D' : '-';
+ f[7] = (invalid ()) ? 'I' : '-';
+ f[8] = '\x0';
+ // output
+ printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top());
+ }
+
+};
+
+class TagWord {
+ public:
+ int32_t _value;
+
+ int tag_at(int i) const { return (_value >> (i*2)) & 3; }
+
+ void print() const {
+ printf("%04x", _value & 0xFFFF);
+ }
+
+};
+
+class FPU_Register {
+ public:
+ int32_t _m0;
+ int32_t _m1;
+ int16_t _ex;
+
+ bool is_indefinite() const {
+ return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
+ }
+
+ void print() const {
+ char sign = (_ex < 0) ? '-' : '+';
+ const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " ";
+ printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind);
+ };
+
+};
+
+class FPU_State {
+ public:
+ enum {
+ register_size = 10,
+ number_of_registers = 8,
+ register_mask = 7
+ };
+
+ ControlWord _control_word;
+ StatusWord _status_word;
+ TagWord _tag_word;
+ int32_t _error_offset;
+ int32_t _error_selector;
+ int32_t _data_offset;
+ int32_t _data_selector;
+ int8_t _register[register_size * number_of_registers];
+
+ int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
+ FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; }
+
+ const char* tag_as_string(int tag) const {
+ switch (tag) {
+ case 0: return "valid";
+ case 1: return "zero";
+ case 2: return "special";
+ case 3: return "empty";
+ }
+ ShouldNotReachHere();
+ return NULL;
+ }
+
+ void print() const {
+ // print computation registers
+ { int t = _status_word.top();
+ for (int i = 0; i < number_of_registers; i++) {
+ int j = (i - t) & register_mask;
+ printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
+ st(j)->print();
+ printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
+ }
+ }
+ printf("\n");
+ // print control registers
+ printf("ctrl = "); _control_word.print(); printf("\n");
+ printf("stat = "); _status_word .print(); printf("\n");
+ printf("tags = "); _tag_word .print(); printf("\n");
+ }
+
+};
+
+class Flag_Register {
+ public:
+ int32_t _value;
+
+ bool overflow() const { return ((_value >> 11) & 1) != 0; }
+ bool direction() const { return ((_value >> 10) & 1) != 0; }
+ bool sign() const { return ((_value >> 7) & 1) != 0; }
+ bool zero() const { return ((_value >> 6) & 1) != 0; }
+ bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; }
+ bool parity() const { return ((_value >> 2) & 1) != 0; }
+ bool carry() const { return ((_value >> 0) & 1) != 0; }
+
+ void print() const {
+ // flags
+ char f[8];
+ f[0] = (overflow ()) ? 'O' : '-';
+ f[1] = (direction ()) ? 'D' : '-';
+ f[2] = (sign ()) ? 'S' : '-';
+ f[3] = (zero ()) ? 'Z' : '-';
+ f[4] = (auxiliary_carry()) ? 'A' : '-';
+ f[5] = (parity ()) ? 'P' : '-';
+ f[6] = (carry ()) ? 'C' : '-';
+ f[7] = '\x0';
+ // output
+ printf("%08x flags = %s", _value, f);
+ }
+
+};
+
+class IU_Register {
+ public:
+ int32_t _value;
+
+ void print() const {
+ printf("%08x %11d", _value, _value);
+ }
+
+};
+
+class IU_State {
+ public:
+ Flag_Register _eflags;
+ IU_Register _rdi;
+ IU_Register _rsi;
+ IU_Register _rbp;
+ IU_Register _rsp;
+ IU_Register _rbx;
+ IU_Register _rdx;
+ IU_Register _rcx;
+ IU_Register _rax;
+
+ void print() const {
+ // computation registers
+ printf("rax, = "); _rax.print(); printf("\n");
+ printf("rbx, = "); _rbx.print(); printf("\n");
+ printf("rcx = "); _rcx.print(); printf("\n");
+ printf("rdx = "); _rdx.print(); printf("\n");
+ printf("rdi = "); _rdi.print(); printf("\n");
+ printf("rsi = "); _rsi.print(); printf("\n");
+ printf("rbp, = "); _rbp.print(); printf("\n");
+ printf("rsp = "); _rsp.print(); printf("\n");
+ printf("\n");
+ // control registers
+ printf("flgs = "); _eflags.print(); printf("\n");
+ }
+};
+
+
+class CPU_State {
+ public:
+ FPU_State _fpu_state;
+ IU_State _iu_state;
+
+ void print() const {
+ printf("--------------------------------------------------\n");
+ _iu_state .print();
+ printf("\n");
+ _fpu_state.print();
+ printf("--------------------------------------------------\n");
+ }
+
+};
+
+
+static void _print_CPU_state(CPU_State* state) {
+ state->print();
+};
+
+
+void MacroAssembler::print_CPU_state() {
+ push_CPU_state();
+ push(rsp); // pass CPU state
+ call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
+ addptr(rsp, wordSize); // discard argument
+ pop_CPU_state();
+}
+
+
+static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
+ static int counter = 0;
+ FPU_State* fs = &state->_fpu_state;
+ counter++;
+ // For leaf calls, only verify that the top few elements remain empty.
+ // We only need 1 empty at the top for C2 code.
+ if( stack_depth < 0 ) {
+ if( fs->tag_for_st(7) != 3 ) {
+ printf("FPR7 not empty\n");
+ state->print();
+ assert(false, "error");
+ return false;
+ }
+ return true; // All other stack states do not matter
+ }
+
+ assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std,
+ "bad FPU control word");
+
+ // compute stack depth
+ int i = 0;
+ while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++;
+ int d = i;
+ while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;
+ // verify findings
+ if (i != FPU_State::number_of_registers) {
+ // stack not contiguous
+ printf("%s: stack not contiguous at ST%d\n", s, i);
+ state->print();
+ assert(false, "error");
+ return false;
+ }
+ // check if computed stack depth corresponds to expected stack depth
+ if (stack_depth < 0) {
+ // expected stack depth is -stack_depth or less
+ if (d > -stack_depth) {
+ // too many elements on the stack
+ printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);
+ state->print();
+ assert(false, "error");
+ return false;
+ }
+ } else {
+ // expected stack depth is stack_depth
+ if (d != stack_depth) {
+ // wrong stack depth
+ printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);
+ state->print();
+ assert(false, "error");
+ return false;
+ }
+ }
+ // everything is cool
+ return true;
+}
+
+
+void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
+ if (!VerifyFPU) return;
+ push_CPU_state();
+ push(rsp); // pass CPU state
+ ExternalAddress msg((address) s);
+ // pass message string s
+ pushptr(msg.addr());
+ push(stack_depth); // pass stack depth
+ call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
+ addptr(rsp, 3 * wordSize); // discard arguments
+ // check for error
+ { Label L;
+ testl(rax, rax);
+ jcc(Assembler::notZero, L);
+ int3(); // break if error condition
+ bind(L);
+ }
+ pop_CPU_state();
+}
+
+void MacroAssembler::load_klass(Register dst, Register src) {
+#ifdef _LP64
+ if (UseCompressedKlassPointers) {
+ movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+ decode_klass_not_null(dst);
+ } else
+#endif
+ movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+}
+
+void MacroAssembler::load_prototype_header(Register dst, Register src) {
+#ifdef _LP64
+ if (UseCompressedKlassPointers) {
+ assert (Universe::heap() != NULL, "java heap should be initialized");
+ movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+ if (Universe::narrow_klass_shift() != 0) {
+ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+ assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
+ movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset()));
+ } else {
+ movq(dst, Address(dst, Klass::prototype_header_offset()));
+ }
+ } else
+#endif
+ {
+ movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+ movptr(dst, Address(dst, Klass::prototype_header_offset()));
+ }
+}
+
+void MacroAssembler::store_klass(Register dst, Register src) {
+#ifdef _LP64
+ if (UseCompressedKlassPointers) {
+ encode_klass_not_null(src);
+ movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
+ } else
+#endif
+ movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
+}
+
+void MacroAssembler::load_heap_oop(Register dst, Address src) {
+#ifdef _LP64
+ // FIXME: Must change all places where we try to load the klass.
+ if (UseCompressedOops) {
+ movl(dst, src);
+ decode_heap_oop(dst);
+ } else
+#endif
+ movptr(dst, src);
+}
+
+// Doesn't do verfication, generates fixed size code
+void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) {
+#ifdef _LP64
+ if (UseCompressedOops) {
+ movl(dst, src);
+ decode_heap_oop_not_null(dst);
+ } else
+#endif
+ movptr(dst, src);
+}
+
+void MacroAssembler::store_heap_oop(Address dst, Register src) {
+#ifdef _LP64
+ if (UseCompressedOops) {
+ assert(!dst.uses(src), "not enough registers");
+ encode_heap_oop(src);
+ movl(dst, src);
+ } else
+#endif
+ movptr(dst, src);
+}
+
+void MacroAssembler::cmp_heap_oop(Register src1, Address src2, Register tmp) {
+ assert_different_registers(src1, tmp);
+#ifdef _LP64
+ if (UseCompressedOops) {
+ bool did_push = false;
+ if (tmp == noreg) {
+ tmp = rax;
+ push(tmp);
+ did_push = true;
+ assert(!src2.uses(rsp), "can't push");
+ }
+ load_heap_oop(tmp, src2);
+ cmpptr(src1, tmp);
+ if (did_push) pop(tmp);
+ } else
+#endif
+ cmpptr(src1, src2);
+}
+
+// Used for storing NULLs.
+void MacroAssembler::store_heap_oop_null(Address dst) {
+#ifdef _LP64
+ if (UseCompressedOops) {
+ movl(dst, (int32_t)NULL_WORD);
+ } else {
+ movslq(dst, (int32_t)NULL_WORD);
+ }
+#else
+ movl(dst, (int32_t)NULL_WORD);
+#endif
+}
+
+#ifdef _LP64
+void MacroAssembler::store_klass_gap(Register dst, Register src) {
+ if (UseCompressedKlassPointers) {
+ // Store to klass gap in destination
+ movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
+ }
+}
+
+#ifdef ASSERT
+void MacroAssembler::verify_heapbase(const char* msg) {
+ assert (UseCompressedOops || UseCompressedKlassPointers, "should be compressed");
+ assert (Universe::heap() != NULL, "java heap should be initialized");
+ if (CheckCompressedOops) {
+ Label ok;
+ push(rscratch1); // cmpptr trashes rscratch1
+ cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
+ jcc(Assembler::equal, ok);
+ STOP(msg);
+ bind(ok);
+ pop(rscratch1);
+ }
+}
+#endif
+
+// Algorithm must match oop.inline.hpp encode_heap_oop.
+void MacroAssembler::encode_heap_oop(Register r) {
+#ifdef ASSERT
+ verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
+#endif
+ verify_oop(r, "broken oop in encode_heap_oop");
+ if (Universe::narrow_oop_base() == NULL) {
+ if (Universe::narrow_oop_shift() != 0) {
+ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+ shrq(r, LogMinObjAlignmentInBytes);
+ }
+ return;
+ }
+ testq(r, r);
+ cmovq(Assembler::equal, r, r12_heapbase);
+ subq(r, r12_heapbase);
+ shrq(r, LogMinObjAlignmentInBytes);
+}
+
+void MacroAssembler::encode_heap_oop_not_null(Register r) {
+#ifdef ASSERT
+ verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?");
+ if (CheckCompressedOops) {
+ Label ok;
+ testq(r, r);
+ jcc(Assembler::notEqual, ok);
+ STOP("null oop passed to encode_heap_oop_not_null");
+ bind(ok);
+ }
+#endif
+ verify_oop(r, "broken oop in encode_heap_oop_not_null");
+ if (Universe::narrow_oop_base() != NULL) {
+ subq(r, r12_heapbase);
+ }
+ if (Universe::narrow_oop_shift() != 0) {
+ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+ shrq(r, LogMinObjAlignmentInBytes);
+ }
+}
+
+void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
+#ifdef ASSERT
+ verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?");
+ if (CheckCompressedOops) {
+ Label ok;
+ testq(src, src);
+ jcc(Assembler::notEqual, ok);
+ STOP("null oop passed to encode_heap_oop_not_null2");
+ bind(ok);
+ }
+#endif
+ verify_oop(src, "broken oop in encode_heap_oop_not_null2");
+ if (dst != src) {
+ movq(dst, src);
+ }
+ if (Universe::narrow_oop_base() != NULL) {
+ subq(dst, r12_heapbase);
+ }
+ if (Universe::narrow_oop_shift() != 0) {
+ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+ shrq(dst, LogMinObjAlignmentInBytes);
+ }
+}
+
+void MacroAssembler::decode_heap_oop(Register r) {
+#ifdef ASSERT
+ verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
+#endif
+ if (Universe::narrow_oop_base() == NULL) {
+ if (Universe::narrow_oop_shift() != 0) {
+ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+ shlq(r, LogMinObjAlignmentInBytes);
+ }
+ } else {
+ Label done;
+ shlq(r, LogMinObjAlignmentInBytes);
+ jccb(Assembler::equal, done);
+ addq(r, r12_heapbase);
+ bind(done);
+ }
+ verify_oop(r, "broken oop in decode_heap_oop");
+}
+
+void MacroAssembler::decode_heap_oop_not_null(Register r) {
+ // Note: it will change flags
+ assert (UseCompressedOops, "should only be used for compressed headers");
+ assert (Universe::heap() != NULL, "java heap should be initialized");
+ // Cannot assert, unverified entry point counts instructions (see .ad file)
+ // vtableStubs also counts instructions in pd_code_size_limit.
+ // Also do not verify_oop as this is called by verify_oop.
+ if (Universe::narrow_oop_shift() != 0) {
+ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+ shlq(r, LogMinObjAlignmentInBytes);
+ if (Universe::narrow_oop_base() != NULL) {
+ addq(r, r12_heapbase);
+ }
+ } else {
+ assert (Universe::narrow_oop_base() == NULL, "sanity");
+ }
+}
+
+void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
+ // Note: it will change flags
+ assert (UseCompressedOops, "should only be used for compressed headers");
+ assert (Universe::heap() != NULL, "java heap should be initialized");
+ // Cannot assert, unverified entry point counts instructions (see .ad file)
+ // vtableStubs also counts instructions in pd_code_size_limit.
+ // Also do not verify_oop as this is called by verify_oop.
+ if (Universe::narrow_oop_shift() != 0) {
+ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+ if (LogMinObjAlignmentInBytes == Address::times_8) {
+ leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
+ } else {
+ if (dst != src) {
+ movq(dst, src);
+ }
+ shlq(dst, LogMinObjAlignmentInBytes);
+ if (Universe::narrow_oop_base() != NULL) {
+ addq(dst, r12_heapbase);
+ }
+ }
+ } else {
+ assert (Universe::narrow_oop_base() == NULL, "sanity");
+ if (dst != src) {
+ movq(dst, src);
+ }
+ }
+}
+
+void MacroAssembler::encode_klass_not_null(Register r) {
+ assert(Metaspace::is_initialized(), "metaspace should be initialized");
+#ifdef ASSERT
+ verify_heapbase("MacroAssembler::encode_klass_not_null: heap base corrupted?");
+#endif
+ if (Universe::narrow_klass_base() != NULL) {
+ subq(r, r12_heapbase);
+ }
+ if (Universe::narrow_klass_shift() != 0) {
+ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+ shrq(r, LogKlassAlignmentInBytes);
+ }
+}
+
+void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
+ assert(Metaspace::is_initialized(), "metaspace should be initialized");
+#ifdef ASSERT
+ verify_heapbase("MacroAssembler::encode_klass_not_null2: heap base corrupted?");
+#endif
+ if (dst != src) {
+ movq(dst, src);
+ }
+ if (Universe::narrow_klass_base() != NULL) {
+ subq(dst, r12_heapbase);
+ }
+ if (Universe::narrow_klass_shift() != 0) {
+ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+ shrq(dst, LogKlassAlignmentInBytes);
+ }
+}
+
+void MacroAssembler::decode_klass_not_null(Register r) {
+ assert(Metaspace::is_initialized(), "metaspace should be initialized");
+ // Note: it will change flags
+ assert (UseCompressedKlassPointers, "should only be used for compressed headers");
+ // Cannot assert, unverified entry point counts instructions (see .ad file)
+ // vtableStubs also counts instructions in pd_code_size_limit.
+ // Also do not verify_oop as this is called by verify_oop.
+ if (Universe::narrow_klass_shift() != 0) {
+ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+ shlq(r, LogKlassAlignmentInBytes);
+ if (Universe::narrow_klass_base() != NULL) {
+ addq(r, r12_heapbase);
+ }
+ } else {
+ assert (Universe::narrow_klass_base() == NULL, "sanity");
+ }
+}
+
+void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
+ assert(Metaspace::is_initialized(), "metaspace should be initialized");
+ // Note: it will change flags
+ assert (UseCompressedKlassPointers, "should only be used for compressed headers");
+ // Cannot assert, unverified entry point counts instructions (see .ad file)
+ // vtableStubs also counts instructions in pd_code_size_limit.
+ // Also do not verify_oop as this is called by verify_oop.
+ if (Universe::narrow_klass_shift() != 0) {
+ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+ assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
+ leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
+ } else {
+ assert (Universe::narrow_klass_base() == NULL, "sanity");
+ if (dst != src) {
+ movq(dst, src);
+ }
+ }
+}
+
+void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
+ assert (UseCompressedOops, "should only be used for compressed headers");
+ assert (Universe::heap() != NULL, "java heap should be initialized");
+ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+ int oop_index = oop_recorder()->find_index(obj);
+ RelocationHolder rspec = oop_Relocation::spec(oop_index);
+ mov_narrow_oop(dst, oop_index, rspec);
+}
+
+void MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
+ assert (UseCompressedOops, "should only be used for compressed headers");
+ assert (Universe::heap() != NULL, "java heap should be initialized");
+ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+ int oop_index = oop_recorder()->find_index(obj);
+ RelocationHolder rspec = oop_Relocation::spec(oop_index);
+ mov_narrow_oop(dst, oop_index, rspec);
+}
+
+void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
+ assert (UseCompressedKlassPointers, "should only be used for compressed headers");
+ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+ int klass_index = oop_recorder()->find_index(k);
+ RelocationHolder rspec = metadata_Relocation::spec(klass_index);
+ mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
+}
+
+void MacroAssembler::set_narrow_klass(Address dst, Klass* k) {
+ assert (UseCompressedKlassPointers, "should only be used for compressed headers");
+ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+ int klass_index = oop_recorder()->find_index(k);
+ RelocationHolder rspec = metadata_Relocation::spec(klass_index);
+ mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
+}
+
+void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
+ assert (UseCompressedOops, "should only be used for compressed headers");
+ assert (Universe::heap() != NULL, "java heap should be initialized");
+ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+ int oop_index = oop_recorder()->find_index(obj);
+ RelocationHolder rspec = oop_Relocation::spec(oop_index);
+ Assembler::cmp_narrow_oop(dst, oop_index, rspec);
+}
+
+void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
+ assert (UseCompressedOops, "should only be used for compressed headers");
+ assert (Universe::heap() != NULL, "java heap should be initialized");
+ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+ int oop_index = oop_recorder()->find_index(obj);
+ RelocationHolder rspec = oop_Relocation::spec(oop_index);
+ Assembler::cmp_narrow_oop(dst, oop_index, rspec);
+}
+
+void MacroAssembler::cmp_narrow_klass(Register dst, Klass* k) {
+ assert (UseCompressedKlassPointers, "should only be used for compressed headers");
+ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+ int klass_index = oop_recorder()->find_index(k);
+ RelocationHolder rspec = metadata_Relocation::spec(klass_index);
+ Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
+}
+
+void MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) {
+ assert (UseCompressedKlassPointers, "should only be used for compressed headers");
+ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+ int klass_index = oop_recorder()->find_index(k);
+ RelocationHolder rspec = metadata_Relocation::spec(klass_index);
+ Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
+}
+
+void MacroAssembler::reinit_heapbase() {
+ if (UseCompressedOops || UseCompressedKlassPointers) {
+ movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
+ }
+}
+#endif // _LP64
+
+
+// C2 compiled method's prolog code.
+void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) {
+
+ // WARNING: Initial instruction MUST be 5 bytes or longer so that
+ // NativeJump::patch_verified_entry will be able to patch out the entry
+ // code safely. The push to verify stack depth is ok at 5 bytes,
+ // the frame allocation can be either 3 or 6 bytes. So if we don't do
+ // stack bang then we must use the 6 byte frame allocation even if
+ // we have no frame. :-(
+
+ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+ // Remove word for return addr
+ framesize -= wordSize;
+
+ // Calls to C2R adapters often do not accept exceptional returns.
+ // We require that their callers must bang for them. But be careful, because
+ // some VM calls (such as call site linkage) can use several kilobytes of
+ // stack. But the stack safety zone should account for that.
+ // See bugs 4446381, 4468289, 4497237.
+ if (stack_bang) {
+ generate_stack_overflow_check(framesize);
+
+ // We always push rbp, so that on return to interpreter rbp, will be
+ // restored correctly and we can correct the stack.
+ push(rbp);
+ // Remove word for ebp
+ framesize -= wordSize;
+
+ // Create frame
+ if (framesize) {
+ subptr(rsp, framesize);
+ }
+ } else {
+ // Create frame (force generation of a 4 byte immediate value)
+ subptr_imm32(rsp, framesize);
+
+ // Save RBP register now.
+ framesize -= wordSize;
+ movptr(Address(rsp, framesize), rbp);
+ }
+
+ if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
+ framesize -= wordSize;
+ movptr(Address(rsp, framesize), (int32_t)0xbadb100d);
+ }
+
+#ifndef _LP64
+ // If method sets FPU control word do it now
+ if (fp_mode_24b) {
+ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
+ }
+ if (UseSSE >= 2 && VerifyFPU) {
+ verify_FPU(0, "FPU stack must be clean on entry");
+ }
+#endif
+
+#ifdef ASSERT
+ if (VerifyStackAtCalls) {
+ Label L;
+ push(rax);
+ mov(rax, rsp);
+ andptr(rax, StackAlignmentInBytes-1);
+ cmpptr(rax, StackAlignmentInBytes-wordSize);
+ pop(rax);
+ jcc(Assembler::equal, L);
+ STOP("Stack is not properly aligned!");
+ bind(L);
+ }
+#endif
+
+}
+
+
+// IndexOf for constant substrings with size >= 8 chars
+// which don't need to be loaded through stack.
+void MacroAssembler::string_indexofC8(Register str1, Register str2,
+ Register cnt1, Register cnt2,
+ int int_cnt2, Register result,
+ XMMRegister vec, Register tmp) {
+ ShortBranchVerifier sbv(this);
+ assert(UseSSE42Intrinsics, "SSE4.2 is required");
+
+ // This method uses pcmpestri inxtruction with bound registers
+ // inputs:
+ // xmm - substring
+ // rax - substring length (elements count)
+ // mem - scanned string
+ // rdx - string length (elements count)
+ // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
+ // outputs:
+ // rcx - matched index in string
+ assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
+
+ Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
+ RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
+ MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;
+
+ // Note, inline_string_indexOf() generates checks:
+ // if (substr.count > string.count) return -1;
+ // if (substr.count == 0) return 0;
+ assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars");
+
+ // Load substring.
+ movdqu(vec, Address(str2, 0));
+ movl(cnt2, int_cnt2);
+ movptr(result, str1); // string addr
+
+ if (int_cnt2 > 8) {
+ jmpb(SCAN_TO_SUBSTR);
+
+ // Reload substr for rescan, this code
+ // is executed only for large substrings (> 8 chars)
+ bind(RELOAD_SUBSTR);
+ movdqu(vec, Address(str2, 0));
+ negptr(cnt2); // Jumped here with negative cnt2, convert to positive
+
+ bind(RELOAD_STR);
+ // We came here after the beginning of the substring was
+ // matched but the rest of it was not so we need to search
+ // again. Start from the next element after the previous match.
+
+ // cnt2 is number of substring reminding elements and
+ // cnt1 is number of string reminding elements when cmp failed.
+ // Restored cnt1 = cnt1 - cnt2 + int_cnt2
+ subl(cnt1, cnt2);
+ addl(cnt1, int_cnt2);
+ movl(cnt2, int_cnt2); // Now restore cnt2
+
+ decrementl(cnt1); // Shift to next element
+ cmpl(cnt1, cnt2);
+ jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
+
+ addptr(result, 2);
+
+ } // (int_cnt2 > 8)
+
+ // Scan string for start of substr in 16-byte vectors
+ bind(SCAN_TO_SUBSTR);
+ pcmpestri(vec, Address(result, 0), 0x0d);
+ jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1
+ subl(cnt1, 8);
+ jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
+ cmpl(cnt1, cnt2);
+ jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
+ addptr(result, 16);
+ jmpb(SCAN_TO_SUBSTR);
+
+ // Found a potential substr
+ bind(FOUND_CANDIDATE);
+ // Matched whole vector if first element matched (tmp(rcx) == 0).
+ if (int_cnt2 == 8) {
+ jccb(Assembler::overflow, RET_FOUND); // OF == 1
+ } else { // int_cnt2 > 8
+ jccb(Assembler::overflow, FOUND_SUBSTR);
+ }
+ // After pcmpestri tmp(rcx) contains matched element index
+ // Compute start addr of substr
+ lea(result, Address(result, tmp, Address::times_2));
+
+ // Make sure string is still long enough
+ subl(cnt1, tmp);
+ cmpl(cnt1, cnt2);
+ if (int_cnt2 == 8) {
+ jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
+ } else { // int_cnt2 > 8
+ jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD);
+ }
+ // Left less then substring.
+
+ bind(RET_NOT_FOUND);
+ movl(result, -1);
+ jmpb(EXIT);
+
+ if (int_cnt2 > 8) {
+ // This code is optimized for the case when whole substring
+ // is matched if its head is matched.
+ bind(MATCH_SUBSTR_HEAD);
+ pcmpestri(vec, Address(result, 0), 0x0d);
+ // Reload only string if does not match
+ jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0
+
+ Label CONT_SCAN_SUBSTR;
+ // Compare the rest of substring (> 8 chars).
+ bind(FOUND_SUBSTR);
+ // First 8 chars are already matched.
+ negptr(cnt2);
+ addptr(cnt2, 8);
+
+ bind(SCAN_SUBSTR);
+ subl(cnt1, 8);
+ cmpl(cnt2, -8); // Do not read beyond substring
+ jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR);
+ // Back-up strings to avoid reading beyond substring:
+ // cnt1 = cnt1 - cnt2 + 8
+ addl(cnt1, cnt2); // cnt2 is negative
+ addl(cnt1, 8);
+ movl(cnt2, 8); negptr(cnt2);
+ bind(CONT_SCAN_SUBSTR);
+ if (int_cnt2 < (int)G) {
+ movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2));
+ pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d);
+ } else {
+ // calculate index in register to avoid integer overflow (int_cnt2*2)
+ movl(tmp, int_cnt2);
+ addptr(tmp, cnt2);
+ movdqu(vec, Address(str2, tmp, Address::times_2, 0));
+ pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d);
+ }
+ // Need to reload strings pointers if not matched whole vector
+ jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
+ addptr(cnt2, 8);
+ jcc(Assembler::negative, SCAN_SUBSTR);
+ // Fall through if found full substring
+
+ } // (int_cnt2 > 8)
+
+ bind(RET_FOUND);
+ // Found result if we matched full small substring.
+ // Compute substr offset
+ subptr(result, str1);
+ shrl(result, 1); // index
+ bind(EXIT);
+
+} // string_indexofC8
+
+// Small strings are loaded through stack if they cross page boundary.
+void MacroAssembler::string_indexof(Register str1, Register str2,
+ Register cnt1, Register cnt2,
+ int int_cnt2, Register result,
+ XMMRegister vec, Register tmp) {
+ ShortBranchVerifier sbv(this);
+ assert(UseSSE42Intrinsics, "SSE4.2 is required");
+ //
+ // int_cnt2 is length of small (< 8 chars) constant substring
+ // or (-1) for non constant substring in which case its length
+ // is in cnt2 register.
+ //
+ // Note, inline_string_indexOf() generates checks:
+ // if (substr.count > string.count) return -1;
+ // if (substr.count == 0) return 0;
+ //
+ assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0");
+
+ // This method uses pcmpestri inxtruction with bound registers
+ // inputs:
+ // xmm - substring
+ // rax - substring length (elements count)
+ // mem - scanned string
+ // rdx - string length (elements count)
+ // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
+ // outputs:
+ // rcx - matched index in string
+ assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
+
+ Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
+ RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
+ FOUND_CANDIDATE;
+
+ { //========================================================
+ // We don't know where these strings are located
+ // and we can't read beyond them. Load them through stack.
+ Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;
+
+ movptr(tmp, rsp); // save old SP
+
+ if (int_cnt2 > 0) { // small (< 8 chars) constant substring
+ if (int_cnt2 == 1) { // One char
+ load_unsigned_short(result, Address(str2, 0));
+ movdl(vec, result); // move 32 bits
+ } else if (int_cnt2 == 2) { // Two chars
+ movdl(vec, Address(str2, 0)); // move 32 bits
+ } else if (int_cnt2 == 4) { // Four chars
+ movq(vec, Address(str2, 0)); // move 64 bits
+ } else { // cnt2 = { 3, 5, 6, 7 }
+ // Array header size is 12 bytes in 32-bit VM
+ // + 6 bytes for 3 chars == 18 bytes,
+ // enough space to load vec and shift.
+ assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity");
+ movdqu(vec, Address(str2, (int_cnt2*2)-16));
+ psrldq(vec, 16-(int_cnt2*2));
+ }
+ } else { // not constant substring
+ cmpl(cnt2, 8);
+ jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough
+
+ // We can read beyond string if srt+16 does not cross page boundary
+ // since heaps are aligned and mapped by pages.
+ assert(os::vm_page_size() < (int)G, "default page should be small");
+ movl(result, str2); // We need only low 32 bits
+ andl(result, (os::vm_page_size()-1));
+ cmpl(result, (os::vm_page_size()-16));
+ jccb(Assembler::belowEqual, CHECK_STR);
+
+ // Move small strings to stack to allow load 16 bytes into vec.
+ subptr(rsp, 16);
+ int stk_offset = wordSize-2;
+ push(cnt2);
+
+ bind(COPY_SUBSTR);
+ load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2));
+ movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
+ decrement(cnt2);
+ jccb(Assembler::notZero, COPY_SUBSTR);
+
+ pop(cnt2);
+ movptr(str2, rsp); // New substring address
+ } // non constant
+
+ bind(CHECK_STR);
+ cmpl(cnt1, 8);
+ jccb(Assembler::aboveEqual, BIG_STRINGS);
+
+ // Check cross page boundary.
+ movl(result, str1); // We need only low 32 bits
+ andl(result, (os::vm_page_size()-1));
+ cmpl(result, (os::vm_page_size()-16));
+ jccb(Assembler::belowEqual, BIG_STRINGS);
+
+ subptr(rsp, 16);
+ int stk_offset = -2;
+ if (int_cnt2 < 0) { // not constant
+ push(cnt2);
+ stk_offset += wordSize;
+ }
+ movl(cnt2, cnt1);
+
+ bind(COPY_STR);
+ load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2));
+ movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
+ decrement(cnt2);
+ jccb(Assembler::notZero, COPY_STR);
+
+ if (int_cnt2 < 0) { // not constant
+ pop(cnt2);
+ }
+ movptr(str1, rsp); // New string address
+
+ bind(BIG_STRINGS);
+ // Load substring.
+ if (int_cnt2 < 0) { // -1
+ movdqu(vec, Address(str2, 0));
+ push(cnt2); // substr count
+ push(str2); // substr addr
+ push(str1); // string addr
+ } else {
+ // Small (< 8 chars) constant substrings are loaded already.
+ movl(cnt2, int_cnt2);
+ }
+ push(tmp); // original SP
+
+ } // Finished loading
+
+ //========================================================
+ // Start search
+ //
+
+ movptr(result, str1); // string addr
+
+ if (int_cnt2 < 0) { // Only for non constant substring
+ jmpb(SCAN_TO_SUBSTR);
+
+ // SP saved at sp+0
+ // String saved at sp+1*wordSize
+ // Substr saved at sp+2*wordSize
+ // Substr count saved at sp+3*wordSize
+
+ // Reload substr for rescan, this code
+ // is executed only for large substrings (> 8 chars)
+ bind(RELOAD_SUBSTR);
+ movptr(str2, Address(rsp, 2*wordSize));
+ movl(cnt2, Address(rsp, 3*wordSize));
+ movdqu(vec, Address(str2, 0));
+ // We came here after the beginning of the substring was
+ // matched but the rest of it was not so we need to search
+ // again. Start from the next element after the previous match.
+ subptr(str1, result); // Restore counter
+ shrl(str1, 1);
+ addl(cnt1, str1);
+ decrementl(cnt1); // Shift to next element
+ cmpl(cnt1, cnt2);
+ jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
+
+ addptr(result, 2);
+ } // non constant
+
+ // Scan string for start of substr in 16-byte vectors
+ bind(SCAN_TO_SUBSTR);
+ assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
+ pcmpestri(vec, Address(result, 0), 0x0d);
+ jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1
+ subl(cnt1, 8);
+ jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
+ cmpl(cnt1, cnt2);
+ jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
+ addptr(result, 16);
+
+ bind(ADJUST_STR);
+ cmpl(cnt1, 8); // Do not read beyond string
+ jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
+ // Back-up string to avoid reading beyond string.
+ lea(result, Address(result, cnt1, Address::times_2, -16));
+ movl(cnt1, 8);
+ jmpb(SCAN_TO_SUBSTR);
+
+ // Found a potential substr
+ bind(FOUND_CANDIDATE);
+ // After pcmpestri tmp(rcx) contains matched element index
+
+ // Make sure string is still long enough
+ subl(cnt1, tmp);
+ cmpl(cnt1, cnt2);
+ jccb(Assembler::greaterEqual, FOUND_SUBSTR);
+ // Left less then substring.
+
+ bind(RET_NOT_FOUND);
+ movl(result, -1);
+ jmpb(CLEANUP);
+
+ bind(FOUND_SUBSTR);
+ // Compute start addr of substr
+ lea(result, Address(result, tmp, Address::times_2));
+
+ if (int_cnt2 > 0) { // Constant substring
+ // Repeat search for small substring (< 8 chars)
+ // from new point without reloading substring.
+ // Have to check that we don't read beyond string.
+ cmpl(tmp, 8-int_cnt2);
+ jccb(Assembler::greater, ADJUST_STR);
+ // Fall through if matched whole substring.
+ } else { // non constant
+ assert(int_cnt2 == -1, "should be != 0");
+
+ addl(tmp, cnt2);
+ // Found result if we matched whole substring.
+ cmpl(tmp, 8);
+ jccb(Assembler::lessEqual, RET_FOUND);
+
+ // Repeat search for small substring (<= 8 chars)
+ // from new point 'str1' without reloading substring.
+ cmpl(cnt2, 8);
+ // Have to check that we don't read beyond string.
+ jccb(Assembler::lessEqual, ADJUST_STR);
+
+ Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;
+ // Compare the rest of substring (> 8 chars).
+ movptr(str1, result);
+
+ cmpl(tmp, cnt2);
+ // First 8 chars are already matched.
+ jccb(Assembler::equal, CHECK_NEXT);
+
+ bind(SCAN_SUBSTR);
+ pcmpestri(vec, Address(str1, 0), 0x0d);
+ // Need to reload strings pointers if not matched whole vector
+ jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
+
+ bind(CHECK_NEXT);
+ subl(cnt2, 8);
+ jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring
+ addptr(str1, 16);
+ addptr(str2, 16);
+ subl(cnt1, 8);
+ cmpl(cnt2, 8); // Do not read beyond substring
+ jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR);
+ // Back-up strings to avoid reading beyond substring.
+ lea(str2, Address(str2, cnt2, Address::times_2, -16));
+ lea(str1, Address(str1, cnt2, Address::times_2, -16));
+ subl(cnt1, cnt2);
+ movl(cnt2, 8);
+ addl(cnt1, 8);
+ bind(CONT_SCAN_SUBSTR);
+ movdqu(vec, Address(str2, 0));
+ jmpb(SCAN_SUBSTR);
+
+ bind(RET_FOUND_LONG);
+ movptr(str1, Address(rsp, wordSize));
+ } // non constant
+
+ bind(RET_FOUND);
+ // Compute substr offset
+ subptr(result, str1);
+ shrl(result, 1); // index
+
+ bind(CLEANUP);
+ pop(rsp); // restore SP
+
+} // string_indexof
+
+// Compare strings.
+void MacroAssembler::string_compare(Register str1, Register str2,
+ Register cnt1, Register cnt2, Register result,
+ XMMRegister vec1) {
+ ShortBranchVerifier sbv(this);
+ Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
+
+ // Compute the minimum of the string lengths and the
+ // difference of the string lengths (stack).
+ // Do the conditional move stuff
+ movl(result, cnt1);
+ subl(cnt1, cnt2);
+ push(cnt1);
+ cmov32(Assembler::lessEqual, cnt2, result);
+
+ // Is the minimum length zero?
+ testl(cnt2, cnt2);
+ jcc(Assembler::zero, LENGTH_DIFF_LABEL);
+
+ // Load first characters
+ load_unsigned_short(result, Address(str1, 0));
+ load_unsigned_short(cnt1, Address(str2, 0));
+
+ // Compare first characters
+ subl(result, cnt1);
+ jcc(Assembler::notZero, POP_LABEL);
+ decrementl(cnt2);
+ jcc(Assembler::zero, LENGTH_DIFF_LABEL);
+
+ {
+ // Check after comparing first character to see if strings are equivalent
+ Label LSkip2;
+ // Check if the strings start at same location
+ cmpptr(str1, str2);
+ jccb(Assembler::notEqual, LSkip2);
+
+ // Check if the length difference is zero (from stack)
+ cmpl(Address(rsp, 0), 0x0);
+ jcc(Assembler::equal, LENGTH_DIFF_LABEL);
+
+ // Strings might not be equivalent
+ bind(LSkip2);
+ }
+
+ Address::ScaleFactor scale = Address::times_2;
+ int stride = 8;
+
+ // Advance to next element
+ addptr(str1, 16/stride);
+ addptr(str2, 16/stride);
+
+ if (UseSSE42Intrinsics) {
+ Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
+ int pcmpmask = 0x19;
+ // Setup to compare 16-byte vectors
+ movl(result, cnt2);
+ andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count
+ jccb(Assembler::zero, COMPARE_TAIL);
+
+ lea(str1, Address(str1, result, scale));
+ lea(str2, Address(str2, result, scale));
+ negptr(result);
+
+ // pcmpestri
+ // inputs:
+ // vec1- substring
+ // rax - negative string length (elements count)
+ // mem - scaned string
+ // rdx - string length (elements count)
+ // pcmpmask - cmp mode: 11000 (string compare with negated result)
+ // + 00 (unsigned bytes) or + 01 (unsigned shorts)
+ // outputs:
+ // rcx - first mismatched element index
+ assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
+
+ bind(COMPARE_WIDE_VECTORS);
+ movdqu(vec1, Address(str1, result, scale));
+ pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
+ // After pcmpestri cnt1(rcx) contains mismatched element index
+
+ jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1
+ addptr(result, stride);
+ subptr(cnt2, stride);
+ jccb(Assembler::notZero, COMPARE_WIDE_VECTORS);
+
+ // compare wide vectors tail
+ testl(result, result);
+ jccb(Assembler::zero, LENGTH_DIFF_LABEL);
+
+ movl(cnt2, stride);
+ movl(result, stride);
+ negptr(result);
+ movdqu(vec1, Address(str1, result, scale));
+ pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
+ jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL);
+
+ // Mismatched characters in the vectors
+ bind(VECTOR_NOT_EQUAL);
+ addptr(result, cnt1);
+ movptr(cnt2, result);
+ load_unsigned_short(result, Address(str1, cnt2, scale));
+ load_unsigned_short(cnt1, Address(str2, cnt2, scale));
+ subl(result, cnt1);
+ jmpb(POP_LABEL);
+
+ bind(COMPARE_TAIL); // limit is zero
+ movl(cnt2, result);
+ // Fallthru to tail compare
+ }
+
+ // Shift str2 and str1 to the end of the arrays, negate min
+ lea(str1, Address(str1, cnt2, scale, 0));
+ lea(str2, Address(str2, cnt2, scale, 0));
+ negptr(cnt2);
+
+ // Compare the rest of the elements
+ bind(WHILE_HEAD_LABEL);
+ load_unsigned_short(result, Address(str1, cnt2, scale, 0));
+ load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0));
+ subl(result, cnt1);
+ jccb(Assembler::notZero, POP_LABEL);
+ increment(cnt2);
+ jccb(Assembler::notZero, WHILE_HEAD_LABEL);
+
+ // Strings are equal up to min length. Return the length difference.
+ bind(LENGTH_DIFF_LABEL);
+ pop(result);
+ jmpb(DONE_LABEL);
+
+ // Discard the stored length difference
+ bind(POP_LABEL);
+ pop(cnt1);
+
+ // That's it
+ bind(DONE_LABEL);
+}
+
+// Compare char[] arrays aligned to 4 bytes or substrings.
+void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
+ Register limit, Register result, Register chr,
+ XMMRegister vec1, XMMRegister vec2) {
+ ShortBranchVerifier sbv(this);
+ Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
+
+ int length_offset = arrayOopDesc::length_offset_in_bytes();
+ int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
+
+ // Check the input args
+ cmpptr(ary1, ary2);
+ jcc(Assembler::equal, TRUE_LABEL);
+
+ if (is_array_equ) {
+ // Need additional checks for arrays_equals.
+ testptr(ary1, ary1);
+ jcc(Assembler::zero, FALSE_LABEL);
+ testptr(ary2, ary2);
+ jcc(Assembler::zero, FALSE_LABEL);
+
+ // Check the lengths
+ movl(limit, Address(ary1, length_offset));
+ cmpl(limit, Address(ary2, length_offset));
+ jcc(Assembler::notEqual, FALSE_LABEL);
+ }
+
+ // count == 0
+ testl(limit, limit);
+ jcc(Assembler::zero, TRUE_LABEL);
+
+ if (is_array_equ) {
+ // Load array address
+ lea(ary1, Address(ary1, base_offset));
+ lea(ary2, Address(ary2, base_offset));
+ }
+
+ shll(limit, 1); // byte count != 0
+ movl(result, limit); // copy
+
+ if (UseSSE42Intrinsics) {
+ // With SSE4.2, use double quad vector compare
+ Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
+
+ // Compare 16-byte vectors
+ andl(result, 0x0000000e); // tail count (in bytes)
+ andl(limit, 0xfffffff0); // vector count (in bytes)
+ jccb(Assembler::zero, COMPARE_TAIL);
+
+ lea(ary1, Address(ary1, limit, Address::times_1));
+ lea(ary2, Address(ary2, limit, Address::times_1));
+ negptr(limit);
+
+ bind(COMPARE_WIDE_VECTORS);
+ movdqu(vec1, Address(ary1, limit, Address::times_1));
+ movdqu(vec2, Address(ary2, limit, Address::times_1));
+ pxor(vec1, vec2);
+
+ ptest(vec1, vec1);
+ jccb(Assembler::notZero, FALSE_LABEL);
+ addptr(limit, 16);
+ jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
+
+ testl(result, result);
+ jccb(Assembler::zero, TRUE_LABEL);
+
+ movdqu(vec1, Address(ary1, result, Address::times_1, -16));
+ movdqu(vec2, Address(ary2, result, Address::times_1, -16));
+ pxor(vec1, vec2);
+
+ ptest(vec1, vec1);
+ jccb(Assembler::notZero, FALSE_LABEL);
+ jmpb(TRUE_LABEL);
+
+ bind(COMPARE_TAIL); // limit is zero
+ movl(limit, result);
+ // Fallthru to tail compare
+ }
+
+ // Compare 4-byte vectors
+ andl(limit, 0xfffffffc); // vector count (in bytes)
+ jccb(Assembler::zero, COMPARE_CHAR);
+
+ lea(ary1, Address(ary1, limit, Address::times_1));
+ lea(ary2, Address(ary2, limit, Address::times_1));
+ negptr(limit);
+
+ bind(COMPARE_VECTORS);
+ movl(chr, Address(ary1, limit, Address::times_1));
+ cmpl(chr, Address(ary2, limit, Address::times_1));
+ jccb(Assembler::notEqual, FALSE_LABEL);
+ addptr(limit, 4);
+ jcc(Assembler::notZero, COMPARE_VECTORS);
+
+ // Compare trailing char (final 2 bytes), if any
+ bind(COMPARE_CHAR);
+ testl(result, 0x2); // tail char
+ jccb(Assembler::zero, TRUE_LABEL);
+ load_unsigned_short(chr, Address(ary1, 0));
+ load_unsigned_short(limit, Address(ary2, 0));
+ cmpl(chr, limit);
+ jccb(Assembler::notEqual, FALSE_LABEL);
+
+ bind(TRUE_LABEL);
+ movl(result, 1); // return true
+ jmpb(DONE);
+
+ bind(FALSE_LABEL);
+ xorl(result, result); // return false
+
+ // That's it
+ bind(DONE);
+}
+
+void MacroAssembler::generate_fill(BasicType t, bool aligned,
+ Register to, Register value, Register count,
+ Register rtmp, XMMRegister xtmp) {
+ ShortBranchVerifier sbv(this);
+ assert_different_registers(to, value, count, rtmp);
+ Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
+ Label L_fill_2_bytes, L_fill_4_bytes;
+
+ int shift = -1;
+ switch (t) {
+ case T_BYTE:
+ shift = 2;
+ break;
+ case T_SHORT:
+ shift = 1;
+ break;
+ case T_INT:
+ shift = 0;
+ break;
+ default: ShouldNotReachHere();
+ }
+
+ if (t == T_BYTE) {
+ andl(value, 0xff);
+ movl(rtmp, value);
+ shll(rtmp, 8);
+ orl(value, rtmp);
+ }
+ if (t == T_SHORT) {
+ andl(value, 0xffff);
+ }
+ if (t == T_BYTE || t == T_SHORT) {
+ movl(rtmp, value);
+ shll(rtmp, 16);
+ orl(value, rtmp);
+ }
+
+ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
+ jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp
+ if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
+ // align source address at 4 bytes address boundary
+ if (t == T_BYTE) {
+ // One byte misalignment happens only for byte arrays
+ testptr(to, 1);
+ jccb(Assembler::zero, L_skip_align1);
+ movb(Address(to, 0), value);
+ increment(to);
+ decrement(count);
+ BIND(L_skip_align1);
+ }
+ // Two bytes misalignment happens only for byte and short (char) arrays
+ testptr(to, 2);
+ jccb(Assembler::zero, L_skip_align2);
+ movw(Address(to, 0), value);
+ addptr(to, 2);
+ subl(count, 1<<(shift-1));
+ BIND(L_skip_align2);
+ }
+ if (UseSSE < 2) {
+ Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
+ // Fill 32-byte chunks
+ subl(count, 8 << shift);
+ jcc(Assembler::less, L_check_fill_8_bytes);
+ align(16);
+
+ BIND(L_fill_32_bytes_loop);
+
+ for (int i = 0; i < 32; i += 4) {
+ movl(Address(to, i), value);
+ }
+
+ addptr(to, 32);
+ subl(count, 8 << shift);
+ jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
+ BIND(L_check_fill_8_bytes);
+ addl(count, 8 << shift);
+ jccb(Assembler::zero, L_exit);
+ jmpb(L_fill_8_bytes);
+
+ //
+ // length is too short, just fill qwords
+ //
+ BIND(L_fill_8_bytes_loop);
+ movl(Address(to, 0), value);
+ movl(Address(to, 4), value);
+ addptr(to, 8);
+ BIND(L_fill_8_bytes);
+ subl(count, 1 << (shift + 1));
+ jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
+ // fall through to fill 4 bytes
+ } else {
+ Label L_fill_32_bytes;
+ if (!UseUnalignedLoadStores) {
+ // align to 8 bytes, we know we are 4 byte aligned to start
+ testptr(to, 4);
+ jccb(Assembler::zero, L_fill_32_bytes);
+ movl(Address(to, 0), value);
+ addptr(to, 4);
+ subl(count, 1<<shift);
+ }
+ BIND(L_fill_32_bytes);
+ {
+ assert( UseSSE >= 2, "supported cpu only" );
+ Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
+ // Fill 32-byte chunks
+ movdl(xtmp, value);
+ pshufd(xtmp, xtmp, 0);
+
+ subl(count, 8 << shift);
+ jcc(Assembler::less, L_check_fill_8_bytes);
+ align(16);
+
+ BIND(L_fill_32_bytes_loop);
+
+ if (UseUnalignedLoadStores) {
+ movdqu(Address(to, 0), xtmp);
+ movdqu(Address(to, 16), xtmp);
+ } else {
+ movq(Address(to, 0), xtmp);
+ movq(Address(to, 8), xtmp);
+ movq(Address(to, 16), xtmp);
+ movq(Address(to, 24), xtmp);
+ }
+
+ addptr(to, 32);
+ subl(count, 8 << shift);
+ jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
+ BIND(L_check_fill_8_bytes);
+ addl(count, 8 << shift);
+ jccb(Assembler::zero, L_exit);
+ jmpb(L_fill_8_bytes);
+
+ //
+ // length is too short, just fill qwords
+ //
+ BIND(L_fill_8_bytes_loop);
+ movq(Address(to, 0), xtmp);
+ addptr(to, 8);
+ BIND(L_fill_8_bytes);
+ subl(count, 1 << (shift + 1));
+ jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
+ }
+ }
+ // fill trailing 4 bytes
+ BIND(L_fill_4_bytes);
+ testl(count, 1<<shift);
+ jccb(Assembler::zero, L_fill_2_bytes);
+ movl(Address(to, 0), value);
+ if (t == T_BYTE || t == T_SHORT) {
+ addptr(to, 4);
+ BIND(L_fill_2_bytes);
+ // fill trailing 2 bytes
+ testl(count, 1<<(shift-1));
+ jccb(Assembler::zero, L_fill_byte);
+ movw(Address(to, 0), value);
+ if (t == T_BYTE) {
+ addptr(to, 2);
+ BIND(L_fill_byte);
+ // fill trailing byte
+ testl(count, 1);
+ jccb(Assembler::zero, L_exit);
+ movb(Address(to, 0), value);
+ } else {
+ BIND(L_fill_byte);
+ }
+ } else {
+ BIND(L_fill_2_bytes);
+ }
+ BIND(L_exit);
+}
+#undef BIND
+#undef BLOCK_COMMENT
+
+
+Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
+ switch (cond) {
+ // Note some conditions are synonyms for others
+ case Assembler::zero: return Assembler::notZero;
+ case Assembler::notZero: return Assembler::zero;
+ case Assembler::less: return Assembler::greaterEqual;
+ case Assembler::lessEqual: return Assembler::greater;
+ case Assembler::greater: return Assembler::lessEqual;
+ case Assembler::greaterEqual: return Assembler::less;
+ case Assembler::below: return Assembler::aboveEqual;
+ case Assembler::belowEqual: return Assembler::above;
+ case Assembler::above: return Assembler::belowEqual;
+ case Assembler::aboveEqual: return Assembler::below;
+ case Assembler::overflow: return Assembler::noOverflow;
+ case Assembler::noOverflow: return Assembler::overflow;
+ case Assembler::negative: return Assembler::positive;
+ case Assembler::positive: return Assembler::negative;
+ case Assembler::parity: return Assembler::noParity;
+ case Assembler::noParity: return Assembler::parity;
+ }
+ ShouldNotReachHere(); return Assembler::overflow;
+}
+
+SkipIfEqual::SkipIfEqual(
+ MacroAssembler* masm, const bool* flag_addr, bool value) {
+ _masm = masm;
+ _masm->cmp8(ExternalAddress((address)flag_addr), value);
+ _masm->jcc(Assembler::equal, _label);
+}
+
+SkipIfEqual::~SkipIfEqual() {
+ _masm->bind(_label);
+}
diff --git a/src/cpu/x86/vm/macroAssembler_x86.hpp b/src/cpu/x86/vm/macroAssembler_x86.hpp
new file mode 100644
index 000000000..36d4d2311
--- /dev/null
+++ b/src/cpu/x86/vm/macroAssembler_x86.hpp
@@ -0,0 +1,1172 @@
+/*
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_X86_VM_MACROASSEMBLER_X86_HPP
+#define CPU_X86_VM_MACROASSEMBLER_X86_HPP
+
+#include "asm/assembler.hpp"
+
+
+// MacroAssembler extends Assembler by frequently used macros.
+//
+// Instructions for which a 'better' code sequence exists depending
+// on arguments should also go in here.
+
+class MacroAssembler: public Assembler {
+ friend class LIR_Assembler;
+ friend class Runtime1; // as_Address()
+
+ protected:
+
+ Address as_Address(AddressLiteral adr);
+ Address as_Address(ArrayAddress adr);
+
+ // Support for VM calls
+ //
+ // This is the base routine called by the different versions of call_VM_leaf. The interpreter
+ // may customize this version by overriding it for its purposes (e.g., to save/restore
+ // additional registers when doing a VM call).
+#ifdef CC_INTERP
+ // c++ interpreter never wants to use interp_masm version of call_VM
+ #define VIRTUAL
+#else
+ #define VIRTUAL virtual
+#endif
+
+ VIRTUAL void call_VM_leaf_base(
+ address entry_point, // the entry point
+ int number_of_arguments // the number of arguments to pop after the call
+ );
+
+ // This is the base routine called by the different versions of call_VM. The interpreter
+ // may customize this version by overriding it for its purposes (e.g., to save/restore
+ // additional registers when doing a VM call).
+ //
+ // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base
+ // returns the register which contains the thread upon return. If a thread register has been
+ // specified, the return value will correspond to that register. If no last_java_sp is specified
+ // (noreg) than rsp will be used instead.
+ VIRTUAL void call_VM_base( // returns the register containing the thread upon return
+ Register oop_result, // where an oop-result ends up if any; use noreg otherwise
+ Register java_thread, // the thread if computed before ; use noreg otherwise
+ Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise
+ address entry_point, // the entry point
+ int number_of_arguments, // the number of arguments (w/o thread) to pop after the call
+ bool check_exceptions // whether to check for pending exceptions after return
+ );
+
+ // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
+ // The implementation is only non-empty for the InterpreterMacroAssembler,
+ // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
+ virtual void check_and_handle_popframe(Register java_thread);
+ virtual void check_and_handle_earlyret(Register java_thread);
+
+ void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
+
+ // helpers for FPU flag access
+ // tmp is a temporary register, if none is available use noreg
+ void save_rax (Register tmp);
+ void restore_rax(Register tmp);
+
+ public:
+ MacroAssembler(CodeBuffer* code) : Assembler(code) {}
+
+ // Support for NULL-checks
+ //
+ // Generates code that causes a NULL OS exception if the content of reg is NULL.
+ // If the accessed location is M[reg + offset] and the offset is known, provide the
+ // offset. No explicit code generation is needed if the offset is within a certain
+ // range (0 <= offset <= page_size).
+
+ void null_check(Register reg, int offset = -1);
+ static bool needs_explicit_null_check(intptr_t offset);
+
+ // Required platform-specific helpers for Label::patch_instructions.
+ // They _shadow_ the declarations in AbstractAssembler, which are undefined.
+ void pd_patch_instruction(address branch, address target) {
+ unsigned char op = branch[0];
+ assert(op == 0xE8 /* call */ ||
+ op == 0xE9 /* jmp */ ||
+ op == 0xEB /* short jmp */ ||
+ (op & 0xF0) == 0x70 /* short jcc */ ||
+ op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */,
+ "Invalid opcode at patch point");
+
+ if (op == 0xEB || (op & 0xF0) == 0x70) {
+ // short offset operators (jmp and jcc)
+ char* disp = (char*) &branch[1];
+ int imm8 = target - (address) &disp[1];
+ guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset");
+ *disp = imm8;
+ } else {
+ int* disp = (int*) &branch[(op == 0x0F)? 2: 1];
+ int imm32 = target - (address) &disp[1];
+ *disp = imm32;
+ }
+ }
+
+#ifndef PRODUCT
+ static void pd_print_patched_instruction(address branch) {
+ const char* s;
+ unsigned char op = branch[0];
+ if (op == 0xE8) {
+ s = "call";
+ } else if (op == 0xE9 || op == 0xEB) {
+ s = "jmp";
+ } else if ((op & 0xF0) == 0x70) {
+ s = "jcc";
+ } else if (op == 0x0F) {
+ s = "jcc";
+ } else {
+ s = "????";
+ }
+ tty->print("%s (unresolved)", s);
+ }
+#endif
+
+ // The following 4 methods return the offset of the appropriate move instruction
+
+ // Support for fast byte/short loading with zero extension (depending on particular CPU)
+ int load_unsigned_byte(Register dst, Address src);
+ int load_unsigned_short(Register dst, Address src);
+
+ // Support for fast byte/short loading with sign extension (depending on particular CPU)
+ int load_signed_byte(Register dst, Address src);
+ int load_signed_short(Register dst, Address src);
+
+ // Support for sign-extension (hi:lo = extend_sign(lo))
+ void extend_sign(Register hi, Register lo);
+
+ // Load and store values by size and signed-ness
+ void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
+ void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
+
+ // Support for inc/dec with optimal instruction selection depending on value
+
+ void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value)) NOT_LP64(incrementl(reg, value)) ; }
+ void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value)) NOT_LP64(decrementl(reg, value)) ; }
+
+ void decrementl(Address dst, int value = 1);
+ void decrementl(Register reg, int value = 1);
+
+ void decrementq(Register reg, int value = 1);
+ void decrementq(Address dst, int value = 1);
+
+ void incrementl(Address dst, int value = 1);
+ void incrementl(Register reg, int value = 1);
+
+ void incrementq(Register reg, int value = 1);
+ void incrementq(Address dst, int value = 1);
+
+
+ // Support optimal SSE move instructions.
+ void movflt(XMMRegister dst, XMMRegister src) {
+ if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; }
+ else { movss (dst, src); return; }
+ }
+ void movflt(XMMRegister dst, Address src) { movss(dst, src); }
+ void movflt(XMMRegister dst, AddressLiteral src);
+ void movflt(Address dst, XMMRegister src) { movss(dst, src); }
+
+ void movdbl(XMMRegister dst, XMMRegister src) {
+ if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; }
+ else { movsd (dst, src); return; }
+ }
+
+ void movdbl(XMMRegister dst, AddressLiteral src);
+
+ void movdbl(XMMRegister dst, Address src) {
+ if (UseXmmLoadAndClearUpper) { movsd (dst, src); return; }
+ else { movlpd(dst, src); return; }
+ }
+ void movdbl(Address dst, XMMRegister src) { movsd(dst, src); }
+
+ void incrementl(AddressLiteral dst);
+ void incrementl(ArrayAddress dst);
+
+ // Alignment
+ void align(int modulus);
+
+ // A 5 byte nop that is safe for patching (see patch_verified_entry)
+ void fat_nop();
+
+ // Stack frame creation/removal
+ void enter();
+ void leave();
+
+ // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
+ // The pointer will be loaded into the thread register.
+ void get_thread(Register thread);
+
+
+ // Support for VM calls
+ //
+ // It is imperative that all calls into the VM are handled via the call_VM macros.
+ // They make sure that the stack linkage is setup correctly. call_VM's correspond
+ // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
+
+
+ void call_VM(Register oop_result,
+ address entry_point,
+ bool check_exceptions = true);
+ void call_VM(Register oop_result,
+ address entry_point,
+ Register arg_1,
+ bool check_exceptions = true);
+ void call_VM(Register oop_result,
+ address entry_point,
+ Register arg_1, Register arg_2,
+ bool check_exceptions = true);
+ void call_VM(Register oop_result,
+ address entry_point,
+ Register arg_1, Register arg_2, Register arg_3,
+ bool check_exceptions = true);
+
+ // Overloadings with last_Java_sp
+ void call_VM(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ int number_of_arguments = 0,
+ bool check_exceptions = true);
+ void call_VM(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ Register arg_1, bool
+ check_exceptions = true);
+ void call_VM(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ Register arg_1, Register arg_2,
+ bool check_exceptions = true);
+ void call_VM(Register oop_result,
+ Register last_java_sp,
+ address entry_point,
+ Register arg_1, Register arg_2, Register arg_3,
+ bool check_exceptions = true);
+
+ void get_vm_result (Register oop_result, Register thread);
+ void get_vm_result_2(Register metadata_result, Register thread);
+
+ // These always tightly bind to MacroAssembler::call_VM_base
+ // bypassing the virtual implementation
+ void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true);
+ void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true);
+ void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
+ void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
+ void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true);
+
+ void call_VM_leaf(address entry_point,
+ int number_of_arguments = 0);
+ void call_VM_leaf(address entry_point,
+ Register arg_1);
+ void call_VM_leaf(address entry_point,
+ Register arg_1, Register arg_2);
+ void call_VM_leaf(address entry_point,
+ Register arg_1, Register arg_2, Register arg_3);
+
+ // These always tightly bind to MacroAssembler::call_VM_leaf_base
+ // bypassing the virtual implementation
+ void super_call_VM_leaf(address entry_point);
+ void super_call_VM_leaf(address entry_point, Register arg_1);
+ void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
+ void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
+ void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4);
+
+ // last Java Frame (fills frame anchor)
+ void set_last_Java_frame(Register thread,
+ Register last_java_sp,
+ Register last_java_fp,
+ address last_java_pc);
+
+ // thread in the default location (r15_thread on 64bit)
+ void set_last_Java_frame(Register last_java_sp,
+ Register last_java_fp,
+ address last_java_pc);
+
+ void reset_last_Java_frame(Register thread, bool clear_fp, bool clear_pc);
+
+ // thread in the default location (r15_thread on 64bit)
+ void reset_last_Java_frame(bool clear_fp, bool clear_pc);
+
+ // Stores
+ void store_check(Register obj); // store check for obj - register is destroyed afterwards
+ void store_check(Register obj, Address dst); // same as above, dst is exact store location (reg. is destroyed)
+
+#ifndef SERIALGC
+
+ void g1_write_barrier_pre(Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp,
+ bool tosca_live,
+ bool expand_call);
+
+ void g1_write_barrier_post(Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp,
+ Register tmp2);
+
+#endif // SERIALGC
+
+ // split store_check(Register obj) to enhance instruction interleaving
+ void store_check_part_1(Register obj);
+ void store_check_part_2(Register obj);
+
+ // C 'boolean' to Java boolean: x == 0 ? 0 : 1
+ void c2bool(Register x);
+
+ // C++ bool manipulation
+
+ void movbool(Register dst, Address src);
+ void movbool(Address dst, bool boolconst);
+ void movbool(Address dst, Register src);
+ void testbool(Register dst);
+
+ // oop manipulations
+ void load_klass(Register dst, Register src);
+ void store_klass(Register dst, Register src);
+
+ void load_heap_oop(Register dst, Address src);
+ void load_heap_oop_not_null(Register dst, Address src);
+ void store_heap_oop(Address dst, Register src);
+ void cmp_heap_oop(Register src1, Address src2, Register tmp = noreg);
+
+ // Used for storing NULL. All other oop constants should be
+ // stored using routines that take a jobject.
+ void store_heap_oop_null(Address dst);
+
+ void load_prototype_header(Register dst, Register src);
+
+#ifdef _LP64
+ void store_klass_gap(Register dst, Register src);
+
+ // This dummy is to prevent a call to store_heap_oop from
+ // converting a zero (like NULL) into a Register by giving
+ // the compiler two choices it can't resolve
+
+ void store_heap_oop(Address dst, void* dummy);
+
+ void encode_heap_oop(Register r);
+ void decode_heap_oop(Register r);
+ void encode_heap_oop_not_null(Register r);
+ void decode_heap_oop_not_null(Register r);
+ void encode_heap_oop_not_null(Register dst, Register src);
+ void decode_heap_oop_not_null(Register dst, Register src);
+
+ void set_narrow_oop(Register dst, jobject obj);
+ void set_narrow_oop(Address dst, jobject obj);
+ void cmp_narrow_oop(Register dst, jobject obj);
+ void cmp_narrow_oop(Address dst, jobject obj);
+
+ void encode_klass_not_null(Register r);
+ void decode_klass_not_null(Register r);
+ void encode_klass_not_null(Register dst, Register src);
+ void decode_klass_not_null(Register dst, Register src);
+ void set_narrow_klass(Register dst, Klass* k);
+ void set_narrow_klass(Address dst, Klass* k);
+ void cmp_narrow_klass(Register dst, Klass* k);
+ void cmp_narrow_klass(Address dst, Klass* k);
+
+ // if heap base register is used - reinit it with the correct value
+ void reinit_heapbase();
+
+ DEBUG_ONLY(void verify_heapbase(const char* msg);)
+
+#endif // _LP64
+
+ // Int division/remainder for Java
+ // (as idivl, but checks for special case as described in JVM spec.)
+ // returns idivl instruction offset for implicit exception handling
+ int corrected_idivl(Register reg);
+
+ // Long division/remainder for Java
+ // (as idivq, but checks for special case as described in JVM spec.)
+ // returns idivq instruction offset for implicit exception handling
+ int corrected_idivq(Register reg);
+
+ void int3();
+
+ // Long operation macros for a 32bit cpu
+ // Long negation for Java
+ void lneg(Register hi, Register lo);
+
+ // Long multiplication for Java
+ // (destroys contents of eax, ebx, ecx and edx)
+ void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y
+
+ // Long shifts for Java
+ // (semantics as described in JVM spec.)
+ void lshl(Register hi, Register lo); // hi:lo << (rcx & 0x3f)
+ void lshr(Register hi, Register lo, bool sign_extension = false); // hi:lo >> (rcx & 0x3f)
+
+ // Long compare for Java
+ // (semantics as described in JVM spec.)
+ void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y)
+
+
+ // misc
+
+ // Sign extension
+ void sign_extend_short(Register reg);
+ void sign_extend_byte(Register reg);
+
+ // Division by power of 2, rounding towards 0
+ void division_with_shift(Register reg, int shift_value);
+
+ // Compares the top-most stack entries on the FPU stack and sets the eflags as follows:
+ //
+ // CF (corresponds to C0) if x < y
+ // PF (corresponds to C2) if unordered
+ // ZF (corresponds to C3) if x = y
+ //
+ // The arguments are in reversed order on the stack (i.e., top of stack is first argument).
+ // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code)
+ void fcmp(Register tmp);
+ // Variant of the above which allows y to be further down the stack
+ // and which only pops x and y if specified. If pop_right is
+ // specified then pop_left must also be specified.
+ void fcmp(Register tmp, int index, bool pop_left, bool pop_right);
+
+ // Floating-point comparison for Java
+ // Compares the top-most stack entries on the FPU stack and stores the result in dst.
+ // The arguments are in reversed order on the stack (i.e., top of stack is first argument).
+ // (semantics as described in JVM spec.)
+ void fcmp2int(Register dst, bool unordered_is_less);
+ // Variant of the above which allows y to be further down the stack
+ // and which only pops x and y if specified. If pop_right is
+ // specified then pop_left must also be specified.
+ void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right);
+
+ // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards)
+ // tmp is a temporary register, if none is available use noreg
+ void fremr(Register tmp);
+
+
+ // same as fcmp2int, but using SSE2
+ void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
+ void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
+
+ // Inlined sin/cos generator for Java; must not use CPU instruction
+ // directly on Intel as it does not have high enough precision
+ // outside of the range [-pi/4, pi/4]. Extra argument indicate the
+ // number of FPU stack slots in use; all but the topmost will
+ // require saving if a slow case is necessary. Assumes argument is
+ // on FP TOS; result is on FP TOS. No cpu registers are changed by
+ // this code.
+ void trigfunc(char trig, int num_fpu_regs_in_use = 1);
+
+ // branch to L if FPU flag C2 is set/not set
+ // tmp is a temporary register, if none is available use noreg
+ void jC2 (Register tmp, Label& L);
+ void jnC2(Register tmp, Label& L);
+
+ // Pop ST (ffree & fincstp combined)
+ void fpop();
+
+ // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
+ void push_fTOS();
+
+ // pops double TOS element from CPU stack and pushes on FPU stack
+ void pop_fTOS();
+
+ void empty_FPU_stack();
+
+ void push_IU_state();
+ void pop_IU_state();
+
+ void push_FPU_state();
+ void pop_FPU_state();
+
+ void push_CPU_state();
+ void pop_CPU_state();
+
+ // Round up to a power of two
+ void round_to(Register reg, int modulus);
+
+ // Callee saved registers handling
+ void push_callee_saved_registers();
+ void pop_callee_saved_registers();
+
+ // allocation
+ void eden_allocate(
+ Register obj, // result: pointer to object after successful allocation
+ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
+ int con_size_in_bytes, // object size in bytes if known at compile time
+ Register t1, // temp register
+ Label& slow_case // continuation point if fast allocation fails
+ );
+ void tlab_allocate(
+ Register obj, // result: pointer to object after successful allocation
+ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
+ int con_size_in_bytes, // object size in bytes if known at compile time
+ Register t1, // temp register
+ Register t2, // temp register
+ Label& slow_case // continuation point if fast allocation fails
+ );
+ Register tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); // returns TLS address
+ void incr_allocated_bytes(Register thread,
+ Register var_size_in_bytes, int con_size_in_bytes,
+ Register t1 = noreg);
+
+ // interface method calling
+ void lookup_interface_method(Register recv_klass,
+ Register intf_klass,
+ RegisterOrConstant itable_index,
+ Register method_result,
+ Register scan_temp,
+ Label& no_such_interface);
+
+ // virtual method calling
+ void lookup_virtual_method(Register recv_klass,
+ RegisterOrConstant vtable_index,
+ Register method_result);
+
+ // Test sub_klass against super_klass, with fast and slow paths.
+
+ // The fast path produces a tri-state answer: yes / no / maybe-slow.
+ // One of the three labels can be NULL, meaning take the fall-through.
+ // If super_check_offset is -1, the value is loaded up from super_klass.
+ // No registers are killed, except temp_reg.
+ void check_klass_subtype_fast_path(Register sub_klass,
+ Register super_klass,
+ Register temp_reg,
+ Label* L_success,
+ Label* L_failure,
+ Label* L_slow_path,
+ RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
+
+ // The rest of the type check; must be wired to a corresponding fast path.
+ // It does not repeat the fast path logic, so don't use it standalone.
+ // The temp_reg and temp2_reg can be noreg, if no temps are available.
+ // Updates the sub's secondary super cache as necessary.
+ // If set_cond_codes, condition codes will be Z on success, NZ on failure.
+ void check_klass_subtype_slow_path(Register sub_klass,
+ Register super_klass,
+ Register temp_reg,
+ Register temp2_reg,
+ Label* L_success,
+ Label* L_failure,
+ bool set_cond_codes = false);
+
+ // Simplified, combined version, good for typical uses.
+ // Falls through on failure.
+ void check_klass_subtype(Register sub_klass,
+ Register super_klass,
+ Register temp_reg,
+ Label& L_success);
+
+ // method handles (JSR 292)
+ Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
+
+ //----
+ void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0
+
+ // Debugging
+
+ // only if +VerifyOops
+ // TODO: Make these macros with file and line like sparc version!
+ void verify_oop(Register reg, const char* s = "broken oop");
+ void verify_oop_addr(Address addr, const char * s = "broken oop addr");
+
+ // TODO: verify method and klass metadata (compare against vptr?)
+ void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
+ void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
+
+#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
+#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
+
+ // only if +VerifyFPU
+ void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
+
+ // prints msg, dumps registers and stops execution
+ void stop(const char* msg);
+
+ // prints msg and continues
+ void warn(const char* msg);
+
+ // dumps registers and other state
+ void print_state();
+
+ static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg);
+ static void debug64(char* msg, int64_t pc, int64_t regs[]);
+ static void print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip);
+ static void print_state64(int64_t pc, int64_t regs[]);
+
+ void os_breakpoint();
+
+ void untested() { stop("untested"); }
+
+ void unimplemented(const char* what = "") { char* b = new char[1024]; jio_snprintf(b, 1024, "unimplemented: %s", what); stop(b); }
+
+ void should_not_reach_here() { stop("should not reach here"); }
+
+ void print_CPU_state();
+
+ // Stack overflow checking
+ void bang_stack_with_offset(int offset) {
+ // stack grows down, caller passes positive offset
+ assert(offset > 0, "must bang with negative offset");
+ movl(Address(rsp, (-offset)), rax);
+ }
+
+ // Writes to stack successive pages until offset reached to check for
+ // stack overflow + shadow pages. Also, clobbers tmp
+ void bang_stack_size(Register size, Register tmp);
+
+ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
+ Register tmp,
+ int offset);
+
+ // Support for serializing memory accesses between threads
+ void serialize_memory(Register thread, Register tmp);
+
+ void verify_tlab();
+
+ // Biased locking support
+ // lock_reg and obj_reg must be loaded up with the appropriate values.
+ // swap_reg must be rax, and is killed.
+ // tmp_reg is optional. If it is supplied (i.e., != noreg) it will
+ // be killed; if not supplied, push/pop will be used internally to
+ // allocate a temporary (inefficient, avoid if possible).
+ // Optional slow case is for implementations (interpreter and C1) which branch to
+ // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
+ // Returns offset of first potentially-faulting instruction for null
+ // check info (currently consumed only by C1). If
+ // swap_reg_contains_mark is true then returns -1 as it is assumed
+ // the calling code has already passed any potential faults.
+ int biased_locking_enter(Register lock_reg, Register obj_reg,
+ Register swap_reg, Register tmp_reg,
+ bool swap_reg_contains_mark,
+ Label& done, Label* slow_case = NULL,
+ BiasedLockingCounters* counters = NULL);
+ void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
+
+
+ Condition negate_condition(Condition cond);
+
+ // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit
+ // operands. In general the names are modified to avoid hiding the instruction in Assembler
+ // so that we don't need to implement all the varieties in the Assembler with trivial wrappers
+ // here in MacroAssembler. The major exception to this rule is call
+
+ // Arithmetics
+
+
+ void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)) ; }
+ void addptr(Address dst, Register src);
+
+ void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); }
+ void addptr(Register dst, int32_t src);
+ void addptr(Register dst, Register src);
+ void addptr(Register dst, RegisterOrConstant src) {
+ if (src.is_constant()) addptr(dst, (int) src.as_constant());
+ else addptr(dst, src.as_register());
+ }
+
+ void andptr(Register dst, int32_t src);
+ void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2)) NOT_LP64(andl(src1, src2)) ; }
+
+ void cmp8(AddressLiteral src1, int imm);
+
+ // renamed to drag out the casting of address to int32_t/intptr_t
+ void cmp32(Register src1, int32_t imm);
+
+ void cmp32(AddressLiteral src1, int32_t imm);
+ // compare reg - mem, or reg - &mem
+ void cmp32(Register src1, AddressLiteral src2);
+
+ void cmp32(Register src1, Address src2);
+
+#ifndef _LP64
+ void cmpklass(Address dst, Metadata* obj);
+ void cmpklass(Register dst, Metadata* obj);
+ void cmpoop(Address dst, jobject obj);
+ void cmpoop(Register dst, jobject obj);
+#endif // _LP64
+
+ // NOTE src2 must be the lval. This is NOT an mem-mem compare
+ void cmpptr(Address src1, AddressLiteral src2);
+
+ void cmpptr(Register src1, AddressLiteral src2);
+
+ void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
+ void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
+ // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
+
+ void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
+ void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
+
+ // cmp64 to avoild hiding cmpq
+ void cmp64(Register src1, AddressLiteral src);
+
+ void cmpxchgptr(Register reg, Address adr);
+
+ void locked_cmpxchgptr(Register reg, AddressLiteral adr);
+
+
+ void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); }
+
+
+ void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); }
+
+ void notptr(Register dst) { LP64_ONLY(notq(dst)) NOT_LP64(notl(dst)); }
+
+ void shlptr(Register dst, int32_t shift);
+ void shlptr(Register dst) { LP64_ONLY(shlq(dst)) NOT_LP64(shll(dst)); }
+
+ void shrptr(Register dst, int32_t shift);
+ void shrptr(Register dst) { LP64_ONLY(shrq(dst)) NOT_LP64(shrl(dst)); }
+
+ void sarptr(Register dst) { LP64_ONLY(sarq(dst)) NOT_LP64(sarl(dst)); }
+ void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src)) NOT_LP64(sarl(dst, src)); }
+
+ void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); }
+
+ void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); }
+ void subptr(Register dst, int32_t src);
+ // Force generation of a 4 byte immediate value even if it fits into 8bit
+ void subptr_imm32(Register dst, int32_t src);
+ void subptr(Register dst, Register src);
+ void subptr(Register dst, RegisterOrConstant src) {
+ if (src.is_constant()) subptr(dst, (int) src.as_constant());
+ else subptr(dst, src.as_register());
+ }
+
+ void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); }
+ void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); }
+
+ void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; }
+ void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; }
+
+ void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2)) NOT_LP64(xaddl(src1, src2)) ; }
+
+
+
+ // Helper functions for statistics gathering.
+ // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes.
+ void cond_inc32(Condition cond, AddressLiteral counter_addr);
+ // Unconditional atomic increment.
+ void atomic_incl(AddressLiteral counter_addr);
+
+ void lea(Register dst, AddressLiteral adr);
+ void lea(Address dst, AddressLiteral adr);
+ void lea(Register dst, Address adr) { Assembler::lea(dst, adr); }
+
+ void leal32(Register dst, Address src) { leal(dst, src); }
+
+ // Import other testl() methods from the parent class or else
+ // they will be hidden by the following overriding declaration.
+ using Assembler::testl;
+ void testl(Register dst, AddressLiteral src);
+
+ void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
+ void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
+ void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
+
+ void testptr(Register src, int32_t imm32) { LP64_ONLY(testq(src, imm32)) NOT_LP64(testl(src, imm32)); }
+ void testptr(Register src1, Register src2);
+
+ void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); }
+ void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); }
+
+ // Calls
+
+ void call(Label& L, relocInfo::relocType rtype);
+ void call(Register entry);
+
+ // NOTE: this call tranfers to the effective address of entry NOT
+ // the address contained by entry. This is because this is more natural
+ // for jumps/calls.
+ void call(AddressLiteral entry);
+
+ // Emit the CompiledIC call idiom
+ void ic_call(address entry);
+
+ // Jumps
+
+ // NOTE: these jumps tranfer to the effective address of dst NOT
+ // the address contained by dst. This is because this is more natural
+ // for jumps/calls.
+ void jump(AddressLiteral dst);
+ void jump_cc(Condition cc, AddressLiteral dst);
+
+ // 32bit can do a case table jump in one instruction but we no longer allow the base
+ // to be installed in the Address class. This jump will tranfers to the address
+ // contained in the location described by entry (not the address of entry)
+ void jump(ArrayAddress entry);
+
+ // Floating
+
+ void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); }
+ void andpd(XMMRegister dst, AddressLiteral src);
+
+ void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); }
+ void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); }
+ void andps(XMMRegister dst, AddressLiteral src);
+
+ void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); }
+ void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); }
+ void comiss(XMMRegister dst, AddressLiteral src);
+
+ void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); }
+ void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); }
+ void comisd(XMMRegister dst, AddressLiteral src);
+
+ void fadd_s(Address src) { Assembler::fadd_s(src); }
+ void fadd_s(AddressLiteral src) { Assembler::fadd_s(as_Address(src)); }
+
+ void fldcw(Address src) { Assembler::fldcw(src); }
+ void fldcw(AddressLiteral src);
+
+ void fld_s(int index) { Assembler::fld_s(index); }
+ void fld_s(Address src) { Assembler::fld_s(src); }
+ void fld_s(AddressLiteral src);
+
+ void fld_d(Address src) { Assembler::fld_d(src); }
+ void fld_d(AddressLiteral src);
+
+ void fld_x(Address src) { Assembler::fld_x(src); }
+ void fld_x(AddressLiteral src);
+
+ void fmul_s(Address src) { Assembler::fmul_s(src); }
+ void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); }
+
+ void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
+ void ldmxcsr(AddressLiteral src);
+
+ // compute pow(x,y) and exp(x) with x86 instructions. Don't cover
+ // all corner cases and may result in NaN and require fallback to a
+ // runtime call.
+ void fast_pow();
+ void fast_exp();
+ void increase_precision();
+ void restore_precision();
+
+ // computes exp(x). Fallback to runtime call included.
+ void exp_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(true, num_fpu_regs_in_use); }
+ // computes pow(x,y). Fallback to runtime call included.
+ void pow_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(false, num_fpu_regs_in_use); }
+
+private:
+
+ // call runtime as a fallback for trig functions and pow/exp.
+ void fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use);
+
+ // computes 2^(Ylog2X); Ylog2X in ST(0)
+ void pow_exp_core_encoding();
+
+ // computes pow(x,y) or exp(x). Fallback to runtime call included.
+ void pow_or_exp(bool is_exp, int num_fpu_regs_in_use);
+
+ // these are private because users should be doing movflt/movdbl
+
+ void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); }
+ void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); }
+ void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); }
+ void movss(XMMRegister dst, AddressLiteral src);
+
+ void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); }
+ void movlpd(XMMRegister dst, AddressLiteral src);
+
+public:
+
+ void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); }
+ void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); }
+ void addsd(XMMRegister dst, AddressLiteral src);
+
+ void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); }
+ void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); }
+ void addss(XMMRegister dst, AddressLiteral src);
+
+ void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); }
+ void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); }
+ void divsd(XMMRegister dst, AddressLiteral src);
+
+ void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); }
+ void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); }
+ void divss(XMMRegister dst, AddressLiteral src);
+
+ // Move Unaligned Double Quadword
+ void movdqu(Address dst, XMMRegister src) { Assembler::movdqu(dst, src); }
+ void movdqu(XMMRegister dst, Address src) { Assembler::movdqu(dst, src); }
+ void movdqu(XMMRegister dst, XMMRegister src) { Assembler::movdqu(dst, src); }
+ void movdqu(XMMRegister dst, AddressLiteral src);
+
+ void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
+ void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); }
+ void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
+ void movsd(XMMRegister dst, AddressLiteral src);
+
+ void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); }
+ void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); }
+ void mulsd(XMMRegister dst, AddressLiteral src);
+
+ void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); }
+ void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); }
+ void mulss(XMMRegister dst, AddressLiteral src);
+
+ void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); }
+ void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); }
+ void sqrtsd(XMMRegister dst, AddressLiteral src);
+
+ void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); }
+ void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); }
+ void sqrtss(XMMRegister dst, AddressLiteral src);
+
+ void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); }
+ void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); }
+ void subsd(XMMRegister dst, AddressLiteral src);
+
+ void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); }
+ void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); }
+ void subss(XMMRegister dst, AddressLiteral src);
+
+ void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); }
+ void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); }
+ void ucomiss(XMMRegister dst, AddressLiteral src);
+
+ void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); }
+ void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); }
+ void ucomisd(XMMRegister dst, AddressLiteral src);
+
+ // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
+ void xorpd(XMMRegister dst, XMMRegister src) { Assembler::xorpd(dst, src); }
+ void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); }
+ void xorpd(XMMRegister dst, AddressLiteral src);
+
+ // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
+ void xorps(XMMRegister dst, XMMRegister src) { Assembler::xorps(dst, src); }
+ void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); }
+ void xorps(XMMRegister dst, AddressLiteral src);
+
+ // Shuffle Bytes
+ void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); }
+ void pshufb(XMMRegister dst, Address src) { Assembler::pshufb(dst, src); }
+ void pshufb(XMMRegister dst, AddressLiteral src);
+ // AVX 3-operands instructions
+
+ void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); }
+ void vaddsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddsd(dst, nds, src); }
+ void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+ void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); }
+ void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); }
+ void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+ void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); }
+ void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); }
+ void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
+
+ void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); }
+ void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); }
+ void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
+
+ void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
+ void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); }
+ void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+ void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); }
+ void vdivss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivss(dst, nds, src); }
+ void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+ void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); }
+ void vmulsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulsd(dst, nds, src); }
+ void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+ void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); }
+ void vmulss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulss(dst, nds, src); }
+ void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+ void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); }
+ void vsubsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubsd(dst, nds, src); }
+ void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+ void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); }
+ void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); }
+ void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+ // AVX Vector instructions
+
+ void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
+ void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
+ void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
+
+ void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
+ void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
+ void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
+
+ void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
+ Assembler::vpxor(dst, nds, src, vector256);
+ else
+ Assembler::vxorpd(dst, nds, src, vector256);
+ }
+ void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
+ Assembler::vpxor(dst, nds, src, vector256);
+ else
+ Assembler::vxorpd(dst, nds, src, vector256);
+ }
+
+ // Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector.
+ void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+ if (UseAVX > 1) // vinserti128h is available only in AVX2
+ Assembler::vinserti128h(dst, nds, src);
+ else
+ Assembler::vinsertf128h(dst, nds, src);
+ }
+
+ // Data
+
+ void cmov32( Condition cc, Register dst, Address src);
+ void cmov32( Condition cc, Register dst, Register src);
+
+ void cmov( Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); }
+
+ void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); }
+ void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); }
+
+ void movoop(Register dst, jobject obj);
+ void movoop(Address dst, jobject obj);
+
+ void mov_metadata(Register dst, Metadata* obj);
+ void mov_metadata(Address dst, Metadata* obj);
+
+ void movptr(ArrayAddress dst, Register src);
+ // can this do an lea?
+ void movptr(Register dst, ArrayAddress src);
+
+ void movptr(Register dst, Address src);
+
+ void movptr(Register dst, AddressLiteral src);
+
+ void movptr(Register dst, intptr_t src);
+ void movptr(Register dst, Register src);
+ void movptr(Address dst, intptr_t src);
+
+ void movptr(Address dst, Register src);
+
+ void movptr(Register dst, RegisterOrConstant src) {
+ if (src.is_constant()) movptr(dst, src.as_constant());
+ else movptr(dst, src.as_register());
+ }
+
+#ifdef _LP64
+ // Generally the next two are only used for moving NULL
+ // Although there are situations in initializing the mark word where
+ // they could be used. They are dangerous.
+
+ // They only exist on LP64 so that int32_t and intptr_t are not the same
+ // and we have ambiguous declarations.
+
+ void movptr(Address dst, int32_t imm32);
+ void movptr(Register dst, int32_t imm32);
+#endif // _LP64
+
+ // to avoid hiding movl
+ void mov32(AddressLiteral dst, Register src);
+ void mov32(Register dst, AddressLiteral src);
+
+ // to avoid hiding movb
+ void movbyte(ArrayAddress dst, int src);
+
+ // Import other mov() methods from the parent class or else
+ // they will be hidden by the following overriding declaration.
+ using Assembler::movdl;
+ using Assembler::movq;
+ void movdl(XMMRegister dst, AddressLiteral src);
+ void movq(XMMRegister dst, AddressLiteral src);
+
+ // Can push value or effective address
+ void pushptr(AddressLiteral src);
+
+ void pushptr(Address src) { LP64_ONLY(pushq(src)) NOT_LP64(pushl(src)); }
+ void popptr(Address src) { LP64_ONLY(popq(src)) NOT_LP64(popl(src)); }
+
+ void pushoop(jobject obj);
+ void pushklass(Metadata* obj);
+
+ // sign extend as need a l to ptr sized element
+ void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); }
+ void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); }
+
+ // C2 compiled method's prolog code.
+ void verified_entry(int framesize, bool stack_bang, bool fp_mode_24b);
+
+ // IndexOf strings.
+ // Small strings are loaded through stack if they cross page boundary.
+ void string_indexof(Register str1, Register str2,
+ Register cnt1, Register cnt2,
+ int int_cnt2, Register result,
+ XMMRegister vec, Register tmp);
+
+ // IndexOf for constant substrings with size >= 8 elements
+ // which don't need to be loaded through stack.
+ void string_indexofC8(Register str1, Register str2,
+ Register cnt1, Register cnt2,
+ int int_cnt2, Register result,
+ XMMRegister vec, Register tmp);
+
+ // Smallest code: we don't need to load through stack,
+ // check string tail.
+
+ // Compare strings.
+ void string_compare(Register str1, Register str2,
+ Register cnt1, Register cnt2, Register result,
+ XMMRegister vec1);
+
+ // Compare char[] arrays.
+ void char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
+ Register limit, Register result, Register chr,
+ XMMRegister vec1, XMMRegister vec2);
+
+ // Fill primitive arrays
+ void generate_fill(BasicType t, bool aligned,
+ Register to, Register value, Register count,
+ Register rtmp, XMMRegister xtmp);
+
+#undef VIRTUAL
+
+};
+
+/**
+ * class SkipIfEqual:
+ *
+ * Instantiating this class will result in assembly code being output that will
+ * jump around any code emitted between the creation of the instance and it's
+ * automatic destruction at the end of a scope block, depending on the value of
+ * the flag passed to the constructor, which will be checked at run-time.
+ */
+class SkipIfEqual {
+ private:
+ MacroAssembler* _masm;
+ Label _label;
+
+ public:
+ SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
+ ~SkipIfEqual();
+};
+
+#endif // CPU_X86_VM_MACROASSEMBLER_X86_HPP
diff --git a/src/cpu/x86/vm/metaspaceShared_x86_32.cpp b/src/cpu/x86/vm/metaspaceShared_x86_32.cpp
index a43fafdd3..c2956a52a 100644
--- a/src/cpu/x86/vm/metaspaceShared_x86_32.cpp
+++ b/src/cpu/x86/vm/metaspaceShared_x86_32.cpp
@@ -23,7 +23,8 @@
*/
#include "precompiled.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/codeBuffer.hpp"
#include "memory/metaspaceShared.hpp"
// Generate the self-patching vtable method:
diff --git a/src/cpu/x86/vm/metaspaceShared_x86_64.cpp b/src/cpu/x86/vm/metaspaceShared_x86_64.cpp
index 2ef2abf6a..4ff6cc955 100644
--- a/src/cpu/x86/vm/metaspaceShared_x86_64.cpp
+++ b/src/cpu/x86/vm/metaspaceShared_x86_64.cpp
@@ -23,7 +23,8 @@
*/
#include "precompiled.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/codeBuffer.hpp"
#include "memory/metaspaceShared.hpp"
// Generate the self-patching vtable method:
diff --git a/src/cpu/x86/vm/methodHandles_x86.cpp b/src/cpu/x86/vm/methodHandles_x86.cpp
index 88ec6b719..7da3a2c42 100644
--- a/src/cpu/x86/vm/methodHandles_x86.cpp
+++ b/src/cpu/x86/vm/methodHandles_x86.cpp
@@ -23,6 +23,7 @@
*/
#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
#include "interpreter/interpreter.hpp"
#include "interpreter/interpreterRuntime.hpp"
#include "memory/allocation.inline.hpp"
diff --git a/src/cpu/x86/vm/nativeInst_x86.cpp b/src/cpu/x86/vm/nativeInst_x86.cpp
index 1cf509992..dccd7e0b7 100644
--- a/src/cpu/x86/vm/nativeInst_x86.cpp
+++ b/src/cpu/x86/vm/nativeInst_x86.cpp
@@ -23,7 +23,7 @@
*/
#include "precompiled.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
#include "memory/resourceArea.hpp"
#include "nativeInst_x86.hpp"
#include "oops/oop.inline.hpp"
diff --git a/src/cpu/x86/vm/relocInfo_x86.cpp b/src/cpu/x86/vm/relocInfo_x86.cpp
index 1023695e8..d4a929613 100644
--- a/src/cpu/x86/vm/relocInfo_x86.cpp
+++ b/src/cpu/x86/vm/relocInfo_x86.cpp
@@ -23,8 +23,7 @@
*/
#include "precompiled.hpp"
-#include "asm/assembler.inline.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
#include "code/relocInfo.hpp"
#include "nativeInst_x86.hpp"
#include "oops/oop.inline.hpp"
diff --git a/src/cpu/x86/vm/runtime_x86_32.cpp b/src/cpu/x86/vm/runtime_x86_32.cpp
index c932f9fa2..1cc10d766 100644
--- a/src/cpu/x86/vm/runtime_x86_32.cpp
+++ b/src/cpu/x86/vm/runtime_x86_32.cpp
@@ -24,12 +24,11 @@
#include "precompiled.hpp"
#ifdef COMPILER2
-#include "asm/assembler.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
#include "classfile/systemDictionary.hpp"
#include "code/vmreg.hpp"
#include "interpreter/interpreter.hpp"
-#include "nativeInst_x86.hpp"
#include "opto/runtime.hpp"
#include "runtime/interfaceSupport.hpp"
#include "runtime/sharedRuntime.hpp"
diff --git a/src/cpu/x86/vm/runtime_x86_64.cpp b/src/cpu/x86/vm/runtime_x86_64.cpp
index 8f53518bb..0c39aea84 100644
--- a/src/cpu/x86/vm/runtime_x86_64.cpp
+++ b/src/cpu/x86/vm/runtime_x86_64.cpp
@@ -24,12 +24,11 @@
#include "precompiled.hpp"
#ifdef COMPILER2
-#include "asm/assembler.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
#include "classfile/systemDictionary.hpp"
#include "code/vmreg.hpp"
#include "interpreter/interpreter.hpp"
-#include "nativeInst_x86.hpp"
#include "opto/runtime.hpp"
#include "runtime/interfaceSupport.hpp"
#include "runtime/sharedRuntime.hpp"
diff --git a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp
index 7c3116a48..dc705421c 100644
--- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp
+++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp
@@ -23,8 +23,8 @@
*/
#include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
#include "code/debugInfoRec.hpp"
#include "code/icBuffer.hpp"
#include "code/vtableStubs.hpp"
diff --git a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
index 271a045fb..50255eeef 100644
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
@@ -23,8 +23,8 @@
*/
#include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
#include "code/debugInfoRec.hpp"
#include "code/icBuffer.hpp"
#include "code/vtableStubs.hpp"
diff --git a/src/cpu/x86/vm/stubGenerator_x86_32.cpp b/src/cpu/x86/vm/stubGenerator_x86_32.cpp
index bcb408b5b..52e3f4169 100644
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp
@@ -23,8 +23,8 @@
*/
#include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
#include "interpreter/interpreter.hpp"
#include "nativeInst_x86.hpp"
#include "oops/instanceOop.hpp"
diff --git a/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/src/cpu/x86/vm/stubGenerator_x86_64.cpp
index d205f9639..48f4af8dc 100644
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp
@@ -23,8 +23,8 @@
*/
#include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
#include "interpreter/interpreter.hpp"
#include "nativeInst_x86.hpp"
#include "oops/instanceOop.hpp"
diff --git a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp
index 4aff1d966..19a2a45c9 100644
--- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp
+++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp
@@ -23,7 +23,7 @@
*/
#include "precompiled.hpp"
-#include "asm/assembler.hpp"
+#include "asm/macroAssembler.hpp"
#include "interpreter/bytecodeHistogram.hpp"
#include "interpreter/interpreter.hpp"
#include "interpreter/interpreterGenerator.hpp"
diff --git a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp
index 75318ab42..e2b46fc78 100644
--- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp
+++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp
@@ -23,7 +23,7 @@
*/
#include "precompiled.hpp"
-#include "asm/assembler.hpp"
+#include "asm/macroAssembler.hpp"
#include "interpreter/bytecodeHistogram.hpp"
#include "interpreter/interpreter.hpp"
#include "interpreter/interpreterGenerator.hpp"
diff --git a/src/cpu/x86/vm/templateTable_x86_32.cpp b/src/cpu/x86/vm/templateTable_x86_32.cpp
index 24d138e49..e2a20531f 100644
--- a/src/cpu/x86/vm/templateTable_x86_32.cpp
+++ b/src/cpu/x86/vm/templateTable_x86_32.cpp
@@ -23,7 +23,7 @@
*/
#include "precompiled.hpp"
-#include "asm/assembler.hpp"
+#include "asm/macroAssembler.hpp"
#include "interpreter/interpreter.hpp"
#include "interpreter/interpreterRuntime.hpp"
#include "interpreter/templateTable.hpp"
diff --git a/src/cpu/x86/vm/templateTable_x86_64.cpp b/src/cpu/x86/vm/templateTable_x86_64.cpp
index bb9b86f24..eedab0b4b 100644
--- a/src/cpu/x86/vm/templateTable_x86_64.cpp
+++ b/src/cpu/x86/vm/templateTable_x86_64.cpp
@@ -23,6 +23,7 @@
*/
#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
#include "interpreter/interpreter.hpp"
#include "interpreter/interpreterRuntime.hpp"
#include "interpreter/templateTable.hpp"
diff --git a/src/cpu/x86/vm/vm_version_x86.cpp b/src/cpu/x86/vm/vm_version_x86.cpp
index 21bb57335..f48e66012 100644
--- a/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/src/cpu/x86/vm/vm_version_x86.cpp
@@ -23,7 +23,8 @@
*/
#include "precompiled.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
#include "memory/resourceArea.hpp"
#include "runtime/java.hpp"
#include "runtime/stubCodeGenerator.hpp"
diff --git a/src/cpu/x86/vm/vtableStubs_x86_32.cpp b/src/cpu/x86/vm/vtableStubs_x86_32.cpp
index edf1ab1bf..c470004eb 100644
--- a/src/cpu/x86/vm/vtableStubs_x86_32.cpp
+++ b/src/cpu/x86/vm/vtableStubs_x86_32.cpp
@@ -23,8 +23,7 @@
*/
#include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
#include "code/vtableStubs.hpp"
#include "interp_masm_x86_32.hpp"
#include "memory/resourceArea.hpp"
diff --git a/src/cpu/x86/vm/vtableStubs_x86_64.cpp b/src/cpu/x86/vm/vtableStubs_x86_64.cpp
index 867ebfa15..0dc056cdb 100644
--- a/src/cpu/x86/vm/vtableStubs_x86_64.cpp
+++ b/src/cpu/x86/vm/vtableStubs_x86_64.cpp
@@ -23,8 +23,7 @@
*/
#include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "assembler_x86.inline.hpp"
+#include "asm/macroAssembler.hpp"
#include "code/vtableStubs.hpp"
#include "interp_masm_x86_64.hpp"
#include "memory/resourceArea.hpp"