aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkvn <none@none>2012-01-06 20:09:20 -0800
committerkvn <none@none>2012-01-06 20:09:20 -0800
commitba9ecd28c25c047bf812b22781d1c0dde87ecd6c (patch)
tree1560ec8ef1d967c16f2a9dc74b2c4f9b9873bd18
parent9b76e3c59f71bf75c52003143e2649a5a0536340 (diff)
parent3998d29352106095850894600e989573054abab2 (diff)
Merge
-rw-r--r--make/bsd/makefiles/adlc.make9
-rw-r--r--make/linux/makefiles/adlc.make9
-rw-r--r--make/solaris/makefiles/adlc.make9
-rw-r--r--make/windows/makefiles/adlc.make16
-rw-r--r--src/cpu/sparc/vm/assembler_sparc.cpp18
-rw-r--r--src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp4
-rw-r--r--src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp15
-rw-r--r--src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp4
-rw-r--r--src/cpu/sparc/vm/c1_Runtime1_sparc.cpp13
-rw-r--r--src/cpu/sparc/vm/cppInterpreter_sparc.cpp4
-rw-r--r--src/cpu/sparc/vm/methodHandles_sparc.cpp2
-rw-r--r--src/cpu/sparc/vm/sparc.ad10
-rw-r--r--src/cpu/sparc/vm/stubGenerator_sparc.cpp9
-rw-r--r--src/cpu/sparc/vm/templateInterpreter_sparc.cpp4
-rw-r--r--src/cpu/sparc/vm/templateTable_sparc.cpp12
-rw-r--r--src/cpu/x86/vm/assembler_x86.cpp1312
-rw-r--r--src/cpu/x86/vm/assembler_x86.hpp268
-rw-r--r--src/cpu/x86/vm/assembler_x86.inline.hpp1
-rw-r--r--src/cpu/x86/vm/c1_CodeStubs_x86.cpp4
-rw-r--r--src/cpu/x86/vm/c1_LIRAssembler_x86.cpp21
-rw-r--r--src/cpu/x86/vm/c1_MacroAssembler_x86.cpp2
-rw-r--r--src/cpu/x86/vm/c1_Runtime1_x86.cpp20
-rw-r--r--src/cpu/x86/vm/cppInterpreter_x86.cpp6
-rw-r--r--src/cpu/x86/vm/methodHandles_x86.cpp2
-rw-r--r--src/cpu/x86/vm/nativeInst_x86.cpp20
-rw-r--r--src/cpu/x86/vm/nativeInst_x86.hpp3
-rw-r--r--src/cpu/x86/vm/register_definitions_x86.cpp2
-rw-r--r--src/cpu/x86/vm/stubGenerator_x86_32.cpp12
-rw-r--r--src/cpu/x86/vm/stubGenerator_x86_64.cpp12
-rw-r--r--src/cpu/x86/vm/templateInterpreter_x86_32.cpp4
-rw-r--r--src/cpu/x86/vm/templateInterpreter_x86_64.cpp6
-rw-r--r--src/cpu/x86/vm/templateTable_x86_32.cpp10
-rw-r--r--src/cpu/x86/vm/templateTable_x86_64.cpp12
-rw-r--r--src/cpu/x86/vm/vm_version_x86.cpp77
-rw-r--r--src/cpu/x86/vm/vm_version_x86.hpp67
-rw-r--r--src/cpu/x86/vm/x86.ad777
-rw-r--r--src/cpu/x86/vm/x86_32.ad2605
-rw-r--r--src/cpu/x86/vm/x86_64.ad1687
-rw-r--r--src/share/vm/adlc/formssel.cpp4
-rw-r--r--src/share/vm/asm/assembler.cpp1
-rw-r--r--src/share/vm/asm/assembler.hpp27
-rw-r--r--src/share/vm/c1/c1_LIR.cpp3
-rw-r--r--src/share/vm/c1/c1_LIRGenerator.cpp3
-rw-r--r--src/share/vm/c1/c1_Optimizer.cpp30
-rw-r--r--src/share/vm/oops/arrayKlass.hpp2
-rw-r--r--src/share/vm/oops/instanceKlass.hpp6
-rw-r--r--src/share/vm/oops/klass.cpp2
-rw-r--r--src/share/vm/oops/klass.hpp26
-rw-r--r--src/share/vm/oops/klassOop.hpp8
-rw-r--r--src/share/vm/oops/objArrayKlass.hpp2
-rw-r--r--src/share/vm/opto/callnode.hpp4
-rw-r--r--src/share/vm/opto/classes.hpp1
-rw-r--r--src/share/vm/opto/compile.cpp27
-rw-r--r--src/share/vm/opto/escape.cpp36
-rw-r--r--src/share/vm/opto/graphKit.cpp19
-rw-r--r--src/share/vm/opto/library_call.cpp52
-rw-r--r--src/share/vm/opto/macro.cpp120
-rw-r--r--src/share/vm/opto/matcher.cpp21
-rw-r--r--src/share/vm/opto/memnode.cpp29
-rw-r--r--src/share/vm/opto/memnode.hpp14
-rw-r--r--src/share/vm/opto/node.hpp5
-rw-r--r--src/share/vm/opto/parse1.cpp2
-rw-r--r--src/share/vm/opto/parseHelper.cpp6
-rw-r--r--src/share/vm/runtime/advancedThresholdPolicy.cpp13
-rw-r--r--src/share/vm/runtime/arguments.cpp7
-rw-r--r--src/share/vm/runtime/globals.hpp3
-rw-r--r--src/share/vm/shark/sharkIntrinsics.cpp10
-rw-r--r--src/share/vm/shark/sharkTopLevelBlock.cpp10
-rw-r--r--test/compiler/7123108/Test7123108.java60
-rw-r--r--test/compiler/7125879/Test7125879.java51
70 files changed, 4103 insertions, 3569 deletions
diff --git a/make/bsd/makefiles/adlc.make b/make/bsd/makefiles/adlc.make
index 69797ab73..7686c4886 100644
--- a/make/bsd/makefiles/adlc.make
+++ b/make/bsd/makefiles/adlc.make
@@ -39,9 +39,16 @@ OS = $(Platform_os_family)
SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad
-SOURCES.AD = \
+ifeq ("${Platform_arch_model}", "${Platform_arch}")
+ SOURCES.AD = \
$(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
$(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
+else
+ SOURCES.AD = \
+ $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
+ $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \
+ $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
+endif
EXEC = $(OUTDIR)/adlc
diff --git a/make/linux/makefiles/adlc.make b/make/linux/makefiles/adlc.make
index 0c15c1c65..33a28eef8 100644
--- a/make/linux/makefiles/adlc.make
+++ b/make/linux/makefiles/adlc.make
@@ -39,9 +39,16 @@ OS = $(Platform_os_family)
SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad
-SOURCES.AD = \
+ifeq ("${Platform_arch_model}", "${Platform_arch}")
+ SOURCES.AD = \
$(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
$(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
+else
+ SOURCES.AD = \
+ $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
+ $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \
+ $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
+endif
EXEC = $(OUTDIR)/adlc
diff --git a/make/solaris/makefiles/adlc.make b/make/solaris/makefiles/adlc.make
index 4bcecf607..b14a18794 100644
--- a/make/solaris/makefiles/adlc.make
+++ b/make/solaris/makefiles/adlc.make
@@ -40,9 +40,16 @@ OS = $(Platform_os_family)
SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad
-SOURCES.AD = \
+ifeq ("${Platform_arch_model}", "${Platform_arch}")
+ SOURCES.AD = \
$(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
$(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
+else
+ SOURCES.AD = \
+ $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
+ $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \
+ $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
+endif
EXEC = $(OUTDIR)/adlc
diff --git a/make/windows/makefiles/adlc.make b/make/windows/makefiles/adlc.make
index d03e73373..de607ec52 100644
--- a/make/windows/makefiles/adlc.make
+++ b/make/windows/makefiles/adlc.make
@@ -53,6 +53,17 @@ CPP_INCLUDE_DIRS=\
/I "$(WorkSpace)\src\os\windows\vm" \
/I "$(WorkSpace)\src\cpu\$(Platform_arch)\vm"
+!if "$(Platform_arch_model)" == "$(Platform_arch)"
+SOURCES_AD=\
+ $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \
+ $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad
+!else
+SOURCES_AD=\
+ $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \
+ $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch).ad \
+ $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad
+!endif
+
# NOTE! If you add any files here, you must also update GENERATED_NAMES_IN_DIR
# and ProjectCreatorIDEOptions in projectcreator.make.
GENERATED_NAMES=\
@@ -105,7 +116,6 @@ $(GENERATED_NAMES_IN_DIR): $(Platform_arch_model).ad adlc.exe
$(ADLC) $(ADLCFLAGS) $(Platform_arch_model).ad
mv $(GENERATED_NAMES) $(AdlcOutDir)/
-$(Platform_arch_model).ad: $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad
+$(Platform_arch_model).ad: $(SOURCES_AD)
rm -f $(Platform_arch_model).ad
- cat $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \
- $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad >$(Platform_arch_model).ad
+ cat $(SOURCES_AD) >$(Platform_arch_model).ad
diff --git a/src/cpu/sparc/vm/assembler_sparc.cpp b/src/cpu/sparc/vm/assembler_sparc.cpp
index 8fe11550f..71c8e0745 100644
--- a/src/cpu/sparc/vm/assembler_sparc.cpp
+++ b/src/cpu/sparc/vm/assembler_sparc.cpp
@@ -3036,10 +3036,8 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
Label* L_failure,
Label* L_slow_path,
RegisterOrConstant super_check_offset) {
- int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
- Klass::secondary_super_cache_offset_in_bytes());
- int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
- Klass::super_check_offset_offset_in_bytes());
+ int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+ int sco_offset = in_bytes(Klass::super_check_offset_offset());
bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
bool need_slow_path = (must_load_sco ||
@@ -3159,10 +3157,8 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
assert(label_nulls <= 1, "at most one NULL in the batch");
// a couple of useful fields in sub_klass:
- int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
- Klass::secondary_supers_offset_in_bytes());
- int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
- Klass::secondary_super_cache_offset_in_bytes());
+ int ss_offset = in_bytes(Klass::secondary_supers_offset());
+ int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
// Do a linear scan of the secondary super-klass chain.
// This code is rarely used, so simplicity is a virtue here.
@@ -3336,7 +3332,7 @@ void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg,
cmp_and_brx_short(temp_reg, markOopDesc::biased_lock_pattern, Assembler::notEqual, Assembler::pn, cas_label);
load_klass(obj_reg, temp_reg);
- ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
+ ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg);
or3(G2_thread, temp_reg, temp_reg);
xor3(mark_reg, temp_reg, temp_reg);
andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg);
@@ -3413,7 +3409,7 @@ void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg,
// FIXME: due to a lack of registers we currently blow away the age
// bits in this situation. Should attempt to preserve them.
load_klass(obj_reg, temp_reg);
- ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
+ ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg);
or3(G2_thread, temp_reg, temp_reg);
casn(mark_addr.base(), mark_reg, temp_reg);
// If the biasing toward our thread failed, this means that
@@ -3443,7 +3439,7 @@ void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg,
// FIXME: due to a lack of registers we currently blow away the age
// bits in this situation. Should attempt to preserve them.
load_klass(obj_reg, temp_reg);
- ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
+ ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg);
casn(mark_addr.base(), mark_reg, temp_reg);
// Fall through to the normal CAS-based lock, because no matter what
// the result of the above CAS, some thread must have succeeded in
diff --git a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp
index a54b68c45..837488c23 100644
--- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp
+++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp
@@ -302,7 +302,7 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
assert(_obj != noreg, "must be a valid register");
assert(_oop_index >= 0, "must have oop index");
__ load_heap_oop(_obj, java_lang_Class::klass_offset_in_bytes(), G3);
- __ ld_ptr(G3, instanceKlass::init_thread_offset_in_bytes() + sizeof(klassOopDesc), G3);
+ __ ld_ptr(G3, in_bytes(instanceKlass::init_thread_offset()), G3);
__ cmp_and_brx_short(G2_thread, G3, Assembler::notEqual, Assembler::pn, call_patch);
// load_klass patches may execute the patched code before it's
@@ -471,7 +471,7 @@ void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) {
__ load_klass(src_reg, tmp_reg);
- Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset_in_bytes() + sizeof(oopDesc));
+ Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset());
__ ld(ref_type_adr, tmp_reg);
// _reference_type field is of type ReferenceType (enum)
diff --git a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp
index 3605c0a29..96953eebf 100644
--- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp
+++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp
@@ -2202,8 +2202,7 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
} else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
__ load_klass(dst, tmp);
}
- int lh_offset = klassOopDesc::header_size() * HeapWordSize +
- Klass::layout_helper_offset_in_bytes();
+ int lh_offset = in_bytes(Klass::layout_helper_offset());
__ lduw(tmp, lh_offset, tmp2);
@@ -2238,12 +2237,10 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
__ mov(length, len);
__ load_klass(dst, tmp);
- int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
- objArrayKlass::element_klass_offset_in_bytes());
+ int ek_offset = in_bytes(objArrayKlass::element_klass_offset());
__ ld_ptr(tmp, ek_offset, super_k);
- int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
- Klass::super_check_offset_offset_in_bytes());
+ int sco_offset = in_bytes(Klass::super_check_offset_offset());
__ lduw(super_k, sco_offset, chk_off);
__ call_VM_leaf(tmp, copyfunc_addr);
@@ -2456,7 +2453,7 @@ void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
op->klass()->as_register() == G5, "must be");
if (op->init_check()) {
__ ldub(op->klass()->as_register(),
- instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc),
+ in_bytes(instanceKlass::init_state_offset()),
op->tmp1()->as_register());
add_debug_info_for_null_check_here(op->stub()->info());
__ cmp(op->tmp1()->as_register(), instanceKlass::fully_initialized);
@@ -2627,7 +2624,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L
} else {
bool need_slow_path = true;
if (k->is_loaded()) {
- if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())
+ if ((int) k->super_check_offset() != in_bytes(Klass::secondary_super_cache_offset()))
need_slow_path = false;
// perform the fast part of the checking logic
__ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, noreg,
@@ -2731,7 +2728,7 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
__ load_klass(value, klass_RInfo);
// get instance klass
- __ ld_ptr(Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)), k_RInfo);
+ __ ld_ptr(Address(k_RInfo, objArrayKlass::element_klass_offset()), k_RInfo);
// perform the fast part of the checking logic
__ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, success_target, failure_target, NULL);
diff --git a/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp b/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp
index 437222548..47f82cf87 100644
--- a/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp
+++ b/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp
@@ -181,7 +181,7 @@ void C1_MacroAssembler::try_allocate(
void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) {
assert_different_registers(obj, klass, len, t1, t2);
if (UseBiasedLocking && !len->is_valid()) {
- ld_ptr(klass, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes(), t1);
+ ld_ptr(klass, in_bytes(Klass::prototype_header_offset()), t1);
} else {
set((intx)markOopDesc::prototype(), t1);
}
@@ -252,7 +252,7 @@ void C1_MacroAssembler::initialize_object(
#ifdef ASSERT
{
Label ok;
- ld(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), t1);
+ ld(klass, in_bytes(Klass::layout_helper_offset()), t1);
if (var_size_in_bytes != noreg) {
cmp_and_brx_short(t1, var_size_in_bytes, Assembler::equal, Assembler::pt, ok);
} else {
diff --git a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp
index 7f053b046..d0ddc37c8 100644
--- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp
+++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp
@@ -398,14 +398,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
if (id == fast_new_instance_init_check_id) {
// make sure the klass is initialized
- __ ldub(G5_klass, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc), G3_t1);
+ __ ldub(G5_klass, in_bytes(instanceKlass::init_state_offset()), G3_t1);
__ cmp_and_br_short(G3_t1, instanceKlass::fully_initialized, Assembler::notEqual, Assembler::pn, slow_path);
}
#ifdef ASSERT
// assert object can be fast path allocated
{
Label ok, not_ok;
- __ ld(G5_klass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc), G1_obj_size);
+ __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size);
// make sure it's an instance (LH > 0)
__ cmp_and_br_short(G1_obj_size, 0, Assembler::lessEqual, Assembler::pn, not_ok);
__ btst(Klass::_lh_instance_slow_path_bit, G1_obj_size);
@@ -425,7 +425,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ bind(retry_tlab);
// get the instance size
- __ ld(G5_klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), G1_obj_size);
+ __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size);
__ tlab_allocate(O0_obj, G1_obj_size, 0, G3_t1, slow_path);
@@ -437,7 +437,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ bind(try_eden);
// get the instance size
- __ ld(G5_klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), G1_obj_size);
+ __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size);
__ eden_allocate(O0_obj, G1_obj_size, 0, G3_t1, G4_t2, slow_path);
__ incr_allocated_bytes(G1_obj_size, G3_t1, G4_t2);
@@ -471,8 +471,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
Register G4_length = G4; // Incoming
Register O0_obj = O0; // Outgoing
- Address klass_lh(G5_klass, ((klassOopDesc::header_size() * HeapWordSize)
- + Klass::layout_helper_offset_in_bytes()));
+ Address klass_lh(G5_klass, Klass::layout_helper_offset());
assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
assert(Klass::_lh_header_size_mask == 0xFF, "bytewise");
// Use this offset to pick out an individual byte of the layout_helper:
@@ -592,7 +591,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
Label register_finalizer;
Register t = O1;
__ load_klass(O0, t);
- __ ld(t, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc), t);
+ __ ld(t, in_bytes(Klass::access_flags_offset()), t);
__ set(JVM_ACC_HAS_FINALIZER, G3);
__ andcc(G3, t, G0);
__ br(Assembler::notZero, false, Assembler::pt, register_finalizer);
diff --git a/src/cpu/sparc/vm/cppInterpreter_sparc.cpp b/src/cpu/sparc/vm/cppInterpreter_sparc.cpp
index 2c2a93f99..f402d622f 100644
--- a/src/cpu/sparc/vm/cppInterpreter_sparc.cpp
+++ b/src/cpu/sparc/vm/cppInterpreter_sparc.cpp
@@ -766,7 +766,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) {
// get native function entry point(O0 is a good temp until the very end)
ld_ptr(Address(G5_method, 0, in_bytes(methodOopDesc::native_function_offset())), O0);
// for static methods insert the mirror argument
- const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+ const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ ld_ptr(Address(G5_method, 0, in_bytes(methodOopDesc:: constants_offset())), O1);
__ ld_ptr(Address(O1, 0, constantPoolOopDesc::pool_holder_offset_in_bytes()), O1);
@@ -1173,7 +1173,7 @@ void CppInterpreterGenerator::generate_compute_interpreter_state(const Register
__ btst(JVM_ACC_SYNCHRONIZED, O1);
__ br( Assembler::zero, false, Assembler::pt, done);
- const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+ const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ delayed()->btst(JVM_ACC_STATIC, O1);
__ ld_ptr(XXX_STATE(_locals), O1);
__ br( Assembler::zero, true, Assembler::pt, got_obj);
diff --git a/src/cpu/sparc/vm/methodHandles_sparc.cpp b/src/cpu/sparc/vm/methodHandles_sparc.cpp
index d2a94d17e..5bdf88e53 100644
--- a/src/cpu/sparc/vm/methodHandles_sparc.cpp
+++ b/src/cpu/sparc/vm/methodHandles_sparc.cpp
@@ -1098,7 +1098,7 @@ void MethodHandles::generate_method_handle_stub(MacroAssembler* _masm, MethodHan
Address G3_amh_argument ( G3_method_handle, java_lang_invoke_AdapterMethodHandle::argument_offset_in_bytes());
Address G3_amh_conversion(G3_method_handle, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes());
- const int java_mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+ const int java_mirror_offset = in_bytes(Klass::java_mirror_offset());
if (have_entry(ek)) {
__ nop(); // empty stubs make SG sick
diff --git a/src/cpu/sparc/vm/sparc.ad b/src/cpu/sparc/vm/sparc.ad
index fe5f992e8..2660c4306 100644
--- a/src/cpu/sparc/vm/sparc.ad
+++ b/src/cpu/sparc/vm/sparc.ad
@@ -6773,6 +6773,16 @@ instruct unnecessary_membar_volatile() %{
ins_pipe(empty);
%}
+instruct membar_storestore() %{
+ match(MemBarStoreStore);
+ ins_cost(0);
+
+ size(0);
+ format %{ "!MEMBAR-storestore (empty encoding)" %}
+ ins_encode( );
+ ins_pipe(empty);
+%}
+
//----------Register Move Instructions-----------------------------------------
instruct roundDouble_nop(regD dst) %{
match(Set dst (RoundDouble dst));
diff --git a/src/cpu/sparc/vm/stubGenerator_sparc.cpp b/src/cpu/sparc/vm/stubGenerator_sparc.cpp
index 589986272..0e076dbbd 100644
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp
@@ -3046,8 +3046,7 @@ class StubGenerator: public StubCodeGenerator {
// array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
//
- int lh_offset = klassOopDesc::header_size() * HeapWordSize +
- Klass::layout_helper_offset_in_bytes();
+ int lh_offset = in_bytes(Klass::layout_helper_offset());
// Load 32-bits signed value. Use br() instruction with it to check icc.
__ lduw(G3_src_klass, lh_offset, G5_lh);
@@ -3194,15 +3193,13 @@ class StubGenerator: public StubCodeGenerator {
G4_dst_klass, G3_src_klass);
// Generate the type check.
- int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
- Klass::super_check_offset_offset_in_bytes());
+ int sco_offset = in_bytes(Klass::super_check_offset_offset());
__ lduw(G4_dst_klass, sco_offset, sco_temp);
generate_type_check(G3_src_klass, sco_temp, G4_dst_klass,
O5_temp, L_plain_copy);
// Fetch destination element klass from the objArrayKlass header.
- int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
- objArrayKlass::element_klass_offset_in_bytes());
+ int ek_offset = in_bytes(objArrayKlass::element_klass_offset());
// the checkcast_copy loop needs two extra arguments:
__ ld_ptr(G4_dst_klass, ek_offset, O4); // dest elem klass
diff --git a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp
index f72af5409..ceb6a5994 100644
--- a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp
+++ b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp
@@ -366,7 +366,7 @@ void InterpreterGenerator::lock_method(void) {
// get synchronization object to O0
{ Label done;
- const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+ const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ btst(JVM_ACC_STATIC, O0);
__ br( Assembler::zero, true, Assembler::pt, done);
__ delayed()->ld_ptr(Llocals, Interpreter::local_offset_in_bytes(0), O0); // get receiver for not-static case
@@ -991,7 +991,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) {
// get native function entry point(O0 is a good temp until the very end)
__ delayed()->ld_ptr(Lmethod, in_bytes(methodOopDesc::native_function_offset()), O0);
// for static methods insert the mirror argument
- const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+ const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ ld_ptr(Lmethod, methodOopDesc:: constants_offset(), O1);
__ ld_ptr(O1, constantPoolOopDesc::pool_holder_offset_in_bytes(), O1);
diff --git a/src/cpu/sparc/vm/templateTable_sparc.cpp b/src/cpu/sparc/vm/templateTable_sparc.cpp
index 01dc89212..af6829b3d 100644
--- a/src/cpu/sparc/vm/templateTable_sparc.cpp
+++ b/src/cpu/sparc/vm/templateTable_sparc.cpp
@@ -888,7 +888,7 @@ void TemplateTable::aastore() {
// do fast instanceof cache test
- __ ld_ptr(O4, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes(), O4);
+ __ ld_ptr(O4, in_bytes(objArrayKlass::element_klass_offset()), O4);
assert(Otos_i == O0, "just checking");
@@ -2031,7 +2031,7 @@ void TemplateTable::_return(TosState state) {
__ access_local_ptr(G3_scratch, Otos_i);
__ load_klass(Otos_i, O2);
__ set(JVM_ACC_HAS_FINALIZER, G3);
- __ ld(O2, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc), O2);
+ __ ld(O2, in_bytes(Klass::access_flags_offset()), O2);
__ andcc(G3, O2, G0);
Label skip_register_finalizer;
__ br(Assembler::zero, false, Assembler::pn, skip_register_finalizer);
@@ -3350,13 +3350,13 @@ void TemplateTable::_new() {
__ ld_ptr(Rscratch, Roffset, RinstanceKlass);
// make sure klass is fully initialized:
- __ ldub(RinstanceKlass, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc), G3_scratch);
+ __ ldub(RinstanceKlass, in_bytes(instanceKlass::init_state_offset()), G3_scratch);
__ cmp(G3_scratch, instanceKlass::fully_initialized);
__ br(Assembler::notEqual, false, Assembler::pn, slow_case);
- __ delayed()->ld(RinstanceKlass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc), Roffset);
+ __ delayed()->ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset);
// get instance_size in instanceKlass (already aligned)
- //__ ld(RinstanceKlass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc), Roffset);
+ //__ ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset);
// make sure klass does not have has_finalizer, or is abstract, or interface or java/lang/Class
__ btst(Klass::_lh_instance_slow_path_bit, Roffset);
@@ -3483,7 +3483,7 @@ void TemplateTable::_new() {
__ bind(initialize_header);
if (UseBiasedLocking) {
- __ ld_ptr(RinstanceKlass, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), G4_scratch);
+ __ ld_ptr(RinstanceKlass, in_bytes(Klass::prototype_header_offset()), G4_scratch);
} else {
__ set((intptr_t)markOopDesc::prototype(), G4_scratch);
}
diff --git a/src/cpu/x86/vm/assembler_x86.cpp b/src/cpu/x86/vm/assembler_x86.cpp
index a58455cbf..aa5cd1854 100644
--- a/src/cpu/x86/vm/assembler_x86.cpp
+++ b/src/cpu/x86/vm/assembler_x86.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -533,6 +533,19 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
case 0x0F: // movx..., etc.
switch (0xFF & *ip++) {
+ case 0x3A: // pcmpestri
+ tail_size = 1;
+ case 0x38: // ptest, pmovzxbw
+ ip++; // skip opcode
+ debug_only(has_disp32 = true); // has both kinds of operands!
+ break;
+
+ case 0x70: // pshufd r, r/a, #8
+ debug_only(has_disp32 = true); // has both kinds of operands!
+ case 0x73: // psrldq r, #8
+ tail_size = 1;
+ break;
+
case 0x12: // movlps
case 0x28: // movaps
case 0x2E: // ucomiss
@@ -543,9 +556,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
case 0x57: // xorps
case 0x6E: // movd
case 0x7E: // movd
- case 0xAE: // ldmxcsr a
- // 64bit side says it these have both operands but that doesn't
- // appear to be true
+ case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
debug_only(has_disp32 = true);
break;
@@ -565,6 +576,12 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
// fall out of the switch to decode the address
break;
+ case 0xC4: // pinsrw r, a, #8
+ debug_only(has_disp32 = true);
+ case 0xC5: // pextrw r, r, #8
+ tail_size = 1; // the imm8
+ break;
+
case 0xAC: // shrd r, a, #8
debug_only(has_disp32 = true);
tail_size = 1; // the imm8
@@ -625,11 +642,44 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
tail_size = 1; // the imm8
break;
- case 0xE8: // call rdisp32
- case 0xE9: // jmp rdisp32
- if (which == end_pc_operand) return ip + 4;
- assert(which == call32_operand, "call has no disp32 or imm");
- return ip;
+ case 0xC4: // VEX_3bytes
+ case 0xC5: // VEX_2bytes
+ assert((UseAVX > 0), "shouldn't have VEX prefix");
+ assert(ip == inst+1, "no prefixes allowed");
+ // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
+ // but they have prefix 0x0F and processed when 0x0F processed above.
+ //
+ // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
+ // instructions (these instructions are not supported in 64-bit mode).
+ // To distinguish them bits [7:6] are set in the VEX second byte since
+ // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
+ // those VEX bits REX and vvvv bits are inverted.
+ //
+ // Fortunately C2 doesn't generate these instructions so we don't need
+ // to check for them in product version.
+
+ // Check second byte
+ NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
+
+ // First byte
+ if ((0xFF & *inst) == VEX_3bytes) {
+ ip++; // third byte
+ is_64bit = ((VEX_W & *ip) == VEX_W);
+ }
+ ip++; // opcode
+ // To find the end of instruction (which == end_pc_operand).
+ switch (0xFF & *ip) {
+ case 0x61: // pcmpestri r, r/a, #8
+ case 0x70: // pshufd r, r/a, #8
+ case 0x73: // psrldq r, #8
+ tail_size = 1; // the imm8
+ break;
+ default:
+ break;
+ }
+ ip++; // skip opcode
+ debug_only(has_disp32 = true); // has both kinds of operands!
+ break;
case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
@@ -643,6 +693,12 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
debug_only(has_disp32 = true);
break;
+ case 0xE8: // call rdisp32
+ case 0xE9: // jmp rdisp32
+ if (which == end_pc_operand) return ip + 4;
+ assert(which == call32_operand, "call has no disp32 or imm");
+ return ip;
+
case 0xF0: // Lock
assert(os::is_MP(), "only on MP");
goto again_after_prefix;
@@ -918,9 +974,7 @@ void Assembler::addr_nop_8() {
void Assembler::addsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0xF2);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
emit_byte(0x58);
emit_byte(0xC0 | encode);
}
@@ -928,18 +982,14 @@ void Assembler::addsd(XMMRegister dst, XMMRegister src) {
void Assembler::addsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
- emit_byte(0xF2);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, dst, src, VEX_SIMD_F2);
emit_byte(0x58);
emit_operand(dst, src);
}
void Assembler::addss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- emit_byte(0xF3);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
emit_byte(0x58);
emit_byte(0xC0 | encode);
}
@@ -947,13 +997,19 @@ void Assembler::addss(XMMRegister dst, XMMRegister src) {
void Assembler::addss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this);
- emit_byte(0xF3);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, dst, src, VEX_SIMD_F3);
emit_byte(0x58);
emit_operand(dst, src);
}
+void Assembler::andl(Address dst, int32_t imm32) {
+ InstructionMark im(this);
+ prefix(dst);
+ emit_byte(0x81);
+ emit_operand(rsp, dst, 4);
+ emit_long(imm32);
+}
+
void Assembler::andl(Register dst, int32_t imm32) {
prefix(dst);
emit_arith(0x81, 0xE0, dst, imm32);
@@ -974,13 +1030,33 @@ void Assembler::andl(Register dst, Register src) {
void Assembler::andpd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
- emit_byte(0x66);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, dst, src, VEX_SIMD_66);
emit_byte(0x54);
emit_operand(dst, src);
}
+void Assembler::andpd(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
+ emit_byte(0x54);
+ emit_byte(0xC0 | encode);
+}
+
+void Assembler::andps(XMMRegister dst, Address src) {
+ NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ InstructionMark im(this);
+ simd_prefix(dst, dst, src, VEX_SIMD_NONE);
+ emit_byte(0x54);
+ emit_operand(dst, src);
+}
+
+void Assembler::andps(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE);
+ emit_byte(0x54);
+ emit_byte(0xC0 | encode);
+}
+
void Assembler::bsfl(Register dst, Register src) {
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
@@ -1025,19 +1101,7 @@ void Assembler::call(Label& L, relocInfo::relocType rtype) {
}
void Assembler::call(Register dst) {
- // This was originally using a 32bit register encoding
- // and surely we want 64bit!
- // this is a 32bit encoding but in 64bit mode the default
- // operand size is 64bit so there is no need for the
- // wide prefix. So prefix only happens if we use the
- // new registers. Much like push/pop.
- int x = offset();
- // this may be true but dbx disassembles it as if it
- // were 32bits...
- // int encode = prefix_and_encode(dst->encoding());
- // if (offset() != x) assert(dst->encoding() >= 8, "what?");
- int encode = prefixq_and_encode(dst->encoding());
-
+ int encode = prefix_and_encode(dst->encoding());
emit_byte(0xFF);
emit_byte(0xD0 | encode);
}
@@ -1157,87 +1221,119 @@ void Assembler::comisd(XMMRegister dst, Address src) {
// NOTE: dbx seems to decode this as comiss even though the
// 0x66 is there. Strangly ucomisd comes out correct
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0x66);
- comiss(dst, src);
+ InstructionMark im(this);
+ simd_prefix(dst, src, VEX_SIMD_66);
+ emit_byte(0x2F);
+ emit_operand(dst, src);
+}
+
+void Assembler::comisd(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
+ emit_byte(0x2F);
+ emit_byte(0xC0 | encode);
}
void Assembler::comiss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
-
InstructionMark im(this);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, src, VEX_SIMD_NONE);
emit_byte(0x2F);
emit_operand(dst, src);
}
+void Assembler::comiss(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
+ emit_byte(0x2F);
+ emit_byte(0xC0 | encode);
+}
+
void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0xF3);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
emit_byte(0xE6);
emit_byte(0xC0 | encode);
}
void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
emit_byte(0x5B);
emit_byte(0xC0 | encode);
}
void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0xF2);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
emit_byte(0x5A);
emit_byte(0xC0 | encode);
}
+void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionMark im(this);
+ simd_prefix(dst, dst, src, VEX_SIMD_F2);
+ emit_byte(0x5A);
+ emit_operand(dst, src);
+}
+
void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0xF2);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
emit_byte(0x2A);
emit_byte(0xC0 | encode);
}
+void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionMark im(this);
+ simd_prefix(dst, dst, src, VEX_SIMD_F2);
+ emit_byte(0x2A);
+ emit_operand(dst, src);
+}
+
void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- emit_byte(0xF3);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
emit_byte(0x2A);
emit_byte(0xC0 | encode);
}
+void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
+ NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ InstructionMark im(this);
+ simd_prefix(dst, dst, src, VEX_SIMD_F3);
+ emit_byte(0x2A);
+ emit_operand(dst, src);
+}
+
void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0xF3);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
emit_byte(0x5A);
emit_byte(0xC0 | encode);
}
+void Assembler::cvtss2sd(XMMRegister dst, Address src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionMark im(this);
+ simd_prefix(dst, dst, src, VEX_SIMD_F3);
+ emit_byte(0x5A);
+ emit_operand(dst, src);
+}
+
+
void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0xF2);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
emit_byte(0x2C);
emit_byte(0xC0 | encode);
}
void Assembler::cvttss2sil(Register dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- emit_byte(0xF3);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
emit_byte(0x2C);
emit_byte(0xC0 | encode);
}
@@ -1253,18 +1349,14 @@ void Assembler::decl(Address dst) {
void Assembler::divsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
- emit_byte(0xF2);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, dst, src, VEX_SIMD_F2);
emit_byte(0x5E);
emit_operand(dst, src);
}
void Assembler::divsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0xF2);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
emit_byte(0x5E);
emit_byte(0xC0 | encode);
}
@@ -1272,18 +1364,14 @@ void Assembler::divsd(XMMRegister dst, XMMRegister src) {
void Assembler::divss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this);
- emit_byte(0xF3);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, dst, src, VEX_SIMD_F3);
emit_byte(0x5E);
emit_operand(dst, src);
}
void Assembler::divss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- emit_byte(0xF3);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
emit_byte(0x5E);
emit_byte(0xC0 | encode);
}
@@ -1377,8 +1465,14 @@ void Assembler::jccb(Condition cc, Label& L) {
if (L.is_bound()) {
const int short_size = 2;
address entry = target(L);
- assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)),
- "Dispacement too large for a short jmp");
+#ifdef ASSERT
+ intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size);
+ intptr_t delta = short_branch_delta();
+ if (delta != 0) {
+ dist += (dist < 0 ? (-delta) :delta);
+ }
+ assert(is8bit(dist), "Dispacement too large for a short jmp");
+#endif
intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos;
// 0111 tttn #8-bit disp
emit_byte(0x70 | cc);
@@ -1444,9 +1538,15 @@ void Assembler::jmpb(Label& L) {
if (L.is_bound()) {
const int short_size = 2;
address entry = target(L);
- assert(is8bit((entry - _code_pos) + short_size),
- "Dispacement too large for a short jmp");
assert(entry != NULL, "jmp most probably wrong");
+#ifdef ASSERT
+ intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size);
+ intptr_t delta = short_branch_delta();
+ if (delta != 0) {
+ dist += (dist < 0 ? (-delta) :delta);
+ }
+ assert(is8bit(dist), "Dispacement too large for a short jmp");
+#endif
intptr_t offs = entry - _code_pos;
emit_byte(0xEB);
emit_byte((offs - short_size) & 0xFF);
@@ -1509,49 +1609,16 @@ void Assembler::mov(Register dst, Register src) {
void Assembler::movapd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int dstenc = dst->encoding();
- int srcenc = src->encoding();
- emit_byte(0x66);
- if (dstenc < 8) {
- if (srcenc >= 8) {
- prefix(REX_B);
- srcenc -= 8;
- }
- } else {
- if (srcenc < 8) {
- prefix(REX_R);
- } else {
- prefix(REX_RB);
- srcenc -= 8;
- }
- dstenc -= 8;
- }
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
emit_byte(0x28);
- emit_byte(0xC0 | dstenc << 3 | srcenc);
+ emit_byte(0xC0 | encode);
}
void Assembler::movaps(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- int dstenc = dst->encoding();
- int srcenc = src->encoding();
- if (dstenc < 8) {
- if (srcenc >= 8) {
- prefix(REX_B);
- srcenc -= 8;
- }
- } else {
- if (srcenc < 8) {
- prefix(REX_R);
- } else {
- prefix(REX_RB);
- srcenc -= 8;
- }
- dstenc -= 8;
- }
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
emit_byte(0x28);
- emit_byte(0xC0 | dstenc << 3 | srcenc);
+ emit_byte(0xC0 | encode);
}
void Assembler::movb(Register dst, Address src) {
@@ -1582,19 +1649,15 @@ void Assembler::movb(Address dst, Register src) {
void Assembler::movdl(XMMRegister dst, Register src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0x66);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
emit_byte(0x6E);
emit_byte(0xC0 | encode);
}
void Assembler::movdl(Register dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0x66);
// swap src/dst to get correct prefix
- int encode = prefix_and_encode(src->encoding(), dst->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66);
emit_byte(0x7E);
emit_byte(0xC0 | encode);
}
@@ -1602,58 +1665,29 @@ void Assembler::movdl(Register dst, XMMRegister src) {
void Assembler::movdl(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
- emit_byte(0x66);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, src, VEX_SIMD_66);
emit_byte(0x6E);
emit_operand(dst, src);
}
-
-void Assembler::movdqa(XMMRegister dst, Address src) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionMark im(this);
- emit_byte(0x66);
- prefix(src, dst);
- emit_byte(0x0F);
- emit_byte(0x6F);
- emit_operand(dst, src);
-}
-
void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0x66);
- int encode = prefixq_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
emit_byte(0x6F);
emit_byte(0xC0 | encode);
}
-void Assembler::movdqa(Address dst, XMMRegister src) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionMark im(this);
- emit_byte(0x66);
- prefix(dst, src);
- emit_byte(0x0F);
- emit_byte(0x7F);
- emit_operand(src, dst);
-}
-
void Assembler::movdqu(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
- emit_byte(0xF3);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, src, VEX_SIMD_F3);
emit_byte(0x6F);
emit_operand(dst, src);
}
void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0xF3);
- int encode = prefixq_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
emit_byte(0x6F);
emit_byte(0xC0 | encode);
}
@@ -1661,9 +1695,7 @@ void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
void Assembler::movdqu(Address dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
- emit_byte(0xF3);
- prefix(dst, src);
- emit_byte(0x0F);
+ simd_prefix(dst, src, VEX_SIMD_F3);
emit_byte(0x7F);
emit_operand(src, dst);
}
@@ -1710,9 +1742,7 @@ void Assembler::movl(Address dst, Register src) {
void Assembler::movlpd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
- emit_byte(0x66);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, dst, src, VEX_SIMD_66);
emit_byte(0x12);
emit_operand(dst, src);
}
@@ -1740,9 +1770,7 @@ void Assembler::movq( Address dst, MMXRegister src ) {
void Assembler::movq(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
- emit_byte(0xF3);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, src, VEX_SIMD_F3);
emit_byte(0x7E);
emit_operand(dst, src);
}
@@ -1750,9 +1778,7 @@ void Assembler::movq(XMMRegister dst, Address src) {
void Assembler::movq(Address dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
- emit_byte(0x66);
- prefix(dst, src);
- emit_byte(0x0F);
+ simd_prefix(dst, src, VEX_SIMD_66);
emit_byte(0xD6);
emit_operand(src, dst);
}
@@ -1775,9 +1801,7 @@ void Assembler::movsbl(Register dst, Register src) { // movsxb
void Assembler::movsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0xF2);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
emit_byte(0x10);
emit_byte(0xC0 | encode);
}
@@ -1785,9 +1809,7 @@ void Assembler::movsd(XMMRegister dst, XMMRegister src) {
void Assembler::movsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
- emit_byte(0xF2);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, src, VEX_SIMD_F2);
emit_byte(0x10);
emit_operand(dst, src);
}
@@ -1795,18 +1817,14 @@ void Assembler::movsd(XMMRegister dst, Address src) {
void Assembler::movsd(Address dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
- emit_byte(0xF2);
- prefix(dst, src);
- emit_byte(0x0F);
+ simd_prefix(dst, src, VEX_SIMD_F2);
emit_byte(0x11);
emit_operand(src, dst);
}
void Assembler::movss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- emit_byte(0xF3);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
emit_byte(0x10);
emit_byte(0xC0 | encode);
}
@@ -1814,9 +1832,7 @@ void Assembler::movss(XMMRegister dst, XMMRegister src) {
void Assembler::movss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this);
- emit_byte(0xF3);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, src, VEX_SIMD_F3);
emit_byte(0x10);
emit_operand(dst, src);
}
@@ -1824,9 +1840,7 @@ void Assembler::movss(XMMRegister dst, Address src) {
void Assembler::movss(Address dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this);
- emit_byte(0xF3);
- prefix(dst, src);
- emit_byte(0x0F);
+ simd_prefix(dst, src, VEX_SIMD_F3);
emit_byte(0x11);
emit_operand(src, dst);
}
@@ -1919,18 +1933,14 @@ void Assembler::mull(Register src) {
void Assembler::mulsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
- emit_byte(0xF2);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, dst, src, VEX_SIMD_F2);
emit_byte(0x59);
emit_operand(dst, src);
}
void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0xF2);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
emit_byte(0x59);
emit_byte(0xC0 | encode);
}
@@ -1938,18 +1948,14 @@ void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
void Assembler::mulss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this);
- emit_byte(0xF3);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, dst, src, VEX_SIMD_F3);
emit_byte(0x59);
emit_operand(dst, src);
}
void Assembler::mulss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- emit_byte(0xF3);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
emit_byte(0x59);
emit_byte(0xC0 | encode);
}
@@ -2237,14 +2243,26 @@ void Assembler::orl(Register dst, Register src) {
emit_arith(0x0B, 0xC0, dst, src);
}
+void Assembler::packuswb(XMMRegister dst, Address src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+ InstructionMark im(this);
+ simd_prefix(dst, dst, src, VEX_SIMD_66);
+ emit_byte(0x67);
+ emit_operand(dst, src);
+}
+
+void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
+ emit_byte(0x67);
+ emit_byte(0xC0 | encode);
+}
+
void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
assert(VM_Version::supports_sse4_2(), "");
-
InstructionMark im(this);
- emit_byte(0x66);
- prefix(src, dst);
- emit_byte(0x0F);
- emit_byte(0x3A);
+ simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
emit_byte(0x61);
emit_operand(dst, src);
emit_byte(imm8);
@@ -2252,16 +2270,27 @@ void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_2(), "");
-
- emit_byte(0x66);
- int encode = prefixq_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
- emit_byte(0x3A);
+ int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
emit_byte(0x61);
emit_byte(0xC0 | encode);
emit_byte(imm8);
}
+void Assembler::pmovzxbw(XMMRegister dst, Address src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionMark im(this);
+ simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0x30);
+ emit_operand(dst, src);
+}
+
+void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0x30);
+ emit_byte(0xC0 | encode);
+}
+
// generic
void Assembler::pop(Register dst) {
int encode = prefix_and_encode(dst->encoding());
@@ -2360,22 +2389,24 @@ void Assembler::prefix(Prefix p) {
void Assembler::por(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-
- emit_byte(0x66);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
-
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
emit_byte(0xEB);
emit_byte(0xC0 | encode);
}
+void Assembler::por(XMMRegister dst, Address src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+ InstructionMark im(this);
+ simd_prefix(dst, dst, src, VEX_SIMD_66);
+ emit_byte(0xEB);
+ emit_operand(dst, src);
+}
+
void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
assert(isByte(mode), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-
- emit_byte(0x66);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
emit_byte(0x70);
emit_byte(0xC0 | encode);
emit_byte(mode & 0xFF);
@@ -2385,11 +2416,9 @@ void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
assert(isByte(mode), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-
+ assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
InstructionMark im(this);
- emit_byte(0x66);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, src, VEX_SIMD_66);
emit_byte(0x70);
emit_operand(dst, src);
emit_byte(mode & 0xFF);
@@ -2398,10 +2427,7 @@ void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
assert(isByte(mode), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-
- emit_byte(0xF2);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
emit_byte(0x70);
emit_byte(0xC0 | encode);
emit_byte(mode & 0xFF);
@@ -2410,11 +2436,9 @@ void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
assert(isByte(mode), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-
+ assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
InstructionMark im(this);
- emit_byte(0xF2);
- prefix(src, dst); // QQ new
- emit_byte(0x0F);
+ simd_prefix(dst, src, VEX_SIMD_F2);
emit_byte(0x70);
emit_operand(dst, src);
emit_byte(mode & 0xFF);
@@ -2425,11 +2449,8 @@ void Assembler::psrlq(XMMRegister dst, int shift) {
// HMM Table D-1 says sse2 or mmx.
// Do not confuse it with psrldq SSE2 instruction which
// shifts 128 bit value in xmm register by number of bytes.
- NOT_LP64(assert(VM_Version::supports_sse(), ""));
-
- int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding());
- emit_byte(0x66);
- emit_byte(0x0F);
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
emit_byte(0x73);
emit_byte(0xC0 | encode);
emit_byte(shift);
@@ -2438,10 +2459,7 @@ void Assembler::psrlq(XMMRegister dst, int shift) {
void Assembler::psrldq(XMMRegister dst, int shift) {
// Shift 128 bit value in xmm register by number of bytes.
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-
- int encode = prefixq_and_encode(xmm3->encoding(), dst->encoding());
- emit_byte(0x66);
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66);
emit_byte(0x73);
emit_byte(0xC0 | encode);
emit_byte(shift);
@@ -2449,36 +2467,52 @@ void Assembler::psrldq(XMMRegister dst, int shift) {
void Assembler::ptest(XMMRegister dst, Address src) {
assert(VM_Version::supports_sse4_1(), "");
-
+ assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
InstructionMark im(this);
- emit_byte(0x66);
- prefix(src, dst);
- emit_byte(0x0F);
- emit_byte(0x38);
+ simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
emit_byte(0x17);
emit_operand(dst, src);
}
void Assembler::ptest(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_sse4_1(), "");
-
- emit_byte(0x66);
- int encode = prefixq_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
- emit_byte(0x38);
+ int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
emit_byte(0x17);
emit_byte(0xC0 | encode);
}
+void Assembler::punpcklbw(XMMRegister dst, Address src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+ InstructionMark im(this);
+ simd_prefix(dst, dst, src, VEX_SIMD_66);
+ emit_byte(0x60);
+ emit_operand(dst, src);
+}
+
void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0x66);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
emit_byte(0x60);
emit_byte(0xC0 | encode);
}
+void Assembler::punpckldq(XMMRegister dst, Address src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+ InstructionMark im(this);
+ simd_prefix(dst, dst, src, VEX_SIMD_66);
+ emit_byte(0x62);
+ emit_operand(dst, src);
+}
+
+void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
+ emit_byte(0x62);
+ emit_byte(0xC0 | encode);
+}
+
void Assembler::push(int32_t imm32) {
// in 64bits we push 64bits onto the stack but only
// take a 32bit immediate
@@ -2508,20 +2542,16 @@ void Assembler::pushl(Address src) {
void Assembler::pxor(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
InstructionMark im(this);
- emit_byte(0x66);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, dst, src, VEX_SIMD_66);
emit_byte(0xEF);
emit_operand(dst, src);
}
void Assembler::pxor(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionMark im(this);
- emit_byte(0x66);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
emit_byte(0xEF);
emit_byte(0xC0 | encode);
}
@@ -2683,12 +2713,8 @@ void Assembler::smovl() {
}
void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
- // HMM Table D-1 says sse2
- // NOT_LP64(assert(VM_Version::supports_sse(), ""));
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0xF2);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
emit_byte(0x51);
emit_byte(0xC0 | encode);
}
@@ -2696,30 +2722,22 @@ void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
void Assembler::sqrtsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
- emit_byte(0xF2);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, dst, src, VEX_SIMD_F2);
emit_byte(0x51);
emit_operand(dst, src);
}
void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
- // HMM Table D-1 says sse2
- // NOT_LP64(assert(VM_Version::supports_sse(), ""));
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0xF3);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
emit_byte(0x51);
emit_byte(0xC0 | encode);
}
void Assembler::sqrtss(XMMRegister dst, Address src) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this);
- emit_byte(0xF3);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, dst, src, VEX_SIMD_F3);
emit_byte(0x51);
emit_operand(dst, src);
}
@@ -2765,9 +2783,7 @@ void Assembler::subl(Register dst, Register src) {
void Assembler::subsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0xF2);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
emit_byte(0x5C);
emit_byte(0xC0 | encode);
}
@@ -2775,18 +2791,14 @@ void Assembler::subsd(XMMRegister dst, XMMRegister src) {
void Assembler::subsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
- emit_byte(0xF2);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, dst, src, VEX_SIMD_F2);
emit_byte(0x5C);
emit_operand(dst, src);
}
void Assembler::subss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- emit_byte(0xF3);
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
emit_byte(0x5C);
emit_byte(0xC0 | encode);
}
@@ -2794,9 +2806,7 @@ void Assembler::subss(XMMRegister dst, XMMRegister src) {
void Assembler::subss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this);
- emit_byte(0xF3);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, dst, src, VEX_SIMD_F3);
emit_byte(0x5C);
emit_operand(dst, src);
}
@@ -2836,30 +2846,30 @@ void Assembler::testl(Register dst, Address src) {
void Assembler::ucomisd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0x66);
- ucomiss(dst, src);
+ InstructionMark im(this);
+ simd_prefix(dst, src, VEX_SIMD_66);
+ emit_byte(0x2E);
+ emit_operand(dst, src);
}
void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0x66);
- ucomiss(dst, src);
+ int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
+ emit_byte(0x2E);
+ emit_byte(0xC0 | encode);
}
void Assembler::ucomiss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
-
InstructionMark im(this);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, src, VEX_SIMD_NONE);
emit_byte(0x2E);
emit_operand(dst, src);
}
void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
emit_byte(0x2E);
emit_byte(0xC0 | encode);
}
@@ -2905,16 +2915,15 @@ void Assembler::xorl(Register dst, Register src) {
void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0x66);
- xorps(dst, src);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
+ emit_byte(0x57);
+ emit_byte(0xC0 | encode);
}
void Assembler::xorpd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
- emit_byte(0x66);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, dst, src, VEX_SIMD_66);
emit_byte(0x57);
emit_operand(dst, src);
}
@@ -2922,8 +2931,7 @@ void Assembler::xorpd(XMMRegister dst, Address src) {
void Assembler::xorps(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- int encode = prefix_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE);
emit_byte(0x57);
emit_byte(0xC0 | encode);
}
@@ -2931,12 +2939,166 @@ void Assembler::xorps(XMMRegister dst, XMMRegister src) {
void Assembler::xorps(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this);
- prefix(src, dst);
- emit_byte(0x0F);
+ simd_prefix(dst, dst, src, VEX_SIMD_NONE);
+ emit_byte(0x57);
+ emit_operand(dst, src);
+}
+
+// AVX 3-operands non destructive source instructions (encoded with VEX prefix)
+
+void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionMark im(this);
+ vex_prefix(dst, nds, src, VEX_SIMD_F2);
+ emit_byte(0x58);
+ emit_operand(dst, src);
+}
+
+void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+ assert(VM_Version::supports_avx(), "");
+ int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
+ emit_byte(0x58);
+ emit_byte(0xC0 | encode);
+}
+
+void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionMark im(this);
+ vex_prefix(dst, nds, src, VEX_SIMD_F3);
+ emit_byte(0x58);
+ emit_operand(dst, src);
+}
+
+void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+ assert(VM_Version::supports_avx(), "");
+ int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
+ emit_byte(0x58);
+ emit_byte(0xC0 | encode);
+}
+
+void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionMark im(this);
+ vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector
+ emit_byte(0x54);
+ emit_operand(dst, src);
+}
+
+void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionMark im(this);
+ vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector
+ emit_byte(0x54);
+ emit_operand(dst, src);
+}
+
+void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionMark im(this);
+ vex_prefix(dst, nds, src, VEX_SIMD_F2);
+ emit_byte(0x5E);
+ emit_operand(dst, src);
+}
+
+void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+ assert(VM_Version::supports_avx(), "");
+ int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
+ emit_byte(0x5E);
+ emit_byte(0xC0 | encode);
+}
+
+void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionMark im(this);
+ vex_prefix(dst, nds, src, VEX_SIMD_F3);
+ emit_byte(0x5E);
+ emit_operand(dst, src);
+}
+
+void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+ assert(VM_Version::supports_avx(), "");
+ int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
+ emit_byte(0x5E);
+ emit_byte(0xC0 | encode);
+}
+
+void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionMark im(this);
+ vex_prefix(dst, nds, src, VEX_SIMD_F2);
+ emit_byte(0x59);
+ emit_operand(dst, src);
+}
+
+void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+ assert(VM_Version::supports_avx(), "");
+ int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
+ emit_byte(0x59);
+ emit_byte(0xC0 | encode);
+}
+
+void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
+ InstructionMark im(this);
+ vex_prefix(dst, nds, src, VEX_SIMD_F3);
+ emit_byte(0x59);
+ emit_operand(dst, src);
+}
+
+void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+ assert(VM_Version::supports_avx(), "");
+ int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
+ emit_byte(0x59);
+ emit_byte(0xC0 | encode);
+}
+
+
+void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionMark im(this);
+ vex_prefix(dst, nds, src, VEX_SIMD_F2);
+ emit_byte(0x5C);
+ emit_operand(dst, src);
+}
+
+void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+ assert(VM_Version::supports_avx(), "");
+ int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
+ emit_byte(0x5C);
+ emit_byte(0xC0 | encode);
+}
+
+void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionMark im(this);
+ vex_prefix(dst, nds, src, VEX_SIMD_F3);
+ emit_byte(0x5C);
+ emit_operand(dst, src);
+}
+
+void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+ assert(VM_Version::supports_avx(), "");
+ int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
+ emit_byte(0x5C);
+ emit_byte(0xC0 | encode);
+}
+
+void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionMark im(this);
+ vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector
emit_byte(0x57);
emit_operand(dst, src);
}
+void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionMark im(this);
+ vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector
+ emit_byte(0x57);
+ emit_operand(dst, src);
+}
+
+
#ifndef _LP64
// 32bit only pieces of the assembler
@@ -3394,12 +3556,114 @@ void Assembler::fyl2x() {
emit_byte(0xF1);
}
+// SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
+static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
+// SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
+static int simd_opc[4] = { 0, 0, 0x38, 0x3A };
+
+// Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
+void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
+ if (pre > 0) {
+ emit_byte(simd_pre[pre]);
+ }
+ if (rex_w) {
+ prefixq(adr, xreg);
+ } else {
+ prefix(adr, xreg);
+ }
+ if (opc > 0) {
+ emit_byte(0x0F);
+ int opc2 = simd_opc[opc];
+ if (opc2 > 0) {
+ emit_byte(opc2);
+ }
+ }
+}
+
+int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
+ if (pre > 0) {
+ emit_byte(simd_pre[pre]);
+ }
+ int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
+ prefix_and_encode(dst_enc, src_enc);
+ if (opc > 0) {
+ emit_byte(0x0F);
+ int opc2 = simd_opc[opc];
+ if (opc2 > 0) {
+ emit_byte(opc2);
+ }
+ }
+ return encode;
+}
+
+
+void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) {
+ if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
+ prefix(VEX_3bytes);
+
+ int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
+ byte1 = (~byte1) & 0xE0;
+ byte1 |= opc;
+ a_byte(byte1);
+
+ int byte2 = ((~nds_enc) & 0xf) << 3;
+ byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre;
+ emit_byte(byte2);
+ } else {
+ prefix(VEX_2bytes);
+
+ int byte1 = vex_r ? VEX_R : 0;
+ byte1 = (~byte1) & 0x80;
+ byte1 |= ((~nds_enc) & 0xf) << 3;
+ byte1 |= (vector256 ? 4 : 0) | pre;
+ emit_byte(byte1);
+ }
+}
+
+void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){
+ bool vex_r = (xreg_enc >= 8);
+ bool vex_b = adr.base_needs_rex();
+ bool vex_x = adr.index_needs_rex();
+ vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
+}
+
+int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) {
+ bool vex_r = (dst_enc >= 8);
+ bool vex_b = (src_enc >= 8);
+ bool vex_x = false;
+ vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
+ return (((dst_enc & 7) << 3) | (src_enc & 7));
+}
+
+
+void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
+ if (UseAVX > 0) {
+ int xreg_enc = xreg->encoding();
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256);
+ } else {
+ assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
+ rex_prefix(adr, xreg, pre, opc, rex_w);
+ }
+}
+
+int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ if (UseAVX > 0) {
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256);
+ } else {
+ assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
+ return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
+ }
+}
#ifndef _LP64
void Assembler::incl(Register dst) {
// Don't use it directly. Use MacroAssembler::incrementl() instead.
- emit_byte(0x40 | dst->encoding());
+ emit_byte(0x40 | dst->encoding());
}
void Assembler::lea(Register dst, Address src) {
@@ -3756,6 +4020,38 @@ void Assembler::prefix(Address adr, XMMRegister reg) {
}
}
+void Assembler::prefixq(Address adr, XMMRegister src) {
+ if (src->encoding() < 8) {
+ if (adr.base_needs_rex()) {
+ if (adr.index_needs_rex()) {
+ prefix(REX_WXB);
+ } else {
+ prefix(REX_WB);
+ }
+ } else {
+ if (adr.index_needs_rex()) {
+ prefix(REX_WX);
+ } else {
+ prefix(REX_W);
+ }
+ }
+ } else {
+ if (adr.base_needs_rex()) {
+ if (adr.index_needs_rex()) {
+ prefix(REX_WRXB);
+ } else {
+ prefix(REX_WRB);
+ }
+ } else {
+ if (adr.index_needs_rex()) {
+ prefix(REX_WRX);
+ } else {
+ prefix(REX_WR);
+ }
+ }
+ }
+}
+
void Assembler::adcq(Register dst, int32_t imm32) {
(void) prefixq_and_encode(dst->encoding());
emit_arith(0x81, 0xD0, dst, imm32);
@@ -3918,36 +4214,44 @@ void Assembler::cmpxchgq(Register reg, Address adr) {
void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0xF2);
- int encode = prefixq_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2);
emit_byte(0x2A);
emit_byte(0xC0 | encode);
}
+void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionMark im(this);
+ simd_prefix_q(dst, dst, src, VEX_SIMD_F2);
+ emit_byte(0x2A);
+ emit_operand(dst, src);
+}
+
void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- emit_byte(0xF3);
- int encode = prefixq_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3);
emit_byte(0x2A);
emit_byte(0xC0 | encode);
}
+void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
+ NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ InstructionMark im(this);
+ simd_prefix_q(dst, dst, src, VEX_SIMD_F3);
+ emit_byte(0x2A);
+ emit_operand(dst, src);
+}
+
void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_byte(0xF2);
- int encode = prefixq_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2);
emit_byte(0x2C);
emit_byte(0xC0 | encode);
}
void Assembler::cvttss2siq(Register dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- emit_byte(0xF3);
- int encode = prefixq_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3);
emit_byte(0x2C);
emit_byte(0xC0 | encode);
}
@@ -4107,21 +4411,17 @@ void Assembler::lzcntq(Register dst, Register src) {
void Assembler::movdq(XMMRegister dst, Register src) {
// table D-1 says MMX/SSE2
- NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
- emit_byte(0x66);
- int encode = prefixq_and_encode(dst->encoding(), src->encoding());
- emit_byte(0x0F);
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66);
emit_byte(0x6E);
emit_byte(0xC0 | encode);
}
void Assembler::movdq(Register dst, XMMRegister src) {
// table D-1 says MMX/SSE2
- NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
- emit_byte(0x66);
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
// swap src/dst to get correct prefix
- int encode = prefixq_and_encode(src->encoding(), dst->encoding());
- emit_byte(0x0F);
+ int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66);
emit_byte(0x7E);
emit_byte(0xC0 | encode);
}
@@ -4632,7 +4932,7 @@ int MacroAssembler::biased_locking_enter(Register lock_reg,
null_check_offset = offset();
}
movl(tmp_reg, klass_addr);
- xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+ xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset()));
andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
if (need_tmp_reg) {
pop(tmp_reg);
@@ -4719,7 +5019,7 @@ int MacroAssembler::biased_locking_enter(Register lock_reg,
}
get_thread(tmp_reg);
movl(swap_reg, klass_addr);
- orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+ orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset()));
movl(swap_reg, saved_mark_addr);
if (os::is_MP()) {
lock();
@@ -4757,7 +5057,7 @@ int MacroAssembler::biased_locking_enter(Register lock_reg,
push(tmp_reg);
}
movl(tmp_reg, klass_addr);
- movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+ movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
if (os::is_MP()) {
lock();
}
@@ -5680,6 +5980,24 @@ void MacroAssembler::addptr(Address dst, Register src) {
LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
}
+void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::addsd(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::addsd(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ addss(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ addss(dst, Address(rscratch1, 0));
+ }
+}
+
void MacroAssembler::align(int modulus) {
if (offset() % modulus != 0) {
nop(modulus - (offset() % modulus));
@@ -5687,11 +6005,24 @@ void MacroAssembler::align(int modulus) {
}
void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
+ // Used in sign-masking with aligned address.
+ assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
if (reachable(src)) {
- andpd(dst, as_Address(src));
+ Assembler::andpd(dst, as_Address(src));
} else {
lea(rscratch1, src);
- andpd(dst, Address(rscratch1, 0));
+ Assembler::andpd(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) {
+ // Used in sign-masking with aligned address.
+ assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+ if (reachable(src)) {
+ Assembler::andps(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::andps(dst, Address(rscratch1, 0));
}
}
@@ -6270,19 +6601,19 @@ void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
- comisd(dst, as_Address(src));
+ Assembler::comisd(dst, as_Address(src));
} else {
lea(rscratch1, src);
- comisd(dst, Address(rscratch1, 0));
+ Assembler::comisd(dst, Address(rscratch1, 0));
}
}
void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
- comiss(dst, as_Address(src));
+ Assembler::comiss(dst, as_Address(src));
} else {
lea(rscratch1, src);
- comiss(dst, Address(rscratch1, 0));
+ Assembler::comiss(dst, Address(rscratch1, 0));
}
}
@@ -6366,6 +6697,24 @@ void MacroAssembler::division_with_shift (Register reg, int shift_value) {
sarl(reg, shift_value);
}
+void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::divsd(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::divsd(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::divss(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::divss(dst, Address(rscratch1, 0));
+ }
+}
+
// !defined(COMPILER2) is because of stupid core builds
#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
void MacroAssembler::empty_FPU_stack() {
@@ -6805,12 +7154,39 @@ void MacroAssembler::movptr(Address dst, Register src) {
LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
}
+void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::movsd(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::movsd(dst, Address(rscratch1, 0));
+ }
+}
+
void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
- movss(dst, as_Address(src));
+ Assembler::movss(dst, as_Address(src));
} else {
lea(rscratch1, src);
- movss(dst, Address(rscratch1, 0));
+ Assembler::movss(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::mulsd(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::mulsd(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::mulss(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::mulss(dst, Address(rscratch1, 0));
}
}
@@ -6992,6 +7368,193 @@ void MacroAssembler::testl(Register dst, AddressLiteral src) {
testl(dst, as_Address(src));
}
+void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::sqrtsd(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::sqrtsd(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::sqrtss(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::sqrtss(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::subsd(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::subsd(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::subss(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::subss(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::ucomisd(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::ucomisd(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::ucomiss(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::ucomiss(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
+ // Used in sign-bit flipping with aligned address.
+ assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+ if (reachable(src)) {
+ Assembler::xorpd(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::xorpd(dst, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
+ // Used in sign-bit flipping with aligned address.
+ assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+ if (reachable(src)) {
+ Assembler::xorps(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::xorps(dst, Address(rscratch1, 0));
+ }
+}
+
+// AVX 3-operands instructions
+
+void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+ if (reachable(src)) {
+ vaddsd(dst, nds, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ vaddsd(dst, nds, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+ if (reachable(src)) {
+ vaddss(dst, nds, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ vaddss(dst, nds, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+ if (reachable(src)) {
+ vandpd(dst, nds, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ vandpd(dst, nds, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+ if (reachable(src)) {
+ vandps(dst, nds, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ vandps(dst, nds, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+ if (reachable(src)) {
+ vdivsd(dst, nds, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ vdivsd(dst, nds, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+ if (reachable(src)) {
+ vdivss(dst, nds, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ vdivss(dst, nds, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+ if (reachable(src)) {
+ vmulsd(dst, nds, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ vmulsd(dst, nds, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+ if (reachable(src)) {
+ vmulss(dst, nds, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ vmulss(dst, nds, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+ if (reachable(src)) {
+ vsubsd(dst, nds, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ vsubsd(dst, nds, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+ if (reachable(src)) {
+ vsubss(dst, nds, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ vsubss(dst, nds, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+ if (reachable(src)) {
+ vxorpd(dst, nds, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ vxorpd(dst, nds, Address(rscratch1, 0));
+ }
+}
+
+void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+ if (reachable(src)) {
+ vxorps(dst, nds, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ vxorps(dst, nds, Address(rscratch1, 0));
+ }
+}
+
+
//////////////////////////////////////////////////////////////////////////////////
#ifndef SERIALGC
@@ -7430,19 +7993,23 @@ void MacroAssembler::incr_allocated_bytes(Register thread,
Register var_size_in_bytes,
int con_size_in_bytes,
Register t1) {
+ if (!thread->is_valid()) {
#ifdef _LP64
- if (var_size_in_bytes->is_valid()) {
- addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
- } else {
- addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
- }
+ thread = r15_thread;
#else
- if (!thread->is_valid()) {
assert(t1->is_valid(), "need temp reg");
thread = t1;
get_thread(thread);
+#endif
}
+#ifdef _LP64
+ if (var_size_in_bytes->is_valid()) {
+ addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
+ } else {
+ addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
+ }
+#else
if (var_size_in_bytes->is_valid()) {
addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
} else {
@@ -7685,10 +8252,8 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
assert(label_nulls <= 1, "at most one NULL in the batch");
- int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
- Klass::secondary_super_cache_offset_in_bytes());
- int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
- Klass::super_check_offset_offset_in_bytes());
+ int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+ int sco_offset = in_bytes(Klass::super_check_offset_offset());
Address super_check_offset_addr(super_klass, sco_offset);
// Hacked jcc, which "knows" that L_fallthrough, at least, is in
@@ -7786,10 +8351,8 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
assert(label_nulls <= 1, "at most one NULL in the batch");
// a couple of useful fields in sub_klass:
- int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
- Klass::secondary_supers_offset_in_bytes());
- int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
- Klass::secondary_super_cache_offset_in_bytes());
+ int ss_offset = in_bytes(Klass::secondary_supers_offset());
+ int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
Address secondary_supers_addr(sub_klass, ss_offset);
Address super_cache_addr( sub_klass, sc_offset);
@@ -7876,32 +8439,6 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
}
-void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
- ucomisd(dst, as_Address(src));
-}
-
-void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
- ucomiss(dst, as_Address(src));
-}
-
-void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- xorpd(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- xorpd(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
- if (reachable(src)) {
- xorps(dst, as_Address(src));
- } else {
- lea(rscratch1, src);
- xorps(dst, Address(rscratch1, 0));
- }
-}
-
void MacroAssembler::cmov32(Condition cc, Register dst, Address src) {
if (VM_Version::supports_cmov()) {
cmovl(cc, dst, src);
@@ -8487,20 +9024,20 @@ void MacroAssembler::load_prototype_header(Register dst, Register src) {
if (Universe::narrow_oop_shift() != 0) {
assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
if (LogMinObjAlignmentInBytes == Address::times_8) {
- movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+ movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset()));
} else {
// OK to use shift since we don't need to preserve flags.
shlq(dst, LogMinObjAlignmentInBytes);
- movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+ movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset()));
}
} else {
- movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+ movq(dst, Address(dst, Klass::prototype_header_offset()));
}
} else
#endif
{
movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
- movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+ movptr(dst, Address(dst, Klass::prototype_header_offset()));
}
}
@@ -8761,6 +9298,7 @@ void MacroAssembler::string_indexofC8(Register str1, Register str2,
Register cnt1, Register cnt2,
int int_cnt2, Register result,
XMMRegister vec, Register tmp) {
+ ShortBranchVerifier sbv(this);
assert(UseSSE42Intrinsics, "SSE4.2 is required");
// This method uses pcmpestri inxtruction with bound registers
@@ -8890,9 +9428,9 @@ void MacroAssembler::string_indexofC8(Register str1, Register str2,
pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d);
}
// Need to reload strings pointers if not matched whole vector
- jccb(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
+ jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
addptr(cnt2, 8);
- jccb(Assembler::negative, SCAN_SUBSTR);
+ jcc(Assembler::negative, SCAN_SUBSTR);
// Fall through if found full substring
} // (int_cnt2 > 8)
@@ -8911,6 +9449,7 @@ void MacroAssembler::string_indexof(Register str1, Register str2,
Register cnt1, Register cnt2,
int int_cnt2, Register result,
XMMRegister vec, Register tmp) {
+ ShortBranchVerifier sbv(this);
assert(UseSSE42Intrinsics, "SSE4.2 is required");
//
// int_cnt2 is length of small (< 8 chars) constant substring
@@ -9172,6 +9711,7 @@ void MacroAssembler::string_indexof(Register str1, Register str2,
void MacroAssembler::string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,
XMMRegister vec1) {
+ ShortBranchVerifier sbv(this);
Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
// Compute the minimum of the string lengths and the
@@ -9308,6 +9848,7 @@ void MacroAssembler::string_compare(Register str1, Register str2,
void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register result, Register chr,
XMMRegister vec1, XMMRegister vec2) {
+ ShortBranchVerifier sbv(this);
Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
int length_offset = arrayOopDesc::length_offset_in_bytes();
@@ -9427,6 +9968,7 @@ void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Regist
void MacroAssembler::generate_fill(BasicType t, bool aligned,
Register to, Register value, Register count,
Register rtmp, XMMRegister xtmp) {
+ ShortBranchVerifier sbv(this);
assert_different_registers(to, value, count, rtmp);
Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
Label L_fill_2_bytes, L_fill_4_bytes;
diff --git a/src/cpu/x86/vm/assembler_x86.hpp b/src/cpu/x86/vm/assembler_x86.hpp
index d5c35dfdc..1a2d4afa6 100644
--- a/src/cpu/x86/vm/assembler_x86.hpp
+++ b/src/cpu/x86/vm/assembler_x86.hpp
@@ -503,7 +503,31 @@ class Assembler : public AbstractAssembler {
REX_WR = 0x4C,
REX_WRB = 0x4D,
REX_WRX = 0x4E,
- REX_WRXB = 0x4F
+ REX_WRXB = 0x4F,
+
+ VEX_3bytes = 0xC4,
+ VEX_2bytes = 0xC5
+ };
+
+ enum VexPrefix {
+ VEX_B = 0x20,
+ VEX_X = 0x40,
+ VEX_R = 0x80,
+ VEX_W = 0x80
+ };
+
+ enum VexSimdPrefix {
+ VEX_SIMD_NONE = 0x0,
+ VEX_SIMD_66 = 0x1,
+ VEX_SIMD_F3 = 0x2,
+ VEX_SIMD_F2 = 0x3
+ };
+
+ enum VexOpcode {
+ VEX_OPCODE_NONE = 0x0,
+ VEX_OPCODE_0F = 0x1,
+ VEX_OPCODE_0F_38 = 0x2,
+ VEX_OPCODE_0F_3A = 0x3
};
enum WhichOperand {
@@ -546,12 +570,99 @@ private:
void prefixq(Address adr);
void prefix(Address adr, Register reg, bool byteinst = false);
- void prefixq(Address adr, Register reg);
-
void prefix(Address adr, XMMRegister reg);
+ void prefixq(Address adr, Register reg);
+ void prefixq(Address adr, XMMRegister reg);
void prefetch_prefix(Address src);
+ void rex_prefix(Address adr, XMMRegister xreg,
+ VexSimdPrefix pre, VexOpcode opc, bool rex_w);
+ int rex_prefix_and_encode(int dst_enc, int src_enc,
+ VexSimdPrefix pre, VexOpcode opc, bool rex_w);
+
+ void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
+ int nds_enc, VexSimdPrefix pre, VexOpcode opc,
+ bool vector256);
+
+ void vex_prefix(Address adr, int nds_enc, int xreg_enc,
+ VexSimdPrefix pre, VexOpcode opc,
+ bool vex_w, bool vector256);
+
+ void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
+ VexSimdPrefix pre, bool vector256 = false) {
+ vex_prefix(src, nds->encoding(), dst->encoding(),
+ pre, VEX_OPCODE_0F, false, vector256);
+ }
+
+ int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
+ VexSimdPrefix pre, VexOpcode opc,
+ bool vex_w, bool vector256);
+
+ int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
+ VexSimdPrefix pre, bool vector256 = false) {
+ return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
+ pre, VEX_OPCODE_0F, false, vector256);
+ }
+
+ void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
+ VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
+ bool rex_w = false, bool vector256 = false);
+
+ void simd_prefix(XMMRegister dst, Address src,
+ VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
+ simd_prefix(dst, xnoreg, src, pre, opc);
+ }
+ void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {
+ simd_prefix(src, dst, pre);
+ }
+ void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
+ VexSimdPrefix pre) {
+ bool rex_w = true;
+ simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);
+ }
+
+
+ int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
+ VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
+ bool rex_w = false, bool vector256 = false);
+
+ int simd_prefix_and_encode(XMMRegister dst, XMMRegister src,
+ VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
+ return simd_prefix_and_encode(dst, xnoreg, src, pre, opc);
+ }
+
+ // Move/convert 32-bit integer value.
+ int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
+ VexSimdPrefix pre) {
+ // It is OK to cast from Register to XMMRegister to pass argument here
+ // since only encoding is used in simd_prefix_and_encode() and number of
+ // Gen and Xmm registers are the same.
+ return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre);
+ }
+ int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) {
+ return simd_prefix_and_encode(dst, xnoreg, src, pre);
+ }
+ int simd_prefix_and_encode(Register dst, XMMRegister src,
+ VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
+ return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc);
+ }
+
+ // Move/convert 64-bit integer value.
+ int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
+ VexSimdPrefix pre) {
+ bool rex_w = true;
+ return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, VEX_OPCODE_0F, rex_w);
+ }
+ int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) {
+ return simd_prefix_and_encode_q(dst, xnoreg, src, pre);
+ }
+ int simd_prefix_and_encode_q(Register dst, XMMRegister src,
+ VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
+ bool rex_w = true;
+ return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w);
+ }
+
// Helper functions for groups of instructions
void emit_arith_b(int op1, int op2, Register dst, int imm8);
@@ -764,6 +875,7 @@ private:
void addss(XMMRegister dst, Address src);
void addss(XMMRegister dst, XMMRegister src);
+ void andl(Address dst, int32_t imm32);
void andl(Register dst, int32_t imm32);
void andl(Register dst, Address src);
void andl(Register dst, Register src);
@@ -774,9 +886,11 @@ private:
void andq(Register dst, Register src);
// Bitwise Logical AND of Packed Double-Precision Floating-Point Values
- void andpd(XMMRegister dst, Address src);
void andpd(XMMRegister dst, XMMRegister src);
+ // Bitwise Logical AND of Packed Single-Precision Floating-Point Values
+ void andps(XMMRegister dst, XMMRegister src);
+
void bsfl(Register dst, Register src);
void bsrl(Register dst, Register src);
@@ -837,9 +951,11 @@ private:
// Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
void comisd(XMMRegister dst, Address src);
+ void comisd(XMMRegister dst, XMMRegister src);
// Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
void comiss(XMMRegister dst, Address src);
+ void comiss(XMMRegister dst, XMMRegister src);
// Identify processor type and features
void cpuid() {
@@ -849,14 +965,19 @@ private:
// Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
void cvtsd2ss(XMMRegister dst, XMMRegister src);
+ void cvtsd2ss(XMMRegister dst, Address src);
// Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
void cvtsi2sdl(XMMRegister dst, Register src);
+ void cvtsi2sdl(XMMRegister dst, Address src);
void cvtsi2sdq(XMMRegister dst, Register src);
+ void cvtsi2sdq(XMMRegister dst, Address src);
// Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
void cvtsi2ssl(XMMRegister dst, Register src);
+ void cvtsi2ssl(XMMRegister dst, Address src);
void cvtsi2ssq(XMMRegister dst, Register src);
+ void cvtsi2ssq(XMMRegister dst, Address src);
// Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
void cvtdq2pd(XMMRegister dst, XMMRegister src);
@@ -866,6 +987,7 @@ private:
// Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
void cvtss2sd(XMMRegister dst, XMMRegister src);
+ void cvtss2sd(XMMRegister dst, Address src);
// Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
void cvttsd2sil(Register dst, Address src);
@@ -1140,8 +1262,6 @@ private:
void movdq(Register dst, XMMRegister src);
// Move Aligned Double Quadword
- void movdqa(Address dst, XMMRegister src);
- void movdqa(XMMRegister dst, Address src);
void movdqa(XMMRegister dst, XMMRegister src);
// Move Unaligned Double Quadword
@@ -1261,10 +1381,18 @@ private:
void orq(Register dst, Address src);
void orq(Register dst, Register src);
+ // Pack with unsigned saturation
+ void packuswb(XMMRegister dst, XMMRegister src);
+ void packuswb(XMMRegister dst, Address src);
+
// SSE4.2 string instructions
void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
void pcmpestri(XMMRegister xmm1, Address src, int imm8);
+ // SSE4.1 packed move
+ void pmovzxbw(XMMRegister dst, XMMRegister src);
+ void pmovzxbw(XMMRegister dst, Address src);
+
#ifndef _LP64 // no 32bit push/pop on amd64
void popl(Address dst);
#endif
@@ -1292,6 +1420,7 @@ private:
// POR - Bitwise logical OR
void por(XMMRegister dst, XMMRegister src);
+ void por(XMMRegister dst, Address src);
// Shuffle Packed Doublewords
void pshufd(XMMRegister dst, XMMRegister src, int mode);
@@ -1313,6 +1442,11 @@ private:
// Interleave Low Bytes
void punpcklbw(XMMRegister dst, XMMRegister src);
+ void punpcklbw(XMMRegister dst, Address src);
+
+ // Interleave Low Doublewords
+ void punpckldq(XMMRegister dst, XMMRegister src);
+ void punpckldq(XMMRegister dst, Address src);
#ifndef _LP64 // no 32bit push/pop on amd64
void pushl(Address src);
@@ -1429,6 +1563,13 @@ private:
void xchgq(Register reg, Address adr);
void xchgq(Register dst, Register src);
+ // Get Value of Extended Control Register
+ void xgetbv() {
+ emit_byte(0x0F);
+ emit_byte(0x01);
+ emit_byte(0xD0);
+ }
+
void xorl(Register dst, int32_t imm32);
void xorl(Register dst, Address src);
void xorl(Register dst, Register src);
@@ -1437,14 +1578,44 @@ private:
void xorq(Register dst, Register src);
// Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
- void xorpd(XMMRegister dst, Address src);
void xorpd(XMMRegister dst, XMMRegister src);
// Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
- void xorps(XMMRegister dst, Address src);
void xorps(XMMRegister dst, XMMRegister src);
void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
+
+ // AVX 3-operands instructions (encoded with VEX prefix)
+ void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
+ void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
+ void vaddss(XMMRegister dst, XMMRegister nds, Address src);
+ void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
+ void vandpd(XMMRegister dst, XMMRegister nds, Address src);
+ void vandps(XMMRegister dst, XMMRegister nds, Address src);
+ void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
+ void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
+ void vdivss(XMMRegister dst, XMMRegister nds, Address src);
+ void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
+ void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
+ void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
+ void vmulss(XMMRegister dst, XMMRegister nds, Address src);
+ void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
+ void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
+ void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
+ void vsubss(XMMRegister dst, XMMRegister nds, Address src);
+ void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
+ void vxorpd(XMMRegister dst, XMMRegister nds, Address src);
+ void vxorps(XMMRegister dst, XMMRegister nds, Address src);
+
+
+ protected:
+ // Next instructions require address alignment 16 bytes SSE mode.
+ // They should be called only from corresponding MacroAssembler instructions.
+ void andpd(XMMRegister dst, Address src);
+ void andps(XMMRegister dst, Address src);
+ void xorpd(XMMRegister dst, Address src);
+ void xorps(XMMRegister dst, Address src);
+
};
@@ -2175,9 +2346,15 @@ class MacroAssembler: public Assembler {
void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); }
void andpd(XMMRegister dst, AddressLiteral src);
+ void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); }
+ void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); }
+ void andps(XMMRegister dst, AddressLiteral src);
+
+ void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); }
void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); }
void comiss(XMMRegister dst, AddressLiteral src);
+ void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); }
void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); }
void comisd(XMMRegister dst, AddressLiteral src);
@@ -2211,62 +2388,62 @@ private:
void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); }
void movss(XMMRegister dst, AddressLiteral src);
- void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); }
+ void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); }
void movlpd(XMMRegister dst, AddressLiteral src);
public:
void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); }
void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); }
- void addsd(XMMRegister dst, AddressLiteral src) { Assembler::addsd(dst, as_Address(src)); }
+ void addsd(XMMRegister dst, AddressLiteral src);
void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); }
void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); }
- void addss(XMMRegister dst, AddressLiteral src) { Assembler::addss(dst, as_Address(src)); }
+ void addss(XMMRegister dst, AddressLiteral src);
void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); }
void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); }
- void divsd(XMMRegister dst, AddressLiteral src) { Assembler::divsd(dst, as_Address(src)); }
+ void divsd(XMMRegister dst, AddressLiteral src);
void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); }
void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); }
- void divss(XMMRegister dst, AddressLiteral src) { Assembler::divss(dst, as_Address(src)); }
+ void divss(XMMRegister dst, AddressLiteral src);
void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); }
void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
- void movsd(XMMRegister dst, AddressLiteral src) { Assembler::movsd(dst, as_Address(src)); }
+ void movsd(XMMRegister dst, AddressLiteral src);
void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); }
void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); }
- void mulsd(XMMRegister dst, AddressLiteral src) { Assembler::mulsd(dst, as_Address(src)); }
+ void mulsd(XMMRegister dst, AddressLiteral src);
void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); }
void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); }
- void mulss(XMMRegister dst, AddressLiteral src) { Assembler::mulss(dst, as_Address(src)); }
+ void mulss(XMMRegister dst, AddressLiteral src);
void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); }
void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); }
- void sqrtsd(XMMRegister dst, AddressLiteral src) { Assembler::sqrtsd(dst, as_Address(src)); }
+ void sqrtsd(XMMRegister dst, AddressLiteral src);
void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); }
void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); }
- void sqrtss(XMMRegister dst, AddressLiteral src) { Assembler::sqrtss(dst, as_Address(src)); }
+ void sqrtss(XMMRegister dst, AddressLiteral src);
void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); }
void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); }
- void subsd(XMMRegister dst, AddressLiteral src) { Assembler::subsd(dst, as_Address(src)); }
+ void subsd(XMMRegister dst, AddressLiteral src);
void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); }
void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); }
- void subss(XMMRegister dst, AddressLiteral src) { Assembler::subss(dst, as_Address(src)); }
+ void subss(XMMRegister dst, AddressLiteral src);
void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); }
- void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); }
+ void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); }
void ucomiss(XMMRegister dst, AddressLiteral src);
void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); }
- void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); }
+ void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); }
void ucomisd(XMMRegister dst, AddressLiteral src);
// Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
@@ -2279,6 +2456,53 @@ public:
void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); }
void xorps(XMMRegister dst, AddressLiteral src);
+ // AVX 3-operands instructions
+
+ void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); }
+ void vaddsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddsd(dst, nds, src); }
+ void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+ void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); }
+ void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); }
+ void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+ void vandpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vandpd(dst, nds, src); }
+ void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+ void vandps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vandps(dst, nds, src); }
+ void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+ void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
+ void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); }
+ void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+ void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); }
+ void vdivss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivss(dst, nds, src); }
+ void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+ void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); }
+ void vmulsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulsd(dst, nds, src); }
+ void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+ void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); }
+ void vmulss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulss(dst, nds, src); }
+ void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+ void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); }
+ void vsubsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubsd(dst, nds, src); }
+ void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+ void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); }
+ void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); }
+ void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+ void vxorpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorpd(dst, nds, src); }
+ void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+ void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); }
+ void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+
// Data
void cmov32( Condition cc, Register dst, Address src);
diff --git a/src/cpu/x86/vm/assembler_x86.inline.hpp b/src/cpu/x86/vm/assembler_x86.inline.hpp
index 125bf3fff..bf299c6da 100644
--- a/src/cpu/x86/vm/assembler_x86.inline.hpp
+++ b/src/cpu/x86/vm/assembler_x86.inline.hpp
@@ -86,6 +86,7 @@ inline void Assembler::prefix(Address adr, Register reg, bool byteinst) {}
inline void Assembler::prefixq(Address adr, Register reg) {}
inline void Assembler::prefix(Address adr, XMMRegister reg) {}
+inline void Assembler::prefixq(Address adr, XMMRegister reg) {}
#else
inline void Assembler::emit_long64(jlong x) {
*(jlong*) _code_pos = x;
diff --git a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp
index f276df9e5..fe5495dda 100644
--- a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp
+++ b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp
@@ -320,7 +320,7 @@ void PatchingStub::emit_code(LIR_Assembler* ce) {
// begin_initialized_entry_offset has to fit in a byte. Also, we know it's not null.
__ load_heap_oop_not_null(tmp2, Address(_obj, java_lang_Class::klass_offset_in_bytes()));
__ get_thread(tmp);
- __ cmpptr(tmp, Address(tmp2, instanceKlass::init_thread_offset_in_bytes() + sizeof(klassOopDesc)));
+ __ cmpptr(tmp, Address(tmp2, instanceKlass::init_thread_offset()));
__ pop(tmp2);
__ pop(tmp);
__ jcc(Assembler::notEqual, call_patch);
@@ -519,7 +519,7 @@ void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) {
__ load_klass(tmp_reg, src_reg);
- Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset_in_bytes() + sizeof(oopDesc));
+ Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset());
__ cmpl(ref_type_adr, REF_NONE);
__ jcc(Assembler::equal, _continuation);
diff --git a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
index b6035c291..3c24feb72 100644
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
@@ -1558,7 +1558,7 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
if (op->init_check()) {
__ cmpb(Address(op->klass()->as_register(),
- instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)),
+ instanceKlass::init_state_offset()),
instanceKlass::fully_initialized);
add_debug_info_for_null_check_here(op->stub()->info());
__ jcc(Assembler::notEqual, *op->stub()->entry());
@@ -1730,7 +1730,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L
#else
__ cmpoop(Address(klass_RInfo, k->super_check_offset()), k->constant_encoding());
#endif // _LP64
- if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) {
+ if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) {
__ jcc(Assembler::notEqual, *failure_target);
// successful cast, fall through to profile or jump
} else {
@@ -1842,7 +1842,7 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
__ load_klass(klass_RInfo, value);
// get instance klass (it's already uncompressed)
- __ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
+ __ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset()));
// perform the fast part of the checking logic
__ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
// call out-of-line instance of __ check_klass_subtype_slow_path(...):
@@ -3289,8 +3289,7 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
} else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
__ load_klass(tmp, dst);
}
- int lh_offset = klassOopDesc::header_size() * HeapWordSize +
- Klass::layout_helper_offset_in_bytes();
+ int lh_offset = in_bytes(Klass::layout_helper_offset());
Address klass_lh_addr(tmp, lh_offset);
jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
__ cmpl(klass_lh_addr, objArray_lh);
@@ -3307,9 +3306,9 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
#ifndef _LP64
__ movptr(tmp, dst_klass_addr);
- __ movptr(tmp, Address(tmp, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
+ __ movptr(tmp, Address(tmp, objArrayKlass::element_klass_offset()));
__ push(tmp);
- __ movl(tmp, Address(tmp, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc)));
+ __ movl(tmp, Address(tmp, Klass::super_check_offset_offset()));
__ push(tmp);
__ push(length);
__ lea(tmp, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
@@ -3333,15 +3332,15 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
// Allocate abi space for args but be sure to keep stack aligned
__ subptr(rsp, 6*wordSize);
__ load_klass(c_rarg3, dst);
- __ movptr(c_rarg3, Address(c_rarg3, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
+ __ movptr(c_rarg3, Address(c_rarg3, objArrayKlass::element_klass_offset()));
store_parameter(c_rarg3, 4);
- __ movl(c_rarg3, Address(c_rarg3, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc)));
+ __ movl(c_rarg3, Address(c_rarg3, Klass::super_check_offset_offset()));
__ call(RuntimeAddress(copyfunc_addr));
__ addptr(rsp, 6*wordSize);
#else
__ load_klass(c_rarg4, dst);
- __ movptr(c_rarg4, Address(c_rarg4, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
- __ movl(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc)));
+ __ movptr(c_rarg4, Address(c_rarg4, objArrayKlass::element_klass_offset()));
+ __ movl(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset()));
__ call(RuntimeAddress(copyfunc_addr));
#endif
diff --git a/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp b/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp
index d386a99a0..ad5075714 100644
--- a/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp
+++ b/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp
@@ -150,7 +150,7 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register
assert_different_registers(obj, klass, len);
if (UseBiasedLocking && !len->is_valid()) {
assert_different_registers(obj, klass, len, t1, t2);
- movptr(t1, Address(klass, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+ movptr(t1, Address(klass, Klass::prototype_header_offset()));
movptr(Address(obj, oopDesc::mark_offset_in_bytes()), t1);
} else {
// This assumes that all prototype bits fit in an int32_t
diff --git a/src/cpu/x86/vm/c1_Runtime1_x86.cpp b/src/cpu/x86/vm/c1_Runtime1_x86.cpp
index cc71ccee3..5f2cf3886 100644
--- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp
+++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp
@@ -1011,7 +1011,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
if (id == fast_new_instance_init_check_id) {
// make sure the klass is initialized
- __ cmpb(Address(klass, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)), instanceKlass::fully_initialized);
+ __ cmpb(Address(klass, instanceKlass::init_state_offset()), instanceKlass::fully_initialized);
__ jcc(Assembler::notEqual, slow_path);
}
@@ -1019,7 +1019,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
// assert object can be fast path allocated
{
Label ok, not_ok;
- __ movl(obj_size, Address(klass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc)));
+ __ movl(obj_size, Address(klass, Klass::layout_helper_offset()));
__ cmpl(obj_size, 0); // make sure it's an instance (LH > 0)
__ jcc(Assembler::lessEqual, not_ok);
__ testl(obj_size, Klass::_lh_instance_slow_path_bit);
@@ -1040,7 +1040,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ bind(retry_tlab);
// get the instance size (size is postive so movl is fine for 64bit)
- __ movl(obj_size, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes()));
+ __ movl(obj_size, Address(klass, Klass::layout_helper_offset()));
__ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path);
@@ -1052,7 +1052,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ bind(try_eden);
// get the instance size (size is postive so movl is fine for 64bit)
- __ movl(obj_size, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes()));
+ __ movl(obj_size, Address(klass, Klass::layout_helper_offset()));
__ eden_allocate(obj, obj_size, 0, t1, slow_path);
__ incr_allocated_bytes(thread, obj_size, 0);
@@ -1119,7 +1119,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
{
Label ok;
Register t0 = obj;
- __ movl(t0, Address(klass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc)));
+ __ movl(t0, Address(klass, Klass::layout_helper_offset()));
__ sarl(t0, Klass::_lh_array_tag_shift);
int tag = ((id == new_type_array_id)
? Klass::_lh_array_tag_type_value
@@ -1153,7 +1153,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
// get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
// since size is positive movl does right thing on 64bit
- __ movl(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes()));
+ __ movl(t1, Address(klass, Klass::layout_helper_offset()));
// since size is postive movl does right thing on 64bit
__ movl(arr_size, length);
assert(t1 == rcx, "fixed register usage");
@@ -1167,7 +1167,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ tlab_allocate(obj, arr_size, 0, t1, t2, slow_path); // preserves arr_size
__ initialize_header(obj, klass, length, t1, t2);
- __ movb(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes() + (Klass::_lh_header_size_shift / BitsPerByte)));
+ __ movb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte)));
assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
__ andptr(t1, Klass::_lh_header_size_mask);
@@ -1180,7 +1180,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ bind(try_eden);
// get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
// since size is positive movl does right thing on 64bit
- __ movl(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes()));
+ __ movl(t1, Address(klass, Klass::layout_helper_offset()));
// since size is postive movl does right thing on 64bit
__ movl(arr_size, length);
assert(t1 == rcx, "fixed register usage");
@@ -1195,7 +1195,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ incr_allocated_bytes(thread, arr_size, 0);
__ initialize_header(obj, klass, length, t1, t2);
- __ movb(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes() + (Klass::_lh_header_size_shift / BitsPerByte)));
+ __ movb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte)));
assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
__ andptr(t1, Klass::_lh_header_size_mask);
@@ -1267,7 +1267,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
Label register_finalizer;
Register t = rsi;
__ load_klass(t, rax);
- __ movl(t, Address(t, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)));
+ __ movl(t, Address(t, Klass::access_flags_offset()));
__ testl(t, JVM_ACC_HAS_FINALIZER);
__ jcc(Assembler::notZero, register_finalizer);
__ ret(0);
diff --git a/src/cpu/x86/vm/cppInterpreter_x86.cpp b/src/cpu/x86/vm/cppInterpreter_x86.cpp
index 226c6cbc6..b9a5c2293 100644
--- a/src/cpu/x86/vm/cppInterpreter_x86.cpp
+++ b/src/cpu/x86/vm/cppInterpreter_x86.cpp
@@ -511,7 +511,7 @@ void CppInterpreterGenerator::generate_compute_interpreter_state(const Register
// get synchronization object
Label done;
- const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+ const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ movl(rax, access_flags);
__ testl(rax, JVM_ACC_STATIC);
__ movptr(rax, Address(locals, 0)); // get receiver (assume this is frequent case)
@@ -763,7 +763,7 @@ void InterpreterGenerator::lock_method(void) {
#endif // ASSERT
// get synchronization object
{ Label done;
- const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+ const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ movl(rax, access_flags);
__ movptr(rdi, STATE(_locals)); // prepare to get receiver (assume common case)
__ testl(rax, JVM_ACC_STATIC);
@@ -1180,7 +1180,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) {
// pass mirror handle if static call
{ Label L;
- const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+ const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ movl(t, Address(method, methodOopDesc::access_flags_offset()));
__ testl(t, JVM_ACC_STATIC);
__ jcc(Assembler::zero, L);
diff --git a/src/cpu/x86/vm/methodHandles_x86.cpp b/src/cpu/x86/vm/methodHandles_x86.cpp
index 254d087df..7d987e588 100644
--- a/src/cpu/x86/vm/methodHandles_x86.cpp
+++ b/src/cpu/x86/vm/methodHandles_x86.cpp
@@ -1160,7 +1160,7 @@ void MethodHandles::generate_method_handle_stub(MacroAssembler* _masm, MethodHan
Address rcx_amh_conversion( rcx_recv, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes() );
Address vmarg; // __ argument_address(vmargslot)
- const int java_mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+ const int java_mirror_offset = in_bytes(Klass::java_mirror_offset());
if (have_entry(ek)) {
__ nop(); // empty stubs make SG sick
diff --git a/src/cpu/x86/vm/nativeInst_x86.cpp b/src/cpu/x86/vm/nativeInst_x86.cpp
index 7ec07737f..1cf509992 100644
--- a/src/cpu/x86/vm/nativeInst_x86.cpp
+++ b/src/cpu/x86/vm/nativeInst_x86.cpp
@@ -237,9 +237,21 @@ int NativeMovRegMem::instruction_start() const {
int off = 0;
u_char instr_0 = ubyte_at(off);
+ // See comment in Assembler::locate_operand() about VEX prefixes.
+ if (instr_0 == instruction_VEX_prefix_2bytes) {
+ assert((UseAVX > 0), "shouldn't have VEX prefix");
+ NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions"));
+ return 2;
+ }
+ if (instr_0 == instruction_VEX_prefix_3bytes) {
+ assert((UseAVX > 0), "shouldn't have VEX prefix");
+ NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions"));
+ return 3;
+ }
+
// First check to see if we have a (prefixed or not) xor
- if ( instr_0 >= instruction_prefix_wide_lo && // 0x40
- instr_0 <= instruction_prefix_wide_hi) { // 0x4f
+ if (instr_0 >= instruction_prefix_wide_lo && // 0x40
+ instr_0 <= instruction_prefix_wide_hi) { // 0x4f
off++;
instr_0 = ubyte_at(off);
}
@@ -256,13 +268,13 @@ int NativeMovRegMem::instruction_start() const {
instr_0 = ubyte_at(off);
}
- if ( instr_0 == instruction_code_xmm_ss_prefix || // 0xf3
+ if ( instr_0 == instruction_code_xmm_ss_prefix || // 0xf3
instr_0 == instruction_code_xmm_sd_prefix) { // 0xf2
off++;
instr_0 = ubyte_at(off);
}
- if ( instr_0 >= instruction_prefix_wide_lo && // 0x40
+ if ( instr_0 >= instruction_prefix_wide_lo && // 0x40
instr_0 <= instruction_prefix_wide_hi) { // 0x4f
off++;
instr_0 = ubyte_at(off);
diff --git a/src/cpu/x86/vm/nativeInst_x86.hpp b/src/cpu/x86/vm/nativeInst_x86.hpp
index fc7a1ab07..470e971fe 100644
--- a/src/cpu/x86/vm/nativeInst_x86.hpp
+++ b/src/cpu/x86/vm/nativeInst_x86.hpp
@@ -287,6 +287,9 @@ class NativeMovRegMem: public NativeInstruction {
instruction_code_xmm_store = 0x11,
instruction_code_xmm_lpd = 0x12,
+ instruction_VEX_prefix_2bytes = Assembler::VEX_2bytes,
+ instruction_VEX_prefix_3bytes = Assembler::VEX_3bytes,
+
instruction_size = 4,
instruction_offset = 0,
data_offset = 2,
diff --git a/src/cpu/x86/vm/register_definitions_x86.cpp b/src/cpu/x86/vm/register_definitions_x86.cpp
index f1fd229b9..7165872c2 100644
--- a/src/cpu/x86/vm/register_definitions_x86.cpp
+++ b/src/cpu/x86/vm/register_definitions_x86.cpp
@@ -53,6 +53,7 @@ REGISTER_DEFINITION(Register, r14);
REGISTER_DEFINITION(Register, r15);
#endif // AMD64
+REGISTER_DEFINITION(XMMRegister, xnoreg);
REGISTER_DEFINITION(XMMRegister, xmm0 );
REGISTER_DEFINITION(XMMRegister, xmm1 );
REGISTER_DEFINITION(XMMRegister, xmm2 );
@@ -115,6 +116,7 @@ REGISTER_DEFINITION(Register, r12_heapbase);
REGISTER_DEFINITION(Register, r15_thread);
#endif // AMD64
+REGISTER_DEFINITION(MMXRegister, mnoreg );
REGISTER_DEFINITION(MMXRegister, mmx0 );
REGISTER_DEFINITION(MMXRegister, mmx1 );
REGISTER_DEFINITION(MMXRegister, mmx2 );
diff --git a/src/cpu/x86/vm/stubGenerator_x86_32.cpp b/src/cpu/x86/vm/stubGenerator_x86_32.cpp
index 6d98c56ad..4d4e66f60 100644
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp
@@ -1374,8 +1374,7 @@ class StubGenerator: public StubCodeGenerator {
// L_success, L_failure, NULL);
assert_different_registers(sub_klass, temp);
- int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
- Klass::secondary_super_cache_offset_in_bytes());
+ int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
// if the pointers are equal, we are done (e.g., String[] elements)
__ cmpptr(sub_klass, super_klass_addr);
@@ -1787,8 +1786,7 @@ class StubGenerator: public StubCodeGenerator {
// array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
//
- int lh_offset = klassOopDesc::header_size() * HeapWordSize +
- Klass::layout_helper_offset_in_bytes();
+ int lh_offset = in_bytes(Klass::layout_helper_offset());
Address src_klass_lh_addr(rcx_src_klass, lh_offset);
// Handle objArrays completely differently...
@@ -1914,10 +1912,8 @@ class StubGenerator: public StubCodeGenerator {
// live at this point: rcx_src_klass, dst[_pos], src[_pos]
{
// Handy offsets:
- int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
- objArrayKlass::element_klass_offset_in_bytes());
- int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
- Klass::super_check_offset_offset_in_bytes());
+ int ek_offset = in_bytes(objArrayKlass::element_klass_offset());
+ int sco_offset = in_bytes(Klass::super_check_offset_offset());
Register rsi_dst_klass = rsi;
Register rdi_temp = rdi;
diff --git a/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/src/cpu/x86/vm/stubGenerator_x86_64.cpp
index e5ef3f4de..a6fcc7820 100644
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp
@@ -2261,8 +2261,7 @@ class StubGenerator: public StubCodeGenerator {
// The ckoff and ckval must be mutually consistent,
// even though caller generates both.
{ Label L;
- int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
- Klass::super_check_offset_offset_in_bytes());
+ int sco_offset = in_bytes(Klass::super_check_offset_offset());
__ cmpl(ckoff, Address(ckval, sco_offset));
__ jcc(Assembler::equal, L);
__ stop("super_check_offset inconsistent");
@@ -2572,8 +2571,7 @@ class StubGenerator: public StubCodeGenerator {
// array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
//
- const int lh_offset = klassOopDesc::header_size() * HeapWordSize +
- Klass::layout_helper_offset_in_bytes();
+ const int lh_offset = in_bytes(Klass::layout_helper_offset());
// Handle objArrays completely differently...
const jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
@@ -2722,15 +2720,13 @@ class StubGenerator: public StubCodeGenerator {
assert_clean_int(count, sco_temp);
// Generate the type check.
- const int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
- Klass::super_check_offset_offset_in_bytes());
+ const int sco_offset = in_bytes(Klass::super_check_offset_offset());
__ movl(sco_temp, Address(r11_dst_klass, sco_offset));
assert_clean_int(sco_temp, rax);
generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy);
// Fetch destination element klass from the objArrayKlass header.
- int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
- objArrayKlass::element_klass_offset_in_bytes());
+ int ek_offset = in_bytes(objArrayKlass::element_klass_offset());
__ movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset));
__ movl( sco_temp, Address(r11_dst_klass, sco_offset));
assert_clean_int(sco_temp, rax);
diff --git a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp
index 80396fd70..29533832e 100644
--- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp
+++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp
@@ -561,7 +561,7 @@ void InterpreterGenerator::lock_method(void) {
#endif // ASSERT
// get synchronization object
{ Label done;
- const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+ const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ movl(rax, access_flags);
__ testl(rax, JVM_ACC_STATIC);
__ movptr(rax, Address(rdi, Interpreter::local_offset_in_bytes(0))); // get receiver (assume this is frequent case)
@@ -1021,7 +1021,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) {
// pass mirror handle if static call
{ Label L;
- const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+ const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ movl(t, Address(method, methodOopDesc::access_flags_offset()));
__ testl(t, JVM_ACC_STATIC);
__ jcc(Assembler::zero, L);
diff --git a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp
index b179b9e3d..110d8ebdf 100644
--- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp
+++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp
@@ -515,8 +515,7 @@ void InterpreterGenerator::lock_method(void) {
// get synchronization object
{
- const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() +
- Klass::java_mirror_offset_in_bytes();
+ const int mirror_offset = in_bytes(Klass::java_mirror_offset());
Label done;
__ movl(rax, access_flags);
__ testl(rax, JVM_ACC_STATIC);
@@ -1016,8 +1015,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) {
// pass mirror handle if static call
{
Label L;
- const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() +
- Klass::java_mirror_offset_in_bytes();
+ const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ movl(t, Address(method, methodOopDesc::access_flags_offset()));
__ testl(t, JVM_ACC_STATIC);
__ jcc(Assembler::zero, L);
diff --git a/src/cpu/x86/vm/templateTable_x86_32.cpp b/src/cpu/x86/vm/templateTable_x86_32.cpp
index 955101721..1cbc67e60 100644
--- a/src/cpu/x86/vm/templateTable_x86_32.cpp
+++ b/src/cpu/x86/vm/templateTable_x86_32.cpp
@@ -980,7 +980,7 @@ void TemplateTable::aastore() {
__ load_klass(rbx, rax);
// Move superklass into EAX
__ load_klass(rax, rdx);
- __ movptr(rax, Address(rax, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes()));
+ __ movptr(rax, Address(rax, objArrayKlass::element_klass_offset()));
// Compress array+index*wordSize+12 into a single register. Frees ECX.
__ lea(rdx, element_address);
@@ -2033,7 +2033,7 @@ void TemplateTable::_return(TosState state) {
assert(state == vtos, "only valid state");
__ movptr(rax, aaddress(0));
__ load_klass(rdi, rax);
- __ movl(rdi, Address(rdi, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)));
+ __ movl(rdi, Address(rdi, Klass::access_flags_offset()));
__ testl(rdi, JVM_ACC_HAS_FINALIZER);
Label skip_register_finalizer;
__ jcc(Assembler::zero, skip_register_finalizer);
@@ -3188,11 +3188,11 @@ void TemplateTable::_new() {
// make sure klass is initialized & doesn't have finalizer
// make sure klass is fully initialized
- __ cmpb(Address(rcx, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)), instanceKlass::fully_initialized);
+ __ cmpb(Address(rcx, instanceKlass::init_state_offset()), instanceKlass::fully_initialized);
__ jcc(Assembler::notEqual, slow_case);
// get instance_size in instanceKlass (scaled to a count of bytes)
- __ movl(rdx, Address(rcx, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc)));
+ __ movl(rdx, Address(rcx, Klass::layout_helper_offset()));
// test to see if it has a finalizer or is malformed in some way
__ testl(rdx, Klass::_lh_instance_slow_path_bit);
__ jcc(Assembler::notZero, slow_case);
@@ -3293,7 +3293,7 @@ void TemplateTable::_new() {
__ bind(initialize_header);
if (UseBiasedLocking) {
__ pop(rcx); // get saved klass back in the register.
- __ movptr(rbx, Address(rcx, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+ __ movptr(rbx, Address(rcx, Klass::prototype_header_offset()));
__ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()), rbx);
} else {
__ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()),
diff --git a/src/cpu/x86/vm/templateTable_x86_64.cpp b/src/cpu/x86/vm/templateTable_x86_64.cpp
index cc837c789..0e5ac274f 100644
--- a/src/cpu/x86/vm/templateTable_x86_64.cpp
+++ b/src/cpu/x86/vm/templateTable_x86_64.cpp
@@ -1004,8 +1004,7 @@ void TemplateTable::aastore() {
// Move superklass into rax
__ load_klass(rax, rdx);
__ movptr(rax, Address(rax,
- sizeof(oopDesc) +
- objArrayKlass::element_klass_offset_in_bytes()));
+ objArrayKlass::element_klass_offset()));
// Compress array + index*oopSize + 12 into a single register. Frees rcx.
__ lea(rdx, element_address);
@@ -2067,7 +2066,7 @@ void TemplateTable::_return(TosState state) {
assert(state == vtos, "only valid state");
__ movptr(c_rarg1, aaddress(0));
__ load_klass(rdi, c_rarg1);
- __ movl(rdi, Address(rdi, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)));
+ __ movl(rdi, Address(rdi, Klass::access_flags_offset()));
__ testl(rdi, JVM_ACC_HAS_FINALIZER);
Label skip_register_finalizer;
__ jcc(Assembler::zero, skip_register_finalizer);
@@ -3236,15 +3235,14 @@ void TemplateTable::_new() {
// make sure klass is initialized & doesn't have finalizer
// make sure klass is fully initialized
__ cmpb(Address(rsi,
- instanceKlass::init_state_offset_in_bytes() +
- sizeof(oopDesc)),
+ instanceKlass::init_state_offset()),
instanceKlass::fully_initialized);
__ jcc(Assembler::notEqual, slow_case);
// get instance_size in instanceKlass (scaled to a count of bytes)
__ movl(rdx,
Address(rsi,
- Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc)));
+ Klass::layout_helper_offset()));
// test to see if it has a finalizer or is malformed in some way
__ testl(rdx, Klass::_lh_instance_slow_path_bit);
__ jcc(Assembler::notZero, slow_case);
@@ -3337,7 +3335,7 @@ void TemplateTable::_new() {
// initialize object header only.
__ bind(initialize_header);
if (UseBiasedLocking) {
- __ movptr(rscratch1, Address(rsi, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+ __ movptr(rscratch1, Address(rsi, Klass::prototype_header_offset()));
__ movptr(Address(rax, oopDesc::mark_offset_in_bytes()), rscratch1);
} else {
__ movptr(Address(rax, oopDesc::mark_offset_in_bytes()),
diff --git a/src/cpu/x86/vm/vm_version_x86.cpp b/src/cpu/x86/vm/vm_version_x86.cpp
index fe02223f7..2155d7679 100644
--- a/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/src/cpu/x86/vm/vm_version_x86.cpp
@@ -50,7 +50,7 @@ const char* VM_Version::_features_str = "";
VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
static BufferBlob* stub_blob;
-static const int stub_size = 500;
+static const int stub_size = 550;
extern "C" {
typedef void (*getPsrInfo_stub_t)(void*);
@@ -73,7 +73,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
- Label ext_cpuid1, ext_cpuid5, ext_cpuid7, done;
+ Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done;
StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
# define __ _masm->
@@ -229,6 +229,41 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ movl(Address(rsi, 8), rcx);
__ movl(Address(rsi,12), rdx);
+ //
+ // Check if OS has enabled XGETBV instruction to access XCR0
+ // (OSXSAVE feature flag) and CPU supports AVX
+ //
+ __ andl(rcx, 0x18000000);
+ __ cmpl(rcx, 0x18000000);
+ __ jccb(Assembler::notEqual, sef_cpuid);
+
+ //
+ // XCR0, XFEATURE_ENABLED_MASK register
+ //
+ __ xorl(rcx, rcx); // zero for XCR0 register
+ __ xgetbv();
+ __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
+ __ movl(Address(rsi, 0), rax);
+ __ movl(Address(rsi, 4), rdx);
+
+ //
+ // cpuid(0x7) Structured Extended Features
+ //
+ __ bind(sef_cpuid);
+ __ movl(rax, 7);
+ __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
+ __ jccb(Assembler::greater, ext_cpuid);
+
+ __ xorl(rcx, rcx);
+ __ cpuid();
+ __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
+ __ movl(Address(rsi, 0), rax);
+ __ movl(Address(rsi, 4), rbx);
+
+ //
+ // Extended cpuid(0x80000000)
+ //
+ __ bind(ext_cpuid);
__ movl(rax, 0x80000000);
__ cpuid();
__ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
@@ -373,13 +408,19 @@ void VM_Version::get_processor_features() {
if (UseSSE < 1)
_cpuFeatures &= ~CPU_SSE;
+ if (UseAVX < 2)
+ _cpuFeatures &= ~CPU_AVX2;
+
+ if (UseAVX < 1)
+ _cpuFeatures &= ~CPU_AVX;
+
if (logical_processors_per_package() == 1) {
// HT processor could be installed on a system which doesn't support HT.
_cpuFeatures &= ~CPU_HT;
}
char buf[256];
- jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+ jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
cores_per_cpu(), threads_per_core(),
cpu_family(), _model, _stepping,
(supports_cmov() ? ", cmov" : ""),
@@ -393,6 +434,8 @@ void VM_Version::get_processor_features() {
(supports_sse4_1() ? ", sse4.1" : ""),
(supports_sse4_2() ? ", sse4.2" : ""),
(supports_popcnt() ? ", popcnt" : ""),
+ (supports_avx() ? ", avx" : ""),
+ (supports_avx2() ? ", avx2" : ""),
(supports_mmx_ext() ? ", mmxext" : ""),
(supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
(supports_lzcnt() ? ", lzcnt": ""),
@@ -406,17 +449,24 @@ void VM_Version::get_processor_features() {
// UseSSE is set to the smaller of what hardware supports and what
// the command line requires. I.e., you cannot set UseSSE to 2 on
// older Pentiums which do not support it.
- if( UseSSE > 4 ) UseSSE=4;
- if( UseSSE < 0 ) UseSSE=0;
- if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
+ if (UseSSE > 4) UseSSE=4;
+ if (UseSSE < 0) UseSSE=0;
+ if (!supports_sse4_1()) // Drop to 3 if no SSE4 support
UseSSE = MIN2((intx)3,UseSSE);
- if( !supports_sse3() ) // Drop to 2 if no SSE3 support
+ if (!supports_sse3()) // Drop to 2 if no SSE3 support
UseSSE = MIN2((intx)2,UseSSE);
- if( !supports_sse2() ) // Drop to 1 if no SSE2 support
+ if (!supports_sse2()) // Drop to 1 if no SSE2 support
UseSSE = MIN2((intx)1,UseSSE);
- if( !supports_sse () ) // Drop to 0 if no SSE support
+ if (!supports_sse ()) // Drop to 0 if no SSE support
UseSSE = 0;
+ if (UseAVX > 2) UseAVX=2;
+ if (UseAVX < 0) UseAVX=0;
+ if (!supports_avx2()) // Drop to 1 if no AVX2 support
+ UseAVX = MIN2((intx)1,UseAVX);
+ if (!supports_avx ()) // Drop to 0 if no AVX support
+ UseAVX = 0;
+
// On new cpus instructions which update whole XMM register should be used
// to prevent partial register stall due to dependencies on high half.
//
@@ -551,6 +601,9 @@ void VM_Version::get_processor_features() {
if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
UsePopCountInstruction = true;
}
+ } else if (UsePopCountInstruction) {
+ warning("POPCNT instruction is not available on this CPU");
+ FLAG_SET_DEFAULT(UsePopCountInstruction, false);
}
#ifdef COMPILER2
@@ -622,7 +675,11 @@ void VM_Version::get_processor_features() {
if (PrintMiscellaneous && Verbose) {
tty->print_cr("Logical CPUs per core: %u",
logical_processors_per_package());
- tty->print_cr("UseSSE=%d",UseSSE);
+ tty->print("UseSSE=%d",UseSSE);
+ if (UseAVX > 0) {
+ tty->print(" UseAVX=%d",UseAVX);
+ }
+ tty->cr();
tty->print("Allocation");
if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
tty->print_cr(": no prefetching");
diff --git a/src/cpu/x86/vm/vm_version_x86.hpp b/src/cpu/x86/vm/vm_version_x86.hpp
index 47c4a1c27..27f3bde57 100644
--- a/src/cpu/x86/vm/vm_version_x86.hpp
+++ b/src/cpu/x86/vm/vm_version_x86.hpp
@@ -78,7 +78,10 @@ public:
sse4_2 : 1,
: 2,
popcnt : 1,
- : 8;
+ : 3,
+ osxsave : 1,
+ avx : 1,
+ : 3;
} bits;
};
@@ -168,6 +171,15 @@ public:
} bits;
};
+ union ExtCpuid7Edx {
+ uint32_t value;
+ struct {
+ uint32_t : 8,
+ tsc_invariance : 1,
+ : 23;
+ } bits;
+ };
+
union ExtCpuid8Ecx {
uint32_t value;
struct {
@@ -176,12 +188,31 @@ public:
} bits;
};
- union ExtCpuid7Edx {
+ union SefCpuid7Eax {
+ uint32_t value;
+ };
+
+ union SefCpuid7Ebx {
uint32_t value;
struct {
- uint32_t : 8,
- tsc_invariance : 1,
- : 23;
+ uint32_t fsgsbase : 1,
+ : 2,
+ bmi1 : 1,
+ : 1,
+ avx2 : 1,
+ : 2,
+ bmi2 : 1,
+ : 23;
+ } bits;
+ };
+
+ union XemXcr0Eax {
+ uint32_t value;
+ struct {
+ uint32_t x87 : 1,
+ sse : 1,
+ ymm : 1,
+ : 29;
} bits;
};
@@ -211,7 +242,9 @@ protected:
CPU_POPCNT = (1 << 13),
CPU_LZCNT = (1 << 14),
CPU_TSC = (1 << 15),
- CPU_TSCINV = (1 << 16)
+ CPU_TSCINV = (1 << 16),
+ CPU_AVX = (1 << 17),
+ CPU_AVX2 = (1 << 18)
} cpuFeatureFlags;
enum {
@@ -250,6 +283,12 @@ protected:
uint32_t dcp_cpuid4_ecx; // unused currently
uint32_t dcp_cpuid4_edx; // unused currently
+ // cpuid function 7 (structured extended features)
+ SefCpuid7Eax sef_cpuid7_eax;
+ SefCpuid7Ebx sef_cpuid7_ebx;
+ uint32_t sef_cpuid7_ecx; // unused currently
+ uint32_t sef_cpuid7_edx; // unused currently
+
// cpuid function 0xB (processor topology)
// ecx = 0
uint32_t tpl_cpuidB0_eax;
@@ -303,6 +342,10 @@ protected:
uint32_t ext_cpuid8_ebx; // reserved
ExtCpuid8Ecx ext_cpuid8_ecx;
uint32_t ext_cpuid8_edx; // reserved
+
+ // extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
+ XemXcr0Eax xem_xcr0_eax;
+ uint32_t xem_xcr0_edx; // reserved
};
// The actual cpuid info block
@@ -360,6 +403,14 @@ protected:
result |= CPU_SSE4_2;
if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
result |= CPU_POPCNT;
+ if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
+ _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
+ _cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
+ _cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
+ result |= CPU_AVX;
+ if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
+ result |= CPU_AVX2;
+ }
if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
result |= CPU_TSC;
if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
@@ -386,6 +437,7 @@ public:
static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
+ static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax); }
static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
@@ -393,6 +445,7 @@ public:
static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
+ static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); }
// Initialization
static void initialize();
@@ -483,6 +536,8 @@ public:
static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; }
static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; }
static bool supports_popcnt() { return (_cpuFeatures & CPU_POPCNT) != 0; }
+ static bool supports_avx() { return (_cpuFeatures & CPU_AVX) != 0; }
+ static bool supports_avx2() { return (_cpuFeatures & CPU_AVX2) != 0; }
static bool supports_tsc() { return (_cpuFeatures & CPU_TSC) != 0; }
// Intel features
diff --git a/src/cpu/x86/vm/x86.ad b/src/cpu/x86/vm/x86.ad
new file mode 100644
index 000000000..5f165a9ff
--- /dev/null
+++ b/src/cpu/x86/vm/x86.ad
@@ -0,0 +1,777 @@
+//
+// Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+// X86 Common Architecture Description File
+
+source %{
+ // Float masks come from different places depending on platform.
+#ifdef _LP64
+ static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
+ static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
+ static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
+ static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
+#else
+ static address float_signmask() { return (address)float_signmask_pool; }
+ static address float_signflip() { return (address)float_signflip_pool; }
+ static address double_signmask() { return (address)double_signmask_pool; }
+ static address double_signflip() { return (address)double_signflip_pool; }
+#endif
+%}
+
+// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
+
+instruct addF_reg(regF dst, regF src) %{
+ predicate((UseSSE>=1) && (UseAVX == 0));
+ match(Set dst (AddF dst src));
+
+ format %{ "addss $dst, $src" %}
+ ins_cost(150);
+ ins_encode %{
+ __ addss($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct addF_mem(regF dst, memory src) %{
+ predicate((UseSSE>=1) && (UseAVX == 0));
+ match(Set dst (AddF dst (LoadF src)));
+
+ format %{ "addss $dst, $src" %}
+ ins_cost(150);
+ ins_encode %{
+ __ addss($dst$$XMMRegister, $src$$Address);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct addF_imm(regF dst, immF con) %{
+ predicate((UseSSE>=1) && (UseAVX == 0));
+ match(Set dst (AddF dst con));
+ format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
+ ins_cost(150);
+ ins_encode %{
+ __ addss($dst$$XMMRegister, $constantaddress($con));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vaddF_reg(regF dst, regF src1, regF src2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (AddF src1 src2));
+
+ format %{ "vaddss $dst, $src1, $src2" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vaddF_mem(regF dst, regF src1, memory src2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (AddF src1 (LoadF src2)));
+
+ format %{ "vaddss $dst, $src1, $src2" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vaddF_imm(regF dst, regF src, immF con) %{
+ predicate(UseAVX > 0);
+ match(Set dst (AddF src con));
+
+ format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct addD_reg(regD dst, regD src) %{
+ predicate((UseSSE>=2) && (UseAVX == 0));
+ match(Set dst (AddD dst src));
+
+ format %{ "addsd $dst, $src" %}
+ ins_cost(150);
+ ins_encode %{
+ __ addsd($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct addD_mem(regD dst, memory src) %{
+ predicate((UseSSE>=2) && (UseAVX == 0));
+ match(Set dst (AddD dst (LoadD src)));
+
+ format %{ "addsd $dst, $src" %}
+ ins_cost(150);
+ ins_encode %{
+ __ addsd($dst$$XMMRegister, $src$$Address);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct addD_imm(regD dst, immD con) %{
+ predicate((UseSSE>=2) && (UseAVX == 0));
+ match(Set dst (AddD dst con));
+ format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
+ ins_cost(150);
+ ins_encode %{
+ __ addsd($dst$$XMMRegister, $constantaddress($con));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vaddD_reg(regD dst, regD src1, regD src2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (AddD src1 src2));
+
+ format %{ "vaddsd $dst, $src1, $src2" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vaddD_mem(regD dst, regD src1, memory src2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (AddD src1 (LoadD src2)));
+
+ format %{ "vaddsd $dst, $src1, $src2" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vaddD_imm(regD dst, regD src, immD con) %{
+ predicate(UseAVX > 0);
+ match(Set dst (AddD src con));
+
+ format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct subF_reg(regF dst, regF src) %{
+ predicate((UseSSE>=1) && (UseAVX == 0));
+ match(Set dst (SubF dst src));
+
+ format %{ "subss $dst, $src" %}
+ ins_cost(150);
+ ins_encode %{
+ __ subss($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct subF_mem(regF dst, memory src) %{
+ predicate((UseSSE>=1) && (UseAVX == 0));
+ match(Set dst (SubF dst (LoadF src)));
+
+ format %{ "subss $dst, $src" %}
+ ins_cost(150);
+ ins_encode %{
+ __ subss($dst$$XMMRegister, $src$$Address);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct subF_imm(regF dst, immF con) %{
+ predicate((UseSSE>=1) && (UseAVX == 0));
+ match(Set dst (SubF dst con));
+ format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
+ ins_cost(150);
+ ins_encode %{
+ __ subss($dst$$XMMRegister, $constantaddress($con));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsubF_reg(regF dst, regF src1, regF src2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (SubF src1 src2));
+
+ format %{ "vsubss $dst, $src1, $src2" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsubF_mem(regF dst, regF src1, memory src2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (SubF src1 (LoadF src2)));
+
+ format %{ "vsubss $dst, $src1, $src2" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsubF_imm(regF dst, regF src, immF con) %{
+ predicate(UseAVX > 0);
+ match(Set dst (SubF src con));
+
+ format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct subD_reg(regD dst, regD src) %{
+ predicate((UseSSE>=2) && (UseAVX == 0));
+ match(Set dst (SubD dst src));
+
+ format %{ "subsd $dst, $src" %}
+ ins_cost(150);
+ ins_encode %{
+ __ subsd($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct subD_mem(regD dst, memory src) %{
+ predicate((UseSSE>=2) && (UseAVX == 0));
+ match(Set dst (SubD dst (LoadD src)));
+
+ format %{ "subsd $dst, $src" %}
+ ins_cost(150);
+ ins_encode %{
+ __ subsd($dst$$XMMRegister, $src$$Address);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct subD_imm(regD dst, immD con) %{
+ predicate((UseSSE>=2) && (UseAVX == 0));
+ match(Set dst (SubD dst con));
+ format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
+ ins_cost(150);
+ ins_encode %{
+ __ subsd($dst$$XMMRegister, $constantaddress($con));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsubD_reg(regD dst, regD src1, regD src2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (SubD src1 src2));
+
+ format %{ "vsubsd $dst, $src1, $src2" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsubD_mem(regD dst, regD src1, memory src2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (SubD src1 (LoadD src2)));
+
+ format %{ "vsubsd $dst, $src1, $src2" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsubD_imm(regD dst, regD src, immD con) %{
+ predicate(UseAVX > 0);
+ match(Set dst (SubD src con));
+
+ format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct mulF_reg(regF dst, regF src) %{
+ predicate((UseSSE>=1) && (UseAVX == 0));
+ match(Set dst (MulF dst src));
+
+ format %{ "mulss $dst, $src" %}
+ ins_cost(150);
+ ins_encode %{
+ __ mulss($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct mulF_mem(regF dst, memory src) %{
+ predicate((UseSSE>=1) && (UseAVX == 0));
+ match(Set dst (MulF dst (LoadF src)));
+
+ format %{ "mulss $dst, $src" %}
+ ins_cost(150);
+ ins_encode %{
+ __ mulss($dst$$XMMRegister, $src$$Address);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct mulF_imm(regF dst, immF con) %{
+ predicate((UseSSE>=1) && (UseAVX == 0));
+ match(Set dst (MulF dst con));
+ format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
+ ins_cost(150);
+ ins_encode %{
+ __ mulss($dst$$XMMRegister, $constantaddress($con));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vmulF_reg(regF dst, regF src1, regF src2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (MulF src1 src2));
+
+ format %{ "vmulss $dst, $src1, $src2" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vmulF_mem(regF dst, regF src1, memory src2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (MulF src1 (LoadF src2)));
+
+ format %{ "vmulss $dst, $src1, $src2" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vmulF_imm(regF dst, regF src, immF con) %{
+ predicate(UseAVX > 0);
+ match(Set dst (MulF src con));
+
+ format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct mulD_reg(regD dst, regD src) %{
+ predicate((UseSSE>=2) && (UseAVX == 0));
+ match(Set dst (MulD dst src));
+
+ format %{ "mulsd $dst, $src" %}
+ ins_cost(150);
+ ins_encode %{
+ __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct mulD_mem(regD dst, memory src) %{
+ predicate((UseSSE>=2) && (UseAVX == 0));
+ match(Set dst (MulD dst (LoadD src)));
+
+ format %{ "mulsd $dst, $src" %}
+ ins_cost(150);
+ ins_encode %{
+ __ mulsd($dst$$XMMRegister, $src$$Address);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct mulD_imm(regD dst, immD con) %{
+ predicate((UseSSE>=2) && (UseAVX == 0));
+ match(Set dst (MulD dst con));
+ format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
+ ins_cost(150);
+ ins_encode %{
+ __ mulsd($dst$$XMMRegister, $constantaddress($con));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vmulD_reg(regD dst, regD src1, regD src2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (MulD src1 src2));
+
+ format %{ "vmulsd $dst, $src1, $src2" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vmulD_mem(regD dst, regD src1, memory src2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (MulD src1 (LoadD src2)));
+
+ format %{ "vmulsd $dst, $src1, $src2" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vmulD_imm(regD dst, regD src, immD con) %{
+ predicate(UseAVX > 0);
+ match(Set dst (MulD src con));
+
+ format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct divF_reg(regF dst, regF src) %{
+ predicate((UseSSE>=1) && (UseAVX == 0));
+ match(Set dst (DivF dst src));
+
+ format %{ "divss $dst, $src" %}
+ ins_cost(150);
+ ins_encode %{
+ __ divss($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct divF_mem(regF dst, memory src) %{
+ predicate((UseSSE>=1) && (UseAVX == 0));
+ match(Set dst (DivF dst (LoadF src)));
+
+ format %{ "divss $dst, $src" %}
+ ins_cost(150);
+ ins_encode %{
+ __ divss($dst$$XMMRegister, $src$$Address);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct divF_imm(regF dst, immF con) %{
+ predicate((UseSSE>=1) && (UseAVX == 0));
+ match(Set dst (DivF dst con));
+ format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
+ ins_cost(150);
+ ins_encode %{
+ __ divss($dst$$XMMRegister, $constantaddress($con));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vdivF_reg(regF dst, regF src1, regF src2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (DivF src1 src2));
+
+ format %{ "vdivss $dst, $src1, $src2" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vdivF_mem(regF dst, regF src1, memory src2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (DivF src1 (LoadF src2)));
+
+ format %{ "vdivss $dst, $src1, $src2" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vdivF_imm(regF dst, regF src, immF con) %{
+ predicate(UseAVX > 0);
+ match(Set dst (DivF src con));
+
+ format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct divD_reg(regD dst, regD src) %{
+ predicate((UseSSE>=2) && (UseAVX == 0));
+ match(Set dst (DivD dst src));
+
+ format %{ "divsd $dst, $src" %}
+ ins_cost(150);
+ ins_encode %{
+ __ divsd($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct divD_mem(regD dst, memory src) %{
+ predicate((UseSSE>=2) && (UseAVX == 0));
+ match(Set dst (DivD dst (LoadD src)));
+
+ format %{ "divsd $dst, $src" %}
+ ins_cost(150);
+ ins_encode %{
+ __ divsd($dst$$XMMRegister, $src$$Address);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct divD_imm(regD dst, immD con) %{
+ predicate((UseSSE>=2) && (UseAVX == 0));
+ match(Set dst (DivD dst con));
+ format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
+ ins_cost(150);
+ ins_encode %{
+ __ divsd($dst$$XMMRegister, $constantaddress($con));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vdivD_reg(regD dst, regD src1, regD src2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (DivD src1 src2));
+
+ format %{ "vdivsd $dst, $src1, $src2" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vdivD_mem(regD dst, regD src1, memory src2) %{
+ predicate(UseAVX > 0);
+ match(Set dst (DivD src1 (LoadD src2)));
+
+ format %{ "vdivsd $dst, $src1, $src2" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vdivD_imm(regD dst, regD src, immD con) %{
+ predicate(UseAVX > 0);
+ match(Set dst (DivD src con));
+
+ format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
+ ins_cost(150);
+ ins_encode %{
+ __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct absF_reg(regF dst) %{
+ predicate((UseSSE>=1) && (UseAVX == 0));
+ match(Set dst (AbsF dst));
+ ins_cost(150);
+ format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
+ ins_encode %{
+ __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vabsF_reg(regF dst, regF src) %{
+ predicate(UseAVX > 0);
+ match(Set dst (AbsF src));
+ ins_cost(150);
+ format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
+ ins_encode %{
+ __ vandps($dst$$XMMRegister, $src$$XMMRegister,
+ ExternalAddress(float_signmask()));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct absD_reg(regD dst) %{
+ predicate((UseSSE>=2) && (UseAVX == 0));
+ match(Set dst (AbsD dst));
+ ins_cost(150);
+ format %{ "andpd $dst, [0x7fffffffffffffff]\t"
+ "# abs double by sign masking" %}
+ ins_encode %{
+ __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vabsD_reg(regD dst, regD src) %{
+ predicate(UseAVX > 0);
+ match(Set dst (AbsD src));
+ ins_cost(150);
+ format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
+ "# abs double by sign masking" %}
+ ins_encode %{
+ __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
+ ExternalAddress(double_signmask()));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct negF_reg(regF dst) %{
+ predicate((UseSSE>=1) && (UseAVX == 0));
+ match(Set dst (NegF dst));
+ ins_cost(150);
+ format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
+ ins_encode %{
+ __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vnegF_reg(regF dst, regF src) %{
+ predicate(UseAVX > 0);
+ match(Set dst (NegF src));
+ ins_cost(150);
+ format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
+ ins_encode %{
+ __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
+ ExternalAddress(float_signflip()));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct negD_reg(regD dst) %{
+ predicate((UseSSE>=2) && (UseAVX == 0));
+ match(Set dst (NegD dst));
+ ins_cost(150);
+ format %{ "xorpd $dst, [0x8000000000000000]\t"
+ "# neg double by sign flipping" %}
+ ins_encode %{
+ __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vnegD_reg(regD dst, regD src) %{
+ predicate(UseAVX > 0);
+ match(Set dst (NegD src));
+ ins_cost(150);
+ format %{ "vxorpd $dst, $src, [0x8000000000000000]\t"
+ "# neg double by sign flipping" %}
+ ins_encode %{
+ __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
+ ExternalAddress(double_signflip()));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct sqrtF_reg(regF dst, regF src) %{
+ predicate(UseSSE>=1);
+ match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
+
+ format %{ "sqrtss $dst, $src" %}
+ ins_cost(150);
+ ins_encode %{
+ __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct sqrtF_mem(regF dst, memory src) %{
+ predicate(UseSSE>=1);
+ match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
+
+ format %{ "sqrtss $dst, $src" %}
+ ins_cost(150);
+ ins_encode %{
+ __ sqrtss($dst$$XMMRegister, $src$$Address);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct sqrtF_imm(regF dst, immF con) %{
+ predicate(UseSSE>=1);
+ match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
+ format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
+ ins_cost(150);
+ ins_encode %{
+ __ sqrtss($dst$$XMMRegister, $constantaddress($con));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct sqrtD_reg(regD dst, regD src) %{
+ predicate(UseSSE>=2);
+ match(Set dst (SqrtD src));
+
+ format %{ "sqrtsd $dst, $src" %}
+ ins_cost(150);
+ ins_encode %{
+ __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct sqrtD_mem(regD dst, memory src) %{
+ predicate(UseSSE>=2);
+ match(Set dst (SqrtD (LoadD src)));
+
+ format %{ "sqrtsd $dst, $src" %}
+ ins_cost(150);
+ ins_encode %{
+ __ sqrtsd($dst$$XMMRegister, $src$$Address);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct sqrtD_imm(regD dst, immD con) %{
+ predicate(UseSSE>=2);
+ match(Set dst (SqrtD con));
+ format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
+ ins_cost(150);
+ ins_encode %{
+ __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
diff --git a/src/cpu/x86/vm/x86_32.ad b/src/cpu/x86/vm/x86_32.ad
index 84d6bbac7..076cf3a6b 100644
--- a/src/cpu/x86/vm/x86_32.ad
+++ b/src/cpu/x86/vm/x86_32.ad
@@ -281,7 +281,7 @@ static int pre_call_FPU_size() {
}
static int preserve_SP_size() {
- return LP64_ONLY(1 +) 2; // [rex,] op, rm(reg/reg)
+ return 2; // op, rm(reg/reg)
}
// !!!!! Special hack to get all type of calls to specify the byte offset
@@ -495,14 +495,34 @@ void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
}
}
-void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
- if( dst_encoding == src_encoding ) {
- // reg-reg copy, use an empty encoding
- } else {
- MacroAssembler _masm(&cbuf);
+void emit_cmpfp_fixup(MacroAssembler& _masm) {
+ Label exit;
+ __ jccb(Assembler::noParity, exit);
+ __ pushf();
+ //
+ // comiss/ucomiss instructions set ZF,PF,CF flags and
+ // zero OF,AF,SF for NaN values.
+ // Fixup flags by zeroing ZF,PF so that compare of NaN
+ // values returns 'less than' result (CF is set).
+ // Leave the rest of flags unchanged.
+ //
+ // 7 6 5 4 3 2 1 0
+ // |S|Z|r|A|r|P|r|C| (r - reserved bit)
+ // 0 0 1 0 1 0 1 1 (0x2B)
+ //
+ __ andl(Address(rsp, 0), 0xffffff2b);
+ __ popf();
+ __ bind(exit);
+}
- __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
- }
+void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
+ Label done;
+ __ movl(dst, -1);
+ __ jcc(Assembler::parity, done);
+ __ jcc(Assembler::below, done);
+ __ setb(Assembler::notEqual, dst);
+ __ movzbl(dst, dst);
+ __ bind(done);
}
@@ -792,92 +812,88 @@ static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset
// Helper for XMM registers. Extra opcode bits, limited syntax.
static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
- if( cbuf ) {
- if( reg_lo+1 == reg_hi ) { // double move?
- if( is_load && !UseXmmLoadAndClearUpper )
- emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load
- else
- emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise
+ if (cbuf) {
+ MacroAssembler _masm(cbuf);
+ if (reg_lo+1 == reg_hi) { // double move?
+ if (is_load) {
+ __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
+ } else {
+ __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
+ }
} else {
- emit_opcode(*cbuf, 0xF3 );
+ if (is_load) {
+ __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
+ } else {
+ __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
+ }
}
- emit_opcode(*cbuf, 0x0F );
- if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper )
- emit_opcode(*cbuf, 0x12 ); // use 'movlpd' for load
- else
- emit_opcode(*cbuf, is_load ? 0x10 : 0x11 );
- encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false);
#ifndef PRODUCT
- } else if( !do_size ) {
- if( size != 0 ) st->print("\n\t");
- if( reg_lo+1 == reg_hi ) { // double move?
- if( is_load ) st->print("%s %s,[ESP + #%d]",
- UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
- Matcher::regName[reg_lo], offset);
- else st->print("MOVSD [ESP + #%d],%s",
- offset, Matcher::regName[reg_lo]);
+ } else if (!do_size) {
+ if (size != 0) st->print("\n\t");
+ if (reg_lo+1 == reg_hi) { // double move?
+ if (is_load) st->print("%s %s,[ESP + #%d]",
+ UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
+ Matcher::regName[reg_lo], offset);
+ else st->print("MOVSD [ESP + #%d],%s",
+ offset, Matcher::regName[reg_lo]);
} else {
- if( is_load ) st->print("MOVSS %s,[ESP + #%d]",
- Matcher::regName[reg_lo], offset);
- else st->print("MOVSS [ESP + #%d],%s",
- offset, Matcher::regName[reg_lo]);
+ if (is_load) st->print("MOVSS %s,[ESP + #%d]",
+ Matcher::regName[reg_lo], offset);
+ else st->print("MOVSS [ESP + #%d],%s",
+ offset, Matcher::regName[reg_lo]);
}
#endif
}
int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
+ // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes.
return size+5+offset_size;
}
static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
int src_hi, int dst_hi, int size, outputStream* st ) {
- if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers
- if( cbuf ) {
- if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) {
- emit_opcode(*cbuf, 0x66 );
- }
- emit_opcode(*cbuf, 0x0F );
- emit_opcode(*cbuf, 0x28 );
- emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
+ if (cbuf) {
+ MacroAssembler _masm(cbuf);
+ if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
+ __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
+ as_XMMRegister(Matcher::_regEncode[src_lo]));
+ } else {
+ __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
+ as_XMMRegister(Matcher::_regEncode[src_lo]));
+ }
#ifndef PRODUCT
- } else if( !do_size ) {
- if( size != 0 ) st->print("\n\t");
- if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
+ } else if (!do_size) {
+ if (size != 0) st->print("\n\t");
+ if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
+ if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
} else {
st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
}
-#endif
- }
- return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3);
- } else {
- if( cbuf ) {
- emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 );
- emit_opcode(*cbuf, 0x0F );
- emit_opcode(*cbuf, 0x10 );
- emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
-#ifndef PRODUCT
- } else if( !do_size ) {
- if( size != 0 ) st->print("\n\t");
+ } else {
if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
} else {
st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
}
-#endif
}
- return size+4;
+#endif
}
+ // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes.
+ // Only MOVAPS SSE prefix uses 1 byte.
+ int sz = 4;
+ if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
+ UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
+ return size + sz;
}
static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
int src_hi, int dst_hi, int size, outputStream* st ) {
// 32-bit
if (cbuf) {
- emit_opcode(*cbuf, 0x66);
- emit_opcode(*cbuf, 0x0F);
- emit_opcode(*cbuf, 0x6E);
- emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7);
+ MacroAssembler _masm(cbuf);
+ __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
+ as_Register(Matcher::_regEncode[src_lo]));
#ifndef PRODUCT
} else if (!do_size) {
st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
@@ -891,10 +907,9 @@ static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int
int src_hi, int dst_hi, int size, outputStream* st ) {
// 32-bit
if (cbuf) {
- emit_opcode(*cbuf, 0x66);
- emit_opcode(*cbuf, 0x0F);
- emit_opcode(*cbuf, 0x7E);
- emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7);
+ MacroAssembler _masm(cbuf);
+ __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
+ as_XMMRegister(Matcher::_regEncode[src_lo]));
#ifndef PRODUCT
} else if (!do_size) {
st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
@@ -1760,7 +1775,7 @@ encode %{
emit_cc(cbuf, $secondary, $cop$$cmpcode);
%}
- enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV
+ enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
emit_d8(cbuf, op >> 8 );
emit_d8(cbuf, op & 255);
@@ -1931,11 +1946,6 @@ encode %{
%}
- enc_class Xor_Reg (eRegI dst) %{
- emit_opcode(cbuf, 0x33);
- emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
- %}
-
// Following encoding is no longer used, but may be restored if calling
// convention changes significantly.
// Became: Xor_Reg(EBP), Java_To_Runtime( labl )
@@ -2013,64 +2023,6 @@ encode %{
%}
- enc_class MovI2X_reg(regX dst, eRegI src) %{
- emit_opcode(cbuf, 0x66 ); // MOVD dst,src
- emit_opcode(cbuf, 0x0F );
- emit_opcode(cbuf, 0x6E );
- emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
- %}
-
- enc_class MovX2I_reg(eRegI dst, regX src) %{
- emit_opcode(cbuf, 0x66 ); // MOVD dst,src
- emit_opcode(cbuf, 0x0F );
- emit_opcode(cbuf, 0x7E );
- emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
- %}
-
- enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{
- { // MOVD $dst,$src.lo
- emit_opcode(cbuf,0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x6E);
- emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
- }
- { // MOVD $tmp,$src.hi
- emit_opcode(cbuf,0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x6E);
- emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
- }
- { // PUNPCKLDQ $dst,$tmp
- emit_opcode(cbuf,0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x62);
- emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg);
- }
- %}
-
- enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{
- { // MOVD $dst.lo,$src
- emit_opcode(cbuf,0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x7E);
- emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
- }
- { // PSHUFLW $tmp,$src,0x4E (01001110b)
- emit_opcode(cbuf,0xF2);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x70);
- emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
- emit_d8(cbuf, 0x4E);
- }
- { // MOVD $dst.hi,$tmp
- emit_opcode(cbuf,0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x7E);
- emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
- }
- %}
-
-
// Encode a reg-reg copy. If it is useless, then empty encoding.
enc_class enc_Copy( eRegI dst, eRegI src ) %{
encode_Copy( cbuf, $dst$$reg, $src$$reg );
@@ -2080,11 +2032,6 @@ encode %{
encode_Copy( cbuf, $dst$$reg, $src$$reg );
%}
- // Encode xmm reg-reg copy. If it is useless, then empty encoding.
- enc_class enc_CopyXD( RegXD dst, RegXD src ) %{
- encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
- %}
-
enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many)
emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
%}
@@ -2116,14 +2063,14 @@ encode %{
$$$emit32$src$$constant;
%}
- enc_class Con32F_as_bits(immF src) %{ // storeF_imm
+ enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm
// Output Float immediate bits
jfloat jf = $src$$constant;
int jf_as_bits = jint_cast( jf );
emit_d32(cbuf, jf_as_bits);
%}
- enc_class Con32XF_as_bits(immXF src) %{ // storeX_imm
+ enc_class Con32F_as_bits(immF src) %{ // storeX_imm
// Output Float immediate bits
jfloat jf = $src$$constant;
int jf_as_bits = jint_cast( jf );
@@ -2336,7 +2283,7 @@ encode %{
emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
%}
- enc_class enc_FP_store(memory mem, regD src) %{
+ enc_class enc_FPR_store(memory mem, regDPR src) %{
// If src is FPR1, we can just FST to store it.
// Else we need to FLD it to FPR1, then FSTP to store/pop it.
int reg_encoding = 0x2; // Just store
@@ -2485,7 +2432,7 @@ encode %{
// ----------------- Encodings for floating point unit -----------------
// May leave result in FPU-TOS or FPU reg depending on opcodes
- enc_class OpcReg_F (regF src) %{ // FMUL, FDIV
+ enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV
$$$emit8$primary;
emit_rm(cbuf, 0x3, $secondary, $src$$reg );
%}
@@ -2497,17 +2444,17 @@ encode %{
%}
// !!!!! equivalent to Pop_Reg_F
- enc_class Pop_Reg_D( regD dst ) %{
+ enc_class Pop_Reg_DPR( regDPR dst ) %{
emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
emit_d8( cbuf, 0xD8+$dst$$reg );
%}
- enc_class Push_Reg_D( regD dst ) %{
+ enc_class Push_Reg_DPR( regDPR dst ) %{
emit_opcode( cbuf, 0xD9 );
emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1)
%}
- enc_class strictfp_bias1( regD dst ) %{
+ enc_class strictfp_bias1( regDPR dst ) %{
emit_opcode( cbuf, 0xDB ); // FLD m80real
emit_opcode( cbuf, 0x2D );
emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
@@ -2515,7 +2462,7 @@ encode %{
emit_opcode( cbuf, 0xC8+$dst$$reg );
%}
- enc_class strictfp_bias2( regD dst ) %{
+ enc_class strictfp_bias2( regDPR dst ) %{
emit_opcode( cbuf, 0xDB ); // FLD m80real
emit_opcode( cbuf, 0x2D );
emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
@@ -2541,39 +2488,29 @@ encode %{
store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
%}
- // Push the float in stackSlot 'src' onto FP-stack
- enc_class Push_Mem_F( memory src ) %{ // FLD_S [ESP+src]
- store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp );
- %}
-
- // Push the double in stackSlot 'src' onto FP-stack
- enc_class Push_Mem_D( memory src ) %{ // FLD_D [ESP+src]
- store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp );
- %}
-
// Push FPU's TOS float to a stack-slot, and pop FPU-stack
- enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
+ enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
%}
// Same as Pop_Mem_F except for opcode
// Push FPU's TOS double to a stack-slot, and pop FPU-stack
- enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
+ enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
%}
- enc_class Pop_Reg_F( regF dst ) %{
+ enc_class Pop_Reg_FPR( regFPR dst ) %{
emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
emit_d8( cbuf, 0xD8+$dst$$reg );
%}
- enc_class Push_Reg_F( regF dst ) %{
+ enc_class Push_Reg_FPR( regFPR dst ) %{
emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
emit_d8( cbuf, 0xC0-1+$dst$$reg );
%}
// Push FPU's float to a stack-slot, and pop FPU-stack
- enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{
+ enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
int pop = 0x02;
if ($src$$reg != FPR1L_enc) {
emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
@@ -2584,7 +2521,7 @@ encode %{
%}
// Push FPU's double to a stack-slot, and pop FPU-stack
- enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{
+ enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
int pop = 0x02;
if ($src$$reg != FPR1L_enc) {
emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
@@ -2595,7 +2532,7 @@ encode %{
%}
// Push FPU's double to a FPU-stack-slot, and pop FPU-stack
- enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{
+ enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
int pop = 0xD0 - 1; // -1 since we skip FLD
if ($src$$reg != FPR1L_enc) {
emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1)
@@ -2607,16 +2544,7 @@ encode %{
%}
- enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{
- MacroAssembler masm(&cbuf);
- masm.fld_s( $src1$$reg-1); // nothing at TOS, load TOS from src1.reg
- masm.fmul( $src2$$reg+0); // value at TOS
- masm.fadd( $src$$reg+0); // value at TOS
- masm.fstp_d( $dst$$reg+0); // value at TOS, popped off after store
- %}
-
-
- enc_class Push_Reg_Mod_D( regD dst, regD src) %{
+ enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
// load dst in FPR0
emit_opcode( cbuf, 0xD9 );
emit_d8( cbuf, 0xC0-1+$dst$$reg );
@@ -2634,116 +2562,59 @@ encode %{
}
%}
- enc_class Push_ModD_encoding( regXD src0, regXD src1) %{
- // Allocate a word
- emit_opcode(cbuf,0x83); // SUB ESP,8
- emit_opcode(cbuf,0xEC);
- emit_d8(cbuf,0x08);
-
- emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src1
- emit_opcode (cbuf, 0x0F );
- emit_opcode (cbuf, 0x11 );
- encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
- encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src0
- emit_opcode (cbuf, 0x0F );
- emit_opcode (cbuf, 0x11 );
- encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
- encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
+ enc_class Push_ModD_encoding(regD src0, regD src1) %{
+ MacroAssembler _masm(&cbuf);
+ __ subptr(rsp, 8);
+ __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
+ __ fld_d(Address(rsp, 0));
+ __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
+ __ fld_d(Address(rsp, 0));
%}
- enc_class Push_ModX_encoding( regX src0, regX src1) %{
- // Allocate a word
- emit_opcode(cbuf,0x83); // SUB ESP,4
- emit_opcode(cbuf,0xEC);
- emit_d8(cbuf,0x04);
-
- emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src1
- emit_opcode (cbuf, 0x0F );
- emit_opcode (cbuf, 0x11 );
- encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0xD9 ); // FLD [ESP]
- encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src0
- emit_opcode (cbuf, 0x0F );
- emit_opcode (cbuf, 0x11 );
- encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0xD9 ); // FLD [ESP]
- encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
+ enc_class Push_ModF_encoding(regF src0, regF src1) %{
+ MacroAssembler _masm(&cbuf);
+ __ subptr(rsp, 4);
+ __ movflt(Address(rsp, 0), $src1$$XMMRegister);
+ __ fld_s(Address(rsp, 0));
+ __ movflt(Address(rsp, 0), $src0$$XMMRegister);
+ __ fld_s(Address(rsp, 0));
%}
- enc_class Push_ResultXD(regXD dst) %{
- store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP]
-
- // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp]
- emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
- emit_opcode (cbuf, 0x0F );
- emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
- encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0x83); // ADD ESP,8
- emit_opcode(cbuf,0xC4);
- emit_d8(cbuf,0x08);
+ enc_class Push_ResultD(regD dst) %{
+ MacroAssembler _masm(&cbuf);
+ __ fstp_d(Address(rsp, 0));
+ __ movdbl($dst$$XMMRegister, Address(rsp, 0));
+ __ addptr(rsp, 8);
%}
- enc_class Push_ResultX(regX dst, immI d8) %{
- store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP]
-
- emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP]
- emit_opcode (cbuf, 0x0F );
- emit_opcode (cbuf, 0x10 );
- encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0x83); // ADD ESP,d8 (4 or 8)
- emit_opcode(cbuf,0xC4);
- emit_d8(cbuf,$d8$$constant);
+ enc_class Push_ResultF(regF dst, immI d8) %{
+ MacroAssembler _masm(&cbuf);
+ __ fstp_s(Address(rsp, 0));
+ __ movflt($dst$$XMMRegister, Address(rsp, 0));
+ __ addptr(rsp, $d8$$constant);
%}
- enc_class Push_SrcXD(regXD src) %{
- // Allocate a word
- emit_opcode(cbuf,0x83); // SUB ESP,8
- emit_opcode(cbuf,0xEC);
- emit_d8(cbuf,0x08);
-
- emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
- emit_opcode (cbuf, 0x0F );
- emit_opcode (cbuf, 0x11 );
- encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
- encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
+ enc_class Push_SrcD(regD src) %{
+ MacroAssembler _masm(&cbuf);
+ __ subptr(rsp, 8);
+ __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+ __ fld_d(Address(rsp, 0));
%}
enc_class push_stack_temp_qword() %{
- emit_opcode(cbuf,0x83); // SUB ESP,8
- emit_opcode(cbuf,0xEC);
- emit_d8 (cbuf,0x08);
+ MacroAssembler _masm(&cbuf);
+ __ subptr(rsp, 8);
%}
enc_class pop_stack_temp_qword() %{
- emit_opcode(cbuf,0x83); // ADD ESP,8
- emit_opcode(cbuf,0xC4);
- emit_d8 (cbuf,0x08);
+ MacroAssembler _masm(&cbuf);
+ __ addptr(rsp, 8);
%}
- enc_class push_xmm_to_fpr1( regXD xmm_src ) %{
- emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], xmm_src
- emit_opcode (cbuf, 0x0F );
- emit_opcode (cbuf, 0x11 );
- encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
- encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
+ enc_class push_xmm_to_fpr1(regD src) %{
+ MacroAssembler _masm(&cbuf);
+ __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+ __ fld_d(Address(rsp, 0));
%}
// Compute X^Y using Intel's fast hardware instructions, if possible.
@@ -2785,10 +2656,7 @@ encode %{
encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
%}
-// enc_class Pop_Reg_Mod_D( regD dst, regD src)
-// was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X()
-
- enc_class Push_Result_Mod_D( regD src) %{
+ enc_class Push_Result_Mod_DPR( regDPR src) %{
if ($src$$reg != FPR1L_enc) {
// fincstp
emit_opcode (cbuf, 0xD9);
@@ -2817,7 +2685,7 @@ encode %{
emit_opcode( cbuf, 0x05 );
%}
- enc_class emitModD() %{
+ enc_class emitModDPR() %{
// fprem must be iterative
// :: loop
// fprem
@@ -2922,24 +2790,6 @@ encode %{
%}
- // XMM version of CmpF_Result. Because the XMM compare
- // instructions set the EFLAGS directly. It becomes simpler than
- // the float version above.
- enc_class CmpX_Result(eRegI dst) %{
- MacroAssembler _masm(&cbuf);
- Label nan, inc, done;
-
- __ jccb(Assembler::parity, nan);
- __ jccb(Assembler::equal, done);
- __ jccb(Assembler::above, inc);
- __ bind(nan);
- __ decrement(as_Register($dst$$reg)); // NO L qqq
- __ jmpb(done);
- __ bind(inc);
- __ increment(as_Register($dst$$reg)); // NO L qqq
- __ bind(done);
- %}
-
// Compare the longs and set flags
// BROKEN! Do Not use as-is
enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
@@ -3162,48 +3012,6 @@ encode %{
emit_d8 (cbuf,0 );
%}
- enc_class movq_ld(regXD dst, memory mem) %{
- MacroAssembler _masm(&cbuf);
- __ movq($dst$$XMMRegister, $mem$$Address);
- %}
-
- enc_class movq_st(memory mem, regXD src) %{
- MacroAssembler _masm(&cbuf);
- __ movq($mem$$Address, $src$$XMMRegister);
- %}
-
- enc_class pshufd_8x8(regX dst, regX src) %{
- MacroAssembler _masm(&cbuf);
-
- encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
- __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
- __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
- %}
-
- enc_class pshufd_4x16(regX dst, regX src) %{
- MacroAssembler _masm(&cbuf);
-
- __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
- %}
-
- enc_class pshufd(regXD dst, regXD src, int mode) %{
- MacroAssembler _masm(&cbuf);
-
- __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
- %}
-
- enc_class pxor(regXD dst, regXD src) %{
- MacroAssembler _masm(&cbuf);
-
- __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
- %}
-
- enc_class mov_i2x(regXD dst, eRegI src) %{
- MacroAssembler _masm(&cbuf);
-
- __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
- %}
-
// Because the transitions from emitted code to the runtime
// monitorenter/exit helper stubs are so slow it's critical that
@@ -3757,7 +3565,7 @@ encode %{
// 'zero', store the darned double down as an int, and reset the
// rounding mode to 'nearest'. The hardware throws an exception which
// patches up the correct value directly to the stack.
- enc_class D2I_encoding( regD src ) %{
+ enc_class DPR2I_encoding( regDPR src ) %{
// Flip to round-to-zero mode. We attempted to allow invalid-op
// exceptions here, so that a NAN or other corner-case value will
// thrown an exception (but normal values get converted at full speed).
@@ -3800,7 +3608,7 @@ encode %{
// Carry on here...
%}
- enc_class D2L_encoding( regD src ) %{
+ enc_class DPR2L_encoding( regDPR src ) %{
emit_opcode(cbuf,0xD9); // FLDCW trunc
emit_opcode(cbuf,0x2D);
emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
@@ -3842,294 +3650,27 @@ encode %{
// Carry on here...
%}
- enc_class X2L_encoding( regX src ) %{
- // Allocate a word
- emit_opcode(cbuf,0x83); // SUB ESP,8
- emit_opcode(cbuf,0xEC);
- emit_d8(cbuf,0x08);
-
- emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src
- emit_opcode (cbuf, 0x0F );
- emit_opcode (cbuf, 0x11 );
- encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0xD9 ); // FLD_S [ESP]
- encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0xD9); // FLDCW trunc
- emit_opcode(cbuf,0x2D);
- emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
-
- // Encoding assumes a double has been pushed into FPR0.
- // Store down the double as a long, popping the FPU stack
- emit_opcode(cbuf,0xDF); // FISTP [ESP]
- emit_opcode(cbuf,0x3C);
- emit_d8(cbuf,0x24);
-
- // Restore the rounding mode; mask the exception
- emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
- emit_opcode(cbuf,0x2D);
- emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
- ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
- : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
-
- // Load the converted int; adjust CPU stack
- emit_opcode(cbuf,0x58); // POP EAX
-
- emit_opcode(cbuf,0x5A); // POP EDX
-
- emit_opcode(cbuf,0x81); // CMP EDX,imm
- emit_d8 (cbuf,0xFA); // rdx
- emit_d32 (cbuf,0x80000000);// 0x80000000
-
- emit_opcode(cbuf,0x75); // JNE around_slow_call
- emit_d8 (cbuf,0x13+4); // Size of slow_call
-
- emit_opcode(cbuf,0x85); // TEST EAX,EAX
- emit_opcode(cbuf,0xC0); // 2/rax,/rax,
-
- emit_opcode(cbuf,0x75); // JNE around_slow_call
- emit_d8 (cbuf,0x13); // Size of slow_call
-
- // Allocate a word
- emit_opcode(cbuf,0x83); // SUB ESP,4
- emit_opcode(cbuf,0xEC);
- emit_d8(cbuf,0x04);
-
- emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src
- emit_opcode (cbuf, 0x0F );
- emit_opcode (cbuf, 0x11 );
- encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0xD9 ); // FLD_S [ESP]
- encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0x83); // ADD ESP,4
- emit_opcode(cbuf,0xC4);
- emit_d8(cbuf,0x04);
-
- // CALL directly to the runtime
- cbuf.set_insts_mark();
- emit_opcode(cbuf,0xE8); // Call into runtime
- emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
- // Carry on here...
- %}
-
- enc_class XD2L_encoding( regXD src ) %{
- // Allocate a word
- emit_opcode(cbuf,0x83); // SUB ESP,8
- emit_opcode(cbuf,0xEC);
- emit_d8(cbuf,0x08);
-
- emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
- emit_opcode (cbuf, 0x0F );
- emit_opcode (cbuf, 0x11 );
- encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
- encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0xD9); // FLDCW trunc
- emit_opcode(cbuf,0x2D);
- emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
-
- // Encoding assumes a double has been pushed into FPR0.
- // Store down the double as a long, popping the FPU stack
- emit_opcode(cbuf,0xDF); // FISTP [ESP]
- emit_opcode(cbuf,0x3C);
- emit_d8(cbuf,0x24);
-
- // Restore the rounding mode; mask the exception
- emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
- emit_opcode(cbuf,0x2D);
- emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
- ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
- : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
-
- // Load the converted int; adjust CPU stack
- emit_opcode(cbuf,0x58); // POP EAX
-
- emit_opcode(cbuf,0x5A); // POP EDX
-
- emit_opcode(cbuf,0x81); // CMP EDX,imm
- emit_d8 (cbuf,0xFA); // rdx
- emit_d32 (cbuf,0x80000000); // 0x80000000
-
- emit_opcode(cbuf,0x75); // JNE around_slow_call
- emit_d8 (cbuf,0x13+4); // Size of slow_call
-
- emit_opcode(cbuf,0x85); // TEST EAX,EAX
- emit_opcode(cbuf,0xC0); // 2/rax,/rax,
-
- emit_opcode(cbuf,0x75); // JNE around_slow_call
- emit_d8 (cbuf,0x13); // Size of slow_call
-
- // Push src onto stack slow-path
- // Allocate a word
- emit_opcode(cbuf,0x83); // SUB ESP,8
- emit_opcode(cbuf,0xEC);
- emit_d8(cbuf,0x08);
-
- emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
- emit_opcode (cbuf, 0x0F );
- emit_opcode (cbuf, 0x11 );
- encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
- encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0x83); // ADD ESP,8
- emit_opcode(cbuf,0xC4);
- emit_d8(cbuf,0x08);
-
- // CALL directly to the runtime
- cbuf.set_insts_mark();
- emit_opcode(cbuf,0xE8); // Call into runtime
- emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
- // Carry on here...
- %}
-
- enc_class D2X_encoding( regX dst, regD src ) %{
- // Allocate a word
- emit_opcode(cbuf,0x83); // SUB ESP,4
- emit_opcode(cbuf,0xEC);
- emit_d8(cbuf,0x04);
- int pop = 0x02;
- if ($src$$reg != FPR1L_enc) {
- emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
- emit_d8( cbuf, 0xC0-1+$src$$reg );
- pop = 0x03;
- }
- store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST<P>_S [ESP]
-
- emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP]
- emit_opcode (cbuf, 0x0F );
- emit_opcode (cbuf, 0x10 );
- encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0x83); // ADD ESP,4
- emit_opcode(cbuf,0xC4);
- emit_d8(cbuf,0x04);
- // Carry on here...
- %}
-
- enc_class FX2I_encoding( regX src, eRegI dst ) %{
- emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
-
- // Compare the result to see if we need to go to the slow path
- emit_opcode(cbuf,0x81); // CMP dst,imm
- emit_rm (cbuf,0x3,0x7,$dst$$reg);
- emit_d32 (cbuf,0x80000000); // 0x80000000
-
- emit_opcode(cbuf,0x75); // JNE around_slow_call
- emit_d8 (cbuf,0x13); // Size of slow_call
- // Store xmm to a temp memory
- // location and push it onto stack.
-
- emit_opcode(cbuf,0x83); // SUB ESP,4
- emit_opcode(cbuf,0xEC);
- emit_d8(cbuf, $primary ? 0x8 : 0x4);
-
- emit_opcode (cbuf, $primary ? 0xF2 : 0xF3 ); // MOVSS [ESP], xmm
- emit_opcode (cbuf, 0x0F );
- emit_opcode (cbuf, 0x11 );
- encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf, $primary ? 0xDD : 0xD9 ); // FLD [ESP]
- encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0x83); // ADD ESP,4
- emit_opcode(cbuf,0xC4);
- emit_d8(cbuf, $primary ? 0x8 : 0x4);
-
- // CALL directly to the runtime
- cbuf.set_insts_mark();
- emit_opcode(cbuf,0xE8); // Call into runtime
- emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
-
- // Carry on here...
- %}
-
- enc_class X2D_encoding( regD dst, regX src ) %{
- // Allocate a word
- emit_opcode(cbuf,0x83); // SUB ESP,4
- emit_opcode(cbuf,0xEC);
- emit_d8(cbuf,0x04);
-
- emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], xmm
- emit_opcode (cbuf, 0x0F );
- emit_opcode (cbuf, 0x11 );
- encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0xD9 ); // FLD_S [ESP]
- encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0x83); // ADD ESP,4
- emit_opcode(cbuf,0xC4);
- emit_d8(cbuf,0x04);
-
- // Carry on here...
- %}
-
- enc_class AbsXF_encoding(regX dst) %{
- address signmask_address=(address)float_signmask_pool;
- // andpd:\tANDPS $dst,[signconst]
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x54);
- emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
- emit_d32(cbuf, (int)signmask_address);
- %}
-
- enc_class AbsXD_encoding(regXD dst) %{
- address signmask_address=(address)double_signmask_pool;
- // andpd:\tANDPD $dst,[signconst]
- emit_opcode(cbuf, 0x66);
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x54);
- emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
- emit_d32(cbuf, (int)signmask_address);
- %}
-
- enc_class NegXF_encoding(regX dst) %{
- address signmask_address=(address)float_signflip_pool;
- // andpd:\tXORPS $dst,[signconst]
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x57);
- emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
- emit_d32(cbuf, (int)signmask_address);
- %}
-
- enc_class NegXD_encoding(regXD dst) %{
- address signmask_address=(address)double_signflip_pool;
- // andpd:\tXORPD $dst,[signconst]
- emit_opcode(cbuf, 0x66);
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x57);
- emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
- emit_d32(cbuf, (int)signmask_address);
- %}
-
- enc_class FMul_ST_reg( eRegF src1 ) %{
+ enc_class FMul_ST_reg( eRegFPR src1 ) %{
// Operand was loaded from memory into fp ST (stack top)
// FMUL ST,$src /* D8 C8+i */
emit_opcode(cbuf, 0xD8);
emit_opcode(cbuf, 0xC8 + $src1$$reg);
%}
- enc_class FAdd_ST_reg( eRegF src2 ) %{
+ enc_class FAdd_ST_reg( eRegFPR src2 ) %{
// FADDP ST,src2 /* D8 C0+i */
emit_opcode(cbuf, 0xD8);
emit_opcode(cbuf, 0xC0 + $src2$$reg);
//could use FADDP src2,fpST /* DE C0+i */
%}
- enc_class FAddP_reg_ST( eRegF src2 ) %{
+ enc_class FAddP_reg_ST( eRegFPR src2 ) %{
// FADDP src2,ST /* DE C0+i */
emit_opcode(cbuf, 0xDE);
emit_opcode(cbuf, 0xC0 + $src2$$reg);
%}
- enc_class subF_divF_encode( eRegF src1, eRegF src2) %{
+ enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
// Operand has been loaded into fp ST (stack top)
// FSUB ST,$src1
emit_opcode(cbuf, 0xD8);
@@ -4140,7 +3681,7 @@ encode %{
emit_opcode(cbuf, 0xF0 + $src2$$reg);
%}
- enc_class MulFAddF (eRegF src1, eRegF src2) %{
+ enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
// Operand was loaded from memory into fp ST (stack top)
// FADD ST,$src /* D8 C0+i */
emit_opcode(cbuf, 0xD8);
@@ -4152,7 +3693,7 @@ encode %{
%}
- enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{
+ enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
// Operand was loaded from memory into fp ST (stack top)
// FADD ST,$src /* D8 C0+i */
emit_opcode(cbuf, 0xD8);
@@ -4176,66 +3717,6 @@ encode %{
store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
%}
- enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{
- { // Atomic long load
- // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
- emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
- int base = $mem$$base;
- int index = $mem$$index;
- int scale = $mem$$scale;
- int displace = $mem$$disp;
- bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
- encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
- }
- { // MOVSD $dst,$tmp ! atomic long store
- emit_opcode(cbuf,0xF2);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x11);
- int base = $dst$$base;
- int index = $dst$$index;
- int scale = $dst$$scale;
- int displace = $dst$$disp;
- bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals
- encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
- }
- %}
-
- enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{
- { // Atomic long load
- // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
- emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
- int base = $mem$$base;
- int index = $mem$$index;
- int scale = $mem$$scale;
- int displace = $mem$$disp;
- bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
- encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
- }
- { // MOVD $dst.lo,$tmp
- emit_opcode(cbuf,0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x7E);
- emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg);
- }
- { // PSRLQ $tmp,32
- emit_opcode(cbuf,0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x73);
- emit_rm(cbuf, 0x3, 0x02, $tmp$$reg);
- emit_d8(cbuf, 0x20);
- }
- { // MOVD $dst.hi,$tmp
- emit_opcode(cbuf,0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x7E);
- emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
- }
- %}
-
// Volatile Store Long. Must be atomic, so move it into
// the FP TOS and then do a 64-bit FIST. Has to probe the
// target address before the store (for null-ptr checks)
@@ -4253,66 +3734,6 @@ encode %{
encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
%}
- enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{
- { // Atomic long load
- // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src]
- emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
- int base = $src$$base;
- int index = $src$$index;
- int scale = $src$$scale;
- int displace = $src$$disp;
- bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals
- encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
- }
- cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop
- { // MOVSD $mem,$tmp ! atomic long store
- emit_opcode(cbuf,0xF2);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x11);
- int base = $mem$$base;
- int index = $mem$$index;
- int scale = $mem$$scale;
- int displace = $mem$$disp;
- bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
- encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
- }
- %}
-
- enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{
- { // MOVD $tmp,$src.lo
- emit_opcode(cbuf,0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x6E);
- emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
- }
- { // MOVD $tmp2,$src.hi
- emit_opcode(cbuf,0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x6E);
- emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg));
- }
- { // PUNPCKLDQ $tmp,$tmp2
- emit_opcode(cbuf,0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x62);
- emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg);
- }
- cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop
- { // MOVSD $mem,$tmp ! atomic long store
- emit_opcode(cbuf,0xF2);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x11);
- int base = $mem$$base;
- int index = $mem$$index;
- int scale = $mem$$scale;
- int displace = $mem$$disp;
- bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
- encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
- }
- %}
-
// Safepoint Poll. This polls the safepoint page, and causes an
// exception if it is not readable. Unfortunately, it kills the condition code
// in the process
@@ -4705,7 +4126,7 @@ operand immL32() %{
%}
//Double Immediate zero
-operand immD0() %{
+operand immDPR0() %{
// Do additional (and counter-intuitive) test against NaN to work around VC++
// bug that generates code such that NaNs compare equal to 0.0
predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
@@ -4717,7 +4138,7 @@ operand immD0() %{
%}
// Double Immediate one
-operand immD1() %{
+operand immDPR1() %{
predicate( UseSSE<=1 && n->getd() == 1.0 );
match(ConD);
@@ -4727,7 +4148,7 @@ operand immD1() %{
%}
// Double Immediate
-operand immD() %{
+operand immDPR() %{
predicate(UseSSE<=1);
match(ConD);
@@ -4736,7 +4157,7 @@ operand immD() %{
interface(CONST_INTER);
%}
-operand immXD() %{
+operand immD() %{
predicate(UseSSE>=2);
match(ConD);
@@ -4746,7 +4167,7 @@ operand immXD() %{
%}
// Double Immediate zero
-operand immXD0() %{
+operand immD0() %{
// Do additional (and counter-intuitive) test against NaN to work around VC++
// bug that generates code such that NaNs compare equal to 0.0 AND do not
// compare equal to -0.0.
@@ -4758,7 +4179,7 @@ operand immXD0() %{
%}
// Float Immediate zero
-operand immF0() %{
+operand immFPR0() %{
predicate(UseSSE == 0 && n->getf() == 0.0F);
match(ConF);
@@ -4768,7 +4189,7 @@ operand immF0() %{
%}
// Float Immediate one
-operand immF1() %{
+operand immFPR1() %{
predicate(UseSSE == 0 && n->getf() == 1.0F);
match(ConF);
@@ -4778,7 +4199,7 @@ operand immF1() %{
%}
// Float Immediate
-operand immF() %{
+operand immFPR() %{
predicate( UseSSE == 0 );
match(ConF);
@@ -4788,7 +4209,7 @@ operand immF() %{
%}
// Float Immediate
-operand immXF() %{
+operand immF() %{
predicate(UseSSE >= 1);
match(ConF);
@@ -4798,7 +4219,7 @@ operand immXF() %{
%}
// Float Immediate zero. Zero and not -0.0
-operand immXF0() %{
+operand immF0() %{
predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
match(ConF);
@@ -5174,7 +4595,7 @@ operand flagsReg_long_LEGT() %{
%}
// Float register operands
-operand regD() %{
+operand regDPR() %{
predicate( UseSSE < 2 );
constraint(ALLOC_IN_RC(dbl_reg));
match(RegD);
@@ -5184,7 +4605,7 @@ operand regD() %{
interface(REG_INTER);
%}
-operand regDPR1(regD reg) %{
+operand regDPR1(regDPR reg) %{
predicate( UseSSE < 2 );
constraint(ALLOC_IN_RC(dbl_reg0));
match(reg);
@@ -5192,7 +4613,7 @@ operand regDPR1(regD reg) %{
interface(REG_INTER);
%}
-operand regDPR2(regD reg) %{
+operand regDPR2(regDPR reg) %{
predicate( UseSSE < 2 );
constraint(ALLOC_IN_RC(dbl_reg1));
match(reg);
@@ -5200,7 +4621,7 @@ operand regDPR2(regD reg) %{
interface(REG_INTER);
%}
-operand regnotDPR1(regD reg) %{
+operand regnotDPR1(regDPR reg) %{
predicate( UseSSE < 2 );
constraint(ALLOC_IN_RC(dbl_notreg0));
match(reg);
@@ -5209,18 +4630,18 @@ operand regnotDPR1(regD reg) %{
%}
// XMM Double register operands
-operand regXD() %{
+operand regD() %{
predicate( UseSSE>=2 );
constraint(ALLOC_IN_RC(xdb_reg));
match(RegD);
- match(regXD6);
- match(regXD7);
+ match(regD6);
+ match(regD7);
format %{ %}
interface(REG_INTER);
%}
// XMM6 double register operands
-operand regXD6(regXD reg) %{
+operand regD6(regD reg) %{
predicate( UseSSE>=2 );
constraint(ALLOC_IN_RC(xdb_reg6));
match(reg);
@@ -5229,7 +4650,7 @@ operand regXD6(regXD reg) %{
%}
// XMM7 double register operands
-operand regXD7(regXD reg) %{
+operand regD7(regD reg) %{
predicate( UseSSE>=2 );
constraint(ALLOC_IN_RC(xdb_reg7));
match(reg);
@@ -5238,7 +4659,7 @@ operand regXD7(regXD reg) %{
%}
// Float register operands
-operand regF() %{
+operand regFPR() %{
predicate( UseSSE < 2 );
constraint(ALLOC_IN_RC(flt_reg));
match(RegF);
@@ -5248,7 +4669,7 @@ operand regF() %{
%}
// Float register operands
-operand regFPR1(regF reg) %{
+operand regFPR1(regFPR reg) %{
predicate( UseSSE < 2 );
constraint(ALLOC_IN_RC(flt_reg0));
match(reg);
@@ -5257,7 +4678,7 @@ operand regFPR1(regF reg) %{
%}
// XMM register operands
-operand regX() %{
+operand regF() %{
predicate( UseSSE>=1 );
constraint(ALLOC_IN_RC(xmm_reg));
match(RegF);
@@ -6001,7 +5422,7 @@ pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
%}
// Conditional move double reg-reg
-pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{
+pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
single_instruction;
dst : S4(write);
src : S3(read);
@@ -6010,7 +5431,7 @@ pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{
%}
// Float reg-reg operation
-pipe_class fpu_reg(regD dst) %{
+pipe_class fpu_reg(regDPR dst) %{
instruction_count(2);
dst : S3(read);
DECODE : S0(2); // any 2 decoders
@@ -6018,7 +5439,7 @@ pipe_class fpu_reg(regD dst) %{
%}
// Float reg-reg operation
-pipe_class fpu_reg_reg(regD dst, regD src) %{
+pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
instruction_count(2);
dst : S4(write);
src : S3(read);
@@ -6027,7 +5448,7 @@ pipe_class fpu_reg_reg(regD dst, regD src) %{
%}
// Float reg-reg operation
-pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{
+pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
instruction_count(3);
dst : S4(write);
src1 : S3(read);
@@ -6037,7 +5458,7 @@ pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{
%}
// Float reg-reg operation
-pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
+pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
instruction_count(4);
dst : S4(write);
src1 : S3(read);
@@ -6048,7 +5469,7 @@ pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
%}
// Float reg-reg operation
-pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{
+pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
instruction_count(4);
dst : S4(write);
src1 : S3(read);
@@ -6061,7 +5482,7 @@ pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{
%}
// Float reg-mem operation
-pipe_class fpu_reg_mem(regD dst, memory mem) %{
+pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
instruction_count(2);
dst : S5(write);
mem : S3(read);
@@ -6072,7 +5493,7 @@ pipe_class fpu_reg_mem(regD dst, memory mem) %{
%}
// Float reg-mem operation
-pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{
+pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
instruction_count(3);
dst : S5(write);
src1 : S3(read);
@@ -6084,7 +5505,7 @@ pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{
%}
// Float mem-reg operation
-pipe_class fpu_mem_reg(memory mem, regD src) %{
+pipe_class fpu_mem_reg(memory mem, regDPR src) %{
instruction_count(2);
src : S5(read);
mem : S3(read);
@@ -6094,7 +5515,7 @@ pipe_class fpu_mem_reg(memory mem, regD src) %{
MEM : S3; // any mem
%}
-pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{
+pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
instruction_count(3);
src1 : S3(read);
src2 : S3(read);
@@ -6105,7 +5526,7 @@ pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{
MEM : S3; // any mem
%}
-pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{
+pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
instruction_count(3);
src1 : S3(read);
src2 : S3(read);
@@ -6134,7 +5555,7 @@ pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
MEM : S3(3); // any mem
%}
-pipe_class fpu_mem_reg_con(memory mem, regD src1) %{
+pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
instruction_count(3);
src1 : S4(read);
mem : S4(read);
@@ -6145,7 +5566,7 @@ pipe_class fpu_mem_reg_con(memory mem, regD src1) %{
%}
// Float load constant
-pipe_class fpu_reg_con(regD dst) %{
+pipe_class fpu_reg_con(regDPR dst) %{
instruction_count(2);
dst : S5(write);
D0 : S0; // big decoder only for the load
@@ -6155,7 +5576,7 @@ pipe_class fpu_reg_con(regD dst) %{
%}
// Float load constant
-pipe_class fpu_reg_reg_con(regD dst, regD src) %{
+pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
instruction_count(3);
dst : S5(write);
src : S3(read);
@@ -6870,18 +6291,21 @@ instruct loadL_volatile(stackSlotL dst, memory mem) %{
ins_pipe( fpu_reg_mem );
%}
-instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{
+instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
match(Set dst (LoadL mem));
effect(TEMP tmp);
ins_cost(180);
format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
"MOVSD $dst,$tmp" %}
- ins_encode(enc_loadLX_volatile(mem, dst, tmp));
+ ins_encode %{
+ __ movdbl($tmp$$XMMRegister, $mem$$Address);
+ __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
-instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
+instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
match(Set dst (LoadL mem));
effect(TEMP tmp);
@@ -6890,7 +6314,12 @@ instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
"MOVD $dst.lo,$tmp\n\t"
"PSRLQ $tmp,32\n\t"
"MOVD $dst.hi,$tmp" %}
- ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
+ ins_encode %{
+ __ movdbl($tmp$$XMMRegister, $mem$$Address);
+ __ movdl($dst$$Register, $tmp$$XMMRegister);
+ __ psrlq($tmp$$XMMRegister, 32);
+ __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -6929,7 +6358,7 @@ instruct loadKlass(eRegP dst, memory mem) %{
%}
// Load Double
-instruct loadD(regD dst, memory mem) %{
+instruct loadDPR(regDPR dst, memory mem) %{
predicate(UseSSE<=1);
match(Set dst (LoadD mem));
@@ -6938,42 +6367,48 @@ instruct loadD(regD dst, memory mem) %{
"FSTP $dst" %}
opcode(0xDD); /* DD /0 */
ins_encode( OpcP, RMopc_Mem(0x00,mem),
- Pop_Reg_D(dst) );
+ Pop_Reg_DPR(dst) );
ins_pipe( fpu_reg_mem );
%}
// Load Double to XMM
-instruct loadXD(regXD dst, memory mem) %{
+instruct loadD(regD dst, memory mem) %{
predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
match(Set dst (LoadD mem));
ins_cost(145);
format %{ "MOVSD $dst,$mem" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
+ ins_encode %{
+ __ movdbl ($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
-instruct loadXD_partial(regXD dst, memory mem) %{
+instruct loadD_partial(regD dst, memory mem) %{
predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
match(Set dst (LoadD mem));
ins_cost(145);
format %{ "MOVLPD $dst,$mem" %}
- ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem));
+ ins_encode %{
+ __ movdbl ($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
// Load to XMM register (single-precision floating point)
// MOVSS instruction
-instruct loadX(regX dst, memory mem) %{
+instruct loadF(regF dst, memory mem) %{
predicate(UseSSE>=1);
match(Set dst (LoadF mem));
ins_cost(145);
format %{ "MOVSS $dst,$mem" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
+ ins_encode %{
+ __ movflt ($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
// Load Float
-instruct loadF(regF dst, memory mem) %{
+instruct loadFPR(regFPR dst, memory mem) %{
predicate(UseSSE==0);
match(Set dst (LoadF mem));
@@ -6982,57 +6417,67 @@ instruct loadF(regF dst, memory mem) %{
"FSTP $dst" %}
opcode(0xD9); /* D9 /0 */
ins_encode( OpcP, RMopc_Mem(0x00,mem),
- Pop_Reg_F(dst) );
+ Pop_Reg_FPR(dst) );
ins_pipe( fpu_reg_mem );
%}
// Load Aligned Packed Byte to XMM register
-instruct loadA8B(regXD dst, memory mem) %{
+instruct loadA8B(regD dst, memory mem) %{
predicate(UseSSE>=1);
match(Set dst (Load8B mem));
ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed8B" %}
- ins_encode( movq_ld(dst, mem));
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
// Load Aligned Packed Short to XMM register
-instruct loadA4S(regXD dst, memory mem) %{
+instruct loadA4S(regD dst, memory mem) %{
predicate(UseSSE>=1);
match(Set dst (Load4S mem));
ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed4S" %}
- ins_encode( movq_ld(dst, mem));
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
// Load Aligned Packed Char to XMM register
-instruct loadA4C(regXD dst, memory mem) %{
+instruct loadA4C(regD dst, memory mem) %{
predicate(UseSSE>=1);
match(Set dst (Load4C mem));
ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed4C" %}
- ins_encode( movq_ld(dst, mem));
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
// Load Aligned Packed Integer to XMM register
-instruct load2IU(regXD dst, memory mem) %{
+instruct load2IU(regD dst, memory mem) %{
predicate(UseSSE>=1);
match(Set dst (Load2I mem));
ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed2I" %}
- ins_encode( movq_ld(dst, mem));
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
// Load Aligned Packed Single to XMM
-instruct loadA2F(regXD dst, memory mem) %{
+instruct loadA2F(regD dst, memory mem) %{
predicate(UseSSE>=1);
match(Set dst (Load2F mem));
ins_cost(145);
format %{ "MOVQ $dst,$mem\t! packed2F" %}
- ins_encode( movq_ld(dst, mem));
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
@@ -7139,8 +6584,8 @@ instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
ins_pipe( ialu_reg_long );
%}
-// The instruction usage is guarded by predicate in operand immF().
-instruct loadConF(regF dst, immF con) %{
+// The instruction usage is guarded by predicate in operand immFPR().
+instruct loadConFPR(regFPR dst, immFPR con) %{
match(Set dst con);
ins_cost(125);
format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
@@ -7152,8 +6597,8 @@ instruct loadConF(regF dst, immF con) %{
ins_pipe(fpu_reg_con);
%}
-// The instruction usage is guarded by predicate in operand immF0().
-instruct loadConF0(regF dst, immF0 con) %{
+// The instruction usage is guarded by predicate in operand immFPR0().
+instruct loadConFPR0(regFPR dst, immFPR0 con) %{
match(Set dst con);
ins_cost(125);
format %{ "FLDZ ST\n\t"
@@ -7165,8 +6610,8 @@ instruct loadConF0(regF dst, immF0 con) %{
ins_pipe(fpu_reg_con);
%}
-// The instruction usage is guarded by predicate in operand immF1().
-instruct loadConF1(regF dst, immF1 con) %{
+// The instruction usage is guarded by predicate in operand immFPR1().
+instruct loadConFPR1(regFPR dst, immFPR1 con) %{
match(Set dst con);
ins_cost(125);
format %{ "FLD1 ST\n\t"
@@ -7178,8 +6623,8 @@ instruct loadConF1(regF dst, immF1 con) %{
ins_pipe(fpu_reg_con);
%}
-// The instruction usage is guarded by predicate in operand immXF().
-instruct loadConX(regX dst, immXF con) %{
+// The instruction usage is guarded by predicate in operand immF().
+instruct loadConF(regF dst, immF con) %{
match(Set dst con);
ins_cost(125);
format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
@@ -7189,8 +6634,8 @@ instruct loadConX(regX dst, immXF con) %{
ins_pipe(pipe_slow);
%}
-// The instruction usage is guarded by predicate in operand immXF0().
-instruct loadConX0(regX dst, immXF0 src) %{
+// The instruction usage is guarded by predicate in operand immF0().
+instruct loadConF0(regF dst, immF0 src) %{
match(Set dst src);
ins_cost(100);
format %{ "XORPS $dst,$dst\t# float 0.0" %}
@@ -7200,8 +6645,8 @@ instruct loadConX0(regX dst, immXF0 src) %{
ins_pipe(pipe_slow);
%}
-// The instruction usage is guarded by predicate in operand immD().
-instruct loadConD(regD dst, immD con) %{
+// The instruction usage is guarded by predicate in operand immDPR().
+instruct loadConDPR(regDPR dst, immDPR con) %{
match(Set dst con);
ins_cost(125);
@@ -7214,8 +6659,8 @@ instruct loadConD(regD dst, immD con) %{
ins_pipe(fpu_reg_con);
%}
-// The instruction usage is guarded by predicate in operand immD0().
-instruct loadConD0(regD dst, immD0 con) %{
+// The instruction usage is guarded by predicate in operand immDPR0().
+instruct loadConDPR0(regDPR dst, immDPR0 con) %{
match(Set dst con);
ins_cost(125);
@@ -7228,8 +6673,8 @@ instruct loadConD0(regD dst, immD0 con) %{
ins_pipe(fpu_reg_con);
%}
-// The instruction usage is guarded by predicate in operand immD1().
-instruct loadConD1(regD dst, immD1 con) %{
+// The instruction usage is guarded by predicate in operand immDPR1().
+instruct loadConDPR1(regDPR dst, immDPR1 con) %{
match(Set dst con);
ins_cost(125);
@@ -7242,8 +6687,8 @@ instruct loadConD1(regD dst, immD1 con) %{
ins_pipe(fpu_reg_con);
%}
-// The instruction usage is guarded by predicate in operand immXD().
-instruct loadConXD(regXD dst, immXD con) %{
+// The instruction usage is guarded by predicate in operand immD().
+instruct loadConD(regD dst, immD con) %{
match(Set dst con);
ins_cost(125);
format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
@@ -7253,12 +6698,14 @@ instruct loadConXD(regXD dst, immXD con) %{
ins_pipe(pipe_slow);
%}
-// The instruction usage is guarded by predicate in operand immXD0().
-instruct loadConXD0(regXD dst, immXD0 src) %{
+// The instruction usage is guarded by predicate in operand immD0().
+instruct loadConD0(regD dst, immD0 src) %{
match(Set dst src);
ins_cost(100);
format %{ "XORPD $dst,$dst\t# double 0.0" %}
- ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
+ ins_encode %{
+ __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -7296,7 +6743,7 @@ instruct loadSSP(eRegP dst, stackSlotP src) %{
%}
// Load Stack Slot
-instruct loadSSF(regF dst, stackSlotF src) %{
+instruct loadSSF(regFPR dst, stackSlotF src) %{
match(Set dst src);
ins_cost(125);
@@ -7304,12 +6751,12 @@ instruct loadSSF(regF dst, stackSlotF src) %{
"FSTP $dst" %}
opcode(0xD9); /* D9 /0, FLD m32real */
ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
- Pop_Reg_F(dst) );
+ Pop_Reg_FPR(dst) );
ins_pipe( fpu_reg_mem );
%}
// Load Stack Slot
-instruct loadSSD(regD dst, stackSlotD src) %{
+instruct loadSSD(regDPR dst, stackSlotD src) %{
match(Set dst src);
ins_cost(125);
@@ -7317,7 +6764,7 @@ instruct loadSSD(regD dst, stackSlotD src) %{
"FSTP $dst" %}
opcode(0xDD); /* DD /0, FLD m64real */
ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
- Pop_Reg_D(dst) );
+ Pop_Reg_DPR(dst) );
ins_pipe( fpu_reg_mem );
%}
@@ -7552,7 +6999,7 @@ instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
ins_pipe( fpu_reg_mem );
%}
-instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{
+instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
match(Set mem (StoreL mem src));
effect( TEMP tmp, KILL cr );
@@ -7560,12 +7007,15 @@ instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %
format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
"MOVSD $tmp,$src\n\t"
"MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
- opcode(0x3B);
- ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp));
+ ins_encode %{
+ __ cmpl(rax, $mem$$Address);
+ __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
+ __ movdbl($mem$$Address, $tmp$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
-instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{
+instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
match(Set mem (StoreL mem src));
effect( TEMP tmp2 , TEMP tmp, KILL cr );
@@ -7575,8 +7025,13 @@ instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFla
"MOVD $tmp2,$src.hi\n\t"
"PUNPCKLDQ $tmp,$tmp2\n\t"
"MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
- opcode(0x3B);
- ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2));
+ ins_encode %{
+ __ cmpl(rax, $mem$$Address);
+ __ movdl($tmp$$XMMRegister, $src$$Register);
+ __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
+ __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
+ __ movdbl($mem$$Address, $tmp$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -7638,32 +7093,38 @@ instruct storeImmB(memory mem, immI8 src) %{
%}
// Store Aligned Packed Byte XMM register to memory
-instruct storeA8B(memory mem, regXD src) %{
+instruct storeA8B(memory mem, regD src) %{
predicate(UseSSE>=1);
match(Set mem (Store8B mem src));
ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed8B" %}
- ins_encode( movq_st(mem, src));
+ ins_encode %{
+ __ movq($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
// Store Aligned Packed Char/Short XMM register to memory
-instruct storeA4C(memory mem, regXD src) %{
+instruct storeA4C(memory mem, regD src) %{
predicate(UseSSE>=1);
match(Set mem (Store4C mem src));
ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed4C" %}
- ins_encode( movq_st(mem, src));
+ ins_encode %{
+ __ movq($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
// Store Aligned Packed Integer XMM register to memory
-instruct storeA2I(memory mem, regXD src) %{
+instruct storeA2I(memory mem, regD src) %{
predicate(UseSSE>=1);
match(Set mem (Store2I mem src));
ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed2I" %}
- ins_encode( movq_st(mem, src));
+ ins_encode %{
+ __ movq($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -7679,118 +7140,124 @@ instruct storeImmCM(memory mem, immI8 src) %{
%}
// Store Double
-instruct storeD( memory mem, regDPR1 src) %{
+instruct storeDPR( memory mem, regDPR1 src) %{
predicate(UseSSE<=1);
match(Set mem (StoreD mem src));
ins_cost(100);
format %{ "FST_D $mem,$src" %}
opcode(0xDD); /* DD /2 */
- ins_encode( enc_FP_store(mem,src) );
+ ins_encode( enc_FPR_store(mem,src) );
ins_pipe( fpu_mem_reg );
%}
// Store double does rounding on x86
-instruct storeD_rounded( memory mem, regDPR1 src) %{
+instruct storeDPR_rounded( memory mem, regDPR1 src) %{
predicate(UseSSE<=1);
match(Set mem (StoreD mem (RoundDouble src)));
ins_cost(100);
format %{ "FST_D $mem,$src\t# round" %}
opcode(0xDD); /* DD /2 */
- ins_encode( enc_FP_store(mem,src) );
+ ins_encode( enc_FPR_store(mem,src) );
ins_pipe( fpu_mem_reg );
%}
// Store XMM register to memory (double-precision floating points)
// MOVSD instruction
-instruct storeXD(memory mem, regXD src) %{
+instruct storeD(memory mem, regD src) %{
predicate(UseSSE>=2);
match(Set mem (StoreD mem src));
ins_cost(95);
format %{ "MOVSD $mem,$src" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
+ ins_encode %{
+ __ movdbl($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
// Store XMM register to memory (single-precision floating point)
// MOVSS instruction
-instruct storeX(memory mem, regX src) %{
+instruct storeF(memory mem, regF src) %{
predicate(UseSSE>=1);
match(Set mem (StoreF mem src));
ins_cost(95);
format %{ "MOVSS $mem,$src" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
+ ins_encode %{
+ __ movflt($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
// Store Aligned Packed Single Float XMM register to memory
-instruct storeA2F(memory mem, regXD src) %{
+instruct storeA2F(memory mem, regD src) %{
predicate(UseSSE>=1);
match(Set mem (Store2F mem src));
ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed2F" %}
- ins_encode( movq_st(mem, src));
+ ins_encode %{
+ __ movq($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
// Store Float
-instruct storeF( memory mem, regFPR1 src) %{
+instruct storeFPR( memory mem, regFPR1 src) %{
predicate(UseSSE==0);
match(Set mem (StoreF mem src));
ins_cost(100);
format %{ "FST_S $mem,$src" %}
opcode(0xD9); /* D9 /2 */
- ins_encode( enc_FP_store(mem,src) );
+ ins_encode( enc_FPR_store(mem,src) );
ins_pipe( fpu_mem_reg );
%}
// Store Float does rounding on x86
-instruct storeF_rounded( memory mem, regFPR1 src) %{
+instruct storeFPR_rounded( memory mem, regFPR1 src) %{
predicate(UseSSE==0);
match(Set mem (StoreF mem (RoundFloat src)));
ins_cost(100);
format %{ "FST_S $mem,$src\t# round" %}
opcode(0xD9); /* D9 /2 */
- ins_encode( enc_FP_store(mem,src) );
+ ins_encode( enc_FPR_store(mem,src) );
ins_pipe( fpu_mem_reg );
%}
// Store Float does rounding on x86
-instruct storeF_Drounded( memory mem, regDPR1 src) %{
+instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
predicate(UseSSE<=1);
match(Set mem (StoreF mem (ConvD2F src)));
ins_cost(100);
format %{ "FST_S $mem,$src\t# D-round" %}
opcode(0xD9); /* D9 /2 */
- ins_encode( enc_FP_store(mem,src) );
+ ins_encode( enc_FPR_store(mem,src) );
ins_pipe( fpu_mem_reg );
%}
// Store immediate Float value (it is faster than store from FPU register)
-// The instruction usage is guarded by predicate in operand immF().
-instruct storeF_imm( memory mem, immF src) %{
+// The instruction usage is guarded by predicate in operand immFPR().
+instruct storeFPR_imm( memory mem, immFPR src) %{
match(Set mem (StoreF mem src));
ins_cost(50);
format %{ "MOV $mem,$src\t# store float" %}
opcode(0xC7); /* C7 /0 */
- ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src ));
+ ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src ));
ins_pipe( ialu_mem_imm );
%}
// Store immediate Float value (it is faster than store from XMM register)
-// The instruction usage is guarded by predicate in operand immXF().
-instruct storeX_imm( memory mem, immXF src) %{
+// The instruction usage is guarded by predicate in operand immF().
+instruct storeF_imm( memory mem, immF src) %{
match(Set mem (StoreF mem src));
ins_cost(50);
format %{ "MOV $mem,$src\t# store float" %}
opcode(0xC7); /* C7 /0 */
- ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32XF_as_bits( src ));
+ ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src ));
ins_pipe( ialu_mem_imm );
%}
@@ -7901,6 +7368,16 @@ instruct unnecessary_membar_volatile() %{
ins_pipe(empty);
%}
+instruct membar_storestore() %{
+ match(MemBarStoreStore);
+ ins_cost(0);
+
+ size(0);
+ format %{ "MEMBAR-storestore (empty encoding)" %}
+ ins_encode( );
+ ins_pipe(empty);
+%}
+
//----------Move Instructions--------------------------------------------------
instruct castX2P(eAXRegP dst, eAXRegI src) %{
match(Set dst (CastX2P src));
@@ -8088,29 +7565,29 @@ instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
//%}
// Conditional move
-instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{
+instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
predicate(UseSSE<=1);
match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
ins_cost(200);
format %{ "FCMOV$cop $dst,$src\t# double" %}
opcode(0xDA);
- ins_encode( enc_cmov_d(cop,src) );
- ins_pipe( pipe_cmovD_reg );
+ ins_encode( enc_cmov_dpr(cop,src) );
+ ins_pipe( pipe_cmovDPR_reg );
%}
// Conditional move
-instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{
+instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
predicate(UseSSE==0);
match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
ins_cost(200);
format %{ "FCMOV$cop $dst,$src\t# float" %}
opcode(0xDA);
- ins_encode( enc_cmov_d(cop,src) );
- ins_pipe( pipe_cmovD_reg );
+ ins_encode( enc_cmov_dpr(cop,src) );
+ ins_pipe( pipe_cmovDPR_reg );
%}
// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
-instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
+instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
predicate(UseSSE<=1);
match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
ins_cost(200);
@@ -8118,12 +7595,12 @@ instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
"MOV $dst,$src\t# double\n"
"skip:" %}
opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
- ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) );
- ins_pipe( pipe_cmovD_reg );
+ ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
+ ins_pipe( pipe_cmovDPR_reg );
%}
// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
-instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
+instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
predicate(UseSSE==0);
match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
ins_cost(200);
@@ -8131,12 +7608,12 @@ instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
"MOV $dst,$src\t# float\n"
"skip:" %}
opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
- ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) );
- ins_pipe( pipe_cmovD_reg );
+ ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
+ ins_pipe( pipe_cmovDPR_reg );
%}
// No CMOVE with SSE/SSE2
-instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{
+instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
predicate (UseSSE>=1);
match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
ins_cost(200);
@@ -8154,7 +7631,7 @@ instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{
%}
// No CMOVE with SSE/SSE2
-instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{
+instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
predicate (UseSSE>=2);
match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
ins_cost(200);
@@ -8172,7 +7649,7 @@ instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{
%}
// unsigned version
-instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{
+instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
predicate (UseSSE>=1);
match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
ins_cost(200);
@@ -8189,17 +7666,17 @@ instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{
ins_pipe( pipe_slow );
%}
-instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{
+instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
predicate (UseSSE>=1);
match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
ins_cost(200);
expand %{
- fcmovX_regU(cop, cr, dst, src);
+ fcmovF_regU(cop, cr, dst, src);
%}
%}
// unsigned version
-instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{
+instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
predicate (UseSSE>=2);
match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
ins_cost(200);
@@ -8216,12 +7693,12 @@ instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{
ins_pipe( pipe_slow );
%}
-instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{
+instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
predicate (UseSSE>=2);
match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
ins_cost(200);
expand %{
- fcmovXD_regU(cop, cr, dst, src);
+ fcmovD_regU(cop, cr, dst, src);
%}
%}
@@ -8440,7 +7917,7 @@ instruct loadPLocked(eRegP dst, memory mem) %{
%}
// LoadLong-locked - same as a volatile long load when used with compare-swap
-instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{
+instruct loadLLocked(stackSlotL dst, memory mem) %{
predicate(UseSSE<=1);
match(Set dst (LoadLLocked mem));
@@ -8451,18 +7928,21 @@ instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{
ins_pipe( fpu_reg_mem );
%}
-instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{
+instruct loadLX_Locked(stackSlotL dst, memory mem, regD tmp) %{
predicate(UseSSE>=2);
match(Set dst (LoadLLocked mem));
effect(TEMP tmp);
ins_cost(180);
format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
"MOVSD $dst,$tmp" %}
- ins_encode(enc_loadLX_volatile(mem, dst, tmp));
+ ins_encode %{
+ __ movdbl($tmp$$XMMRegister, $mem$$Address);
+ __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
-instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{
+instruct loadLX_reg_Locked(eRegL dst, memory mem, regD tmp) %{
predicate(UseSSE>=2);
match(Set dst (LoadLLocked mem));
effect(TEMP tmp);
@@ -8471,7 +7951,12 @@ instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{
"MOVD $dst.lo,$tmp\n\t"
"PSRLQ $tmp,32\n\t"
"MOVD $dst.hi,$tmp" %}
- ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
+ ins_encode %{
+ __ movdbl($tmp$$XMMRegister, $mem$$Address);
+ __ movdl($dst$$Register, $tmp$$XMMRegister);
+ __ psrlq($tmp$$XMMRegister, 32);
+ __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -10054,7 +9539,7 @@ instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
// Compare & branch
// P6 version of float compare, sets condition codes in EFLAGS
-instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
+instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
predicate(VM_Version::supports_cmov() && UseSSE <=1);
match(Set cr (CmpD src1 src2));
effect(KILL rax);
@@ -10066,26 +9551,26 @@ instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
"SAHF\n"
"exit:\tNOP // avoid branch to branch" %}
opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
- ins_encode( Push_Reg_D(src1),
+ ins_encode( Push_Reg_DPR(src1),
OpcP, RegOpc(src2),
cmpF_P6_fixup );
ins_pipe( pipe_slow );
%}
-instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{
+instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
predicate(VM_Version::supports_cmov() && UseSSE <=1);
match(Set cr (CmpD src1 src2));
ins_cost(150);
format %{ "FLD $src1\n\t"
"FUCOMIP ST,$src2 // P6 instruction" %}
opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
- ins_encode( Push_Reg_D(src1),
+ ins_encode( Push_Reg_DPR(src1),
OpcP, RegOpc(src2));
ins_pipe( pipe_slow );
%}
// Compare & branch
-instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
+instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
predicate(UseSSE<=1);
match(Set cr (CmpD src1 src2));
effect(KILL rax);
@@ -10098,138 +9583,140 @@ instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
"MOV AH,1\t# unordered treat as LT\n"
"flags:\tSAHF" %}
opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
- ins_encode( Push_Reg_D(src1),
+ ins_encode( Push_Reg_DPR(src1),
OpcP, RegOpc(src2),
fpu_flags);
ins_pipe( pipe_slow );
%}
// Compare vs zero into -1,0,1
-instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpDPR_0(eRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
predicate(UseSSE<=1);
match(Set dst (CmpD3 src1 zero));
effect(KILL cr, KILL rax);
ins_cost(280);
format %{ "FTSTD $dst,$src1" %}
opcode(0xE4, 0xD9);
- ins_encode( Push_Reg_D(src1),
+ ins_encode( Push_Reg_DPR(src1),
OpcS, OpcP, PopFPU,
CmpF_Result(dst));
ins_pipe( pipe_slow );
%}
// Compare into -1,0,1
-instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpDPR_reg(eRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
predicate(UseSSE<=1);
match(Set dst (CmpD3 src1 src2));
effect(KILL cr, KILL rax);
ins_cost(300);
format %{ "FCMPD $dst,$src1,$src2" %}
opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
- ins_encode( Push_Reg_D(src1),
+ ins_encode( Push_Reg_DPR(src1),
OpcP, RegOpc(src2),
CmpF_Result(dst));
ins_pipe( pipe_slow );
%}
// float compare and set condition codes in EFLAGS by XMM regs
-instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{
+instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
predicate(UseSSE>=2);
- match(Set cr (CmpD dst src));
- effect(KILL rax);
- ins_cost(125);
- format %{ "COMISD $dst,$src\n"
- "\tJNP exit\n"
- "\tMOV ah,1 // saw a NaN, set CF\n"
- "\tSAHF\n"
- "exit:\tNOP // avoid branch to branch" %}
- opcode(0x66, 0x0F, 0x2F);
- ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup);
+ match(Set cr (CmpD src1 src2));
+ ins_cost(145);
+ format %{ "UCOMISD $src1,$src2\n\t"
+ "JNP,s exit\n\t"
+ "PUSHF\t# saw NaN, set CF\n\t"
+ "AND [rsp], #0xffffff2b\n\t"
+ "POPF\n"
+ "exit:" %}
+ ins_encode %{
+ __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
+ emit_cmpfp_fixup(_masm);
+ %}
ins_pipe( pipe_slow );
%}
-instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{
+instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
predicate(UseSSE>=2);
- match(Set cr (CmpD dst src));
+ match(Set cr (CmpD src1 src2));
ins_cost(100);
- format %{ "COMISD $dst,$src" %}
- opcode(0x66, 0x0F, 0x2F);
- ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
+ format %{ "UCOMISD $src1,$src2" %}
+ ins_encode %{
+ __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
// float compare and set condition codes in EFLAGS by XMM regs
-instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{
+instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
predicate(UseSSE>=2);
- match(Set cr (CmpD dst (LoadD src)));
- effect(KILL rax);
+ match(Set cr (CmpD src1 (LoadD src2)));
ins_cost(145);
- format %{ "COMISD $dst,$src\n"
- "\tJNP exit\n"
- "\tMOV ah,1 // saw a NaN, set CF\n"
- "\tSAHF\n"
- "exit:\tNOP // avoid branch to branch" %}
- opcode(0x66, 0x0F, 0x2F);
- ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup);
+ format %{ "UCOMISD $src1,$src2\n\t"
+ "JNP,s exit\n\t"
+ "PUSHF\t# saw NaN, set CF\n\t"
+ "AND [rsp], #0xffffff2b\n\t"
+ "POPF\n"
+ "exit:" %}
+ ins_encode %{
+ __ ucomisd($src1$$XMMRegister, $src2$$Address);
+ emit_cmpfp_fixup(_masm);
+ %}
ins_pipe( pipe_slow );
%}
-instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{
+instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
predicate(UseSSE>=2);
- match(Set cr (CmpD dst (LoadD src)));
+ match(Set cr (CmpD src1 (LoadD src2)));
ins_cost(100);
- format %{ "COMISD $dst,$src" %}
- opcode(0x66, 0x0F, 0x2F);
- ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src));
+ format %{ "UCOMISD $src1,$src2" %}
+ ins_encode %{
+ __ ucomisd($src1$$XMMRegister, $src2$$Address);
+ %}
ins_pipe( pipe_slow );
%}
// Compare into -1,0,1 in XMM
-instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
+instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
predicate(UseSSE>=2);
match(Set dst (CmpD3 src1 src2));
effect(KILL cr);
ins_cost(255);
- format %{ "XOR $dst,$dst\n"
- "\tCOMISD $src1,$src2\n"
- "\tJP,s nan\n"
- "\tJEQ,s exit\n"
- "\tJA,s inc\n"
- "nan:\tDEC $dst\n"
- "\tJMP,s exit\n"
- "inc:\tINC $dst\n"
- "exit:"
- %}
- opcode(0x66, 0x0F, 0x2F);
- ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2),
- CmpX_Result(dst));
+ format %{ "UCOMISD $src1, $src2\n\t"
+ "MOV $dst, #-1\n\t"
+ "JP,s done\n\t"
+ "JB,s done\n\t"
+ "SETNE $dst\n\t"
+ "MOVZB $dst, $dst\n"
+ "done:" %}
+ ins_encode %{
+ __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
+ emit_cmpfp3(_masm, $dst$$Register);
+ %}
ins_pipe( pipe_slow );
%}
// Compare into -1,0,1 in XMM and memory
-instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{
+instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
predicate(UseSSE>=2);
- match(Set dst (CmpD3 src1 (LoadD mem)));
+ match(Set dst (CmpD3 src1 (LoadD src2)));
effect(KILL cr);
ins_cost(275);
- format %{ "COMISD $src1,$mem\n"
- "\tMOV $dst,0\t\t# do not blow flags\n"
- "\tJP,s nan\n"
- "\tJEQ,s exit\n"
- "\tJA,s inc\n"
- "nan:\tDEC $dst\n"
- "\tJMP,s exit\n"
- "inc:\tINC $dst\n"
- "exit:"
- %}
- opcode(0x66, 0x0F, 0x2F);
- ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem),
- LdImmI(dst,0x0), CmpX_Result(dst));
+ format %{ "UCOMISD $src1, $src2\n\t"
+ "MOV $dst, #-1\n\t"
+ "JP,s done\n\t"
+ "JB,s done\n\t"
+ "SETNE $dst\n\t"
+ "MOVZB $dst, $dst\n"
+ "done:" %}
+ ins_encode %{
+ __ ucomisd($src1$$XMMRegister, $src2$$Address);
+ emit_cmpfp3(_masm, $dst$$Register);
+ %}
ins_pipe( pipe_slow );
%}
-instruct subD_reg(regD dst, regD src) %{
+instruct subDPR_reg(regDPR dst, regDPR src) %{
predicate (UseSSE <=1);
match(Set dst (SubD dst src));
@@ -10237,12 +9724,12 @@ instruct subD_reg(regD dst, regD src) %{
"DSUBp $dst,ST" %}
opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
ins_cost(150);
- ins_encode( Push_Reg_D(src),
+ ins_encode( Push_Reg_DPR(src),
OpcP, RegOpc(dst) );
ins_pipe( fpu_reg_reg );
%}
-instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
+instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
predicate (UseSSE <=1);
match(Set dst (RoundDouble (SubD src1 src2)));
ins_cost(250);
@@ -10251,13 +9738,13 @@ instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
"DSUB ST,$src1\n\t"
"FSTP_D $dst\t# D-round" %}
opcode(0xD8, 0x5);
- ins_encode( Push_Reg_D(src2),
- OpcP, RegOpc(src1), Pop_Mem_D(dst) );
+ ins_encode( Push_Reg_DPR(src2),
+ OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
ins_pipe( fpu_mem_reg_reg );
%}
-instruct subD_reg_mem(regD dst, memory src) %{
+instruct subDPR_reg_mem(regDPR dst, memory src) %{
predicate (UseSSE <=1);
match(Set dst (SubD dst (LoadD src)));
ins_cost(150);
@@ -10270,7 +9757,7 @@ instruct subD_reg_mem(regD dst, memory src) %{
ins_pipe( fpu_reg_mem );
%}
-instruct absD_reg(regDPR1 dst, regDPR1 src) %{
+instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
predicate (UseSSE<=1);
match(Set dst (AbsD src));
ins_cost(100);
@@ -10280,15 +9767,7 @@ instruct absD_reg(regDPR1 dst, regDPR1 src) %{
ins_pipe( fpu_reg_reg );
%}
-instruct absXD_reg( regXD dst ) %{
- predicate(UseSSE>=2);
- match(Set dst (AbsD dst));
- format %{ "ANDPD $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
- ins_encode( AbsXD_encoding(dst));
- ins_pipe( pipe_slow );
-%}
-
-instruct negD_reg(regDPR1 dst, regDPR1 src) %{
+instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
predicate(UseSSE<=1);
match(Set dst (NegD src));
ins_cost(100);
@@ -10298,18 +9777,7 @@ instruct negD_reg(regDPR1 dst, regDPR1 src) %{
ins_pipe( fpu_reg_reg );
%}
-instruct negXD_reg( regXD dst ) %{
- predicate(UseSSE>=2);
- match(Set dst (NegD dst));
- format %{ "XORPD $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
- ins_encode %{
- __ xorpd($dst$$XMMRegister,
- ExternalAddress((address)double_signflip_pool));
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct addD_reg(regD dst, regD src) %{
+instruct addDPR_reg(regDPR dst, regDPR src) %{
predicate(UseSSE<=1);
match(Set dst (AddD dst src));
format %{ "FLD $src\n\t"
@@ -10317,13 +9785,13 @@ instruct addD_reg(regD dst, regD src) %{
size(4);
ins_cost(150);
opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
- ins_encode( Push_Reg_D(src),
+ ins_encode( Push_Reg_DPR(src),
OpcP, RegOpc(dst) );
ins_pipe( fpu_reg_reg );
%}
-instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{
+instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
predicate(UseSSE<=1);
match(Set dst (RoundDouble (AddD src1 src2)));
ins_cost(250);
@@ -10332,13 +9800,13 @@ instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{
"DADD ST,$src1\n\t"
"FSTP_D $dst\t# D-round" %}
opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
- ins_encode( Push_Reg_D(src2),
- OpcP, RegOpc(src1), Pop_Mem_D(dst) );
+ ins_encode( Push_Reg_DPR(src2),
+ OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
ins_pipe( fpu_mem_reg_reg );
%}
-instruct addD_reg_mem(regD dst, memory src) %{
+instruct addDPR_reg_mem(regDPR dst, memory src) %{
predicate(UseSSE<=1);
match(Set dst (AddD dst (LoadD src)));
ins_cost(150);
@@ -10352,7 +9820,7 @@ instruct addD_reg_mem(regD dst, memory src) %{
%}
// add-to-memory
-instruct addD_mem_reg(memory dst, regD src) %{
+instruct addDPR_mem_reg(memory dst, regDPR src) %{
predicate(UseSSE<=1);
match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
ins_cost(150);
@@ -10368,7 +9836,7 @@ instruct addD_mem_reg(memory dst, regD src) %{
ins_pipe( fpu_reg_mem );
%}
-instruct addD_reg_imm1(regD dst, immD1 con) %{
+instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
predicate(UseSSE<=1);
match(Set dst (AddD dst con));
ins_cost(125);
@@ -10381,7 +9849,7 @@ instruct addD_reg_imm1(regD dst, immD1 con) %{
ins_pipe(fpu_reg);
%}
-instruct addD_reg_imm(regD dst, immD con) %{
+instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
match(Set dst (AddD dst con));
ins_cost(200);
@@ -10394,7 +9862,7 @@ instruct addD_reg_imm(regD dst, immD con) %{
ins_pipe(fpu_reg_mem);
%}
-instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
+instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
match(Set dst (RoundDouble (AddD src con)));
ins_cost(200);
@@ -10409,124 +9877,14 @@ instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
ins_pipe(fpu_mem_reg_con);
%}
-// Add two double precision floating point values in xmm
-instruct addXD_reg(regXD dst, regXD src) %{
- predicate(UseSSE>=2);
- match(Set dst (AddD dst src));
- format %{ "ADDSD $dst,$src" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
- ins_pipe( pipe_slow );
-%}
-
-instruct addXD_imm(regXD dst, immXD con) %{
- predicate(UseSSE>=2);
- match(Set dst (AddD dst con));
- format %{ "ADDSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
- ins_encode %{
- __ addsd($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct addXD_mem(regXD dst, memory mem) %{
- predicate(UseSSE>=2);
- match(Set dst (AddD dst (LoadD mem)));
- format %{ "ADDSD $dst,$mem" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem));
- ins_pipe( pipe_slow );
-%}
-
-// Sub two double precision floating point values in xmm
-instruct subXD_reg(regXD dst, regXD src) %{
- predicate(UseSSE>=2);
- match(Set dst (SubD dst src));
- format %{ "SUBSD $dst,$src" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
- ins_pipe( pipe_slow );
-%}
-
-instruct subXD_imm(regXD dst, immXD con) %{
- predicate(UseSSE>=2);
- match(Set dst (SubD dst con));
- format %{ "SUBSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
- ins_encode %{
- __ subsd($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct subXD_mem(regXD dst, memory mem) %{
- predicate(UseSSE>=2);
- match(Set dst (SubD dst (LoadD mem)));
- format %{ "SUBSD $dst,$mem" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
- ins_pipe( pipe_slow );
-%}
-
-// Mul two double precision floating point values in xmm
-instruct mulXD_reg(regXD dst, regXD src) %{
- predicate(UseSSE>=2);
- match(Set dst (MulD dst src));
- format %{ "MULSD $dst,$src" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
- ins_pipe( pipe_slow );
-%}
-
-instruct mulXD_imm(regXD dst, immXD con) %{
- predicate(UseSSE>=2);
- match(Set dst (MulD dst con));
- format %{ "MULSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
- ins_encode %{
- __ mulsd($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct mulXD_mem(regXD dst, memory mem) %{
- predicate(UseSSE>=2);
- match(Set dst (MulD dst (LoadD mem)));
- format %{ "MULSD $dst,$mem" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
- ins_pipe( pipe_slow );
-%}
-
-// Div two double precision floating point values in xmm
-instruct divXD_reg(regXD dst, regXD src) %{
- predicate(UseSSE>=2);
- match(Set dst (DivD dst src));
- format %{ "DIVSD $dst,$src" %}
- opcode(0xF2, 0x0F, 0x5E);
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
- ins_pipe( pipe_slow );
-%}
-
-instruct divXD_imm(regXD dst, immXD con) %{
- predicate(UseSSE>=2);
- match(Set dst (DivD dst con));
- format %{ "DIVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
- ins_encode %{
- __ divsd($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct divXD_mem(regXD dst, memory mem) %{
- predicate(UseSSE>=2);
- match(Set dst (DivD dst (LoadD mem)));
- format %{ "DIVSD $dst,$mem" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
- ins_pipe( pipe_slow );
-%}
-
-
-instruct mulD_reg(regD dst, regD src) %{
+instruct mulDPR_reg(regDPR dst, regDPR src) %{
predicate(UseSSE<=1);
match(Set dst (MulD dst src));
format %{ "FLD $src\n\t"
"DMULp $dst,ST" %}
opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
ins_cost(150);
- ins_encode( Push_Reg_D(src),
+ ins_encode( Push_Reg_DPR(src),
OpcP, RegOpc(dst) );
ins_pipe( fpu_reg_reg );
%}
@@ -10539,7 +9897,7 @@ instruct mulD_reg(regD dst, regD src) %{
// multiply scaled arg1 by arg2
// rescale product by 2^(15360)
//
-instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{
+instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
match(Set dst (MulD dst src));
ins_cost(1); // Select this instruction for all strict FP double multiplies
@@ -10552,13 +9910,13 @@ instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{
"DMULp $dst,ST\n\t" %}
opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
ins_encode( strictfp_bias1(dst),
- Push_Reg_D(src),
+ Push_Reg_DPR(src),
OpcP, RegOpc(dst),
strictfp_bias2(dst) );
ins_pipe( fpu_reg_reg );
%}
-instruct mulD_reg_imm(regD dst, immD con) %{
+instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
match(Set dst (MulD dst con));
ins_cost(200);
@@ -10572,7 +9930,7 @@ instruct mulD_reg_imm(regD dst, immD con) %{
%}
-instruct mulD_reg_mem(regD dst, memory src) %{
+instruct mulDPR_reg_mem(regDPR dst, memory src) %{
predicate( UseSSE<=1 );
match(Set dst (MulD dst (LoadD src)));
ins_cost(200);
@@ -10586,7 +9944,7 @@ instruct mulD_reg_mem(regD dst, memory src) %{
//
// Cisc-alternate to reg-reg multiply
-instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{
+instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
predicate( UseSSE<=1 );
match(Set dst (MulD src (LoadD mem)));
ins_cost(250);
@@ -10595,17 +9953,17 @@ instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{
"FSTP_D $dst" %}
opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */
ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
- OpcReg_F(src),
- Pop_Reg_D(dst) );
+ OpcReg_FPR(src),
+ Pop_Reg_DPR(dst) );
ins_pipe( fpu_reg_reg_mem );
%}
-// MACRO3 -- addD a mulD
+// MACRO3 -- addDPR a mulDPR
// This instruction is a '2-address' instruction in that the result goes
// back to src2. This eliminates a move from the macro; possibly the
// register allocator will have to add it back (and maybe not).
-instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{
+instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
predicate( UseSSE<=1 );
match(Set src2 (AddD (MulD src0 src1) src2));
format %{ "FLD $src0\t# ===MACRO3d===\n\t"
@@ -10613,29 +9971,29 @@ instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{
"DADDp $src2,ST" %}
ins_cost(250);
opcode(0xDD); /* LoadD DD /0 */
- ins_encode( Push_Reg_F(src0),
+ ins_encode( Push_Reg_FPR(src0),
FMul_ST_reg(src1),
FAddP_reg_ST(src2) );
ins_pipe( fpu_reg_reg_reg );
%}
-// MACRO3 -- subD a mulD
-instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{
+// MACRO3 -- subDPR a mulDPR
+instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
predicate( UseSSE<=1 );
match(Set src2 (SubD (MulD src0 src1) src2));
format %{ "FLD $src0\t# ===MACRO3d===\n\t"
"DMUL ST,$src1\n\t"
"DSUBRp $src2,ST" %}
ins_cost(250);
- ins_encode( Push_Reg_F(src0),
+ ins_encode( Push_Reg_FPR(src0),
FMul_ST_reg(src1),
Opcode(0xDE), Opc_plus(0xE0,src2));
ins_pipe( fpu_reg_reg_reg );
%}
-instruct divD_reg(regD dst, regD src) %{
+instruct divDPR_reg(regDPR dst, regDPR src) %{
predicate( UseSSE<=1 );
match(Set dst (DivD dst src));
@@ -10643,7 +10001,7 @@ instruct divD_reg(regD dst, regD src) %{
"FDIVp $dst,ST" %}
opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
ins_cost(150);
- ins_encode( Push_Reg_D(src),
+ ins_encode( Push_Reg_DPR(src),
OpcP, RegOpc(dst) );
ins_pipe( fpu_reg_reg );
%}
@@ -10656,7 +10014,7 @@ instruct divD_reg(regD dst, regD src) %{
// divide scaled dividend by divisor
// rescale quotient by 2^(15360)
//
-instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{
+instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
predicate (UseSSE<=1);
match(Set dst (DivD dst src));
predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
@@ -10670,13 +10028,13 @@ instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{
"DMULp $dst,ST\n\t" %}
opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
ins_encode( strictfp_bias1(dst),
- Push_Reg_D(src),
+ Push_Reg_DPR(src),
OpcP, RegOpc(dst),
strictfp_bias2(dst) );
ins_pipe( fpu_reg_reg );
%}
-instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{
+instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
match(Set dst (RoundDouble (DivD src1 src2)));
@@ -10684,27 +10042,27 @@ instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{
"FDIV ST,$src2\n\t"
"FSTP_D $dst\t# D-round" %}
opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
- ins_encode( Push_Reg_D(src1),
- OpcP, RegOpc(src2), Pop_Mem_D(dst) );
+ ins_encode( Push_Reg_DPR(src1),
+ OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
ins_pipe( fpu_mem_reg_reg );
%}
-instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{
+instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
predicate(UseSSE<=1);
match(Set dst (ModD dst src));
- effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
+ effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
format %{ "DMOD $dst,$src" %}
ins_cost(250);
- ins_encode(Push_Reg_Mod_D(dst, src),
- emitModD(),
- Push_Result_Mod_D(src),
- Pop_Reg_D(dst));
+ ins_encode(Push_Reg_Mod_DPR(dst, src),
+ emitModDPR(),
+ Push_Result_Mod_DPR(src),
+ Pop_Reg_DPR(dst));
ins_pipe( pipe_slow );
%}
-instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{
+instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
predicate(UseSSE>=2);
match(Set dst (ModD src0 src1));
effect(KILL rax, KILL cr);
@@ -10725,11 +10083,11 @@ instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr)
"\tFSTP ST0\t # Restore FPU Stack"
%}
ins_cost(250);
- ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU);
+ ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
ins_pipe( pipe_slow );
%}
-instruct sinD_reg(regDPR1 dst, regDPR1 src) %{
+instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{
predicate (UseSSE<=1);
match(Set dst (SinD src));
ins_cost(1800);
@@ -10739,18 +10097,18 @@ instruct sinD_reg(regDPR1 dst, regDPR1 src) %{
ins_pipe( pipe_slow );
%}
-instruct sinXD_reg(regXD dst, eFlagsReg cr) %{
+instruct sinD_reg(regD dst, eFlagsReg cr) %{
predicate (UseSSE>=2);
match(Set dst (SinD dst));
- effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
+ effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
ins_cost(1800);
format %{ "DSIN $dst" %}
opcode(0xD9, 0xFE);
- ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
+ ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
ins_pipe( pipe_slow );
%}
-instruct cosD_reg(regDPR1 dst, regDPR1 src) %{
+instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{
predicate (UseSSE<=1);
match(Set dst (CosD src));
ins_cost(1800);
@@ -10760,18 +10118,18 @@ instruct cosD_reg(regDPR1 dst, regDPR1 src) %{
ins_pipe( pipe_slow );
%}
-instruct cosXD_reg(regXD dst, eFlagsReg cr) %{
+instruct cosD_reg(regD dst, eFlagsReg cr) %{
predicate (UseSSE>=2);
match(Set dst (CosD dst));
- effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
+ effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
ins_cost(1800);
format %{ "DCOS $dst" %}
opcode(0xD9, 0xFF);
- ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
+ ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
ins_pipe( pipe_slow );
%}
-instruct tanD_reg(regDPR1 dst, regDPR1 src) %{
+instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
predicate (UseSSE<=1);
match(Set dst(TanD src));
format %{ "DTAN $dst" %}
@@ -10780,50 +10138,50 @@ instruct tanD_reg(regDPR1 dst, regDPR1 src) %{
ins_pipe( pipe_slow );
%}
-instruct tanXD_reg(regXD dst, eFlagsReg cr) %{
+instruct tanD_reg(regD dst, eFlagsReg cr) %{
predicate (UseSSE>=2);
match(Set dst(TanD dst));
- effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
+ effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
format %{ "DTAN $dst" %}
- ins_encode( Push_SrcXD(dst),
+ ins_encode( Push_SrcD(dst),
Opcode(0xD9), Opcode(0xF2), // fptan
Opcode(0xDD), Opcode(0xD8), // fstp st
- Push_ResultXD(dst) );
+ Push_ResultD(dst) );
ins_pipe( pipe_slow );
%}
-instruct atanD_reg(regD dst, regD src) %{
+instruct atanDPR_reg(regDPR dst, regDPR src) %{
predicate (UseSSE<=1);
match(Set dst(AtanD dst src));
format %{ "DATA $dst,$src" %}
opcode(0xD9, 0xF3);
- ins_encode( Push_Reg_D(src),
+ ins_encode( Push_Reg_DPR(src),
OpcP, OpcS, RegOpc(dst) );
ins_pipe( pipe_slow );
%}
-instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
+instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
predicate (UseSSE>=2);
match(Set dst(AtanD dst src));
- effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
+ effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
format %{ "DATA $dst,$src" %}
opcode(0xD9, 0xF3);
- ins_encode( Push_SrcXD(src),
- OpcP, OpcS, Push_ResultXD(dst) );
+ ins_encode( Push_SrcD(src),
+ OpcP, OpcS, Push_ResultD(dst) );
ins_pipe( pipe_slow );
%}
-instruct sqrtD_reg(regD dst, regD src) %{
+instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
predicate (UseSSE<=1);
match(Set dst (SqrtD src));
format %{ "DSQRT $dst,$src" %}
opcode(0xFA, 0xD9);
- ins_encode( Push_Reg_D(src),
- OpcS, OpcP, Pop_Reg_D(dst) );
+ ins_encode( Push_Reg_DPR(src),
+ OpcS, OpcP, Pop_Reg_DPR(dst) );
ins_pipe( pipe_slow );
%}
-instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
+instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
predicate (UseSSE<=1);
match(Set Y (PowD X Y)); // Raise X to the Yth power
effect(KILL rax, KILL rbx, KILL rcx);
@@ -10852,14 +10210,14 @@ instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
"ADD ESP,8"
%}
ins_encode( push_stack_temp_qword,
- Push_Reg_D(X),
+ Push_Reg_DPR(X),
Opcode(0xD9), Opcode(0xF1), // fyl2x
pow_exp_core_encoding,
pop_stack_temp_qword);
ins_pipe( pipe_slow );
%}
-instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
+instruct powD_reg(regD dst, regD src0, regD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
predicate (UseSSE>=2);
match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power
effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
@@ -10897,12 +10255,12 @@ instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax,
push_xmm_to_fpr1(src0),
Opcode(0xD9), Opcode(0xF1), // fyl2x
pow_exp_core_encoding,
- Push_ResultXD(dst) );
+ Push_ResultD(dst) );
ins_pipe( pipe_slow );
%}
-instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
+instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
predicate (UseSSE<=1);
match(Set dpr1 (ExpD dpr1));
effect(KILL rax, KILL rbx, KILL rcx);
@@ -10938,7 +10296,7 @@ instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
ins_pipe( pipe_slow );
%}
-instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
+instruct expD_reg(regD dst, regD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
predicate (UseSSE>=2);
match(Set dst (ExpD src));
effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
@@ -10969,17 +10327,17 @@ instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx,
"MOVSD $dst,[ESP]\n\t"
"ADD ESP,8"
%}
- ins_encode( Push_SrcXD(src),
+ ins_encode( Push_SrcD(src),
Opcode(0xD9), Opcode(0xEA), // fldl2e
Opcode(0xDE), Opcode(0xC9), // fmulp
pow_exp_core_encoding,
- Push_ResultXD(dst) );
+ Push_ResultD(dst) );
ins_pipe( pipe_slow );
%}
-instruct log10D_reg(regDPR1 dst, regDPR1 src) %{
+instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
predicate (UseSSE<=1);
// The source Double operand on FPU stack
match(Set dst (Log10D src));
@@ -10997,7 +10355,7 @@ instruct log10D_reg(regDPR1 dst, regDPR1 src) %{
ins_pipe( pipe_slow );
%}
-instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{
+instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
predicate (UseSSE>=2);
effect(KILL cr);
match(Set dst (Log10D src));
@@ -11007,14 +10365,14 @@ instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{
"FYL2X \t\t\t# Q=Log10*Log_2(x)"
%}
ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
- Push_SrcXD(src),
+ Push_SrcD(src),
Opcode(0xD9), Opcode(0xF1), // fyl2x
- Push_ResultXD(dst));
+ Push_ResultD(dst));
ins_pipe( pipe_slow );
%}
-instruct logD_reg(regDPR1 dst, regDPR1 src) %{
+instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{
predicate (UseSSE<=1);
// The source Double operand on FPU stack
match(Set dst (LogD src));
@@ -11032,7 +10390,7 @@ instruct logD_reg(regDPR1 dst, regDPR1 src) %{
ins_pipe( pipe_slow );
%}
-instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
+instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{
predicate (UseSSE>=2);
effect(KILL cr);
// The source and result Double operands in XMM registers
@@ -11043,9 +10401,9 @@ instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
"FYL2X \t\t\t# Q=Log_e*Log_2(x)"
%}
ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2
- Push_SrcXD(src),
+ Push_SrcD(src),
Opcode(0xD9), Opcode(0xF1), // fyl2x
- Push_ResultXD(dst));
+ Push_ResultD(dst));
ins_pipe( pipe_slow );
%}
@@ -11066,7 +10424,7 @@ instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
// exit:
// P6 version of float compare, sets condition codes in EFLAGS
-instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
+instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
predicate(VM_Version::supports_cmov() && UseSSE == 0);
match(Set cr (CmpF src1 src2));
effect(KILL rax);
@@ -11078,27 +10436,27 @@ instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
"SAHF\n"
"exit:\tNOP // avoid branch to branch" %}
opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
- ins_encode( Push_Reg_D(src1),
+ ins_encode( Push_Reg_DPR(src1),
OpcP, RegOpc(src2),
cmpF_P6_fixup );
ins_pipe( pipe_slow );
%}
-instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{
+instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
predicate(VM_Version::supports_cmov() && UseSSE == 0);
match(Set cr (CmpF src1 src2));
ins_cost(100);
format %{ "FLD $src1\n\t"
"FUCOMIP ST,$src2 // P6 instruction" %}
opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
- ins_encode( Push_Reg_D(src1),
+ ins_encode( Push_Reg_DPR(src1),
OpcP, RegOpc(src2));
ins_pipe( pipe_slow );
%}
// Compare & branch
-instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
+instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
predicate(UseSSE == 0);
match(Set cr (CmpF src1 src2));
effect(KILL rax);
@@ -11111,328 +10469,190 @@ instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
"MOV AH,1\t# unordered treat as LT\n"
"flags:\tSAHF" %}
opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
- ins_encode( Push_Reg_D(src1),
+ ins_encode( Push_Reg_DPR(src1),
OpcP, RegOpc(src2),
fpu_flags);
ins_pipe( pipe_slow );
%}
// Compare vs zero into -1,0,1
-instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpFPR_0(eRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
predicate(UseSSE == 0);
match(Set dst (CmpF3 src1 zero));
effect(KILL cr, KILL rax);
ins_cost(280);
format %{ "FTSTF $dst,$src1" %}
opcode(0xE4, 0xD9);
- ins_encode( Push_Reg_D(src1),
+ ins_encode( Push_Reg_DPR(src1),
OpcS, OpcP, PopFPU,
CmpF_Result(dst));
ins_pipe( pipe_slow );
%}
// Compare into -1,0,1
-instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpFPR_reg(eRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
predicate(UseSSE == 0);
match(Set dst (CmpF3 src1 src2));
effect(KILL cr, KILL rax);
ins_cost(300);
format %{ "FCMPF $dst,$src1,$src2" %}
opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
- ins_encode( Push_Reg_D(src1),
+ ins_encode( Push_Reg_DPR(src1),
OpcP, RegOpc(src2),
CmpF_Result(dst));
ins_pipe( pipe_slow );
%}
// float compare and set condition codes in EFLAGS by XMM regs
-instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{
+instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
predicate(UseSSE>=1);
- match(Set cr (CmpF dst src));
- effect(KILL rax);
+ match(Set cr (CmpF src1 src2));
ins_cost(145);
- format %{ "COMISS $dst,$src\n"
- "\tJNP exit\n"
- "\tMOV ah,1 // saw a NaN, set CF\n"
- "\tSAHF\n"
- "exit:\tNOP // avoid branch to branch" %}
- opcode(0x0F, 0x2F);
- ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup);
+ format %{ "UCOMISS $src1,$src2\n\t"
+ "JNP,s exit\n\t"
+ "PUSHF\t# saw NaN, set CF\n\t"
+ "AND [rsp], #0xffffff2b\n\t"
+ "POPF\n"
+ "exit:" %}
+ ins_encode %{
+ __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
+ emit_cmpfp_fixup(_masm);
+ %}
ins_pipe( pipe_slow );
%}
-instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{
+instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
predicate(UseSSE>=1);
- match(Set cr (CmpF dst src));
+ match(Set cr (CmpF src1 src2));
ins_cost(100);
- format %{ "COMISS $dst,$src" %}
- opcode(0x0F, 0x2F);
- ins_encode(OpcP, OpcS, RegReg(dst, src));
+ format %{ "UCOMISS $src1,$src2" %}
+ ins_encode %{
+ __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
// float compare and set condition codes in EFLAGS by XMM regs
-instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{
+instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
predicate(UseSSE>=1);
- match(Set cr (CmpF dst (LoadF src)));
- effect(KILL rax);
+ match(Set cr (CmpF src1 (LoadF src2)));
ins_cost(165);
- format %{ "COMISS $dst,$src\n"
- "\tJNP exit\n"
- "\tMOV ah,1 // saw a NaN, set CF\n"
- "\tSAHF\n"
- "exit:\tNOP // avoid branch to branch" %}
- opcode(0x0F, 0x2F);
- ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup);
+ format %{ "UCOMISS $src1,$src2\n\t"
+ "JNP,s exit\n\t"
+ "PUSHF\t# saw NaN, set CF\n\t"
+ "AND [rsp], #0xffffff2b\n\t"
+ "POPF\n"
+ "exit:" %}
+ ins_encode %{
+ __ ucomiss($src1$$XMMRegister, $src2$$Address);
+ emit_cmpfp_fixup(_masm);
+ %}
ins_pipe( pipe_slow );
%}
-instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{
+instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
predicate(UseSSE>=1);
- match(Set cr (CmpF dst (LoadF src)));
+ match(Set cr (CmpF src1 (LoadF src2)));
ins_cost(100);
- format %{ "COMISS $dst,$src" %}
- opcode(0x0F, 0x2F);
- ins_encode(OpcP, OpcS, RegMem(dst, src));
+ format %{ "UCOMISS $src1,$src2" %}
+ ins_encode %{
+ __ ucomiss($src1$$XMMRegister, $src2$$Address);
+ %}
ins_pipe( pipe_slow );
%}
// Compare into -1,0,1 in XMM
-instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{
+instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
predicate(UseSSE>=1);
match(Set dst (CmpF3 src1 src2));
effect(KILL cr);
ins_cost(255);
- format %{ "XOR $dst,$dst\n"
- "\tCOMISS $src1,$src2\n"
- "\tJP,s nan\n"
- "\tJEQ,s exit\n"
- "\tJA,s inc\n"
- "nan:\tDEC $dst\n"
- "\tJMP,s exit\n"
- "inc:\tINC $dst\n"
- "exit:"
- %}
- opcode(0x0F, 0x2F);
- ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst));
+ format %{ "UCOMISS $src1, $src2\n\t"
+ "MOV $dst, #-1\n\t"
+ "JP,s done\n\t"
+ "JB,s done\n\t"
+ "SETNE $dst\n\t"
+ "MOVZB $dst, $dst\n"
+ "done:" %}
+ ins_encode %{
+ __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
+ emit_cmpfp3(_masm, $dst$$Register);
+ %}
ins_pipe( pipe_slow );
%}
// Compare into -1,0,1 in XMM and memory
-instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{
+instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
predicate(UseSSE>=1);
- match(Set dst (CmpF3 src1 (LoadF mem)));
+ match(Set dst (CmpF3 src1 (LoadF src2)));
effect(KILL cr);
ins_cost(275);
- format %{ "COMISS $src1,$mem\n"
- "\tMOV $dst,0\t\t# do not blow flags\n"
- "\tJP,s nan\n"
- "\tJEQ,s exit\n"
- "\tJA,s inc\n"
- "nan:\tDEC $dst\n"
- "\tJMP,s exit\n"
- "inc:\tINC $dst\n"
- "exit:"
- %}
- opcode(0x0F, 0x2F);
- ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst));
+ format %{ "UCOMISS $src1, $src2\n\t"
+ "MOV $dst, #-1\n\t"
+ "JP,s done\n\t"
+ "JB,s done\n\t"
+ "SETNE $dst\n\t"
+ "MOVZB $dst, $dst\n"
+ "done:" %}
+ ins_encode %{
+ __ ucomiss($src1$$XMMRegister, $src2$$Address);
+ emit_cmpfp3(_masm, $dst$$Register);
+ %}
ins_pipe( pipe_slow );
%}
// Spill to obtain 24-bit precision
-instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{
+instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (SubF src1 src2));
format %{ "FSUB $dst,$src1 - $src2" %}
opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
- ins_encode( Push_Reg_F(src1),
- OpcReg_F(src2),
- Pop_Mem_F(dst) );
+ ins_encode( Push_Reg_FPR(src1),
+ OpcReg_FPR(src2),
+ Pop_Mem_FPR(dst) );
ins_pipe( fpu_mem_reg_reg );
%}
//
// This instruction does not round to 24-bits
-instruct subF_reg(regF dst, regF src) %{
+instruct subFPR_reg(regFPR dst, regFPR src) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (SubF dst src));
format %{ "FSUB $dst,$src" %}
opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
- ins_encode( Push_Reg_F(src),
+ ins_encode( Push_Reg_FPR(src),
OpcP, RegOpc(dst) );
ins_pipe( fpu_reg_reg );
%}
// Spill to obtain 24-bit precision
-instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{
+instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (AddF src1 src2));
format %{ "FADD $dst,$src1,$src2" %}
opcode(0xD8, 0x0); /* D8 C0+i */
- ins_encode( Push_Reg_F(src2),
- OpcReg_F(src1),
- Pop_Mem_F(dst) );
+ ins_encode( Push_Reg_FPR(src2),
+ OpcReg_FPR(src1),
+ Pop_Mem_FPR(dst) );
ins_pipe( fpu_mem_reg_reg );
%}
//
// This instruction does not round to 24-bits
-instruct addF_reg(regF dst, regF src) %{
+instruct addFPR_reg(regFPR dst, regFPR src) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (AddF dst src));
format %{ "FLD $src\n\t"
"FADDp $dst,ST" %}
opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
- ins_encode( Push_Reg_F(src),
+ ins_encode( Push_Reg_FPR(src),
OpcP, RegOpc(dst) );
ins_pipe( fpu_reg_reg );
%}
-// Add two single precision floating point values in xmm
-instruct addX_reg(regX dst, regX src) %{
- predicate(UseSSE>=1);
- match(Set dst (AddF dst src));
- format %{ "ADDSS $dst,$src" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
- ins_pipe( pipe_slow );
-%}
-
-instruct addX_imm(regX dst, immXF con) %{
- predicate(UseSSE>=1);
- match(Set dst (AddF dst con));
- format %{ "ADDSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
- ins_encode %{
- __ addss($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct addX_mem(regX dst, memory mem) %{
- predicate(UseSSE>=1);
- match(Set dst (AddF dst (LoadF mem)));
- format %{ "ADDSS $dst,$mem" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem));
- ins_pipe( pipe_slow );
-%}
-
-// Subtract two single precision floating point values in xmm
-instruct subX_reg(regX dst, regX src) %{
- predicate(UseSSE>=1);
- match(Set dst (SubF dst src));
- format %{ "SUBSS $dst,$src" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
- ins_pipe( pipe_slow );
-%}
-
-instruct subX_imm(regX dst, immXF con) %{
- predicate(UseSSE>=1);
- match(Set dst (SubF dst con));
- format %{ "SUBSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
- ins_encode %{
- __ subss($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct subX_mem(regX dst, memory mem) %{
- predicate(UseSSE>=1);
- match(Set dst (SubF dst (LoadF mem)));
- format %{ "SUBSS $dst,$mem" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
- ins_pipe( pipe_slow );
-%}
-
-// Multiply two single precision floating point values in xmm
-instruct mulX_reg(regX dst, regX src) %{
- predicate(UseSSE>=1);
- match(Set dst (MulF dst src));
- format %{ "MULSS $dst,$src" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
- ins_pipe( pipe_slow );
-%}
-
-instruct mulX_imm(regX dst, immXF con) %{
- predicate(UseSSE>=1);
- match(Set dst (MulF dst con));
- format %{ "MULSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
- ins_encode %{
- __ mulss($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct mulX_mem(regX dst, memory mem) %{
- predicate(UseSSE>=1);
- match(Set dst (MulF dst (LoadF mem)));
- format %{ "MULSS $dst,$mem" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
- ins_pipe( pipe_slow );
-%}
-
-// Divide two single precision floating point values in xmm
-instruct divX_reg(regX dst, regX src) %{
- predicate(UseSSE>=1);
- match(Set dst (DivF dst src));
- format %{ "DIVSS $dst,$src" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
- ins_pipe( pipe_slow );
-%}
-
-instruct divX_imm(regX dst, immXF con) %{
- predicate(UseSSE>=1);
- match(Set dst (DivF dst con));
- format %{ "DIVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
- ins_encode %{
- __ divss($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct divX_mem(regX dst, memory mem) %{
- predicate(UseSSE>=1);
- match(Set dst (DivF dst (LoadF mem)));
- format %{ "DIVSS $dst,$mem" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
- ins_pipe( pipe_slow );
-%}
-
-// Get the square root of a single precision floating point values in xmm
-instruct sqrtX_reg(regX dst, regX src) %{
- predicate(UseSSE>=1);
- match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
- format %{ "SQRTSS $dst,$src" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
- ins_pipe( pipe_slow );
-%}
-
-instruct sqrtX_mem(regX dst, memory mem) %{
- predicate(UseSSE>=1);
- match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));
- format %{ "SQRTSS $dst,$mem" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
- ins_pipe( pipe_slow );
-%}
-
-// Get the square root of a double precision floating point values in xmm
-instruct sqrtXD_reg(regXD dst, regXD src) %{
- predicate(UseSSE>=2);
- match(Set dst (SqrtD src));
- format %{ "SQRTSD $dst,$src" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
- ins_pipe( pipe_slow );
-%}
-
-instruct sqrtXD_mem(regXD dst, memory mem) %{
- predicate(UseSSE>=2);
- match(Set dst (SqrtD (LoadD mem)));
- format %{ "SQRTSD $dst,$mem" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
- ins_pipe( pipe_slow );
-%}
-
-instruct absF_reg(regFPR1 dst, regFPR1 src) %{
+instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
predicate(UseSSE==0);
match(Set dst (AbsF src));
ins_cost(100);
@@ -11442,15 +10662,7 @@ instruct absF_reg(regFPR1 dst, regFPR1 src) %{
ins_pipe( fpu_reg_reg );
%}
-instruct absX_reg(regX dst ) %{
- predicate(UseSSE>=1);
- match(Set dst (AbsF dst));
- format %{ "ANDPS $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
- ins_encode( AbsXF_encoding(dst));
- ins_pipe( pipe_slow );
-%}
-
-instruct negF_reg(regFPR1 dst, regFPR1 src) %{
+instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
predicate(UseSSE==0);
match(Set dst (NegF src));
ins_cost(100);
@@ -11460,17 +10672,9 @@ instruct negF_reg(regFPR1 dst, regFPR1 src) %{
ins_pipe( fpu_reg_reg );
%}
-instruct negX_reg( regX dst ) %{
- predicate(UseSSE>=1);
- match(Set dst (NegF dst));
- format %{ "XORPS $dst,[0x80000000]\t# CHS F by sign flipping" %}
- ins_encode( NegXF_encoding(dst));
- ins_pipe( pipe_slow );
-%}
-
-// Cisc-alternate to addF_reg
+// Cisc-alternate to addFPR_reg
// Spill to obtain 24-bit precision
-instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
+instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (AddF src1 (LoadF src2)));
@@ -11479,14 +10683,14 @@ instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
"FSTP_S $dst" %}
opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
- OpcReg_F(src1),
- Pop_Mem_F(dst) );
+ OpcReg_FPR(src1),
+ Pop_Mem_FPR(dst) );
ins_pipe( fpu_mem_reg_mem );
%}
//
-// Cisc-alternate to addF_reg
+// Cisc-alternate to addFPR_reg
// This instruction does not round to 24-bits
-instruct addF_reg_mem(regF dst, memory src) %{
+instruct addFPR_reg_mem(regFPR dst, memory src) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (AddF dst (LoadF src)));
@@ -11499,21 +10703,21 @@ instruct addF_reg_mem(regF dst, memory src) %{
// // Following two instructions for _222_mpegaudio
// Spill to obtain 24-bit precision
-instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{
+instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (AddF src1 src2));
format %{ "FADD $dst,$src1,$src2" %}
opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
- OpcReg_F(src2),
- Pop_Mem_F(dst) );
+ OpcReg_FPR(src2),
+ Pop_Mem_FPR(dst) );
ins_pipe( fpu_mem_reg_mem );
%}
// Cisc-spill variant
// Spill to obtain 24-bit precision
-instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
+instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (AddF src1 (LoadF src2)));
@@ -11522,12 +10726,12 @@ instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
set_instruction_start,
OpcP, RMopc_Mem(secondary,src1),
- Pop_Mem_F(dst) );
+ Pop_Mem_FPR(dst) );
ins_pipe( fpu_mem_mem_mem );
%}
// Spill to obtain 24-bit precision
-instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
+instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (AddF src1 src2));
@@ -11536,13 +10740,13 @@ instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
set_instruction_start,
OpcP, RMopc_Mem(secondary,src1),
- Pop_Mem_F(dst) );
+ Pop_Mem_FPR(dst) );
ins_pipe( fpu_mem_mem_mem );
%}
// Spill to obtain 24-bit precision
-instruct addF24_reg_imm(stackSlotF dst, regF src, immF con) %{
+instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (AddF src con));
format %{ "FLD $src\n\t"
@@ -11557,7 +10761,7 @@ instruct addF24_reg_imm(stackSlotF dst, regF src, immF con) %{
%}
//
// This instruction does not round to 24-bits
-instruct addF_reg_imm(regF dst, regF src, immF con) %{
+instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (AddF src con));
format %{ "FLD $src\n\t"
@@ -11572,7 +10776,7 @@ instruct addF_reg_imm(regF dst, regF src, immF con) %{
%}
// Spill to obtain 24-bit precision
-instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{
+instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (MulF src1 src2));
@@ -11580,14 +10784,14 @@ instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{
"FMUL $src2\n\t"
"FSTP_S $dst" %}
opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
- ins_encode( Push_Reg_F(src1),
- OpcReg_F(src2),
- Pop_Mem_F(dst) );
+ ins_encode( Push_Reg_FPR(src1),
+ OpcReg_FPR(src2),
+ Pop_Mem_FPR(dst) );
ins_pipe( fpu_mem_reg_reg );
%}
//
// This instruction does not round to 24-bits
-instruct mulF_reg(regF dst, regF src1, regF src2) %{
+instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (MulF src1 src2));
@@ -11595,16 +10799,16 @@ instruct mulF_reg(regF dst, regF src1, regF src2) %{
"FMUL $src2\n\t"
"FSTP_S $dst" %}
opcode(0xD8, 0x1); /* D8 C8+i */
- ins_encode( Push_Reg_F(src2),
- OpcReg_F(src1),
- Pop_Reg_F(dst) );
+ ins_encode( Push_Reg_FPR(src2),
+ OpcReg_FPR(src1),
+ Pop_Reg_FPR(dst) );
ins_pipe( fpu_reg_reg_reg );
%}
// Spill to obtain 24-bit precision
// Cisc-alternate to reg-reg multiply
-instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
+instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (MulF src1 (LoadF src2)));
@@ -11613,27 +10817,27 @@ instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
"FSTP_S $dst" %}
opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */
ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
- OpcReg_F(src1),
- Pop_Mem_F(dst) );
+ OpcReg_FPR(src1),
+ Pop_Mem_FPR(dst) );
ins_pipe( fpu_mem_reg_mem );
%}
//
// This instruction does not round to 24-bits
// Cisc-alternate to reg-reg multiply
-instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
+instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (MulF src1 (LoadF src2)));
format %{ "FMUL $dst,$src1,$src2" %}
opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */
ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
- OpcReg_F(src1),
- Pop_Reg_F(dst) );
+ OpcReg_FPR(src1),
+ Pop_Reg_FPR(dst) );
ins_pipe( fpu_reg_reg_mem );
%}
// Spill to obtain 24-bit precision
-instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
+instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (MulF src1 src2));
@@ -11642,12 +10846,12 @@ instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
set_instruction_start,
OpcP, RMopc_Mem(secondary,src1),
- Pop_Mem_F(dst) );
+ Pop_Mem_FPR(dst) );
ins_pipe( fpu_mem_mem_mem );
%}
// Spill to obtain 24-bit precision
-instruct mulF24_reg_imm(stackSlotF dst, regF src, immF con) %{
+instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (MulF src con));
@@ -11663,7 +10867,7 @@ instruct mulF24_reg_imm(stackSlotF dst, regF src, immF con) %{
%}
//
// This instruction does not round to 24-bits
-instruct mulF_reg_imm(regF dst, regF src, immF con) %{
+instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (MulF src con));
@@ -11680,9 +10884,9 @@ instruct mulF_reg_imm(regF dst, regF src, immF con) %{
//
-// MACRO1 -- subsume unshared load into mulF
+// MACRO1 -- subsume unshared load into mulFPR
// This instruction does not round to 24-bits
-instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{
+instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (MulF (LoadF mem1) src));
@@ -11691,36 +10895,36 @@ instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{
"FSTP $dst" %}
opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */
ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
- OpcReg_F(src),
- Pop_Reg_F(dst) );
+ OpcReg_FPR(src),
+ Pop_Reg_FPR(dst) );
ins_pipe( fpu_reg_reg_mem );
%}
//
-// MACRO2 -- addF a mulF which subsumed an unshared load
+// MACRO2 -- addFPR a mulFPR which subsumed an unshared load
// This instruction does not round to 24-bits
-instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{
+instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
ins_cost(95);
format %{ "FLD $mem1 ===MACRO2===\n\t"
- "FMUL ST,$src1 subsume mulF left load\n\t"
+ "FMUL ST,$src1 subsume mulFPR left load\n\t"
"FADD ST,$src2\n\t"
"FSTP $dst" %}
opcode(0xD9); /* LoadF D9 /0 */
ins_encode( OpcP, RMopc_Mem(0x00,mem1),
FMul_ST_reg(src1),
FAdd_ST_reg(src2),
- Pop_Reg_F(dst) );
+ Pop_Reg_FPR(dst) );
ins_pipe( fpu_reg_mem_reg_reg );
%}
-// MACRO3 -- addF a mulF
+// MACRO3 -- addFPR a mulFPR
// This instruction does not round to 24-bits. It is a '2-address'
// instruction in that the result goes back to src2. This eliminates
// a move from the macro; possibly the register allocator will have
// to add it back (and maybe not).
-instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{
+instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set src2 (AddF (MulF src0 src1) src2));
@@ -11728,15 +10932,15 @@ instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{
"FMUL ST,$src1\n\t"
"FADDP $src2,ST" %}
opcode(0xD9); /* LoadF D9 /0 */
- ins_encode( Push_Reg_F(src0),
+ ins_encode( Push_Reg_FPR(src0),
FMul_ST_reg(src1),
FAddP_reg_ST(src2) );
ins_pipe( fpu_reg_reg_reg );
%}
-// MACRO4 -- divF subF
+// MACRO4 -- divFPR subFPR
// This instruction does not round to 24-bits
-instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{
+instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (DivF (SubF src2 src1) src3));
@@ -11745,67 +10949,67 @@ instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{
"FDIV ST,$src3\n\t"
"FSTP $dst" %}
opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
- ins_encode( Push_Reg_F(src2),
- subF_divF_encode(src1,src3),
- Pop_Reg_F(dst) );
+ ins_encode( Push_Reg_FPR(src2),
+ subFPR_divFPR_encode(src1,src3),
+ Pop_Reg_FPR(dst) );
ins_pipe( fpu_reg_reg_reg_reg );
%}
// Spill to obtain 24-bit precision
-instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{
+instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (DivF src1 src2));
format %{ "FDIV $dst,$src1,$src2" %}
opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
- ins_encode( Push_Reg_F(src1),
- OpcReg_F(src2),
- Pop_Mem_F(dst) );
+ ins_encode( Push_Reg_FPR(src1),
+ OpcReg_FPR(src2),
+ Pop_Mem_FPR(dst) );
ins_pipe( fpu_mem_reg_reg );
%}
//
// This instruction does not round to 24-bits
-instruct divF_reg(regF dst, regF src) %{
+instruct divFPR_reg(regFPR dst, regFPR src) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (DivF dst src));
format %{ "FDIV $dst,$src" %}
opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
- ins_encode( Push_Reg_F(src),
+ ins_encode( Push_Reg_FPR(src),
OpcP, RegOpc(dst) );
ins_pipe( fpu_reg_reg );
%}
// Spill to obtain 24-bit precision
-instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
+instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (ModF src1 src2));
- effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
+ effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
format %{ "FMOD $dst,$src1,$src2" %}
- ins_encode( Push_Reg_Mod_D(src1, src2),
- emitModD(),
- Push_Result_Mod_D(src2),
- Pop_Mem_F(dst));
+ ins_encode( Push_Reg_Mod_DPR(src1, src2),
+ emitModDPR(),
+ Push_Result_Mod_DPR(src2),
+ Pop_Mem_FPR(dst));
ins_pipe( pipe_slow );
%}
//
// This instruction does not round to 24-bits
-instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{
+instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (ModF dst src));
- effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
+ effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
format %{ "FMOD $dst,$src" %}
- ins_encode(Push_Reg_Mod_D(dst, src),
- emitModD(),
- Push_Result_Mod_D(src),
- Pop_Reg_F(dst));
+ ins_encode(Push_Reg_Mod_DPR(dst, src),
+ emitModDPR(),
+ Push_Result_Mod_DPR(src),
+ Pop_Reg_FPR(dst));
ins_pipe( pipe_slow );
%}
-instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{
+instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
predicate(UseSSE>=1);
match(Set dst (ModF src0 src1));
effect(KILL rax, KILL cr);
@@ -11825,7 +11029,7 @@ instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{
"\tFSTP ST0\t # Restore FPU Stack"
%}
ins_cost(250);
- ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU);
+ ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
ins_pipe( pipe_slow );
%}
@@ -11833,26 +11037,26 @@ instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{
//----------Arithmetic Conversion Instructions---------------------------------
// The conversions operations are all Alpha sorted. Please keep it that way!
-instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{
+instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
predicate(UseSSE==0);
match(Set dst (RoundFloat src));
ins_cost(125);
format %{ "FST_S $dst,$src\t# F-round" %}
- ins_encode( Pop_Mem_Reg_F(dst, src) );
+ ins_encode( Pop_Mem_Reg_FPR(dst, src) );
ins_pipe( fpu_mem_reg );
%}
-instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{
+instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
predicate(UseSSE<=1);
match(Set dst (RoundDouble src));
ins_cost(125);
format %{ "FST_D $dst,$src\t# D-round" %}
- ins_encode( Pop_Mem_Reg_D(dst, src) );
+ ins_encode( Pop_Mem_Reg_DPR(dst, src) );
ins_pipe( fpu_mem_reg );
%}
// Force rounding to 24-bit precision and 6-bit exponent
-instruct convD2F_reg(stackSlotF dst, regD src) %{
+instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
predicate(UseSSE==0);
match(Set dst (ConvD2F src));
format %{ "FST_S $dst,$src\t# F-round" %}
@@ -11862,7 +11066,7 @@ instruct convD2F_reg(stackSlotF dst, regD src) %{
%}
// Force rounding to 24-bit precision and 6-bit exponent
-instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
+instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
predicate(UseSSE==1);
match(Set dst (ConvD2F src));
effect( KILL cr );
@@ -11870,29 +11074,40 @@ instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
"FST_S [ESP],$src\t# F-round\n\t"
"MOVSS $dst,[ESP]\n\t"
"ADD ESP,4" %}
- ins_encode( D2X_encoding(dst, src) );
+ ins_encode %{
+ __ subptr(rsp, 4);
+ if ($src$$reg != FPR1L_enc) {
+ __ fld_s($src$$reg-1);
+ __ fstp_s(Address(rsp, 0));
+ } else {
+ __ fst_s(Address(rsp, 0));
+ }
+ __ movflt($dst$$XMMRegister, Address(rsp, 0));
+ __ addptr(rsp, 4);
+ %}
ins_pipe( pipe_slow );
%}
// Force rounding double precision to single precision
-instruct convXD2X_reg(regX dst, regXD src) %{
+instruct convD2F_reg(regF dst, regD src) %{
predicate(UseSSE>=2);
match(Set dst (ConvD2F src));
format %{ "CVTSD2SS $dst,$src\t# F-round" %}
- opcode(0xF2, 0x0F, 0x5A);
- ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
+ ins_encode %{
+ __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
-instruct convF2D_reg_reg(regD dst, regF src) %{
+instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
predicate(UseSSE==0);
match(Set dst (ConvF2D src));
format %{ "FST_S $dst,$src\t# D-round" %}
- ins_encode( Pop_Reg_Reg_D(dst, src));
+ ins_encode( Pop_Reg_Reg_DPR(dst, src));
ins_pipe( fpu_reg_reg );
%}
-instruct convF2D_reg(stackSlotD dst, regF src) %{
+instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
predicate(UseSSE==1);
match(Set dst (ConvF2D src));
format %{ "FST_D $dst,$src\t# D-round" %}
@@ -11901,7 +11116,7 @@ instruct convF2D_reg(stackSlotD dst, regF src) %{
%}
%}
-instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
+instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
predicate(UseSSE==1);
match(Set dst (ConvF2D src));
effect( KILL cr );
@@ -11910,21 +11125,28 @@ instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
"FLD_S [ESP]\n\t"
"ADD ESP,4\n\t"
"FSTP $dst\t# D-round" %}
- ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst));
+ ins_encode %{
+ __ subptr(rsp, 4);
+ __ movflt(Address(rsp, 0), $src$$XMMRegister);
+ __ fld_s(Address(rsp, 0));
+ __ addptr(rsp, 4);
+ __ fstp_d($dst$$reg);
+ %}
ins_pipe( pipe_slow );
%}
-instruct convX2XD_reg(regXD dst, regX src) %{
+instruct convF2D_reg(regD dst, regF src) %{
predicate(UseSSE>=2);
match(Set dst (ConvF2D src));
format %{ "CVTSS2SD $dst,$src\t# D-round" %}
- opcode(0xF3, 0x0F, 0x5A);
- ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
+ ins_encode %{
+ __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
// Convert a double to an int. If the double is a NAN, stuff a zero in instead.
-instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
+instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
predicate(UseSSE<=1);
match(Set dst (ConvD2I src));
effect( KILL tmp, KILL cr );
@@ -11939,12 +11161,12 @@ instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
"FLD_D $src\n\t"
"CALL d2i_wrapper\n"
"fast:" %}
- ins_encode( Push_Reg_D(src), D2I_encoding(src) );
+ ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
ins_pipe( pipe_slow );
%}
// Convert a double to an int. If the double is a NAN, stuff a zero in instead.
-instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{
+instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
predicate(UseSSE>=2);
match(Set dst (ConvD2I src));
effect( KILL tmp, KILL cr );
@@ -11957,12 +11179,22 @@ instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %
"ADD ESP, 8\n\t"
"CALL d2i_wrapper\n"
"fast:" %}
- opcode(0x1); // double-precision conversion
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
+ ins_encode %{
+ Label fast;
+ __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
+ __ cmpl($dst$$Register, 0x80000000);
+ __ jccb(Assembler::notEqual, fast);
+ __ subptr(rsp, 8);
+ __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+ __ fld_d(Address(rsp, 0));
+ __ addptr(rsp, 8);
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
+ __ bind(fast);
+ %}
ins_pipe( pipe_slow );
%}
-instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
+instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
predicate(UseSSE<=1);
match(Set dst (ConvD2L src));
effect( KILL cr );
@@ -11980,12 +11212,12 @@ instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
"FLD $src\n\t"
"CALL d2l_wrapper\n"
"fast:" %}
- ins_encode( Push_Reg_D(src), D2L_encoding(src) );
+ ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) );
ins_pipe( pipe_slow );
%}
// XMM lacks a float/double->long conversion, so use the old FPU stack.
-instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
+instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
predicate (UseSSE>=2);
match(Set dst (ConvD2L src));
effect( KILL cr );
@@ -12004,9 +11236,36 @@ instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
"SUB ESP,8\n\t"
"MOVSD [ESP],$src\n\t"
"FLD_D [ESP]\n\t"
+ "ADD ESP,8\n\t"
"CALL d2l_wrapper\n"
"fast:" %}
- ins_encode( XD2L_encoding(src) );
+ ins_encode %{
+ Label fast;
+ __ subptr(rsp, 8);
+ __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+ __ fld_d(Address(rsp, 0));
+ __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
+ __ fistp_d(Address(rsp, 0));
+ // Restore the rounding mode, mask the exception
+ if (Compile::current()->in_24_bit_fp_mode()) {
+ __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
+ } else {
+ __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
+ }
+ // Load the converted long, adjust CPU stack
+ __ pop(rax);
+ __ pop(rdx);
+ __ cmpl(rdx, 0x80000000);
+ __ jccb(Assembler::notEqual, fast);
+ __ testl(rax, rax);
+ __ jccb(Assembler::notEqual, fast);
+ __ subptr(rsp, 8);
+ __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+ __ fld_d(Address(rsp, 0));
+ __ addptr(rsp, 8);
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
+ __ bind(fast);
+ %}
ins_pipe( pipe_slow );
%}
@@ -12016,7 +11275,7 @@ instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
// rounding mode to 'nearest'. The hardware stores a flag value down
// if we would overflow or converted a NAN; we check for this and
// and go the slow path if needed.
-instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
+instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
predicate(UseSSE==0);
match(Set dst (ConvF2I src));
effect( KILL tmp, KILL cr );
@@ -12031,13 +11290,13 @@ instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
"FLD $src\n\t"
"CALL d2i_wrapper\n"
"fast:" %}
- // D2I_encoding works for F2I
- ins_encode( Push_Reg_F(src), D2I_encoding(src) );
+ // DPR2I_encoding works for FPR2I
+ ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
ins_pipe( pipe_slow );
%}
// Convert a float in xmm to an int reg.
-instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
+instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
predicate(UseSSE>=1);
match(Set dst (ConvF2I src));
effect( KILL tmp, KILL cr );
@@ -12050,12 +11309,22 @@ instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
"ADD ESP, 4\n\t"
"CALL d2i_wrapper\n"
"fast:" %}
- opcode(0x0); // single-precision conversion
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
+ ins_encode %{
+ Label fast;
+ __ cvttss2sil($dst$$Register, $src$$XMMRegister);
+ __ cmpl($dst$$Register, 0x80000000);
+ __ jccb(Assembler::notEqual, fast);
+ __ subptr(rsp, 4);
+ __ movflt(Address(rsp, 0), $src$$XMMRegister);
+ __ fld_s(Address(rsp, 0));
+ __ addptr(rsp, 4);
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
+ __ bind(fast);
+ %}
ins_pipe( pipe_slow );
%}
-instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
+instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
predicate(UseSSE==0);
match(Set dst (ConvF2L src));
effect( KILL cr );
@@ -12073,13 +11342,13 @@ instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
"FLD $src\n\t"
"CALL d2l_wrapper\n"
"fast:" %}
- // D2L_encoding works for F2L
- ins_encode( Push_Reg_F(src), D2L_encoding(src) );
+ // DPR2L_encoding works for FPR2L
+ ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
ins_pipe( pipe_slow );
%}
// XMM lacks a float/double->long conversion, so use the old FPU stack.
-instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{
+instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
predicate (UseSSE>=1);
match(Set dst (ConvF2L src));
effect( KILL cr );
@@ -12101,39 +11370,67 @@ instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{
"ADD ESP,4\n\t"
"CALL d2l_wrapper\n"
"fast:" %}
- ins_encode( X2L_encoding(src) );
+ ins_encode %{
+ Label fast;
+ __ subptr(rsp, 8);
+ __ movflt(Address(rsp, 0), $src$$XMMRegister);
+ __ fld_s(Address(rsp, 0));
+ __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
+ __ fistp_d(Address(rsp, 0));
+ // Restore the rounding mode, mask the exception
+ if (Compile::current()->in_24_bit_fp_mode()) {
+ __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
+ } else {
+ __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
+ }
+ // Load the converted long, adjust CPU stack
+ __ pop(rax);
+ __ pop(rdx);
+ __ cmpl(rdx, 0x80000000);
+ __ jccb(Assembler::notEqual, fast);
+ __ testl(rax, rax);
+ __ jccb(Assembler::notEqual, fast);
+ __ subptr(rsp, 4);
+ __ movflt(Address(rsp, 0), $src$$XMMRegister);
+ __ fld_s(Address(rsp, 0));
+ __ addptr(rsp, 4);
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
+ __ bind(fast);
+ %}
ins_pipe( pipe_slow );
%}
-instruct convI2D_reg(regD dst, stackSlotI src) %{
+instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
predicate( UseSSE<=1 );
match(Set dst (ConvI2D src));
format %{ "FILD $src\n\t"
"FSTP $dst" %}
opcode(0xDB, 0x0); /* DB /0 */
- ins_encode(Push_Mem_I(src), Pop_Reg_D(dst));
+ ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
ins_pipe( fpu_reg_mem );
%}
-instruct convI2XD_reg(regXD dst, eRegI src) %{
+instruct convI2D_reg(regD dst, eRegI src) %{
predicate( UseSSE>=2 && !UseXmmI2D );
match(Set dst (ConvI2D src));
format %{ "CVTSI2SD $dst,$src" %}
- opcode(0xF2, 0x0F, 0x2A);
- ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
+ ins_encode %{
+ __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
+ %}
ins_pipe( pipe_slow );
%}
-instruct convI2XD_mem(regXD dst, memory mem) %{
+instruct convI2D_mem(regD dst, memory mem) %{
predicate( UseSSE>=2 );
match(Set dst (ConvI2D (LoadI mem)));
format %{ "CVTSI2SD $dst,$mem" %}
- opcode(0xF2, 0x0F, 0x2A);
- ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem));
+ ins_encode %{
+ __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
-instruct convXI2XD_reg(regXD dst, eRegI src)
+instruct convXI2D_reg(regD dst, eRegI src)
%{
predicate( UseSSE>=2 && UseXmmI2D );
match(Set dst (ConvI2D src));
@@ -12147,31 +11444,31 @@ instruct convXI2XD_reg(regXD dst, eRegI src)
ins_pipe(pipe_slow); // XXX
%}
-instruct convI2D_mem(regD dst, memory mem) %{
+instruct convI2DPR_mem(regDPR dst, memory mem) %{
predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
match(Set dst (ConvI2D (LoadI mem)));
format %{ "FILD $mem\n\t"
"FSTP $dst" %}
opcode(0xDB); /* DB /0 */
ins_encode( OpcP, RMopc_Mem(0x00,mem),
- Pop_Reg_D(dst));
+ Pop_Reg_DPR(dst));
ins_pipe( fpu_reg_mem );
%}
// Convert a byte to a float; no rounding step needed.
-instruct conv24I2F_reg(regF dst, stackSlotI src) %{
+instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
match(Set dst (ConvI2F src));
format %{ "FILD $src\n\t"
"FSTP $dst" %}
opcode(0xDB, 0x0); /* DB /0 */
- ins_encode(Push_Mem_I(src), Pop_Reg_F(dst));
+ ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
ins_pipe( fpu_reg_mem );
%}
// In 24-bit mode, force exponent rounding by storing back out
-instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{
+instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (ConvI2F src));
ins_cost(200);
@@ -12179,12 +11476,12 @@ instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{
"FSTP_S $dst" %}
opcode(0xDB, 0x0); /* DB /0 */
ins_encode( Push_Mem_I(src),
- Pop_Mem_F(dst));
+ Pop_Mem_FPR(dst));
ins_pipe( fpu_mem_mem );
%}
// In 24-bit mode, force exponent rounding by storing back out
-instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{
+instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (ConvI2F (LoadI mem)));
ins_cost(200);
@@ -12192,46 +11489,46 @@ instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{
"FSTP_S $dst" %}
opcode(0xDB); /* DB /0 */
ins_encode( OpcP, RMopc_Mem(0x00,mem),
- Pop_Mem_F(dst));
+ Pop_Mem_FPR(dst));
ins_pipe( fpu_mem_mem );
%}
// This instruction does not round to 24-bits
-instruct convI2F_reg(regF dst, stackSlotI src) %{
+instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (ConvI2F src));
format %{ "FILD $src\n\t"
"FSTP $dst" %}
opcode(0xDB, 0x0); /* DB /0 */
ins_encode( Push_Mem_I(src),
- Pop_Reg_F(dst));
+ Pop_Reg_FPR(dst));
ins_pipe( fpu_reg_mem );
%}
// This instruction does not round to 24-bits
-instruct convI2F_mem(regF dst, memory mem) %{
+instruct convI2FPR_mem(regFPR dst, memory mem) %{
predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (ConvI2F (LoadI mem)));
format %{ "FILD $mem\n\t"
"FSTP $dst" %}
opcode(0xDB); /* DB /0 */
ins_encode( OpcP, RMopc_Mem(0x00,mem),
- Pop_Reg_F(dst));
+ Pop_Reg_FPR(dst));
ins_pipe( fpu_reg_mem );
%}
// Convert an int to a float in xmm; no rounding step needed.
-instruct convI2X_reg(regX dst, eRegI src) %{
+instruct convI2F_reg(regF dst, eRegI src) %{
predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
match(Set dst (ConvI2F src));
format %{ "CVTSI2SS $dst, $src" %}
-
- opcode(0xF3, 0x0F, 0x2A); /* F3 0F 2A /r */
- ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
+ ins_encode %{
+ __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
+ %}
ins_pipe( pipe_slow );
%}
- instruct convXI2X_reg(regX dst, eRegI src)
+ instruct convXI2F_reg(regF dst, eRegI src)
%{
predicate( UseSSE>=2 && UseXmmI2F );
match(Set dst (ConvI2F src));
@@ -12280,7 +11577,7 @@ instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
ins_pipe( ialu_reg_reg_long );
%}
-instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
+instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
predicate (UseSSE<=1);
match(Set dst (ConvL2D src));
effect( KILL cr );
@@ -12290,11 +11587,11 @@ instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
"ADD ESP,8\n\t"
"FSTP_D $dst\t# D-round" %}
opcode(0xDF, 0x5); /* DF /5 */
- ins_encode(convert_long_double(src), Pop_Mem_D(dst));
+ ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
ins_pipe( pipe_slow );
%}
-instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{
+instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
predicate (UseSSE>=2);
match(Set dst (ConvL2D src));
effect( KILL cr );
@@ -12305,11 +11602,11 @@ instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{
"MOVSD $dst,[ESP]\n\t"
"ADD ESP,8" %}
opcode(0xDF, 0x5); /* DF /5 */
- ins_encode(convert_long_double2(src), Push_ResultXD(dst));
+ ins_encode(convert_long_double2(src), Push_ResultD(dst));
ins_pipe( pipe_slow );
%}
-instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{
+instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
predicate (UseSSE>=1);
match(Set dst (ConvL2F src));
effect( KILL cr );
@@ -12320,11 +11617,11 @@ instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{
"MOVSS $dst,[ESP]\n\t"
"ADD ESP,8" %}
opcode(0xDF, 0x5); /* DF /5 */
- ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8));
+ ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
ins_pipe( pipe_slow );
%}
-instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
+instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
match(Set dst (ConvL2F src));
effect( KILL cr );
format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
@@ -12333,7 +11630,7 @@ instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
"ADD ESP,8\n\t"
"FSTP_S $dst\t# F-round" %}
opcode(0xDF, 0x5); /* DF /5 */
- ins_encode(convert_long_double(src), Pop_Mem_F(dst));
+ ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
ins_pipe( pipe_slow );
%}
@@ -12351,40 +11648,45 @@ instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
effect( DEF dst, USE src );
ins_cost(100);
format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %}
- opcode(0x8B);
- ins_encode( OpcP, RegMem(dst,src));
+ ins_encode %{
+ __ movl($dst$$Register, Address(rsp, $src$$disp));
+ %}
ins_pipe( ialu_reg_mem );
%}
-instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
+instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
predicate(UseSSE==0);
match(Set dst (MoveF2I src));
effect( DEF dst, USE src );
ins_cost(125);
format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %}
- ins_encode( Pop_Mem_Reg_F(dst, src) );
+ ins_encode( Pop_Mem_Reg_FPR(dst, src) );
ins_pipe( fpu_mem_reg );
%}
-instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{
+instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
predicate(UseSSE>=1);
match(Set dst (MoveF2I src));
effect( DEF dst, USE src );
ins_cost(95);
format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst));
+ ins_encode %{
+ __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
-instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{
+instruct MoveF2I_reg_reg_sse(eRegI dst, regF src) %{
predicate(UseSSE>=2);
match(Set dst (MoveF2I src));
effect( DEF dst, USE src );
ins_cost(85);
format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %}
- ins_encode( MovX2I_reg(dst, src));
+ ins_encode %{
+ __ movdl($dst$$Register, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -12394,13 +11696,14 @@ instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
ins_cost(100);
format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %}
- opcode(0x89);
- ins_encode( OpcPRegSS( dst, src ) );
+ ins_encode %{
+ __ movl(Address(rsp, $dst$$disp), $src$$Register);
+ %}
ins_pipe( ialu_mem_reg );
%}
-instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
+instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
predicate(UseSSE==0);
match(Set dst (MoveI2F src));
effect(DEF dst, USE src);
@@ -12410,29 +11713,33 @@ instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
"FSTP $dst\t# MoveI2F_stack_reg" %}
opcode(0xD9); /* D9 /0, FLD m32real */
ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
- Pop_Reg_F(dst) );
+ Pop_Reg_FPR(dst) );
ins_pipe( fpu_reg_mem );
%}
-instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{
+instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
predicate(UseSSE>=1);
match(Set dst (MoveI2F src));
effect( DEF dst, USE src );
ins_cost(95);
format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
+ ins_encode %{
+ __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
+ %}
ins_pipe( pipe_slow );
%}
-instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{
+instruct MoveI2F_reg_reg_sse(regF dst, eRegI src) %{
predicate(UseSSE>=2);
match(Set dst (MoveI2F src));
effect( DEF dst, USE src );
ins_cost(85);
format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %}
- ins_encode( MovI2X_reg(dst, src) );
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ %}
ins_pipe( pipe_slow );
%}
@@ -12448,29 +11755,30 @@ instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
ins_pipe( ialu_mem_long_reg );
%}
-instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
+instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
predicate(UseSSE<=1);
match(Set dst (MoveD2L src));
effect(DEF dst, USE src);
ins_cost(125);
format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %}
- ins_encode( Pop_Mem_Reg_D(dst, src) );
+ ins_encode( Pop_Mem_Reg_DPR(dst, src) );
ins_pipe( fpu_mem_reg );
%}
-instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{
+instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
predicate(UseSSE>=2);
match(Set dst (MoveD2L src));
effect(DEF dst, USE src);
ins_cost(95);
-
format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst));
+ ins_encode %{
+ __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
-instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
+instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
predicate(UseSSE>=2);
match(Set dst (MoveD2L src));
effect(DEF dst, USE src, TEMP tmp);
@@ -12478,7 +11786,11 @@ instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
format %{ "MOVD $dst.lo,$src\n\t"
"PSHUFLW $tmp,$src,0x4E\n\t"
"MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
- ins_encode( MovXD2L_reg(dst, src, tmp) );
+ ins_encode %{
+ __ movdl($dst$$Register, $src$$XMMRegister);
+ __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
+ __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -12495,7 +11807,7 @@ instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
%}
-instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
+instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
predicate(UseSSE<=1);
match(Set dst (MoveL2D src));
effect(DEF dst, USE src);
@@ -12505,34 +11817,38 @@ instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
"FSTP $dst\t# MoveL2D_stack_reg" %}
opcode(0xDD); /* DD /0, FLD m64real */
ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
- Pop_Reg_D(dst) );
+ Pop_Reg_DPR(dst) );
ins_pipe( fpu_reg_mem );
%}
-instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{
+instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
match(Set dst (MoveL2D src));
effect(DEF dst, USE src);
ins_cost(95);
format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
+ %}
ins_pipe( pipe_slow );
%}
-instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{
+instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
match(Set dst (MoveL2D src));
effect(DEF dst, USE src);
ins_cost(95);
format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
- ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src));
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
+ %}
ins_pipe( pipe_slow );
%}
-instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
+instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
predicate(UseSSE>=2);
match(Set dst (MoveL2D src));
effect(TEMP dst, USE src, TEMP tmp);
@@ -12540,149 +11856,192 @@ instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
format %{ "MOVD $dst,$src.lo\n\t"
"MOVD $tmp,$src.hi\n\t"
"PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
- ins_encode( MovL2XD_reg(dst, src, tmp) );
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
+ __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
// Replicate scalar to packed byte (1 byte) values in xmm
-instruct Repl8B_reg(regXD dst, regXD src) %{
+instruct Repl8B_reg(regD dst, regD src) %{
predicate(UseSSE>=2);
match(Set dst (Replicate8B src));
format %{ "MOVDQA $dst,$src\n\t"
"PUNPCKLBW $dst,$dst\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
- ins_encode( pshufd_8x8(dst, src));
+ ins_encode %{
+ if ($dst$$reg != $src$$reg) {
+ __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
+ }
+ __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
ins_pipe( pipe_slow );
%}
// Replicate scalar to packed byte (1 byte) values in xmm
-instruct Repl8B_eRegI(regXD dst, eRegI src) %{
+instruct Repl8B_eRegI(regD dst, eRegI src) %{
predicate(UseSSE>=2);
match(Set dst (Replicate8B src));
format %{ "MOVD $dst,$src\n\t"
"PUNPCKLBW $dst,$dst\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
- ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
ins_pipe( pipe_slow );
%}
// Replicate scalar zero to packed byte (1 byte) values in xmm
-instruct Repl8B_immI0(regXD dst, immI0 zero) %{
+instruct Repl8B_immI0(regD dst, immI0 zero) %{
predicate(UseSSE>=2);
match(Set dst (Replicate8B zero));
format %{ "PXOR $dst,$dst\t! replicate8B" %}
- ins_encode( pxor(dst, dst));
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar to packed shore (2 byte) values in xmm
-instruct Repl4S_reg(regXD dst, regXD src) %{
+instruct Repl4S_reg(regD dst, regD src) %{
predicate(UseSSE>=2);
match(Set dst (Replicate4S src));
format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
- ins_encode( pshufd_4x16(dst, src));
+ ins_encode %{
+ __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar to packed shore (2 byte) values in xmm
-instruct Repl4S_eRegI(regXD dst, eRegI src) %{
+instruct Repl4S_eRegI(regD dst, eRegI src) %{
predicate(UseSSE>=2);
match(Set dst (Replicate4S src));
format %{ "MOVD $dst,$src\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
- ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar zero to packed short (2 byte) values in xmm
-instruct Repl4S_immI0(regXD dst, immI0 zero) %{
+instruct Repl4S_immI0(regD dst, immI0 zero) %{
predicate(UseSSE>=2);
match(Set dst (Replicate4S zero));
format %{ "PXOR $dst,$dst\t! replicate4S" %}
- ins_encode( pxor(dst, dst));
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar to packed char (2 byte) values in xmm
-instruct Repl4C_reg(regXD dst, regXD src) %{
+instruct Repl4C_reg(regD dst, regD src) %{
predicate(UseSSE>=2);
match(Set dst (Replicate4C src));
format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
- ins_encode( pshufd_4x16(dst, src));
+ ins_encode %{
+ __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar to packed char (2 byte) values in xmm
-instruct Repl4C_eRegI(regXD dst, eRegI src) %{
+instruct Repl4C_eRegI(regD dst, eRegI src) %{
predicate(UseSSE>=2);
match(Set dst (Replicate4C src));
format %{ "MOVD $dst,$src\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
- ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar zero to packed char (2 byte) values in xmm
-instruct Repl4C_immI0(regXD dst, immI0 zero) %{
+instruct Repl4C_immI0(regD dst, immI0 zero) %{
predicate(UseSSE>=2);
match(Set dst (Replicate4C zero));
format %{ "PXOR $dst,$dst\t! replicate4C" %}
- ins_encode( pxor(dst, dst));
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar to packed integer (4 byte) values in xmm
-instruct Repl2I_reg(regXD dst, regXD src) %{
+instruct Repl2I_reg(regD dst, regD src) %{
predicate(UseSSE>=2);
match(Set dst (Replicate2I src));
format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
- ins_encode( pshufd(dst, src, 0x00));
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar to packed integer (4 byte) values in xmm
-instruct Repl2I_eRegI(regXD dst, eRegI src) %{
+instruct Repl2I_eRegI(regD dst, eRegI src) %{
predicate(UseSSE>=2);
match(Set dst (Replicate2I src));
format %{ "MOVD $dst,$src\n\t"
"PSHUFD $dst,$dst,0x00\t! replicate2I" %}
- ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar zero to packed integer (2 byte) values in xmm
-instruct Repl2I_immI0(regXD dst, immI0 zero) %{
+instruct Repl2I_immI0(regD dst, immI0 zero) %{
predicate(UseSSE>=2);
match(Set dst (Replicate2I zero));
format %{ "PXOR $dst,$dst\t! replicate2I" %}
- ins_encode( pxor(dst, dst));
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_reg(regXD dst, regXD src) %{
+instruct Repl2F_reg(regD dst, regD src) %{
predicate(UseSSE>=2);
match(Set dst (Replicate2F src));
format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
- ins_encode( pshufd(dst, src, 0xe0));
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_regX(regXD dst, regX src) %{
+instruct Repl2F_regF(regD dst, regF src) %{
predicate(UseSSE>=2);
match(Set dst (Replicate2F src));
format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
- ins_encode( pshufd(dst, src, 0xe0));
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
+instruct Repl2F_immF0(regD dst, immF0 zero) %{
predicate(UseSSE>=2);
match(Set dst (Replicate2F zero));
format %{ "PXOR $dst,$dst\t! replicate2F" %}
- ins_encode( pxor(dst, dst));
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -12702,7 +12061,7 @@ instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlag
%}
instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
- eAXRegI result, regXD tmp1, eFlagsReg cr) %{
+ eAXRegI result, regD tmp1, eFlagsReg cr) %{
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
@@ -12717,7 +12076,7 @@ instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
// fast string equals
instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
- regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
+ regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
match(Set result (StrEquals (Binary str1 str2) cnt));
effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
@@ -12732,7 +12091,7 @@ instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
// fast search of substring with known size.
instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
- eBXRegI result, regXD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
+ eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
predicate(UseSSE42Intrinsics);
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
@@ -12759,7 +12118,7 @@ instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_c
%}
instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
- eBXRegI result, regXD vec, eCXRegI tmp, eFlagsReg cr) %{
+ eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
predicate(UseSSE42Intrinsics);
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
@@ -12776,7 +12135,7 @@ instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
// fast array equals
instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
- regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
+ regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
%{
match(Set result (AryEq ary1 ary2));
effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
@@ -13602,40 +12961,40 @@ instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP s
%}
// Compare 2 longs and CMOVE doubles
-instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
+instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
ins_cost(200);
expand %{
- fcmovD_regS(cmp,flags,dst,src);
+ fcmovDPR_regS(cmp,flags,dst,src);
%}
%}
// Compare 2 longs and CMOVE doubles
-instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{
+instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
ins_cost(200);
expand %{
- fcmovXD_regS(cmp,flags,dst,src);
+ fcmovD_regS(cmp,flags,dst,src);
%}
%}
-instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
+instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
ins_cost(200);
expand %{
- fcmovF_regS(cmp,flags,dst,src);
+ fcmovFPR_regS(cmp,flags,dst,src);
%}
%}
-instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{
+instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
ins_cost(200);
expand %{
- fcmovX_regS(cmp,flags,dst,src);
+ fcmovF_regS(cmp,flags,dst,src);
%}
%}
@@ -13730,40 +13089,40 @@ instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP s
%}
// Compare 2 longs and CMOVE doubles
-instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
+instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
ins_cost(200);
expand %{
- fcmovD_regS(cmp,flags,dst,src);
+ fcmovDPR_regS(cmp,flags,dst,src);
%}
%}
// Compare 2 longs and CMOVE doubles
-instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{
+instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
ins_cost(200);
expand %{
- fcmovXD_regS(cmp,flags,dst,src);
+ fcmovD_regS(cmp,flags,dst,src);
%}
%}
-instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
+instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
ins_cost(200);
expand %{
- fcmovF_regS(cmp,flags,dst,src);
+ fcmovFPR_regS(cmp,flags,dst,src);
%}
%}
-instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{
+instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
ins_cost(200);
expand %{
- fcmovX_regS(cmp,flags,dst,src);
+ fcmovF_regS(cmp,flags,dst,src);
%}
%}
@@ -13863,41 +13222,41 @@ instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst,
%}
// Compare 2 longs and CMOVE doubles
-instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
+instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
ins_cost(200);
expand %{
- fcmovD_regS(cmp,flags,dst,src);
+ fcmovDPR_regS(cmp,flags,dst,src);
%}
%}
// Compare 2 longs and CMOVE doubles
-instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{
+instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
ins_cost(200);
expand %{
- fcmovXD_regS(cmp,flags,dst,src);
+ fcmovD_regS(cmp,flags,dst,src);
%}
%}
-instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
+instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
ins_cost(200);
expand %{
- fcmovF_regS(cmp,flags,dst,src);
+ fcmovFPR_regS(cmp,flags,dst,src);
%}
%}
-instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{
+instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
ins_cost(200);
expand %{
- fcmovX_regS(cmp,flags,dst,src);
+ fcmovF_regS(cmp,flags,dst,src);
%}
%}
diff --git a/src/cpu/x86/vm/x86_64.ad b/src/cpu/x86/vm/x86_64.ad
index 57e82bd43..beed25877 100644
--- a/src/cpu/x86/vm/x86_64.ad
+++ b/src/cpu/x86/vm/x86_64.ad
@@ -552,7 +552,7 @@ source %{
#define __ _masm.
static int preserve_SP_size() {
- return LP64_ONLY(1 +) 2; // [rex,] op, rm(reg/reg)
+ return 3; // rex.w, op, rm(reg/reg)
}
// !!!!! Special hack to get all types of calls to specify the byte offset
@@ -797,48 +797,35 @@ void encode_RegMem(CodeBuffer &cbuf,
}
}
-void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
-{
- if (dstenc != srcenc) {
- if (dstenc < 8) {
- if (srcenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_B);
- srcenc -= 8;
- }
- } else {
- if (srcenc < 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- } else {
- emit_opcode(cbuf, Assembler::REX_RB);
- srcenc -= 8;
- }
- dstenc -= 8;
- }
-
- emit_opcode(cbuf, 0x8B);
- emit_rm(cbuf, 0x3, dstenc, srcenc);
- }
-}
-
-void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
- if( dst_encoding == src_encoding ) {
- // reg-reg copy, use an empty encoding
- } else {
- MacroAssembler _masm(&cbuf);
-
- __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
- }
-}
-
// This could be in MacroAssembler but it's fairly C2 specific
void emit_cmpfp_fixup(MacroAssembler& _masm) {
Label exit;
__ jccb(Assembler::noParity, exit);
__ pushf();
+ //
+ // comiss/ucomiss instructions set ZF,PF,CF flags and
+ // zero OF,AF,SF for NaN values.
+ // Fixup flags by zeroing ZF,PF so that compare of NaN
+ // values returns 'less than' result (CF is set).
+ // Leave the rest of flags unchanged.
+ //
+ // 7 6 5 4 3 2 1 0
+ // |S|Z|r|A|r|P|r|C| (r - reserved bit)
+ // 0 0 1 0 1 0 1 1 (0x2B)
+ //
__ andq(Address(rsp, 0), 0xffffff2b);
__ popf();
__ bind(exit);
- __ nop(); // (target for branch to avoid branch to branch)
+}
+
+void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
+ Label done;
+ __ movl(dst, -1);
+ __ jcc(Assembler::parity, done);
+ __ jcc(Assembler::below, done);
+ __ setb(Assembler::notEqual, dst);
+ __ movzbl(dst, dst);
+ __ bind(done);
}
@@ -1274,16 +1261,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
// 64-bit
int offset = ra_->reg2offset(src_first);
if (cbuf) {
- emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
- if (Matcher::_regEncode[dst_first] >= 8) {
- emit_opcode(*cbuf, Assembler::REX_R);
- }
- emit_opcode(*cbuf, 0x0F);
- emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
- encode_RegMem(*cbuf,
- Matcher::_regEncode[dst_first],
- RSP_enc, 0x4, 0, offset,
- false);
+ MacroAssembler _masm(cbuf);
+ __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
#ifndef PRODUCT
} else if (!do_size) {
st->print("%s %s, [rsp + #%d]\t# spill",
@@ -1294,25 +1273,17 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
}
return
((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
- ((Matcher::_regEncode[dst_first] < 8)
- ? 5
- : 6); // REX
+ ((Matcher::_regEncode[dst_first] >= 8)
+ ? 6
+ : (5 + ((UseAVX>0)?1:0))); // REX
} else {
// 32-bit
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
int offset = ra_->reg2offset(src_first);
if (cbuf) {
- emit_opcode(*cbuf, 0xF3);
- if (Matcher::_regEncode[dst_first] >= 8) {
- emit_opcode(*cbuf, Assembler::REX_R);
- }
- emit_opcode(*cbuf, 0x0F);
- emit_opcode(*cbuf, 0x10);
- encode_RegMem(*cbuf,
- Matcher::_regEncode[dst_first],
- RSP_enc, 0x4, 0, offset,
- false);
+ MacroAssembler _masm(cbuf);
+ __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
#ifndef PRODUCT
} else if (!do_size) {
st->print("movss %s, [rsp + #%d]\t# spill",
@@ -1322,9 +1293,9 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
}
return
((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
- ((Matcher::_regEncode[dst_first] < 8)
- ? 5
- : 6); // REX
+ ((Matcher::_regEncode[dst_first] >= 8)
+ ? 6
+ : (5 + ((UseAVX>0)?1:0))); // REX
}
}
} else if (src_first_rc == rc_int) {
@@ -1450,25 +1421,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
(dst_first & 1) == 0 && dst_first + 1 == dst_second) {
// 64-bit
if (cbuf) {
- emit_opcode(*cbuf, 0x66);
- if (Matcher::_regEncode[dst_first] < 8) {
- if (Matcher::_regEncode[src_first] < 8) {
- emit_opcode(*cbuf, Assembler::REX_W);
- } else {
- emit_opcode(*cbuf, Assembler::REX_WB);
- }
- } else {
- if (Matcher::_regEncode[src_first] < 8) {
- emit_opcode(*cbuf, Assembler::REX_WR);
- } else {
- emit_opcode(*cbuf, Assembler::REX_WRB);
- }
- }
- emit_opcode(*cbuf, 0x0F);
- emit_opcode(*cbuf, 0x6E);
- emit_rm(*cbuf, 0x3,
- Matcher::_regEncode[dst_first] & 7,
- Matcher::_regEncode[src_first] & 7);
+ MacroAssembler _masm(cbuf);
+ __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
} else if (!do_size) {
st->print("movdq %s, %s\t# spill",
@@ -1482,23 +1436,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
if (cbuf) {
- emit_opcode(*cbuf, 0x66);
- if (Matcher::_regEncode[dst_first] < 8) {
- if (Matcher::_regEncode[src_first] >= 8) {
- emit_opcode(*cbuf, Assembler::REX_B);
- }
- } else {
- if (Matcher::_regEncode[src_first] < 8) {
- emit_opcode(*cbuf, Assembler::REX_R);
- } else {
- emit_opcode(*cbuf, Assembler::REX_RB);
- }
- }
- emit_opcode(*cbuf, 0x0F);
- emit_opcode(*cbuf, 0x6E);
- emit_rm(*cbuf, 0x3,
- Matcher::_regEncode[dst_first] & 7,
- Matcher::_regEncode[src_first] & 7);
+ MacroAssembler _masm(cbuf);
+ __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
} else if (!do_size) {
st->print("movdl %s, %s\t# spill",
@@ -1507,9 +1446,9 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
#endif
}
return
- (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
- ? 4
- : 5; // REX
+ (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
+ ? 5
+ : (4 + ((UseAVX>0)?1:0)); // REX
}
}
} else if (src_first_rc == rc_float) {
@@ -1521,16 +1460,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
// 64-bit
int offset = ra_->reg2offset(dst_first);
if (cbuf) {
- emit_opcode(*cbuf, 0xF2);
- if (Matcher::_regEncode[src_first] >= 8) {
- emit_opcode(*cbuf, Assembler::REX_R);
- }
- emit_opcode(*cbuf, 0x0F);
- emit_opcode(*cbuf, 0x11);
- encode_RegMem(*cbuf,
- Matcher::_regEncode[src_first],
- RSP_enc, 0x4, 0, offset,
- false);
+ MacroAssembler _masm(cbuf);
+ __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
} else if (!do_size) {
st->print("movsd [rsp + #%d], %s\t# spill",
@@ -1540,25 +1471,17 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
}
return
((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
- ((Matcher::_regEncode[src_first] < 8)
- ? 5
- : 6); // REX
+ ((Matcher::_regEncode[src_first] >= 8)
+ ? 6
+ : (5 + ((UseAVX>0)?1:0))); // REX
} else {
// 32-bit
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
int offset = ra_->reg2offset(dst_first);
if (cbuf) {
- emit_opcode(*cbuf, 0xF3);
- if (Matcher::_regEncode[src_first] >= 8) {
- emit_opcode(*cbuf, Assembler::REX_R);
- }
- emit_opcode(*cbuf, 0x0F);
- emit_opcode(*cbuf, 0x11);
- encode_RegMem(*cbuf,
- Matcher::_regEncode[src_first],
- RSP_enc, 0x4, 0, offset,
- false);
+ MacroAssembler _masm(cbuf);
+ __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
} else if (!do_size) {
st->print("movss [rsp + #%d], %s\t# spill",
@@ -1568,9 +1491,9 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
}
return
((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
- ((Matcher::_regEncode[src_first] < 8)
- ? 5
- : 6); // REX
+ ((Matcher::_regEncode[src_first] >=8)
+ ? 6
+ : (5 + ((UseAVX>0)?1:0))); // REX
}
} else if (dst_first_rc == rc_int) {
// xmm -> gpr
@@ -1578,25 +1501,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
(dst_first & 1) == 0 && dst_first + 1 == dst_second) {
// 64-bit
if (cbuf) {
- emit_opcode(*cbuf, 0x66);
- if (Matcher::_regEncode[dst_first] < 8) {
- if (Matcher::_regEncode[src_first] < 8) {
- emit_opcode(*cbuf, Assembler::REX_W);
- } else {
- emit_opcode(*cbuf, Assembler::REX_WR); // attention!
- }
- } else {
- if (Matcher::_regEncode[src_first] < 8) {
- emit_opcode(*cbuf, Assembler::REX_WB); // attention!
- } else {
- emit_opcode(*cbuf, Assembler::REX_WRB);
- }
- }
- emit_opcode(*cbuf, 0x0F);
- emit_opcode(*cbuf, 0x7E);
- emit_rm(*cbuf, 0x3,
- Matcher::_regEncode[src_first] & 7,
- Matcher::_regEncode[dst_first] & 7);
+ MacroAssembler _masm(cbuf);
+ __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
} else if (!do_size) {
st->print("movdq %s, %s\t# spill",
@@ -1610,23 +1516,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
if (cbuf) {
- emit_opcode(*cbuf, 0x66);
- if (Matcher::_regEncode[dst_first] < 8) {
- if (Matcher::_regEncode[src_first] >= 8) {
- emit_opcode(*cbuf, Assembler::REX_R); // attention!
- }
- } else {
- if (Matcher::_regEncode[src_first] < 8) {
- emit_opcode(*cbuf, Assembler::REX_B); // attention!
- } else {
- emit_opcode(*cbuf, Assembler::REX_RB);
- }
- }
- emit_opcode(*cbuf, 0x0F);
- emit_opcode(*cbuf, 0x7E);
- emit_rm(*cbuf, 0x3,
- Matcher::_regEncode[src_first] & 7,
- Matcher::_regEncode[dst_first] & 7);
+ MacroAssembler _masm(cbuf);
+ __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
} else if (!do_size) {
st->print("movdl %s, %s\t# spill",
@@ -1635,9 +1526,9 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
#endif
}
return
- (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
- ? 4
- : 5; // REX
+ (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
+ ? 5
+ : (4 + ((UseAVX>0)?1:0)); // REX
}
} else if (dst_first_rc == rc_float) {
// xmm -> xmm
@@ -1645,23 +1536,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
(dst_first & 1) == 0 && dst_first + 1 == dst_second) {
// 64-bit
if (cbuf) {
- emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
- if (Matcher::_regEncode[dst_first] < 8) {
- if (Matcher::_regEncode[src_first] >= 8) {
- emit_opcode(*cbuf, Assembler::REX_B);
- }
- } else {
- if (Matcher::_regEncode[src_first] < 8) {
- emit_opcode(*cbuf, Assembler::REX_R);
- } else {
- emit_opcode(*cbuf, Assembler::REX_RB);
- }
- }
- emit_opcode(*cbuf, 0x0F);
- emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
- emit_rm(*cbuf, 0x3,
- Matcher::_regEncode[dst_first] & 7,
- Matcher::_regEncode[src_first] & 7);
+ MacroAssembler _masm(cbuf);
+ __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
} else if (!do_size) {
st->print("%s %s, %s\t# spill",
@@ -1671,32 +1547,16 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
#endif
}
return
- (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
- ? 4
- : 5; // REX
+ (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
+ ? 5
+ : (4 + ((UseAVX>0)?1:0)); // REX
} else {
// 32-bit
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
if (cbuf) {
- if (!UseXmmRegToRegMoveAll)
- emit_opcode(*cbuf, 0xF3);
- if (Matcher::_regEncode[dst_first] < 8) {
- if (Matcher::_regEncode[src_first] >= 8) {
- emit_opcode(*cbuf, Assembler::REX_B);
- }
- } else {
- if (Matcher::_regEncode[src_first] < 8) {
- emit_opcode(*cbuf, Assembler::REX_R);
- } else {
- emit_opcode(*cbuf, Assembler::REX_RB);
- }
- }
- emit_opcode(*cbuf, 0x0F);
- emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
- emit_rm(*cbuf, 0x3,
- Matcher::_regEncode[dst_first] & 7,
- Matcher::_regEncode[src_first] & 7);
+ MacroAssembler _masm(cbuf);
+ __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
} else if (!do_size) {
st->print("%s %s, %s\t# spill",
@@ -1705,10 +1565,10 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
Matcher::regName[src_first]);
#endif
}
- return
- (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
- ? (UseXmmRegToRegMoveAll ? 3 : 4)
- : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
+ return ((UseAVX>0) ? 5:
+ ((Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
+ ? (UseXmmRegToRegMoveAll ? 4 : 5)
+ : (UseXmmRegToRegMoveAll ? 3 : 4))); // REX
}
}
}
@@ -2205,47 +2065,6 @@ encode %{
emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
%}
- enc_class cmpfp_fixup() %{
- MacroAssembler _masm(&cbuf);
- emit_cmpfp_fixup(_masm);
- %}
-
- enc_class cmpfp3(rRegI dst)
- %{
- int dstenc = $dst$$reg;
-
- // movl $dst, -1
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_B);
- }
- emit_opcode(cbuf, 0xB8 | (dstenc & 7));
- emit_d32(cbuf, -1);
-
- // jp,s done
- emit_opcode(cbuf, 0x7A);
- emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
-
- // jb,s done
- emit_opcode(cbuf, 0x72);
- emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
-
- // setne $dst
- if (dstenc >= 4) {
- emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
- }
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x95);
- emit_opcode(cbuf, 0xC0 | (dstenc & 7));
-
- // movzbl $dst, $dst
- if (dstenc >= 4) {
- emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
- }
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0xB6);
- emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
- %}
-
enc_class cdql_enc(no_rax_rdx_RegI div)
%{
// Full implementation of Java idiv and irem; checks for
@@ -2472,55 +2291,6 @@ encode %{
emit_cc(cbuf, $secondary, $cop$$cmpcode);
%}
- enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
- %{
- // Invert sense of branch from sense of cmov
- emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
- emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
- ? (UseXmmRegToRegMoveAll ? 3 : 4)
- : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
- // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
- if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
- if ($dst$$reg < 8) {
- if ($src$$reg >= 8) {
- emit_opcode(cbuf, Assembler::REX_B);
- }
- } else {
- if ($src$$reg < 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- } else {
- emit_opcode(cbuf, Assembler::REX_RB);
- }
- }
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
- emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
- %}
-
- enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
- %{
- // Invert sense of branch from sense of cmov
- emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
- emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
-
- // UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
- emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
- if ($dst$$reg < 8) {
- if ($src$$reg >= 8) {
- emit_opcode(cbuf, Assembler::REX_B);
- }
- } else {
- if ($src$$reg < 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- } else {
- emit_opcode(cbuf, Assembler::REX_RB);
- }
- }
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
- emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
- %}
-
enc_class enc_PartialSubtypeCheck()
%{
Register Rrdi = as_Register(RDI_enc); // result register
@@ -2751,68 +2521,6 @@ encode %{
}
%}
- // Encode a reg-reg copy. If it is useless, then empty encoding.
- enc_class enc_copy(rRegI dst, rRegI src)
- %{
- encode_copy(cbuf, $dst$$reg, $src$$reg);
- %}
-
- // Encode xmm reg-reg copy. If it is useless, then empty encoding.
- enc_class enc_CopyXD( RegD dst, RegD src ) %{
- encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
- %}
-
- enc_class enc_copy_always(rRegI dst, rRegI src)
- %{
- int srcenc = $src$$reg;
- int dstenc = $dst$$reg;
-
- if (dstenc < 8) {
- if (srcenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_B);
- srcenc -= 8;
- }
- } else {
- if (srcenc < 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- } else {
- emit_opcode(cbuf, Assembler::REX_RB);
- srcenc -= 8;
- }
- dstenc -= 8;
- }
-
- emit_opcode(cbuf, 0x8B);
- emit_rm(cbuf, 0x3, dstenc, srcenc);
- %}
-
- enc_class enc_copy_wide(rRegL dst, rRegL src)
- %{
- int srcenc = $src$$reg;
- int dstenc = $dst$$reg;
-
- if (dstenc != srcenc) {
- if (dstenc < 8) {
- if (srcenc < 8) {
- emit_opcode(cbuf, Assembler::REX_W);
- } else {
- emit_opcode(cbuf, Assembler::REX_WB);
- srcenc -= 8;
- }
- } else {
- if (srcenc < 8) {
- emit_opcode(cbuf, Assembler::REX_WR);
- } else {
- emit_opcode(cbuf, Assembler::REX_WRB);
- srcenc -= 8;
- }
- dstenc -= 8;
- }
- emit_opcode(cbuf, 0x8B);
- emit_rm(cbuf, 0x3, dstenc, srcenc);
- }
- %}
-
enc_class Con32(immI src)
%{
// Output immediate
@@ -3212,92 +2920,19 @@ encode %{
%}
enc_class Push_ResultXD(regD dst) %{
- int dstenc = $dst$$reg;
-
- store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
-
- // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
- emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- }
- emit_opcode (cbuf, 0x0F );
- emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
- encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
-
- // add rsp,8
- emit_opcode(cbuf, Assembler::REX_W);
- emit_opcode(cbuf,0x83);
- emit_rm(cbuf,0x3, 0x0, RSP_enc);
- emit_d8(cbuf,0x08);
- %}
-
- enc_class Push_SrcXD(regD src) %{
- int srcenc = $src$$reg;
-
- // subq rsp,#8
- emit_opcode(cbuf, Assembler::REX_W);
- emit_opcode(cbuf, 0x83);
- emit_rm(cbuf, 0x3, 0x5, RSP_enc);
- emit_d8(cbuf, 0x8);
-
- // movsd [rsp],src
- emit_opcode(cbuf, 0xF2);
- if (srcenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- }
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x11);
- encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
-
- // fldd [rsp]
- emit_opcode(cbuf, 0x66);
- emit_opcode(cbuf, 0xDD);
- encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
- %}
-
-
- enc_class movq_ld(regD dst, memory mem) %{
MacroAssembler _masm(&cbuf);
- __ movq($dst$$XMMRegister, $mem$$Address);
+ __ fstp_d(Address(rsp, 0));
+ __ movdbl($dst$$XMMRegister, Address(rsp, 0));
+ __ addptr(rsp, 8);
%}
- enc_class movq_st(memory mem, regD src) %{
- MacroAssembler _masm(&cbuf);
- __ movq($mem$$Address, $src$$XMMRegister);
- %}
-
- enc_class pshufd_8x8(regF dst, regF src) %{
- MacroAssembler _masm(&cbuf);
-
- encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
- __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
- __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
- %}
-
- enc_class pshufd_4x16(regF dst, regF src) %{
- MacroAssembler _masm(&cbuf);
-
- __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
- %}
-
- enc_class pshufd(regD dst, regD src, int mode) %{
- MacroAssembler _masm(&cbuf);
-
- __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
- %}
-
- enc_class pxor(regD dst, regD src) %{
+ enc_class Push_SrcXD(regD src) %{
MacroAssembler _masm(&cbuf);
-
- __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
+ __ subptr(rsp, 8);
+ __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+ __ fld_d(Address(rsp, 0));
%}
- enc_class mov_i2x(regD dst, rRegI src) %{
- MacroAssembler _masm(&cbuf);
-
- __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
- %}
// obj: object to lock
// box: box address (header location) -- killed
@@ -3534,303 +3169,6 @@ encode %{
RELOC_DISP32);
%}
- enc_class absF_encoding(regF dst)
- %{
- int dstenc = $dst$$reg;
- address signmask_address = (address) StubRoutines::x86::float_sign_mask();
-
- cbuf.set_insts_mark();
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- dstenc -= 8;
- }
- // XXX reg_mem doesn't support RIP-relative addressing yet
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x54);
- emit_rm(cbuf, 0x0, dstenc, 0x5); // 00 reg 101
- emit_d32_reloc(cbuf, signmask_address);
- %}
-
- enc_class absD_encoding(regD dst)
- %{
- int dstenc = $dst$$reg;
- address signmask_address = (address) StubRoutines::x86::double_sign_mask();
-
- cbuf.set_insts_mark();
- emit_opcode(cbuf, 0x66);
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- dstenc -= 8;
- }
- // XXX reg_mem doesn't support RIP-relative addressing yet
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x54);
- emit_rm(cbuf, 0x0, dstenc, 0x5); // 00 reg 101
- emit_d32_reloc(cbuf, signmask_address);
- %}
-
- enc_class negF_encoding(regF dst)
- %{
- int dstenc = $dst$$reg;
- address signflip_address = (address) StubRoutines::x86::float_sign_flip();
-
- cbuf.set_insts_mark();
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- dstenc -= 8;
- }
- // XXX reg_mem doesn't support RIP-relative addressing yet
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x57);
- emit_rm(cbuf, 0x0, dstenc, 0x5); // 00 reg 101
- emit_d32_reloc(cbuf, signflip_address);
- %}
-
- enc_class negD_encoding(regD dst)
- %{
- int dstenc = $dst$$reg;
- address signflip_address = (address) StubRoutines::x86::double_sign_flip();
-
- cbuf.set_insts_mark();
- emit_opcode(cbuf, 0x66);
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- dstenc -= 8;
- }
- // XXX reg_mem doesn't support RIP-relative addressing yet
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x57);
- emit_rm(cbuf, 0x0, dstenc, 0x5); // 00 reg 101
- emit_d32_reloc(cbuf, signflip_address);
- %}
-
- enc_class f2i_fixup(rRegI dst, regF src)
- %{
- int dstenc = $dst$$reg;
- int srcenc = $src$$reg;
-
- // cmpl $dst, #0x80000000
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_B);
- }
- emit_opcode(cbuf, 0x81);
- emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
- emit_d32(cbuf, 0x80000000);
-
- // jne,s done
- emit_opcode(cbuf, 0x75);
- if (srcenc < 8 && dstenc < 8) {
- emit_d8(cbuf, 0xF);
- } else if (srcenc >= 8 && dstenc >= 8) {
- emit_d8(cbuf, 0x11);
- } else {
- emit_d8(cbuf, 0x10);
- }
-
- // subq rsp, #8
- emit_opcode(cbuf, Assembler::REX_W);
- emit_opcode(cbuf, 0x83);
- emit_rm(cbuf, 0x3, 0x5, RSP_enc);
- emit_d8(cbuf, 8);
-
- // movss [rsp], $src
- emit_opcode(cbuf, 0xF3);
- if (srcenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- }
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x11);
- encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
-
- // call f2i_fixup
- cbuf.set_insts_mark();
- emit_opcode(cbuf, 0xE8);
- emit_d32_reloc(cbuf,
- (int)
- (StubRoutines::x86::f2i_fixup() - cbuf.insts_end() - 4),
- runtime_call_Relocation::spec(),
- RELOC_DISP32);
-
- // popq $dst
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_B);
- }
- emit_opcode(cbuf, 0x58 | (dstenc & 7));
-
- // done:
- %}
-
- enc_class f2l_fixup(rRegL dst, regF src)
- %{
- int dstenc = $dst$$reg;
- int srcenc = $src$$reg;
- address const_address = (address) StubRoutines::x86::double_sign_flip();
-
- // cmpq $dst, [0x8000000000000000]
- cbuf.set_insts_mark();
- emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
- emit_opcode(cbuf, 0x39);
- // XXX reg_mem doesn't support RIP-relative addressing yet
- emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
- emit_d32_reloc(cbuf, const_address);
-
-
- // jne,s done
- emit_opcode(cbuf, 0x75);
- if (srcenc < 8 && dstenc < 8) {
- emit_d8(cbuf, 0xF);
- } else if (srcenc >= 8 && dstenc >= 8) {
- emit_d8(cbuf, 0x11);
- } else {
- emit_d8(cbuf, 0x10);
- }
-
- // subq rsp, #8
- emit_opcode(cbuf, Assembler::REX_W);
- emit_opcode(cbuf, 0x83);
- emit_rm(cbuf, 0x3, 0x5, RSP_enc);
- emit_d8(cbuf, 8);
-
- // movss [rsp], $src
- emit_opcode(cbuf, 0xF3);
- if (srcenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- }
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x11);
- encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
-
- // call f2l_fixup
- cbuf.set_insts_mark();
- emit_opcode(cbuf, 0xE8);
- emit_d32_reloc(cbuf,
- (int)
- (StubRoutines::x86::f2l_fixup() - cbuf.insts_end() - 4),
- runtime_call_Relocation::spec(),
- RELOC_DISP32);
-
- // popq $dst
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_B);
- }
- emit_opcode(cbuf, 0x58 | (dstenc & 7));
-
- // done:
- %}
-
- enc_class d2i_fixup(rRegI dst, regD src)
- %{
- int dstenc = $dst$$reg;
- int srcenc = $src$$reg;
-
- // cmpl $dst, #0x80000000
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_B);
- }
- emit_opcode(cbuf, 0x81);
- emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
- emit_d32(cbuf, 0x80000000);
-
- // jne,s done
- emit_opcode(cbuf, 0x75);
- if (srcenc < 8 && dstenc < 8) {
- emit_d8(cbuf, 0xF);
- } else if (srcenc >= 8 && dstenc >= 8) {
- emit_d8(cbuf, 0x11);
- } else {
- emit_d8(cbuf, 0x10);
- }
-
- // subq rsp, #8
- emit_opcode(cbuf, Assembler::REX_W);
- emit_opcode(cbuf, 0x83);
- emit_rm(cbuf, 0x3, 0x5, RSP_enc);
- emit_d8(cbuf, 8);
-
- // movsd [rsp], $src
- emit_opcode(cbuf, 0xF2);
- if (srcenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- }
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x11);
- encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
-
- // call d2i_fixup
- cbuf.set_insts_mark();
- emit_opcode(cbuf, 0xE8);
- emit_d32_reloc(cbuf,
- (int)
- (StubRoutines::x86::d2i_fixup() - cbuf.insts_end() - 4),
- runtime_call_Relocation::spec(),
- RELOC_DISP32);
-
- // popq $dst
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_B);
- }
- emit_opcode(cbuf, 0x58 | (dstenc & 7));
-
- // done:
- %}
-
- enc_class d2l_fixup(rRegL dst, regD src)
- %{
- int dstenc = $dst$$reg;
- int srcenc = $src$$reg;
- address const_address = (address) StubRoutines::x86::double_sign_flip();
-
- // cmpq $dst, [0x8000000000000000]
- cbuf.set_insts_mark();
- emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
- emit_opcode(cbuf, 0x39);
- // XXX reg_mem doesn't support RIP-relative addressing yet
- emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
- emit_d32_reloc(cbuf, const_address);
-
-
- // jne,s done
- emit_opcode(cbuf, 0x75);
- if (srcenc < 8 && dstenc < 8) {
- emit_d8(cbuf, 0xF);
- } else if (srcenc >= 8 && dstenc >= 8) {
- emit_d8(cbuf, 0x11);
- } else {
- emit_d8(cbuf, 0x10);
- }
-
- // subq rsp, #8
- emit_opcode(cbuf, Assembler::REX_W);
- emit_opcode(cbuf, 0x83);
- emit_rm(cbuf, 0x3, 0x5, RSP_enc);
- emit_d8(cbuf, 8);
-
- // movsd [rsp], $src
- emit_opcode(cbuf, 0xF2);
- if (srcenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- }
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x11);
- encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
-
- // call d2l_fixup
- cbuf.set_insts_mark();
- emit_opcode(cbuf, 0xE8);
- emit_d32_reloc(cbuf,
- (int)
- (StubRoutines::x86::d2l_fixup() - cbuf.insts_end() - 4),
- runtime_call_Relocation::spec(),
- RELOC_DISP32);
-
- // popq $dst
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_B);
- }
- emit_opcode(cbuf, 0x58 | (dstenc & 7));
-
- // done:
- %}
%}
@@ -6156,8 +5494,9 @@ instruct loadF(regF dst, memory mem)
ins_cost(145); // XXX
format %{ "movss $dst, $mem\t# float" %}
- opcode(0xF3, 0x0F, 0x10);
- ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
+ ins_encode %{
+ __ movflt($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -6169,8 +5508,9 @@ instruct loadD_partial(regD dst, memory mem)
ins_cost(145); // XXX
format %{ "movlpd $dst, $mem\t# double" %}
- opcode(0x66, 0x0F, 0x12);
- ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -6181,8 +5521,9 @@ instruct loadD(regD dst, memory mem)
ins_cost(145); // XXX
format %{ "movsd $dst, $mem\t# double" %}
- opcode(0xF2, 0x0F, 0x10);
- ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -6191,7 +5532,9 @@ instruct loadA8B(regD dst, memory mem) %{
match(Set dst (Load8B mem));
ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed8B" %}
- ins_encode( movq_ld(dst, mem));
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
@@ -6200,7 +5543,9 @@ instruct loadA4S(regD dst, memory mem) %{
match(Set dst (Load4S mem));
ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed4S" %}
- ins_encode( movq_ld(dst, mem));
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
@@ -6209,7 +5554,9 @@ instruct loadA4C(regD dst, memory mem) %{
match(Set dst (Load4C mem));
ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed4C" %}
- ins_encode( movq_ld(dst, mem));
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
@@ -6218,16 +5565,20 @@ instruct load2IU(regD dst, memory mem) %{
match(Set dst (Load2I mem));
ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed2I" %}
- ins_encode( movq_ld(dst, mem));
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
// Load Aligned Packed Single to XMM
instruct loadA2F(regD dst, memory mem) %{
match(Set dst (Load2F mem));
- ins_cost(145);
+ ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed2F" %}
- ins_encode( movq_ld(dst, mem));
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
@@ -6540,8 +5891,9 @@ instruct loadConF0(regF dst, immF0 src)
ins_cost(100);
format %{ "xorps $dst, $dst\t# float 0.0" %}
- opcode(0x0F, 0x57);
- ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
+ ins_encode %{
+ __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe(pipe_slow);
%}
@@ -6562,8 +5914,9 @@ instruct loadConD0(regD dst, immD0 src)
ins_cost(100);
format %{ "xorpd $dst, $dst\t# double 0.0" %}
- opcode(0x66, 0x0F, 0x57);
- ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
+ ins_encode %{
+ __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe(pipe_slow);
%}
@@ -6606,8 +5959,9 @@ instruct loadSSF(regF dst, stackSlotF src)
ins_cost(125);
format %{ "movss $dst, $src\t# float stk" %}
- opcode(0xF3, 0x0F, 0x10);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+ ins_encode %{
+ __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -6972,7 +6326,9 @@ instruct storeA8B(memory mem, regD src) %{
match(Set mem (Store8B mem src));
ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed8B" %}
- ins_encode( movq_st(mem, src));
+ ins_encode %{
+ __ movq($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -6981,7 +6337,9 @@ instruct storeA4C(memory mem, regD src) %{
match(Set mem (Store4C mem src));
ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed4C" %}
- ins_encode( movq_st(mem, src));
+ ins_encode %{
+ __ movq($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -6990,7 +6348,9 @@ instruct storeA2I(memory mem, regD src) %{
match(Set mem (Store2I mem src));
ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed2I" %}
- ins_encode( movq_st(mem, src));
+ ins_encode %{
+ __ movq($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -7024,7 +6384,9 @@ instruct storeA2F(memory mem, regD src) %{
match(Set mem (Store2F mem src));
ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed2F" %}
- ins_encode( movq_st(mem, src));
+ ins_encode %{
+ __ movq($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -7035,8 +6397,9 @@ instruct storeF(memory mem, regF src)
ins_cost(95); // XXX
format %{ "movss $mem, $src\t# float" %}
- opcode(0xF3, 0x0F, 0x11);
- ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
+ ins_encode %{
+ __ movflt($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -7072,8 +6435,9 @@ instruct storeD(memory mem, regD src)
ins_cost(95); // XXX
format %{ "movsd $mem, $src\t# double" %}
- opcode(0xF2, 0x0F, 0x11);
- ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
+ ins_encode %{
+ __ movdbl($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -7142,8 +6506,9 @@ instruct storeSSF(stackSlotF dst, regF src)
ins_cost(95); // XXX
format %{ "movss $dst, $src\t# float stk" %}
- opcode(0xF3, 0x0F, 0x11);
- ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
+ ins_encode %{
+ __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -7153,8 +6518,9 @@ instruct storeSSD(stackSlotD dst, regD src)
ins_cost(95); // XXX
format %{ "movsd $dst, $src\t# double stk" %}
- opcode(0xF2, 0x0F, 0x11);
- ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
+ ins_encode %{
+ __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -7444,6 +6810,16 @@ instruct unnecessary_membar_volatile()
ins_pipe(empty);
%}
+instruct membar_storestore() %{
+ match(MemBarStoreStore);
+ ins_cost(0);
+
+ size(0);
+ format %{ "MEMBAR-storestore (empty encoding)" %}
+ ins_encode( );
+ ins_pipe(empty);
+%}
+
//----------Move Instructions--------------------------------------------------
instruct castX2P(rRegP dst, rRegL src)
@@ -7451,7 +6827,11 @@ instruct castX2P(rRegP dst, rRegL src)
match(Set dst (CastX2P src));
format %{ "movq $dst, $src\t# long->ptr" %}
- ins_encode(enc_copy_wide(dst, src));
+ ins_encode %{
+ if ($dst$$reg != $src$$reg) {
+ __ movptr($dst$$Register, $src$$Register);
+ }
+ %}
ins_pipe(ialu_reg_reg); // XXX
%}
@@ -7460,7 +6840,11 @@ instruct castP2X(rRegL dst, rRegP src)
match(Set dst (CastP2X src));
format %{ "movq $dst, $src\t# ptr -> long" %}
- ins_encode(enc_copy_wide(dst, src));
+ ins_encode %{
+ if ($dst$$reg != $src$$reg) {
+ __ movptr($dst$$Register, $src$$Register);
+ }
+ %}
ins_pipe(ialu_reg_reg); // XXX
%}
@@ -7813,7 +7197,13 @@ instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
format %{ "jn$cop skip\t# signed cmove float\n\t"
"movss $dst, $src\n"
"skip:" %}
- ins_encode(enc_cmovf_branch(cop, dst, src));
+ ins_encode %{
+ Label Lskip;
+ // Invert sense of branch from sense of CMOV
+ __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+ __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+ __ bind(Lskip);
+ %}
ins_pipe(pipe_slow);
%}
@@ -7837,7 +7227,13 @@ instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
format %{ "jn$cop skip\t# unsigned cmove float\n\t"
"movss $dst, $src\n"
"skip:" %}
- ins_encode(enc_cmovf_branch(cop, dst, src));
+ ins_encode %{
+ Label Lskip;
+ // Invert sense of branch from sense of CMOV
+ __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+ __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+ __ bind(Lskip);
+ %}
ins_pipe(pipe_slow);
%}
@@ -7857,7 +7253,13 @@ instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
format %{ "jn$cop skip\t# signed cmove double\n\t"
"movsd $dst, $src\n"
"skip:" %}
- ins_encode(enc_cmovd_branch(cop, dst, src));
+ ins_encode %{
+ Label Lskip;
+ // Invert sense of branch from sense of CMOV
+ __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+ __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+ __ bind(Lskip);
+ %}
ins_pipe(pipe_slow);
%}
@@ -7869,7 +7271,13 @@ instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
format %{ "jn$cop skip\t# unsigned cmove double\n\t"
"movsd $dst, $src\n"
"skip:" %}
- ins_encode(enc_cmovd_branch(cop, dst, src));
+ ins_encode %{
+ Label Lskip;
+ // Invert sense of branch from sense of CMOV
+ __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+ __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+ __ bind(Lskip);
+ %}
ins_pipe(pipe_slow);
%}
@@ -10191,17 +9599,18 @@ instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
"pushfq\t# saw NaN, set CF\n\t"
"andq [rsp], #0xffffff2b\n\t"
"popfq\n"
- "exit: nop\t# avoid branch to branch" %}
- opcode(0x0F, 0x2E);
- ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
- cmpfp_fixup);
+ "exit:" %}
+ ins_encode %{
+ __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
+ emit_cmpfp_fixup(_masm);
+ %}
ins_pipe(pipe_slow);
%}
instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
match(Set cr (CmpF src1 src2));
- ins_cost(145);
+ ins_cost(100);
format %{ "ucomiss $src1, $src2" %}
ins_encode %{
__ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
@@ -10219,10 +9628,11 @@ instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
"pushfq\t# saw NaN, set CF\n\t"
"andq [rsp], #0xffffff2b\n\t"
"popfq\n"
- "exit: nop\t# avoid branch to branch" %}
- opcode(0x0F, 0x2E);
- ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
- cmpfp_fixup);
+ "exit:" %}
+ ins_encode %{
+ __ ucomiss($src1$$XMMRegister, $src2$$Address);
+ emit_cmpfp_fixup(_masm);
+ %}
ins_pipe(pipe_slow);
%}
@@ -10231,8 +9641,9 @@ instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
ins_cost(100);
format %{ "ucomiss $src1, $src2" %}
- opcode(0x0F, 0x2E);
- ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
+ ins_encode %{
+ __ ucomiss($src1$$XMMRegister, $src2$$Address);
+ %}
ins_pipe(pipe_slow);
%}
@@ -10245,7 +9656,7 @@ instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{
"pushfq\t# saw NaN, set CF\n\t"
"andq [rsp], #0xffffff2b\n\t"
"popfq\n"
- "exit: nop\t# avoid branch to branch" %}
+ "exit:" %}
ins_encode %{
__ ucomiss($src$$XMMRegister, $constantaddress($con));
emit_cmpfp_fixup(_masm);
@@ -10273,10 +9684,11 @@ instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
"pushfq\t# saw NaN, set CF\n\t"
"andq [rsp], #0xffffff2b\n\t"
"popfq\n"
- "exit: nop\t# avoid branch to branch" %}
- opcode(0x66, 0x0F, 0x2E);
- ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
- cmpfp_fixup);
+ "exit:" %}
+ ins_encode %{
+ __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
+ emit_cmpfp_fixup(_masm);
+ %}
ins_pipe(pipe_slow);
%}
@@ -10301,10 +9713,11 @@ instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
"pushfq\t# saw NaN, set CF\n\t"
"andq [rsp], #0xffffff2b\n\t"
"popfq\n"
- "exit: nop\t# avoid branch to branch" %}
- opcode(0x66, 0x0F, 0x2E);
- ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
- cmpfp_fixup);
+ "exit:" %}
+ ins_encode %{
+ __ ucomisd($src1$$XMMRegister, $src2$$Address);
+ emit_cmpfp_fixup(_masm);
+ %}
ins_pipe(pipe_slow);
%}
@@ -10313,8 +9726,9 @@ instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
ins_cost(100);
format %{ "ucomisd $src1, $src2" %}
- opcode(0x66, 0x0F, 0x2E);
- ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
+ ins_encode %{
+ __ ucomisd($src1$$XMMRegister, $src2$$Address);
+ %}
ins_pipe(pipe_slow);
%}
@@ -10327,7 +9741,7 @@ instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{
"pushfq\t# saw NaN, set CF\n\t"
"andq [rsp], #0xffffff2b\n\t"
"popfq\n"
- "exit: nop\t# avoid branch to branch" %}
+ "exit:" %}
ins_encode %{
__ ucomisd($src$$XMMRegister, $constantaddress($con));
emit_cmpfp_fixup(_masm);
@@ -10359,10 +9773,10 @@ instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
"setne $dst\n\t"
"movzbl $dst, $dst\n"
"done:" %}
-
- opcode(0x0F, 0x2E);
- ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
- cmpfp3(dst));
+ ins_encode %{
+ __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
+ emit_cmpfp3(_masm, $dst$$Register);
+ %}
ins_pipe(pipe_slow);
%}
@@ -10380,10 +9794,10 @@ instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
"setne $dst\n\t"
"movzbl $dst, $dst\n"
"done:" %}
-
- opcode(0x0F, 0x2E);
- ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
- cmpfp3(dst));
+ ins_encode %{
+ __ ucomiss($src1$$XMMRegister, $src2$$Address);
+ emit_cmpfp3(_masm, $dst$$Register);
+ %}
ins_pipe(pipe_slow);
%}
@@ -10401,15 +9815,8 @@ instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
"movzbl $dst, $dst\n"
"done:" %}
ins_encode %{
- Label L_done;
- Register Rdst = $dst$$Register;
__ ucomiss($src$$XMMRegister, $constantaddress($con));
- __ movl(Rdst, -1);
- __ jcc(Assembler::parity, L_done);
- __ jcc(Assembler::below, L_done);
- __ setb(Assembler::notEqual, Rdst);
- __ movzbl(Rdst, Rdst);
- __ bind(L_done);
+ emit_cmpfp3(_masm, $dst$$Register);
%}
ins_pipe(pipe_slow);
%}
@@ -10428,10 +9835,10 @@ instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
"setne $dst\n\t"
"movzbl $dst, $dst\n"
"done:" %}
-
- opcode(0x66, 0x0F, 0x2E);
- ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
- cmpfp3(dst));
+ ins_encode %{
+ __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
+ emit_cmpfp3(_masm, $dst$$Register);
+ %}
ins_pipe(pipe_slow);
%}
@@ -10449,10 +9856,10 @@ instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
"setne $dst\n\t"
"movzbl $dst, $dst\n"
"done:" %}
-
- opcode(0x66, 0x0F, 0x2E);
- ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
- cmpfp3(dst));
+ ins_encode %{
+ __ ucomisd($src1$$XMMRegister, $src2$$Address);
+ emit_cmpfp3(_masm, $dst$$Register);
+ %}
ins_pipe(pipe_slow);
%}
@@ -10470,377 +9877,12 @@ instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
"movzbl $dst, $dst\n"
"done:" %}
ins_encode %{
- Register Rdst = $dst$$Register;
- Label L_done;
__ ucomisd($src$$XMMRegister, $constantaddress($con));
- __ movl(Rdst, -1);
- __ jcc(Assembler::parity, L_done);
- __ jcc(Assembler::below, L_done);
- __ setb(Assembler::notEqual, Rdst);
- __ movzbl(Rdst, Rdst);
- __ bind(L_done);
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct addF_reg(regF dst, regF src)
-%{
- match(Set dst (AddF dst src));
-
- format %{ "addss $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF3, 0x0F, 0x58);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct addF_mem(regF dst, memory src)
-%{
- match(Set dst (AddF dst (LoadF src)));
-
- format %{ "addss $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF3, 0x0F, 0x58);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct addF_imm(regF dst, immF con) %{
- match(Set dst (AddF dst con));
- format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
- ins_cost(150); // XXX
- ins_encode %{
- __ addss($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct addD_reg(regD dst, regD src)
-%{
- match(Set dst (AddD dst src));
-
- format %{ "addsd $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF2, 0x0F, 0x58);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct addD_mem(regD dst, memory src)
-%{
- match(Set dst (AddD dst (LoadD src)));
-
- format %{ "addsd $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF2, 0x0F, 0x58);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct addD_imm(regD dst, immD con) %{
- match(Set dst (AddD dst con));
- format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
- ins_cost(150); // XXX
- ins_encode %{
- __ addsd($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct subF_reg(regF dst, regF src)
-%{
- match(Set dst (SubF dst src));
-
- format %{ "subss $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF3, 0x0F, 0x5C);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct subF_mem(regF dst, memory src)
-%{
- match(Set dst (SubF dst (LoadF src)));
-
- format %{ "subss $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF3, 0x0F, 0x5C);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct subF_imm(regF dst, immF con) %{
- match(Set dst (SubF dst con));
- format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
- ins_cost(150); // XXX
- ins_encode %{
- __ subss($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct subD_reg(regD dst, regD src)
-%{
- match(Set dst (SubD dst src));
-
- format %{ "subsd $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF2, 0x0F, 0x5C);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct subD_mem(regD dst, memory src)
-%{
- match(Set dst (SubD dst (LoadD src)));
-
- format %{ "subsd $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF2, 0x0F, 0x5C);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct subD_imm(regD dst, immD con) %{
- match(Set dst (SubD dst con));
- format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
- ins_cost(150); // XXX
- ins_encode %{
- __ subsd($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct mulF_reg(regF dst, regF src)
-%{
- match(Set dst (MulF dst src));
-
- format %{ "mulss $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF3, 0x0F, 0x59);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct mulF_mem(regF dst, memory src)
-%{
- match(Set dst (MulF dst (LoadF src)));
-
- format %{ "mulss $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF3, 0x0F, 0x59);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct mulF_imm(regF dst, immF con) %{
- match(Set dst (MulF dst con));
- format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
- ins_cost(150); // XXX
- ins_encode %{
- __ mulss($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct mulD_reg(regD dst, regD src)
-%{
- match(Set dst (MulD dst src));
-
- format %{ "mulsd $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF2, 0x0F, 0x59);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct mulD_mem(regD dst, memory src)
-%{
- match(Set dst (MulD dst (LoadD src)));
-
- format %{ "mulsd $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF2, 0x0F, 0x59);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct mulD_imm(regD dst, immD con) %{
- match(Set dst (MulD dst con));
- format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
- ins_cost(150); // XXX
- ins_encode %{
- __ mulsd($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct divF_reg(regF dst, regF src)
-%{
- match(Set dst (DivF dst src));
-
- format %{ "divss $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF3, 0x0F, 0x5E);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct divF_mem(regF dst, memory src)
-%{
- match(Set dst (DivF dst (LoadF src)));
-
- format %{ "divss $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF3, 0x0F, 0x5E);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct divF_imm(regF dst, immF con) %{
- match(Set dst (DivF dst con));
- format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
- ins_cost(150); // XXX
- ins_encode %{
- __ divss($dst$$XMMRegister, $constantaddress($con));
+ emit_cmpfp3(_masm, $dst$$Register);
%}
ins_pipe(pipe_slow);
%}
-instruct divD_reg(regD dst, regD src)
-%{
- match(Set dst (DivD dst src));
-
- format %{ "divsd $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF2, 0x0F, 0x5E);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct divD_mem(regD dst, memory src)
-%{
- match(Set dst (DivD dst (LoadD src)));
-
- format %{ "divsd $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF2, 0x0F, 0x5E);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct divD_imm(regD dst, immD con) %{
- match(Set dst (DivD dst con));
- format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
- ins_cost(150); // XXX
- ins_encode %{
- __ divsd($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct sqrtF_reg(regF dst, regF src)
-%{
- match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
-
- format %{ "sqrtss $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF3, 0x0F, 0x51);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct sqrtF_mem(regF dst, memory src)
-%{
- match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
-
- format %{ "sqrtss $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF3, 0x0F, 0x51);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct sqrtF_imm(regF dst, immF con) %{
- match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
- format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
- ins_cost(150); // XXX
- ins_encode %{
- __ sqrtss($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct sqrtD_reg(regD dst, regD src)
-%{
- match(Set dst (SqrtD src));
-
- format %{ "sqrtsd $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF2, 0x0F, 0x51);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct sqrtD_mem(regD dst, memory src)
-%{
- match(Set dst (SqrtD (LoadD src)));
-
- format %{ "sqrtsd $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF2, 0x0F, 0x51);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct sqrtD_imm(regD dst, immD con) %{
- match(Set dst (SqrtD con));
- format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
- ins_cost(150); // XXX
- ins_encode %{
- __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct absF_reg(regF dst)
-%{
- match(Set dst (AbsF dst));
-
- format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
- ins_encode(absF_encoding(dst));
- ins_pipe(pipe_slow);
-%}
-
-instruct absD_reg(regD dst)
-%{
- match(Set dst (AbsD dst));
-
- format %{ "andpd $dst, [0x7fffffffffffffff]\t"
- "# abs double by sign masking" %}
- ins_encode(absD_encoding(dst));
- ins_pipe(pipe_slow);
-%}
-
-instruct negF_reg(regF dst)
-%{
- match(Set dst (NegF dst));
-
- format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
- ins_encode(negF_encoding(dst));
- ins_pipe(pipe_slow);
-%}
-
-instruct negD_reg(regD dst)
-%{
- match(Set dst (NegD dst));
-
- format %{ "xorpd $dst, [0x8000000000000000]\t"
- "# neg double by sign flipping" %}
- ins_encode(negD_encoding(dst));
- ins_pipe(pipe_slow);
-%}
-
// -----------Trig and Trancendental Instructions------------------------------
instruct cosD_reg(regD dst) %{
match(Set dst (CosD dst));
@@ -10929,8 +9971,9 @@ instruct convF2D_reg_reg(regD dst, regF src)
match(Set dst (ConvF2D src));
format %{ "cvtss2sd $dst, $src" %}
- opcode(0xF3, 0x0F, 0x5A);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+ ins_encode %{
+ __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -10939,8 +9982,9 @@ instruct convF2D_reg_mem(regD dst, memory src)
match(Set dst (ConvF2D (LoadF src)));
format %{ "cvtss2sd $dst, $src" %}
- opcode(0xF3, 0x0F, 0x5A);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+ ins_encode %{
+ __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -10949,8 +9993,9 @@ instruct convD2F_reg_reg(regF dst, regD src)
match(Set dst (ConvD2F src));
format %{ "cvtsd2ss $dst, $src" %}
- opcode(0xF2, 0x0F, 0x5A);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+ ins_encode %{
+ __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -10959,8 +10004,9 @@ instruct convD2F_reg_mem(regF dst, memory src)
match(Set dst (ConvD2F (LoadD src)));
format %{ "cvtsd2ss $dst, $src" %}
- opcode(0xF2, 0x0F, 0x5A);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+ ins_encode %{
+ __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -10978,9 +10024,17 @@ instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
"call f2i_fixup\n\t"
"popq $dst\n"
"done: "%}
- opcode(0xF3, 0x0F, 0x2C);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
- f2i_fixup(dst, src));
+ ins_encode %{
+ Label done;
+ __ cvttss2sil($dst$$Register, $src$$XMMRegister);
+ __ cmpl($dst$$Register, 0x80000000);
+ __ jccb(Assembler::notEqual, done);
+ __ subptr(rsp, 8);
+ __ movflt(Address(rsp, 0), $src$$XMMRegister);
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())));
+ __ pop($dst$$Register);
+ __ bind(done);
+ %}
ins_pipe(pipe_slow);
%}
@@ -10997,9 +10051,18 @@ instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
"call f2l_fixup\n\t"
"popq $dst\n"
"done: "%}
- opcode(0xF3, 0x0F, 0x2C);
- ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
- f2l_fixup(dst, src));
+ ins_encode %{
+ Label done;
+ __ cvttss2siq($dst$$Register, $src$$XMMRegister);
+ __ cmp64($dst$$Register,
+ ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
+ __ jccb(Assembler::notEqual, done);
+ __ subptr(rsp, 8);
+ __ movflt(Address(rsp, 0), $src$$XMMRegister);
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())));
+ __ pop($dst$$Register);
+ __ bind(done);
+ %}
ins_pipe(pipe_slow);
%}
@@ -11016,9 +10079,17 @@ instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
"call d2i_fixup\n\t"
"popq $dst\n"
"done: "%}
- opcode(0xF2, 0x0F, 0x2C);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
- d2i_fixup(dst, src));
+ ins_encode %{
+ Label done;
+ __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
+ __ cmpl($dst$$Register, 0x80000000);
+ __ jccb(Assembler::notEqual, done);
+ __ subptr(rsp, 8);
+ __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())));
+ __ pop($dst$$Register);
+ __ bind(done);
+ %}
ins_pipe(pipe_slow);
%}
@@ -11035,9 +10106,18 @@ instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
"call d2l_fixup\n\t"
"popq $dst\n"
"done: "%}
- opcode(0xF2, 0x0F, 0x2C);
- ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
- d2l_fixup(dst, src));
+ ins_encode %{
+ Label done;
+ __ cvttsd2siq($dst$$Register, $src$$XMMRegister);
+ __ cmp64($dst$$Register,
+ ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
+ __ jccb(Assembler::notEqual, done);
+ __ subptr(rsp, 8);
+ __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())));
+ __ pop($dst$$Register);
+ __ bind(done);
+ %}
ins_pipe(pipe_slow);
%}
@@ -11047,8 +10127,9 @@ instruct convI2F_reg_reg(regF dst, rRegI src)
match(Set dst (ConvI2F src));
format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
- opcode(0xF3, 0x0F, 0x2A);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+ ins_encode %{
+ __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -11057,8 +10138,9 @@ instruct convI2F_reg_mem(regF dst, memory src)
match(Set dst (ConvI2F (LoadI src)));
format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
- opcode(0xF3, 0x0F, 0x2A);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+ ins_encode %{
+ __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -11068,8 +10150,9 @@ instruct convI2D_reg_reg(regD dst, rRegI src)
match(Set dst (ConvI2D src));
format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
- opcode(0xF2, 0x0F, 0x2A);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+ ins_encode %{
+ __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -11078,8 +10161,9 @@ instruct convI2D_reg_mem(regD dst, memory src)
match(Set dst (ConvI2D (LoadI src)));
format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
- opcode(0xF2, 0x0F, 0x2A);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+ ins_encode %{
+ __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -11116,8 +10200,9 @@ instruct convL2F_reg_reg(regF dst, rRegL src)
match(Set dst (ConvL2F src));
format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
- opcode(0xF3, 0x0F, 0x2A);
- ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
+ ins_encode %{
+ __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -11126,8 +10211,9 @@ instruct convL2F_reg_mem(regF dst, memory src)
match(Set dst (ConvL2F (LoadL src)));
format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
- opcode(0xF3, 0x0F, 0x2A);
- ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
+ ins_encode %{
+ __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -11136,8 +10222,9 @@ instruct convL2D_reg_reg(regD dst, rRegL src)
match(Set dst (ConvL2D src));
format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
- opcode(0xF2, 0x0F, 0x2A);
- ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
+ ins_encode %{
+ __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -11146,8 +10233,9 @@ instruct convL2D_reg_mem(regD dst, memory src)
match(Set dst (ConvL2D (LoadL src)));
format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
- opcode(0xF2, 0x0F, 0x2A);
- ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
+ ins_encode %{
+ __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -11186,7 +10274,11 @@ instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
match(Set dst (AndL (ConvI2L src) mask));
format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
- ins_encode(enc_copy(dst, src));
+ ins_encode %{
+ if ($dst$$reg != $src$$reg) {
+ __ movl($dst$$Register, $src$$Register);
+ }
+ %}
ins_pipe(ialu_reg_reg);
%}
@@ -11196,8 +10288,9 @@ instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
match(Set dst (AndL (ConvI2L (LoadI src)) mask));
format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
- opcode(0x8B);
- ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
+ ins_encode %{
+ __ movl($dst$$Register, $src$$Address);
+ %}
ins_pipe(ialu_reg_mem);
%}
@@ -11206,7 +10299,9 @@ instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
match(Set dst (AndL src mask));
format %{ "movl $dst, $src\t# zero-extend long" %}
- ins_encode(enc_copy_always(dst, src));
+ ins_encode %{
+ __ movl($dst$$Register, $src$$Register);
+ %}
ins_pipe(ialu_reg_reg);
%}
@@ -11215,7 +10310,9 @@ instruct convL2I_reg_reg(rRegI dst, rRegL src)
match(Set dst (ConvL2I src));
format %{ "movl $dst, $src\t# l2i" %}
- ins_encode(enc_copy_always(dst, src));
+ ins_encode %{
+ __ movl($dst$$Register, $src$$Register);
+ %}
ins_pipe(ialu_reg_reg);
%}
@@ -11226,8 +10323,9 @@ instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
ins_cost(125);
format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
- opcode(0x8B);
- ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
+ ins_encode %{
+ __ movl($dst$$Register, Address(rsp, $src$$disp));
+ %}
ins_pipe(ialu_reg_mem);
%}
@@ -11237,8 +10335,9 @@ instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
ins_cost(125);
format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
- opcode(0xF3, 0x0F, 0x10);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+ ins_encode %{
+ __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
+ %}
ins_pipe(pipe_slow);
%}
@@ -11248,8 +10347,9 @@ instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
ins_cost(125);
format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
- opcode(0x8B);
- ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
+ ins_encode %{
+ __ movq($dst$$Register, Address(rsp, $src$$disp));
+ %}
ins_pipe(ialu_reg_mem);
%}
@@ -11260,8 +10360,9 @@ instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
ins_cost(125);
format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
- opcode(0x66, 0x0F, 0x12);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
+ %}
ins_pipe(pipe_slow);
%}
@@ -11272,8 +10373,9 @@ instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
ins_cost(125);
format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
- opcode(0xF2, 0x0F, 0x10);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
+ %}
ins_pipe(pipe_slow);
%}
@@ -11284,8 +10386,9 @@ instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
ins_cost(95); // XXX
format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
- opcode(0xF3, 0x0F, 0x11);
- ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
+ ins_encode %{
+ __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
+ %}
ins_pipe(pipe_slow);
%}
@@ -11295,8 +10398,9 @@ instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
ins_cost(100);
format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
- opcode(0x89);
- ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
+ ins_encode %{
+ __ movl(Address(rsp, $dst$$disp), $src$$Register);
+ %}
ins_pipe( ialu_mem_reg );
%}
@@ -11306,8 +10410,9 @@ instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
ins_cost(95); // XXX
format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
- opcode(0xF2, 0x0F, 0x11);
- ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
+ ins_encode %{
+ __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
+ %}
ins_pipe(pipe_slow);
%}
@@ -11317,8 +10422,9 @@ instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
ins_cost(100);
format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
- opcode(0x89);
- ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
+ ins_encode %{
+ __ movq(Address(rsp, $dst$$disp), $src$$Register);
+ %}
ins_pipe(ialu_mem_reg);
%}
@@ -11327,7 +10433,9 @@ instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
effect(DEF dst, USE src);
ins_cost(85);
format %{ "movd $dst,$src\t# MoveF2I" %}
- ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
+ ins_encode %{
+ __ movdl($dst$$Register, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -11336,7 +10444,9 @@ instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
effect(DEF dst, USE src);
ins_cost(85);
format %{ "movd $dst,$src\t# MoveD2L" %}
- ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
+ ins_encode %{
+ __ movdq($dst$$Register, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -11346,7 +10456,9 @@ instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
effect(DEF dst, USE src);
ins_cost(300);
format %{ "movd $dst,$src\t# MoveI2F" %}
- ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ %}
ins_pipe( pipe_slow );
%}
@@ -11355,7 +10467,9 @@ instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
effect(DEF dst, USE src);
ins_cost(300);
format %{ "movd $dst,$src\t# MoveL2D" %}
- ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
+ ins_encode %{
+ __ movdq($dst$$XMMRegister, $src$$Register);
+ %}
ins_pipe( pipe_slow );
%}
@@ -11365,7 +10479,13 @@ instruct Repl8B_reg(regD dst, regD src) %{
format %{ "MOVDQA $dst,$src\n\t"
"PUNPCKLBW $dst,$dst\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
- ins_encode( pshufd_8x8(dst, src));
+ ins_encode %{
+ if ($dst$$reg != $src$$reg) {
+ __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
+ }
+ __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
ins_pipe( pipe_slow );
%}
@@ -11375,7 +10495,11 @@ instruct Repl8B_rRegI(regD dst, rRegI src) %{
format %{ "MOVD $dst,$src\n\t"
"PUNPCKLBW $dst,$dst\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
- ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
ins_pipe( pipe_slow );
%}
@@ -11383,7 +10507,9 @@ instruct Repl8B_rRegI(regD dst, rRegI src) %{
instruct Repl8B_immI0(regD dst, immI0 zero) %{
match(Set dst (Replicate8B zero));
format %{ "PXOR $dst,$dst\t! replicate8B" %}
- ins_encode( pxor(dst, dst));
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11391,7 +10517,9 @@ instruct Repl8B_immI0(regD dst, immI0 zero) %{
instruct Repl4S_reg(regD dst, regD src) %{
match(Set dst (Replicate4S src));
format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
- ins_encode( pshufd_4x16(dst, src));
+ ins_encode %{
+ __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11400,7 +10528,10 @@ instruct Repl4S_rRegI(regD dst, rRegI src) %{
match(Set dst (Replicate4S src));
format %{ "MOVD $dst,$src\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
- ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11408,7 +10539,9 @@ instruct Repl4S_rRegI(regD dst, rRegI src) %{
instruct Repl4S_immI0(regD dst, immI0 zero) %{
match(Set dst (Replicate4S zero));
format %{ "PXOR $dst,$dst\t! replicate4S" %}
- ins_encode( pxor(dst, dst));
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11416,7 +10549,9 @@ instruct Repl4S_immI0(regD dst, immI0 zero) %{
instruct Repl4C_reg(regD dst, regD src) %{
match(Set dst (Replicate4C src));
format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
- ins_encode( pshufd_4x16(dst, src));
+ ins_encode %{
+ __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11425,7 +10560,10 @@ instruct Repl4C_rRegI(regD dst, rRegI src) %{
match(Set dst (Replicate4C src));
format %{ "MOVD $dst,$src\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
- ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11433,7 +10571,9 @@ instruct Repl4C_rRegI(regD dst, rRegI src) %{
instruct Repl4C_immI0(regD dst, immI0 zero) %{
match(Set dst (Replicate4C zero));
format %{ "PXOR $dst,$dst\t! replicate4C" %}
- ins_encode( pxor(dst, dst));
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11441,7 +10581,9 @@ instruct Repl4C_immI0(regD dst, immI0 zero) %{
instruct Repl2I_reg(regD dst, regD src) %{
match(Set dst (Replicate2I src));
format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
- ins_encode( pshufd(dst, src, 0x00));
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11450,7 +10592,10 @@ instruct Repl2I_rRegI(regD dst, rRegI src) %{
match(Set dst (Replicate2I src));
format %{ "MOVD $dst,$src\n\t"
"PSHUFD $dst,$dst,0x00\t! replicate2I" %}
- ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11458,7 +10603,9 @@ instruct Repl2I_rRegI(regD dst, rRegI src) %{
instruct Repl2I_immI0(regD dst, immI0 zero) %{
match(Set dst (Replicate2I zero));
format %{ "PXOR $dst,$dst\t! replicate2I" %}
- ins_encode( pxor(dst, dst));
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11466,7 +10613,9 @@ instruct Repl2I_immI0(regD dst, immI0 zero) %{
instruct Repl2F_reg(regD dst, regD src) %{
match(Set dst (Replicate2F src));
format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
- ins_encode( pshufd(dst, src, 0xe0));
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11474,7 +10623,9 @@ instruct Repl2F_reg(regD dst, regD src) %{
instruct Repl2F_regF(regD dst, regF src) %{
match(Set dst (Replicate2F src));
format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
- ins_encode( pshufd(dst, src, 0xe0));
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11482,7 +10633,9 @@ instruct Repl2F_regF(regD dst, regF src) %{
instruct Repl2F_immF0(regD dst, immF0 zero) %{
match(Set dst (Replicate2F zero));
format %{ "PXOR $dst,$dst\t! replicate2F" %}
- ins_encode( pxor(dst, dst));
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -12162,12 +11315,12 @@ instruct partialSubtypeCheck(rdi_RegP result,
effect(KILL rcx, KILL cr);
ins_cost(1100); // slightly larger than the next version
- format %{ "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
+ format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
"movl rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
"addq rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
"repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
"jne,s miss\t\t# Missed: rdi not-zero\n\t"
- "movq [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
+ "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
"xorq $result, $result\t\t Hit: rdi zero\n\t"
"miss:\t" %}
@@ -12185,12 +11338,12 @@ instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
effect(KILL rcx, KILL result);
ins_cost(1000);
- format %{ "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
+ format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
"movl rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
"addq rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
"repne scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
"jne,s miss\t\t# Missed: flags nz\n\t"
- "movq [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
+ "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
"miss:\t" %}
opcode(0x0); // No need to XOR RDI
diff --git a/src/share/vm/adlc/formssel.cpp b/src/share/vm/adlc/formssel.cpp
index c1d39849b..3c69d6a0d 100644
--- a/src/share/vm/adlc/formssel.cpp
+++ b/src/share/vm/adlc/formssel.cpp
@@ -627,6 +627,7 @@ bool InstructForm::is_wide_memory_kill(FormDict &globals) const {
if( strcmp(_matrule->_opType,"MemBarAcquire") == 0 ) return true;
if( strcmp(_matrule->_opType,"MemBarReleaseLock") == 0 ) return true;
if( strcmp(_matrule->_opType,"MemBarAcquireLock") == 0 ) return true;
+ if( strcmp(_matrule->_opType,"MemBarStoreStore") == 0 ) return true;
return false;
}
@@ -3978,7 +3979,8 @@ bool MatchRule::is_ideal_membar() const {
!strcmp(_opType,"MemBarAcquireLock") ||
!strcmp(_opType,"MemBarReleaseLock") ||
!strcmp(_opType,"MemBarVolatile" ) ||
- !strcmp(_opType,"MemBarCPUOrder" ) ;
+ !strcmp(_opType,"MemBarCPUOrder" ) ||
+ !strcmp(_opType,"MemBarStoreStore" );
}
bool MatchRule::is_ideal_loadPC() const {
diff --git a/src/share/vm/asm/assembler.cpp b/src/share/vm/asm/assembler.cpp
index 86011e974..2bcdcbc88 100644
--- a/src/share/vm/asm/assembler.cpp
+++ b/src/share/vm/asm/assembler.cpp
@@ -61,6 +61,7 @@ AbstractAssembler::AbstractAssembler(CodeBuffer* code) {
_code_limit = cs->limit();
_code_pos = cs->end();
_oop_recorder= code->oop_recorder();
+ DEBUG_ONLY( _short_branch_delta = 0; )
if (_code_begin == NULL) {
vm_exit_out_of_memory(0, err_msg("CodeCache: no room for %s",
code->name()));
diff --git a/src/share/vm/asm/assembler.hpp b/src/share/vm/asm/assembler.hpp
index 8db7eef2e..c25aa3fca 100644
--- a/src/share/vm/asm/assembler.hpp
+++ b/src/share/vm/asm/assembler.hpp
@@ -241,6 +241,33 @@ class AbstractAssembler : public ResourceObj {
// Make it return true on platforms which need to verify
// instruction boundaries for some operations.
inline static bool pd_check_instruction_mark();
+
+ // Add delta to short branch distance to verify that it still fit into imm8.
+ int _short_branch_delta;
+
+ int short_branch_delta() const { return _short_branch_delta; }
+ void set_short_branch_delta() { _short_branch_delta = 32; }
+ void clear_short_branch_delta() { _short_branch_delta = 0; }
+
+ class ShortBranchVerifier: public StackObj {
+ private:
+ AbstractAssembler* _assm;
+
+ public:
+ ShortBranchVerifier(AbstractAssembler* assm) : _assm(assm) {
+ assert(assm->short_branch_delta() == 0, "overlapping instructions");
+ _assm->set_short_branch_delta();
+ }
+ ~ShortBranchVerifier() {
+ _assm->clear_short_branch_delta();
+ }
+ };
+ #else
+ // Dummy in product.
+ class ShortBranchVerifier: public StackObj {
+ public:
+ ShortBranchVerifier(AbstractAssembler* assm) {}
+ };
#endif
// Label functions
diff --git a/src/share/vm/c1/c1_LIR.cpp b/src/share/vm/c1/c1_LIR.cpp
index 267b96664..629d849bc 100644
--- a/src/share/vm/c1/c1_LIR.cpp
+++ b/src/share/vm/c1/c1_LIR.cpp
@@ -854,6 +854,9 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
if (opTypeCheck->_info_for_exception) do_info(opTypeCheck->_info_for_exception);
if (opTypeCheck->_info_for_patch) do_info(opTypeCheck->_info_for_patch);
if (opTypeCheck->_object->is_valid()) do_input(opTypeCheck->_object);
+ if (op->code() == lir_store_check && opTypeCheck->_object->is_valid()) {
+ do_temp(opTypeCheck->_object);
+ }
if (opTypeCheck->_array->is_valid()) do_input(opTypeCheck->_array);
if (opTypeCheck->_tmp1->is_valid()) do_temp(opTypeCheck->_tmp1);
if (opTypeCheck->_tmp2->is_valid()) do_temp(opTypeCheck->_tmp2);
diff --git a/src/share/vm/c1/c1_LIRGenerator.cpp b/src/share/vm/c1/c1_LIRGenerator.cpp
index 0491d7156..5f3155d41 100644
--- a/src/share/vm/c1/c1_LIRGenerator.cpp
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp
@@ -1256,8 +1256,7 @@ void LIRGenerator::do_getClass(Intrinsic* x) {
info = state_for(x);
}
__ move(new LIR_Address(rcvr.result(), oopDesc::klass_offset_in_bytes(), T_OBJECT), result, info);
- __ move_wide(new LIR_Address(result, Klass::java_mirror_offset_in_bytes() +
- klassOopDesc::klass_part_offset_in_bytes(), T_OBJECT), result);
+ __ move_wide(new LIR_Address(result, in_bytes(Klass::java_mirror_offset()), T_OBJECT), result);
}
diff --git a/src/share/vm/c1/c1_Optimizer.cpp b/src/share/vm/c1/c1_Optimizer.cpp
index 5c3640b1a..e1a0ef7bf 100644
--- a/src/share/vm/c1/c1_Optimizer.cpp
+++ b/src/share/vm/c1/c1_Optimizer.cpp
@@ -122,18 +122,32 @@ void CE_Eliminator::block_do(BlockBegin* block) {
if (sux != f_goto->default_sux()) return;
// check if at least one word was pushed on sux_state
+ // inlining depths must match
+ ValueStack* if_state = if_->state();
ValueStack* sux_state = sux->state();
- if (sux_state->stack_size() <= if_->state()->stack_size()) return;
+ if (if_state->scope()->level() > sux_state->scope()->level()) {
+ while (sux_state->scope() != if_state->scope()) {
+ if_state = if_state->caller_state();
+ assert(if_state != NULL, "states do not match up");
+ }
+ } else if (if_state->scope()->level() < sux_state->scope()->level()) {
+ while (sux_state->scope() != if_state->scope()) {
+ sux_state = sux_state->caller_state();
+ assert(sux_state != NULL, "states do not match up");
+ }
+ }
+
+ if (sux_state->stack_size() <= if_state->stack_size()) return;
// check if phi function is present at end of successor stack and that
// only this phi was pushed on the stack
- Value sux_phi = sux_state->stack_at(if_->state()->stack_size());
+ Value sux_phi = sux_state->stack_at(if_state->stack_size());
if (sux_phi == NULL || sux_phi->as_Phi() == NULL || sux_phi->as_Phi()->block() != sux) return;
- if (sux_phi->type()->size() != sux_state->stack_size() - if_->state()->stack_size()) return;
+ if (sux_phi->type()->size() != sux_state->stack_size() - if_state->stack_size()) return;
// get the values that were pushed in the true- and false-branch
- Value t_value = t_goto->state()->stack_at(if_->state()->stack_size());
- Value f_value = f_goto->state()->stack_at(if_->state()->stack_size());
+ Value t_value = t_goto->state()->stack_at(if_state->stack_size());
+ Value f_value = f_goto->state()->stack_at(if_state->stack_size());
// backend does not support floats
assert(t_value->type()->base() == f_value->type()->base(), "incompatible types");
@@ -180,11 +194,7 @@ void CE_Eliminator::block_do(BlockBegin* block) {
Goto* goto_ = new Goto(sux, state_before, if_->is_safepoint() || t_goto->is_safepoint() || f_goto->is_safepoint());
// prepare state for Goto
- ValueStack* goto_state = if_->state();
- while (sux_state->scope() != goto_state->scope()) {
- goto_state = goto_state->caller_state();
- assert(goto_state != NULL, "states do not match up");
- }
+ ValueStack* goto_state = if_state;
goto_state = goto_state->copy(ValueStack::StateAfter, goto_state->bci());
goto_state->push(result->type(), result);
assert(goto_state->is_same(sux_state), "states must match now");
diff --git a/src/share/vm/oops/arrayKlass.hpp b/src/share/vm/oops/arrayKlass.hpp
index cfc06274c..13dbaec55 100644
--- a/src/share/vm/oops/arrayKlass.hpp
+++ b/src/share/vm/oops/arrayKlass.hpp
@@ -73,7 +73,7 @@ class arrayKlass: public Klass {
oop* adr_component_mirror() { return (oop*)&this->_component_mirror;}
// Compiler/Interpreter offset
- static ByteSize component_mirror_offset() { return byte_offset_of(arrayKlass, _component_mirror); }
+ static ByteSize component_mirror_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(arrayKlass, _component_mirror)); }
virtual klassOop java_super() const;//{ return SystemDictionary::Object_klass(); }
diff --git a/src/share/vm/oops/instanceKlass.hpp b/src/share/vm/oops/instanceKlass.hpp
index e78b7c0af..68849badd 100644
--- a/src/share/vm/oops/instanceKlass.hpp
+++ b/src/share/vm/oops/instanceKlass.hpp
@@ -433,7 +433,7 @@ class instanceKlass: public Klass {
ReferenceType reference_type() const { return _reference_type; }
void set_reference_type(ReferenceType t) { _reference_type = t; }
- static int reference_type_offset_in_bytes() { return offset_of(instanceKlass, _reference_type); }
+ static ByteSize reference_type_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(instanceKlass, _reference_type)); }
// find local field, returns true if found
bool find_local_field(Symbol* name, Symbol* sig, fieldDescriptor* fd) const;
@@ -644,8 +644,8 @@ class instanceKlass: public Klass {
void set_breakpoints(BreakpointInfo* bps) { _breakpoints = bps; };
// support for stub routines
- static int init_state_offset_in_bytes() { return offset_of(instanceKlass, _init_state); }
- static int init_thread_offset_in_bytes() { return offset_of(instanceKlass, _init_thread); }
+ static ByteSize init_state_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(instanceKlass, _init_state)); }
+ static ByteSize init_thread_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(instanceKlass, _init_thread)); }
// subclass/subinterface checks
bool implements_interface(klassOop k) const;
diff --git a/src/share/vm/oops/klass.cpp b/src/share/vm/oops/klass.cpp
index 8541bd5d2..39cb26ebb 100644
--- a/src/share/vm/oops/klass.cpp
+++ b/src/share/vm/oops/klass.cpp
@@ -144,7 +144,7 @@ klassOop Klass::base_create_klass_oop(KlassHandle& klass, int size,
}
kl->set_secondary_supers(NULL);
oop_store_without_check((oop*) &kl->_primary_supers[0], k);
- kl->set_super_check_offset(primary_supers_offset_in_bytes() + sizeof(oopDesc));
+ kl->set_super_check_offset(in_bytes(primary_supers_offset()));
}
kl->set_java_mirror(NULL);
diff --git a/src/share/vm/oops/klass.hpp b/src/share/vm/oops/klass.hpp
index 407b1ef29..ae727d9ec 100644
--- a/src/share/vm/oops/klass.hpp
+++ b/src/share/vm/oops/klass.hpp
@@ -313,7 +313,7 @@ class Klass : public Klass_vtbl {
// Can this klass be a primary super? False for interfaces and arrays of
// interfaces. False also for arrays or classes with long super chains.
bool can_be_primary_super() const {
- const juint secondary_offset = secondary_super_cache_offset_in_bytes() + sizeof(oopDesc);
+ const juint secondary_offset = in_bytes(secondary_super_cache_offset());
return super_check_offset() != secondary_offset;
}
virtual bool can_be_primary_super_slow() const;
@@ -323,7 +323,7 @@ class Klass : public Klass_vtbl {
if (!can_be_primary_super()) {
return primary_super_limit();
} else {
- juint d = (super_check_offset() - (primary_supers_offset_in_bytes() + sizeof(oopDesc))) / sizeof(klassOop);
+ juint d = (super_check_offset() - in_bytes(primary_supers_offset())) / sizeof(klassOop);
assert(d < primary_super_limit(), "oob");
assert(_primary_supers[d] == as_klassOop(), "proper init");
return d;
@@ -373,15 +373,15 @@ class Klass : public Klass_vtbl {
virtual void set_alloc_size(juint n) = 0;
// Compiler support
- static int super_offset_in_bytes() { return offset_of(Klass, _super); }
- static int super_check_offset_offset_in_bytes() { return offset_of(Klass, _super_check_offset); }
- static int primary_supers_offset_in_bytes(){ return offset_of(Klass, _primary_supers); }
- static int secondary_super_cache_offset_in_bytes() { return offset_of(Klass, _secondary_super_cache); }
- static int secondary_supers_offset_in_bytes() { return offset_of(Klass, _secondary_supers); }
- static int java_mirror_offset_in_bytes() { return offset_of(Klass, _java_mirror); }
- static int modifier_flags_offset_in_bytes(){ return offset_of(Klass, _modifier_flags); }
- static int layout_helper_offset_in_bytes() { return offset_of(Klass, _layout_helper); }
- static int access_flags_offset_in_bytes() { return offset_of(Klass, _access_flags); }
+ static ByteSize super_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _super)); }
+ static ByteSize super_check_offset_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _super_check_offset)); }
+ static ByteSize primary_supers_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _primary_supers)); }
+ static ByteSize secondary_super_cache_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _secondary_super_cache)); }
+ static ByteSize secondary_supers_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _secondary_supers)); }
+ static ByteSize java_mirror_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _java_mirror)); }
+ static ByteSize modifier_flags_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _modifier_flags)); }
+ static ByteSize layout_helper_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _layout_helper)); }
+ static ByteSize access_flags_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _access_flags)); }
// Unpacking layout_helper:
enum {
@@ -478,7 +478,7 @@ class Klass : public Klass_vtbl {
bool is_subtype_of(klassOop k) const {
juint off = k->klass_part()->super_check_offset();
klassOop sup = *(klassOop*)( (address)as_klassOop() + off );
- const juint secondary_offset = secondary_super_cache_offset_in_bytes() + sizeof(oopDesc);
+ const juint secondary_offset = in_bytes(secondary_super_cache_offset());
if (sup == k) {
return true;
} else if (off != secondary_offset) {
@@ -674,7 +674,7 @@ class Klass : public Klass_vtbl {
// are potential problems in setting the bias pattern for
// JVM-internal oops.
inline void set_prototype_header(markOop header);
- static int prototype_header_offset_in_bytes() { return offset_of(Klass, _prototype_header); }
+ static ByteSize prototype_header_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _prototype_header)); }
int biased_lock_revocation_count() const { return (int) _biased_lock_revocation_count; }
// Atomically increments biased_lock_revocation_count and returns updated value
diff --git a/src/share/vm/oops/klassOop.hpp b/src/share/vm/oops/klassOop.hpp
index 25dca1d6a..f212fc5ba 100644
--- a/src/share/vm/oops/klassOop.hpp
+++ b/src/share/vm/oops/klassOop.hpp
@@ -38,14 +38,8 @@
class klassOopDesc : public oopDesc {
public:
- // size operation
- static int header_size() { return sizeof(klassOopDesc)/HeapWordSize; }
-
- // support for code generation
- static int klass_part_offset_in_bytes() { return sizeof(klassOopDesc); }
-
// returns the Klass part containing dispatching behavior
- Klass* klass_part() const { return (Klass*)((address)this + klass_part_offset_in_bytes()); }
+ Klass* klass_part() const { return (Klass*)((address)this + sizeof(klassOopDesc)); }
// Convenience wrapper
inline oop java_mirror() const;
diff --git a/src/share/vm/oops/objArrayKlass.hpp b/src/share/vm/oops/objArrayKlass.hpp
index 023f221ab..44717ec69 100644
--- a/src/share/vm/oops/objArrayKlass.hpp
+++ b/src/share/vm/oops/objArrayKlass.hpp
@@ -47,7 +47,7 @@ class objArrayKlass : public arrayKlass {
oop* bottom_klass_addr() { return (oop*)&_bottom_klass; }
// Compiler/Interpreter offset
- static int element_klass_offset_in_bytes() { return offset_of(objArrayKlass, _element_klass); }
+ static ByteSize element_klass_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(objArrayKlass, _element_klass)); }
// Dispatched operation
bool can_be_primary_super_slow() const;
diff --git a/src/share/vm/opto/callnode.hpp b/src/share/vm/opto/callnode.hpp
index 9e9b34260..565d5d80c 100644
--- a/src/share/vm/opto/callnode.hpp
+++ b/src/share/vm/opto/callnode.hpp
@@ -791,6 +791,10 @@ public:
// are defined in graphKit.cpp, which sets up the bidirectional relation.)
InitializeNode* initialization();
+ // Return the corresponding storestore barrier (or null if none).
+ // Walks out edges to find it...
+ MemBarStoreStoreNode* storestore();
+
// Convenience for initialization->maybe_set_complete(phase)
bool maybe_set_complete(PhaseGVN* phase);
};
diff --git a/src/share/vm/opto/classes.hpp b/src/share/vm/opto/classes.hpp
index dbece7611..19ade9069 100644
--- a/src/share/vm/opto/classes.hpp
+++ b/src/share/vm/opto/classes.hpp
@@ -166,6 +166,7 @@ macro(MemBarCPUOrder)
macro(MemBarRelease)
macro(MemBarReleaseLock)
macro(MemBarVolatile)
+macro(MemBarStoreStore)
macro(MergeMem)
macro(MinI)
macro(ModD)
diff --git a/src/share/vm/opto/compile.cpp b/src/share/vm/opto/compile.cpp
index 721d1eedc..2b9051c90 100644
--- a/src/share/vm/opto/compile.cpp
+++ b/src/share/vm/opto/compile.cpp
@@ -1282,12 +1282,11 @@ const TypePtr *Compile::flatten_alias_type( const TypePtr *tj ) const {
if( tk ) {
// If we are referencing a field within a Klass, we need
// to assume the worst case of an Object. Both exact and
- // inexact types must flatten to the same alias class.
- // Since the flattened result for a klass is defined to be
- // precisely java.lang.Object, use a constant ptr.
+ // inexact types must flatten to the same alias class so
+ // use NotNull as the PTR.
if ( offset == Type::OffsetBot || (offset >= 0 && (size_t)offset < sizeof(Klass)) ) {
- tj = tk = TypeKlassPtr::make(TypePtr::Constant,
+ tj = tk = TypeKlassPtr::make(TypePtr::NotNull,
TypeKlassPtr::OBJECT->klass(),
offset);
}
@@ -1307,10 +1306,12 @@ const TypePtr *Compile::flatten_alias_type( const TypePtr *tj ) const {
// these 2 disparate memories into the same alias class. Since the
// primary supertype array is read-only, there's no chance of confusion
// where we bypass an array load and an array store.
- uint off2 = offset - Klass::primary_supers_offset_in_bytes();
- if( offset == Type::OffsetBot ||
- off2 < Klass::primary_super_limit()*wordSize ) {
- offset = sizeof(oopDesc) +Klass::secondary_super_cache_offset_in_bytes();
+ int primary_supers_offset = in_bytes(Klass::primary_supers_offset());
+ if (offset == Type::OffsetBot ||
+ (offset >= primary_supers_offset &&
+ offset < (int)(primary_supers_offset + Klass::primary_super_limit() * wordSize)) ||
+ offset == (int)in_bytes(Klass::secondary_super_cache_offset())) {
+ offset = in_bytes(Klass::secondary_super_cache_offset());
tj = tk = TypeKlassPtr::make( TypePtr::NotNull, tk->klass(), offset );
}
}
@@ -1489,13 +1490,13 @@ Compile::AliasType* Compile::find_alias_type(const TypePtr* adr_type, bool no_cr
alias_type(idx)->set_rewritable(false);
}
if (flat->isa_klassptr()) {
- if (flat->offset() == Klass::super_check_offset_offset_in_bytes() + (int)sizeof(oopDesc))
+ if (flat->offset() == in_bytes(Klass::super_check_offset_offset()))
alias_type(idx)->set_rewritable(false);
- if (flat->offset() == Klass::modifier_flags_offset_in_bytes() + (int)sizeof(oopDesc))
+ if (flat->offset() == in_bytes(Klass::modifier_flags_offset()))
alias_type(idx)->set_rewritable(false);
- if (flat->offset() == Klass::access_flags_offset_in_bytes() + (int)sizeof(oopDesc))
+ if (flat->offset() == in_bytes(Klass::access_flags_offset()))
alias_type(idx)->set_rewritable(false);
- if (flat->offset() == Klass::java_mirror_offset_in_bytes() + (int)sizeof(oopDesc))
+ if (flat->offset() == in_bytes(Klass::java_mirror_offset()))
alias_type(idx)->set_rewritable(false);
}
// %%% (We would like to finalize JavaThread::threadObj_offset(),
@@ -2521,7 +2522,7 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) {
break;
}
}
- assert(p != NULL, "must be found");
+ assert(proj != NULL, "must be found");
p->subsume_by(proj);
}
}
diff --git a/src/share/vm/opto/escape.cpp b/src/share/vm/opto/escape.cpp
index 22ac9a7ec..a874d80a6 100644
--- a/src/share/vm/opto/escape.cpp
+++ b/src/share/vm/opto/escape.cpp
@@ -1595,6 +1595,7 @@ bool ConnectionGraph::compute_escape() {
GrowableArray<Node*> alloc_worklist;
GrowableArray<Node*> addp_worklist;
GrowableArray<Node*> ptr_cmp_worklist;
+ GrowableArray<Node*> storestore_worklist;
PhaseGVN* igvn = _igvn;
// Push all useful nodes onto CG list and set their type.
@@ -1618,6 +1619,11 @@ bool ConnectionGraph::compute_escape() {
(n->Opcode() == Op_CmpP || n->Opcode() == Op_CmpN)) {
// Compare pointers nodes
ptr_cmp_worklist.append(n);
+ } else if (n->is_MemBarStoreStore()) {
+ // Collect all MemBarStoreStore nodes so that depending on the
+ // escape status of the associated Allocate node some of them
+ // may be eliminated.
+ storestore_worklist.append(n);
}
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
Node* m = n->fast_out(i); // Get user
@@ -1724,11 +1730,20 @@ bool ConnectionGraph::compute_escape() {
uint alloc_length = alloc_worklist.length();
for (uint next = 0; next < alloc_length; ++next) {
Node* n = alloc_worklist.at(next);
- if (ptnode_adr(n->_idx)->escape_state() == PointsToNode::NoEscape) {
+ PointsToNode::EscapeState es = ptnode_adr(n->_idx)->escape_state();
+ if (es == PointsToNode::NoEscape) {
has_non_escaping_obj = true;
if (n->is_Allocate()) {
find_init_values(n, &visited, igvn);
+ // The object allocated by this Allocate node will never be
+ // seen by an other thread. Mark it so that when it is
+ // expanded no MemBarStoreStore is added.
+ n->as_Allocate()->initialization()->set_does_not_escape();
}
+ } else if ((es == PointsToNode::ArgEscape) && n->is_Allocate()) {
+ // Same as above. Mark this Allocate node so that when it is
+ // expanded no MemBarStoreStore is added.
+ n->as_Allocate()->initialization()->set_does_not_escape();
}
}
@@ -1874,6 +1889,25 @@ bool ConnectionGraph::compute_escape() {
igvn->hash_delete(_pcmp_eq);
}
+ // For MemBarStoreStore nodes added in library_call.cpp, check
+ // escape status of associated AllocateNode and optimize out
+ // MemBarStoreStore node if the allocated object never escapes.
+ while (storestore_worklist.length() != 0) {
+ Node *n = storestore_worklist.pop();
+ MemBarStoreStoreNode *storestore = n ->as_MemBarStoreStore();
+ Node *alloc = storestore->in(MemBarNode::Precedent)->in(0);
+ assert (alloc->is_Allocate(), "storestore should point to AllocateNode");
+ PointsToNode::EscapeState es = ptnode_adr(alloc->_idx)->escape_state();
+ if (es == PointsToNode::NoEscape || es == PointsToNode::ArgEscape) {
+ MemBarNode* mb = MemBarNode::make(C, Op_MemBarCPUOrder, Compile::AliasIdxBot);
+ mb->init_req(TypeFunc::Memory, storestore->in(TypeFunc::Memory));
+ mb->init_req(TypeFunc::Control, storestore->in(TypeFunc::Control));
+
+ _igvn->register_new_node_with_optimizer(mb);
+ _igvn->replace_node(storestore, mb);
+ }
+ }
+
#ifndef PRODUCT
if (PrintEscapeAnalysis) {
dump(); // Dump ConnectionGraph
diff --git a/src/share/vm/opto/graphKit.cpp b/src/share/vm/opto/graphKit.cpp
index 49717298a..565896079 100644
--- a/src/share/vm/opto/graphKit.cpp
+++ b/src/share/vm/opto/graphKit.cpp
@@ -2304,9 +2304,9 @@ Node* GraphKit::gen_subtype_check(Node* subklass, Node* superklass) {
// will always succeed. We could leave a dependency behind to ensure this.
// First load the super-klass's check-offset
- Node *p1 = basic_plus_adr( superklass, superklass, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes() );
+ Node *p1 = basic_plus_adr( superklass, superklass, in_bytes(Klass::super_check_offset_offset()) );
Node *chk_off = _gvn.transform( new (C, 3) LoadINode( NULL, memory(p1), p1, _gvn.type(p1)->is_ptr() ) );
- int cacheoff_con = sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes();
+ int cacheoff_con = in_bytes(Klass::secondary_super_cache_offset());
bool might_be_cache = (find_int_con(chk_off, cacheoff_con) == cacheoff_con);
// Load from the sub-klass's super-class display list, or a 1-word cache of
@@ -2934,7 +2934,7 @@ Node* GraphKit::get_layout_helper(Node* klass_node, jint& constant_value) {
}
}
constant_value = Klass::_lh_neutral_value; // put in a known value
- Node* lhp = basic_plus_adr(klass_node, klass_node, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc));
+ Node* lhp = basic_plus_adr(klass_node, klass_node, in_bytes(Klass::layout_helper_offset()));
return make_load(NULL, lhp, TypeInt::INT, T_INT);
}
@@ -3337,6 +3337,19 @@ InitializeNode* AllocateNode::initialization() {
return NULL;
}
+// Trace Allocate -> Proj[Parm] -> MemBarStoreStore
+MemBarStoreStoreNode* AllocateNode::storestore() {
+ ProjNode* rawoop = proj_out(AllocateNode::RawAddress);
+ if (rawoop == NULL) return NULL;
+ for (DUIterator_Fast imax, i = rawoop->fast_outs(imax); i < imax; i++) {
+ Node* storestore = rawoop->fast_out(i);
+ if (storestore->is_MemBarStoreStore()) {
+ return storestore->as_MemBarStoreStore();
+ }
+ }
+ return NULL;
+}
+
//----------------------------- loop predicates ---------------------------
//------------------------------add_predicate_impl----------------------------
diff --git a/src/share/vm/opto/library_call.cpp b/src/share/vm/opto/library_call.cpp
index 7d4bb881a..084462709 100644
--- a/src/share/vm/opto/library_call.cpp
+++ b/src/share/vm/opto/library_call.cpp
@@ -2165,8 +2165,7 @@ void LibraryCallKit::insert_g1_pre_barrier(Node* base_oop, Node* offset, Node* p
IdealKit ideal(this);
#define __ ideal.
- const int reference_type_offset = instanceKlass::reference_type_offset_in_bytes() +
- sizeof(oopDesc);
+ const int reference_type_offset = in_bytes(instanceKlass::reference_type_offset());
Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset);
@@ -2806,7 +2805,7 @@ bool LibraryCallKit::inline_unsafe_allocate() {
// Note: The argument might still be an illegal value like
// Serializable.class or Object[].class. The runtime will handle it.
// But we must make an explicit check for initialization.
- Node* insp = basic_plus_adr(kls, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc));
+ Node* insp = basic_plus_adr(kls, in_bytes(instanceKlass::init_state_offset()));
// Use T_BOOLEAN for instanceKlass::_init_state so the compiler
// can generate code to load it as unsigned byte.
Node* inst = make_load(NULL, insp, TypeInt::UBYTE, T_BOOLEAN);
@@ -2956,7 +2955,7 @@ bool LibraryCallKit::inline_native_isInterrupted() {
//---------------------------load_mirror_from_klass----------------------------
// Given a klass oop, load its java mirror (a java.lang.Class oop).
Node* LibraryCallKit::load_mirror_from_klass(Node* klass) {
- Node* p = basic_plus_adr(klass, Klass::java_mirror_offset_in_bytes() + sizeof(oopDesc));
+ Node* p = basic_plus_adr(klass, in_bytes(Klass::java_mirror_offset()));
return make_load(NULL, p, TypeInstPtr::MIRROR, T_OBJECT);
}
@@ -2996,7 +2995,7 @@ Node* LibraryCallKit::load_klass_from_mirror_common(Node* mirror,
Node* LibraryCallKit::generate_access_flags_guard(Node* kls, int modifier_mask, int modifier_bits, RegionNode* region) {
// Branch around if the given klass has the given modifier bit set.
// Like generate_guard, adds a new path onto the region.
- Node* modp = basic_plus_adr(kls, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc));
+ Node* modp = basic_plus_adr(kls, in_bytes(Klass::access_flags_offset()));
Node* mods = make_load(NULL, modp, TypeInt::INT, T_INT);
Node* mask = intcon(modifier_mask);
Node* bits = intcon(modifier_bits);
@@ -3117,7 +3116,7 @@ bool LibraryCallKit::inline_native_Class_query(vmIntrinsics::ID id) {
break;
case vmIntrinsics::_getModifiers:
- p = basic_plus_adr(kls, Klass::modifier_flags_offset_in_bytes() + sizeof(oopDesc));
+ p = basic_plus_adr(kls, in_bytes(Klass::modifier_flags_offset()));
query_value = make_load(NULL, p, TypeInt::INT, T_INT);
break;
@@ -3157,7 +3156,7 @@ bool LibraryCallKit::inline_native_Class_query(vmIntrinsics::ID id) {
// A guard was added. If the guard is taken, it was an array.
phi->add_req(makecon(TypeInstPtr::make(env()->Object_klass()->java_mirror())));
// If we fall through, it's a plain class. Get its _super.
- p = basic_plus_adr(kls, Klass::super_offset_in_bytes() + sizeof(oopDesc));
+ p = basic_plus_adr(kls, in_bytes(Klass::super_offset()));
kls = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeRawPtr::BOTTOM, TypeKlassPtr::OBJECT_OR_NULL) );
null_ctl = top();
kls = null_check_oop(kls, &null_ctl);
@@ -3175,7 +3174,7 @@ bool LibraryCallKit::inline_native_Class_query(vmIntrinsics::ID id) {
if (generate_array_guard(kls, region) != NULL) {
// Be sure to pin the oop load to the guard edge just created:
Node* is_array_ctrl = region->in(region->req()-1);
- Node* cma = basic_plus_adr(kls, in_bytes(arrayKlass::component_mirror_offset()) + sizeof(oopDesc));
+ Node* cma = basic_plus_adr(kls, in_bytes(arrayKlass::component_mirror_offset()));
Node* cmo = make_load(is_array_ctrl, cma, TypeInstPtr::MIRROR, T_OBJECT);
phi->add_req(cmo);
}
@@ -3183,7 +3182,7 @@ bool LibraryCallKit::inline_native_Class_query(vmIntrinsics::ID id) {
break;
case vmIntrinsics::_getClassAccessFlags:
- p = basic_plus_adr(kls, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc));
+ p = basic_plus_adr(kls, in_bytes(Klass::access_flags_offset()));
query_value = make_load(NULL, p, TypeInt::INT, T_INT);
break;
@@ -4196,12 +4195,17 @@ void LibraryCallKit::copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, b
Node* raw_obj = alloc_obj->in(1);
assert(alloc_obj->is_CheckCastPP() && raw_obj->is_Proj() && raw_obj->in(0)->is_Allocate(), "");
+ AllocateNode* alloc = NULL;
if (ReduceBulkZeroing) {
// We will be completely responsible for initializing this object -
// mark Initialize node as complete.
- AllocateNode* alloc = AllocateNode::Ideal_allocation(alloc_obj, &_gvn);
+ alloc = AllocateNode::Ideal_allocation(alloc_obj, &_gvn);
// The object was just allocated - there should be no any stores!
guarantee(alloc != NULL && alloc->maybe_set_complete(&_gvn), "");
+ // Mark as complete_with_arraycopy so that on AllocateNode
+ // expansion, we know this AllocateNode is initialized by an array
+ // copy and a StoreStore barrier exists after the array copy.
+ alloc->initialization()->set_complete_with_arraycopy();
}
// Copy the fastest available way.
@@ -4263,7 +4267,18 @@ void LibraryCallKit::copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, b
}
// Do not let reads from the cloned object float above the arraycopy.
- insert_mem_bar(Op_MemBarCPUOrder);
+ if (alloc != NULL) {
+ // Do not let stores that initialize this object be reordered with
+ // a subsequent store that would make this object accessible by
+ // other threads.
+ // Record what AllocateNode this StoreStore protects so that
+ // escape analysis can go from the MemBarStoreStoreNode to the
+ // AllocateNode and eliminate the MemBarStoreStoreNode if possible
+ // based on the escape status of the AllocateNode.
+ insert_mem_bar(Op_MemBarStoreStore, alloc->proj_out(AllocateNode::RawAddress));
+ } else {
+ insert_mem_bar(Op_MemBarCPUOrder);
+ }
}
//------------------------inline_native_clone----------------------------
@@ -4859,7 +4874,7 @@ LibraryCallKit::generate_arraycopy(const TypePtr* adr_type,
PreserveJVMState pjvms(this);
set_control(not_subtype_ctrl);
// (At this point we can assume disjoint_bases, since types differ.)
- int ek_offset = objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc);
+ int ek_offset = in_bytes(objArrayKlass::element_klass_offset());
Node* p1 = basic_plus_adr(dest_klass, ek_offset);
Node* n1 = LoadKlassNode::make(_gvn, immutable_memory(), p1, TypeRawPtr::BOTTOM);
Node* dest_elem_klass = _gvn.transform(n1);
@@ -5006,7 +5021,16 @@ LibraryCallKit::generate_arraycopy(const TypePtr* adr_type,
// the membar also.
//
// Do not let reads from the cloned object float above the arraycopy.
- if (InsertMemBarAfterArraycopy || alloc != NULL)
+ if (alloc != NULL) {
+ // Do not let stores that initialize this object be reordered with
+ // a subsequent store that would make this object accessible by
+ // other threads.
+ // Record what AllocateNode this StoreStore protects so that
+ // escape analysis can go from the MemBarStoreStoreNode to the
+ // AllocateNode and eliminate the MemBarStoreStoreNode if possible
+ // based on the escape status of the AllocateNode.
+ insert_mem_bar(Op_MemBarStoreStore, alloc->proj_out(AllocateNode::RawAddress));
+ } else if (InsertMemBarAfterArraycopy)
insert_mem_bar(Op_MemBarCPUOrder);
}
@@ -5310,7 +5334,7 @@ LibraryCallKit::generate_checkcast_arraycopy(const TypePtr* adr_type,
// for the target array. This is an optimistic check. It will
// look in each non-null element's class, at the desired klass's
// super_check_offset, for the desired klass.
- int sco_offset = Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc);
+ int sco_offset = in_bytes(Klass::super_check_offset_offset());
Node* p3 = basic_plus_adr(dest_elem_klass, sco_offset);
Node* n3 = new(C, 3) LoadINode(NULL, memory(p3), p3, _gvn.type(p3)->is_ptr());
Node* check_offset = ConvI2X(_gvn.transform(n3));
diff --git a/src/share/vm/opto/macro.cpp b/src/share/vm/opto/macro.cpp
index 343544586..97c5f1155 100644
--- a/src/share/vm/opto/macro.cpp
+++ b/src/share/vm/opto/macro.cpp
@@ -1088,6 +1088,12 @@ void PhaseMacroExpand::expand_allocate_common(
Node* klass_node = alloc->in(AllocateNode::KlassNode);
Node* initial_slow_test = alloc->in(AllocateNode::InitialTest);
+ Node* storestore = alloc->storestore();
+ if (storestore != NULL) {
+ // Break this link that is no longer useful and confuses register allocation
+ storestore->set_req(MemBarNode::Precedent, top());
+ }
+
assert(ctrl != NULL, "must have control");
// We need a Region and corresponding Phi's to merge the slow-path and fast-path results.
// they will not be used if "always_slow" is set
@@ -1289,10 +1295,66 @@ void PhaseMacroExpand::expand_allocate_common(
0, new_alloc_bytes, T_LONG);
}
+ InitializeNode* init = alloc->initialization();
fast_oop_rawmem = initialize_object(alloc,
fast_oop_ctrl, fast_oop_rawmem, fast_oop,
klass_node, length, size_in_bytes);
+ // If initialization is performed by an array copy, any required
+ // MemBarStoreStore was already added. If the object does not
+ // escape no need for a MemBarStoreStore. Otherwise we need a
+ // MemBarStoreStore so that stores that initialize this object
+ // can't be reordered with a subsequent store that makes this
+ // object accessible by other threads.
+ if (init == NULL || (!init->is_complete_with_arraycopy() && !init->does_not_escape())) {
+ if (init == NULL || init->req() < InitializeNode::RawStores) {
+ // No InitializeNode or no stores captured by zeroing
+ // elimination. Simply add the MemBarStoreStore after object
+ // initialization.
+ MemBarNode* mb = MemBarNode::make(C, Op_MemBarStoreStore, Compile::AliasIdxBot, fast_oop_rawmem);
+ transform_later(mb);
+
+ mb->init_req(TypeFunc::Memory, fast_oop_rawmem);
+ mb->init_req(TypeFunc::Control, fast_oop_ctrl);
+ fast_oop_ctrl = new (C, 1) ProjNode(mb,TypeFunc::Control);
+ transform_later(fast_oop_ctrl);
+ fast_oop_rawmem = new (C, 1) ProjNode(mb,TypeFunc::Memory);
+ transform_later(fast_oop_rawmem);
+ } else {
+ // Add the MemBarStoreStore after the InitializeNode so that
+ // all stores performing the initialization that were moved
+ // before the InitializeNode happen before the storestore
+ // barrier.
+
+ Node* init_ctrl = init->proj_out(TypeFunc::Control);
+ Node* init_mem = init->proj_out(TypeFunc::Memory);
+
+ MemBarNode* mb = MemBarNode::make(C, Op_MemBarStoreStore, Compile::AliasIdxBot);
+ transform_later(mb);
+
+ Node* ctrl = new (C, 1) ProjNode(init,TypeFunc::Control);
+ transform_later(ctrl);
+ Node* mem = new (C, 1) ProjNode(init,TypeFunc::Memory);
+ transform_later(mem);
+
+ // The MemBarStoreStore depends on control and memory coming
+ // from the InitializeNode
+ mb->init_req(TypeFunc::Memory, mem);
+ mb->init_req(TypeFunc::Control, ctrl);
+
+ ctrl = new (C, 1) ProjNode(mb,TypeFunc::Control);
+ transform_later(ctrl);
+ mem = new (C, 1) ProjNode(mb,TypeFunc::Memory);
+ transform_later(mem);
+
+ // All nodes that depended on the InitializeNode for control
+ // and memory must now depend on the MemBarNode that itself
+ // depends on the InitializeNode
+ _igvn.replace_node(init_ctrl, ctrl);
+ _igvn.replace_node(init_mem, mem);
+ }
+ }
+
if (C->env()->dtrace_extended_probes()) {
// Slow-path call
int size = TypeFunc::Parms + 2;
@@ -1326,6 +1388,7 @@ void PhaseMacroExpand::expand_allocate_common(
result_phi_rawmem->init_req(fast_result_path, fast_oop_rawmem);
} else {
slow_region = ctrl;
+ result_phi_i_o = i_o; // Rename it to use in the following code.
}
// Generate slow-path call
@@ -1350,6 +1413,10 @@ void PhaseMacroExpand::expand_allocate_common(
copy_call_debug_info((CallNode *) alloc, call);
if (!always_slow) {
call->set_cnt(PROB_UNLIKELY_MAG(4)); // Same effect as RC_UNCOMMON.
+ } else {
+ // Hook i_o projection to avoid its elimination during allocation
+ // replacement (when only a slow call is generated).
+ call->set_req(TypeFunc::I_O, result_phi_i_o);
}
_igvn.replace_node(alloc, call);
transform_later(call);
@@ -1366,8 +1433,10 @@ void PhaseMacroExpand::expand_allocate_common(
//
extract_call_projections(call);
- // An allocate node has separate memory projections for the uses on the control and i_o paths
- // Replace uses of the control memory projection with result_phi_rawmem (unless we are only generating a slow call)
+ // An allocate node has separate memory projections for the uses on
+ // the control and i_o paths. Replace the control memory projection with
+ // result_phi_rawmem (unless we are only generating a slow call when
+ // both memory projections are combined)
if (!always_slow && _memproj_fallthrough != NULL) {
for (DUIterator_Fast imax, i = _memproj_fallthrough->fast_outs(imax); i < imax; i++) {
Node *use = _memproj_fallthrough->fast_out(i);
@@ -1378,8 +1447,8 @@ void PhaseMacroExpand::expand_allocate_common(
--i;
}
}
- // Now change uses of _memproj_catchall to use _memproj_fallthrough and delete _memproj_catchall so
- // we end up with a call that has only 1 memory projection
+ // Now change uses of _memproj_catchall to use _memproj_fallthrough and delete
+ // _memproj_catchall so we end up with a call that has only 1 memory projection.
if (_memproj_catchall != NULL ) {
if (_memproj_fallthrough == NULL) {
_memproj_fallthrough = new (C, 1) ProjNode(call, TypeFunc::Memory);
@@ -1393,17 +1462,18 @@ void PhaseMacroExpand::expand_allocate_common(
// back up iterator
--i;
}
+ assert(_memproj_catchall->outcnt() == 0, "all uses must be deleted");
+ _igvn.remove_dead_node(_memproj_catchall);
}
- // An allocate node has separate i_o projections for the uses on the control and i_o paths
- // Replace uses of the control i_o projection with result_phi_i_o (unless we are only generating a slow call)
- if (_ioproj_fallthrough == NULL) {
- _ioproj_fallthrough = new (C, 1) ProjNode(call, TypeFunc::I_O);
- transform_later(_ioproj_fallthrough);
- } else if (!always_slow) {
+ // An allocate node has separate i_o projections for the uses on the control
+ // and i_o paths. Always replace the control i_o projection with result i_o
+ // otherwise incoming i_o become dead when only a slow call is generated
+ // (it is different from memory projections where both projections are
+ // combined in such case).
+ if (_ioproj_fallthrough != NULL) {
for (DUIterator_Fast imax, i = _ioproj_fallthrough->fast_outs(imax); i < imax; i++) {
Node *use = _ioproj_fallthrough->fast_out(i);
-
_igvn.hash_delete(use);
imax -= replace_input(use, _ioproj_fallthrough, result_phi_i_o);
_igvn._worklist.push(use);
@@ -1411,9 +1481,13 @@ void PhaseMacroExpand::expand_allocate_common(
--i;
}
}
- // Now change uses of _ioproj_catchall to use _ioproj_fallthrough and delete _ioproj_catchall so
- // we end up with a call that has only 1 control projection
+ // Now change uses of _ioproj_catchall to use _ioproj_fallthrough and delete
+ // _ioproj_catchall so we end up with a call that has only 1 i_o projection.
if (_ioproj_catchall != NULL ) {
+ if (_ioproj_fallthrough == NULL) {
+ _ioproj_fallthrough = new (C, 1) ProjNode(call, TypeFunc::I_O);
+ transform_later(_ioproj_fallthrough);
+ }
for (DUIterator_Fast imax, i = _ioproj_catchall->fast_outs(imax); i < imax; i++) {
Node *use = _ioproj_catchall->fast_out(i);
_igvn.hash_delete(use);
@@ -1422,11 +1496,25 @@ void PhaseMacroExpand::expand_allocate_common(
// back up iterator
--i;
}
+ assert(_ioproj_catchall->outcnt() == 0, "all uses must be deleted");
+ _igvn.remove_dead_node(_ioproj_catchall);
}
// if we generated only a slow call, we are done
- if (always_slow)
+ if (always_slow) {
+ // Now we can unhook i_o.
+ if (result_phi_i_o->outcnt() > 1) {
+ call->set_req(TypeFunc::I_O, top());
+ } else {
+ assert(result_phi_i_o->unique_ctrl_out() == call, "");
+ // Case of new array with negative size known during compilation.
+ // AllocateArrayNode::Ideal() optimization disconnect unreachable
+ // following code since call to runtime will throw exception.
+ // As result there will be no users of i_o after the call.
+ // Leave i_o attached to this call to avoid problems in preceding graph.
+ }
return;
+ }
if (_fallthroughcatchproj != NULL) {
@@ -1470,7 +1558,7 @@ PhaseMacroExpand::initialize_object(AllocateNode* alloc,
Node* mark_node = NULL;
// For now only enable fast locking for non-array types
if (UseBiasedLocking && (length == NULL)) {
- mark_node = make_load(control, rawmem, klass_node, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), TypeRawPtr::BOTTOM, T_ADDRESS);
+ mark_node = make_load(control, rawmem, klass_node, in_bytes(Klass::prototype_header_offset()), TypeRawPtr::BOTTOM, T_ADDRESS);
} else {
mark_node = makecon(TypeRawPtr::make((address)markOopDesc::prototype()));
}
@@ -1958,7 +2046,7 @@ void PhaseMacroExpand::expand_lock_node(LockNode *lock) {
#endif
klass_node->init_req(0, ctrl);
}
- Node *proto_node = make_load(ctrl, mem, klass_node, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), TypeX_X, TypeX_X->basic_type());
+ Node *proto_node = make_load(ctrl, mem, klass_node, in_bytes(Klass::prototype_header_offset()), TypeX_X, TypeX_X->basic_type());
Node* thread = transform_later(new (C, 1) ThreadLocalNode());
Node* cast_thread = transform_later(new (C, 2) CastP2XNode(ctrl, thread));
diff --git a/src/share/vm/opto/matcher.cpp b/src/share/vm/opto/matcher.cpp
index 1200d2541..397385670 100644
--- a/src/share/vm/opto/matcher.cpp
+++ b/src/share/vm/opto/matcher.cpp
@@ -1365,31 +1365,36 @@ static bool match_into_reg( const Node *n, Node *m, Node *control, int i, bool s
const Type *t = m->bottom_type();
- if( t->singleton() ) {
+ if (t->singleton()) {
// Never force constants into registers. Allow them to match as
// constants or registers. Copies of the same value will share
// the same register. See find_shared_node.
return false;
} else { // Not a constant
// Stop recursion if they have different Controls.
- // Slot 0 of constants is not really a Control.
- if( control && m->in(0) && control != m->in(0) ) {
+ Node* m_control = m->in(0);
+ // Control of load's memory can post-dominates load's control.
+ // So use it since load can't float above its memory.
+ Node* mem_control = (m->is_Load()) ? m->in(MemNode::Memory)->in(0) : NULL;
+ if (control && m_control && control != m_control && control != mem_control) {
// Actually, we can live with the most conservative control we
// find, if it post-dominates the others. This allows us to
// pick up load/op/store trees where the load can float a little
// above the store.
Node *x = control;
- const uint max_scan = 6; // Arbitrary scan cutoff
+ const uint max_scan = 6; // Arbitrary scan cutoff
uint j;
- for( j=0; j<max_scan; j++ ) {
- if( x->is_Region() ) // Bail out at merge points
+ for (j=0; j<max_scan; j++) {
+ if (x->is_Region()) // Bail out at merge points
return true;
x = x->in(0);
- if( x == m->in(0) ) // Does 'control' post-dominate
+ if (x == m_control) // Does 'control' post-dominate
break; // m->in(0)? If so, we can use it
+ if (x == mem_control) // Does 'control' post-dominate
+ break; // mem_control? If so, we can use it
}
- if( j == max_scan ) // No post-domination before scan end?
+ if (j == max_scan) // No post-domination before scan end?
return true; // Then break the match tree up
}
if (m->is_DecodeN() && Matcher::narrow_oop_use_complex_address()) {
diff --git a/src/share/vm/opto/memnode.cpp b/src/share/vm/opto/memnode.cpp
index 722935a3f..cdccfa0c7 100644
--- a/src/share/vm/opto/memnode.cpp
+++ b/src/share/vm/opto/memnode.cpp
@@ -1473,19 +1473,19 @@ Node *LoadNode::Ideal(PhaseGVN *phase, bool can_reshape) {
const Type*
LoadNode::load_array_final_field(const TypeKlassPtr *tkls,
ciKlass* klass) const {
- if (tkls->offset() == Klass::modifier_flags_offset_in_bytes() + (int)sizeof(oopDesc)) {
+ if (tkls->offset() == in_bytes(Klass::modifier_flags_offset())) {
// The field is Klass::_modifier_flags. Return its (constant) value.
// (Folds up the 2nd indirection in aClassConstant.getModifiers().)
assert(this->Opcode() == Op_LoadI, "must load an int from _modifier_flags");
return TypeInt::make(klass->modifier_flags());
}
- if (tkls->offset() == Klass::access_flags_offset_in_bytes() + (int)sizeof(oopDesc)) {
+ if (tkls->offset() == in_bytes(Klass::access_flags_offset())) {
// The field is Klass::_access_flags. Return its (constant) value.
// (Folds up the 2nd indirection in Reflection.getClassAccessFlags(aClassConstant).)
assert(this->Opcode() == Op_LoadI, "must load an int from _access_flags");
return TypeInt::make(klass->access_flags());
}
- if (tkls->offset() == Klass::layout_helper_offset_in_bytes() + (int)sizeof(oopDesc)) {
+ if (tkls->offset() == in_bytes(Klass::layout_helper_offset())) {
// The field is Klass::_layout_helper. Return its constant value if known.
assert(this->Opcode() == Op_LoadI, "must load an int from _layout_helper");
return TypeInt::make(klass->layout_helper());
@@ -1636,14 +1636,14 @@ const Type *LoadNode::Value( PhaseTransform *phase ) const {
// We are loading a field from a Klass metaobject whose identity
// is known at compile time (the type is "exact" or "precise").
// Check for fields we know are maintained as constants by the VM.
- if (tkls->offset() == Klass::super_check_offset_offset_in_bytes() + (int)sizeof(oopDesc)) {
+ if (tkls->offset() == in_bytes(Klass::super_check_offset_offset())) {
// The field is Klass::_super_check_offset. Return its (constant) value.
// (Folds up type checking code.)
assert(Opcode() == Op_LoadI, "must load an int from _super_check_offset");
return TypeInt::make(klass->super_check_offset());
}
// Compute index into primary_supers array
- juint depth = (tkls->offset() - (Klass::primary_supers_offset_in_bytes() + (int)sizeof(oopDesc))) / sizeof(klassOop);
+ juint depth = (tkls->offset() - in_bytes(Klass::primary_supers_offset())) / sizeof(klassOop);
// Check for overflowing; use unsigned compare to handle the negative case.
if( depth < ciKlass::primary_super_limit() ) {
// The field is an element of Klass::_primary_supers. Return its (constant) value.
@@ -1654,14 +1654,14 @@ const Type *LoadNode::Value( PhaseTransform *phase ) const {
}
const Type* aift = load_array_final_field(tkls, klass);
if (aift != NULL) return aift;
- if (tkls->offset() == in_bytes(arrayKlass::component_mirror_offset()) + (int)sizeof(oopDesc)
+ if (tkls->offset() == in_bytes(arrayKlass::component_mirror_offset())
&& klass->is_array_klass()) {
// The field is arrayKlass::_component_mirror. Return its (constant) value.
// (Folds up aClassConstant.getComponentType, common in Arrays.copyOf.)
assert(Opcode() == Op_LoadP, "must load an oop from _component_mirror");
return TypeInstPtr::make(klass->as_array_klass()->component_mirror());
}
- if (tkls->offset() == Klass::java_mirror_offset_in_bytes() + (int)sizeof(oopDesc)) {
+ if (tkls->offset() == in_bytes(Klass::java_mirror_offset())) {
// The field is Klass::_java_mirror. Return its (constant) value.
// (Folds up the 2nd indirection in anObjConstant.getClass().)
assert(Opcode() == Op_LoadP, "must load an oop from _java_mirror");
@@ -1679,7 +1679,7 @@ const Type *LoadNode::Value( PhaseTransform *phase ) const {
if( inner->is_instance_klass() &&
!inner->as_instance_klass()->flags().is_interface() ) {
// Compute index into primary_supers array
- juint depth = (tkls->offset() - (Klass::primary_supers_offset_in_bytes() + (int)sizeof(oopDesc))) / sizeof(klassOop);
+ juint depth = (tkls->offset() - in_bytes(Klass::primary_supers_offset())) / sizeof(klassOop);
// Check for overflowing; use unsigned compare to handle the negative case.
if( depth < ciKlass::primary_super_limit() &&
depth <= klass->super_depth() ) { // allow self-depth checks to handle self-check case
@@ -1695,7 +1695,7 @@ const Type *LoadNode::Value( PhaseTransform *phase ) const {
// If the type is enough to determine that the thing is not an array,
// we can give the layout_helper a positive interval type.
// This will help short-circuit some reflective code.
- if (tkls->offset() == Klass::layout_helper_offset_in_bytes() + (int)sizeof(oopDesc)
+ if (tkls->offset() == in_bytes(Klass::layout_helper_offset())
&& !klass->is_array_klass() // not directly typed as an array
&& !klass->is_interface() // specifically not Serializable & Cloneable
&& !klass->is_java_lang_Object() // not the supertype of all T[]
@@ -1938,7 +1938,7 @@ const Type *LoadNode::klass_value_common( PhaseTransform *phase ) const {
if( !klass->is_loaded() )
return _type; // Bail out if not loaded
if( klass->is_obj_array_klass() &&
- (uint)tkls->offset() == objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)) {
+ tkls->offset() == in_bytes(objArrayKlass::element_klass_offset())) {
ciKlass* elem = klass->as_obj_array_klass()->element_klass();
// // Always returning precise element type is incorrect,
// // e.g., element type could be object and array may contain strings
@@ -1949,7 +1949,7 @@ const Type *LoadNode::klass_value_common( PhaseTransform *phase ) const {
return TypeKlassPtr::make(tkls->ptr(), elem, 0/*offset*/);
}
if( klass->is_instance_klass() && tkls->klass_is_exact() &&
- (uint)tkls->offset() == Klass::super_offset_in_bytes() + sizeof(oopDesc)) {
+ tkls->offset() == in_bytes(Klass::super_offset())) {
ciKlass* sup = klass->as_instance_klass()->super();
// The field is Klass::_super. Return its (constant) value.
// (Folds up the 2nd indirection in aClassConstant.getSuperClass().)
@@ -2013,11 +2013,11 @@ Node* LoadNode::klass_identity_common(PhaseTransform *phase ) {
tkls->klass()->is_array_klass())
&& adr2->is_AddP()
) {
- int mirror_field = Klass::java_mirror_offset_in_bytes();
+ int mirror_field = in_bytes(Klass::java_mirror_offset());
if (offset == java_lang_Class::array_klass_offset_in_bytes()) {
mirror_field = in_bytes(arrayKlass::component_mirror_offset());
}
- if (tkls->offset() == mirror_field + (int)sizeof(oopDesc)) {
+ if (tkls->offset() == mirror_field) {
return adr2->in(AddPNode::Base);
}
}
@@ -2721,6 +2721,7 @@ MemBarNode* MemBarNode::make(Compile* C, int opcode, int atp, Node* pn) {
case Op_MemBarVolatile: return new(C, len) MemBarVolatileNode(C, atp, pn);
case Op_MemBarCPUOrder: return new(C, len) MemBarCPUOrderNode(C, atp, pn);
case Op_Initialize: return new(C, len) InitializeNode(C, atp, pn);
+ case Op_MemBarStoreStore: return new(C, len) MemBarStoreStoreNode(C, atp, pn);
default: ShouldNotReachHere(); return NULL;
}
}
@@ -2870,7 +2871,7 @@ Node *MemBarNode::match( const ProjNode *proj, const Matcher *m ) {
//---------------------------InitializeNode------------------------------------
InitializeNode::InitializeNode(Compile* C, int adr_type, Node* rawoop)
- : _is_complete(Incomplete),
+ : _is_complete(Incomplete), _does_not_escape(false),
MemBarNode(C, adr_type, rawoop)
{
init_class_id(Class_Initialize);
diff --git a/src/share/vm/opto/memnode.hpp b/src/share/vm/opto/memnode.hpp
index 01c149c7a..f15d4986b 100644
--- a/src/share/vm/opto/memnode.hpp
+++ b/src/share/vm/opto/memnode.hpp
@@ -918,6 +918,15 @@ public:
virtual int Opcode() const;
};
+class MemBarStoreStoreNode: public MemBarNode {
+public:
+ MemBarStoreStoreNode(Compile* C, int alias_idx, Node* precedent)
+ : MemBarNode(C, alias_idx, precedent) {
+ init_class_id(Class_MemBarStoreStore);
+ }
+ virtual int Opcode() const;
+};
+
// Ordering between a volatile store and a following volatile load.
// Requires multi-CPU visibility?
class MemBarVolatileNode: public MemBarNode {
@@ -950,6 +959,8 @@ class InitializeNode: public MemBarNode {
};
int _is_complete;
+ bool _does_not_escape;
+
public:
enum {
Control = TypeFunc::Control,
@@ -989,6 +1000,9 @@ public:
void set_complete(PhaseGVN* phase);
void set_complete_with_arraycopy() { _is_complete = Complete | WithArraycopy; }
+ bool does_not_escape() { return _does_not_escape; }
+ void set_does_not_escape() { _does_not_escape = true; }
+
#ifdef ASSERT
// ensure all non-degenerate stores are ordered and non-overlapping
bool stores_are_sane(PhaseTransform* phase);
diff --git a/src/share/vm/opto/node.hpp b/src/share/vm/opto/node.hpp
index e10cad472..1cbd830a9 100644
--- a/src/share/vm/opto/node.hpp
+++ b/src/share/vm/opto/node.hpp
@@ -97,6 +97,7 @@ class MachSpillCopyNode;
class MachTempNode;
class Matcher;
class MemBarNode;
+class MemBarStoreStoreNode;
class MemNode;
class MergeMemNode;
class MultiNode;
@@ -564,7 +565,8 @@ public:
DEFINE_CLASS_ID(NeverBranch, MultiBranch, 2)
DEFINE_CLASS_ID(Start, Multi, 2)
DEFINE_CLASS_ID(MemBar, Multi, 3)
- DEFINE_CLASS_ID(Initialize, MemBar, 0)
+ DEFINE_CLASS_ID(Initialize, MemBar, 0)
+ DEFINE_CLASS_ID(MemBarStoreStore, MemBar, 1)
DEFINE_CLASS_ID(Mach, Node, 1)
DEFINE_CLASS_ID(MachReturn, Mach, 0)
@@ -744,6 +746,7 @@ public:
DEFINE_CLASS_QUERY(MachTemp)
DEFINE_CLASS_QUERY(Mem)
DEFINE_CLASS_QUERY(MemBar)
+ DEFINE_CLASS_QUERY(MemBarStoreStore)
DEFINE_CLASS_QUERY(MergeMem)
DEFINE_CLASS_QUERY(Multi)
DEFINE_CLASS_QUERY(MultiBranch)
diff --git a/src/share/vm/opto/parse1.cpp b/src/share/vm/opto/parse1.cpp
index 06ef91a49..30e3f5c49 100644
--- a/src/share/vm/opto/parse1.cpp
+++ b/src/share/vm/opto/parse1.cpp
@@ -1911,7 +1911,7 @@ void Parse::call_register_finalizer() {
Node* klass_addr = basic_plus_adr( receiver, receiver, oopDesc::klass_offset_in_bytes() );
Node* klass = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), klass_addr, TypeInstPtr::KLASS) );
- Node* access_flags_addr = basic_plus_adr(klass, klass, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc));
+ Node* access_flags_addr = basic_plus_adr(klass, klass, in_bytes(Klass::access_flags_offset()));
Node* access_flags = make_load(NULL, access_flags_addr, TypeInt::INT, T_INT);
Node* mask = _gvn.transform(new (C, 3) AndINode(access_flags, intcon(JVM_ACC_HAS_FINALIZER)));
diff --git a/src/share/vm/opto/parseHelper.cpp b/src/share/vm/opto/parseHelper.cpp
index 1a5ce207d..9ebc716b5 100644
--- a/src/share/vm/opto/parseHelper.cpp
+++ b/src/share/vm/opto/parseHelper.cpp
@@ -200,7 +200,7 @@ void Parse::array_store_check() {
// Come here for polymorphic array klasses
// Extract the array element class
- int element_klass_offset = objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc);
+ int element_klass_offset = in_bytes(objArrayKlass::element_klass_offset());
Node *p2 = basic_plus_adr(array_klass, array_klass, element_klass_offset);
Node *a_e_klass = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p2, tak) );
@@ -220,7 +220,7 @@ void Parse::emit_guard_for_new(ciInstanceKlass* klass) {
_gvn.set_type(merge, Type::CONTROL);
Node* kls = makecon(TypeKlassPtr::make(klass));
- Node* init_thread_offset = _gvn.MakeConX(instanceKlass::init_thread_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes());
+ Node* init_thread_offset = _gvn.MakeConX(in_bytes(instanceKlass::init_thread_offset()));
Node* adr_node = basic_plus_adr(kls, kls, init_thread_offset);
Node* init_thread = make_load(NULL, adr_node, TypeRawPtr::BOTTOM, T_ADDRESS);
Node *tst = Bool( CmpP( init_thread, cur_thread), BoolTest::eq);
@@ -228,7 +228,7 @@ void Parse::emit_guard_for_new(ciInstanceKlass* klass) {
set_control(IfTrue(iff));
merge->set_req(1, IfFalse(iff));
- Node* init_state_offset = _gvn.MakeConX(instanceKlass::init_state_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes());
+ Node* init_state_offset = _gvn.MakeConX(in_bytes(instanceKlass::init_state_offset()));
adr_node = basic_plus_adr(kls, kls, init_state_offset);
// Use T_BOOLEAN for instanceKlass::_init_state so the compiler
// can generate code to load it as unsigned byte.
diff --git a/src/share/vm/runtime/advancedThresholdPolicy.cpp b/src/share/vm/runtime/advancedThresholdPolicy.cpp
index 758bf5ce5..a7dab0b65 100644
--- a/src/share/vm/runtime/advancedThresholdPolicy.cpp
+++ b/src/share/vm/runtime/advancedThresholdPolicy.cpp
@@ -156,20 +156,19 @@ bool AdvancedThresholdPolicy::is_method_profiled(methodOop method) {
// Called with the queue locked and with at least one element
CompileTask* AdvancedThresholdPolicy::select_task(CompileQueue* compile_queue) {
CompileTask *max_task = NULL;
- methodOop max_method;
+ methodHandle max_method;
jlong t = os::javaTimeMillis();
// Iterate through the queue and find a method with a maximum rate.
for (CompileTask* task = compile_queue->first(); task != NULL;) {
CompileTask* next_task = task->next();
- methodOop method = (methodOop)JNIHandles::resolve(task->method_handle());
- methodDataOop mdo = method->method_data();
- update_rate(t, method);
+ methodHandle method = (methodOop)JNIHandles::resolve(task->method_handle());
+ update_rate(t, method());
if (max_task == NULL) {
max_task = task;
max_method = method;
} else {
// If a method has been stale for some time, remove it from the queue.
- if (is_stale(t, TieredCompileTaskTimeout, method) && !is_old(method)) {
+ if (is_stale(t, TieredCompileTaskTimeout, method()) && !is_old(method())) {
if (PrintTieredEvents) {
print_event(REMOVE_FROM_QUEUE, method, method, task->osr_bci(), (CompLevel)task->comp_level());
}
@@ -181,7 +180,7 @@ CompileTask* AdvancedThresholdPolicy::select_task(CompileQueue* compile_queue) {
}
// Select a method with a higher rate
- if (compare_methods(method, max_method)) {
+ if (compare_methods(method(), max_method())) {
max_task = task;
max_method = method;
}
@@ -190,7 +189,7 @@ CompileTask* AdvancedThresholdPolicy::select_task(CompileQueue* compile_queue) {
}
if (max_task->comp_level() == CompLevel_full_profile && TieredStopAtLevel > CompLevel_full_profile
- && is_method_profiled(max_method)) {
+ && is_method_profiled(max_method())) {
max_task->set_comp_level(CompLevel_limited_profile);
if (PrintTieredEvents) {
print_event(UPDATE_IN_QUEUE, max_method, max_method, max_task->osr_bci(), (CompLevel)max_task->comp_level());
diff --git a/src/share/vm/runtime/arguments.cpp b/src/share/vm/runtime/arguments.cpp
index 89f891283..8a0dc6360 100644
--- a/src/share/vm/runtime/arguments.cpp
+++ b/src/share/vm/runtime/arguments.cpp
@@ -1000,6 +1000,13 @@ void Arguments::set_mode_flags(Mode mode) {
UseInterpreter = false;
BackgroundCompilation = false;
ClipInlining = false;
+ // Be much more aggressive in tiered mode with -Xcomp and exercise C2 more.
+ // We will first compile a level 3 version (C1 with full profiling), then do one invocation of it and
+ // compile a level 4 (C2) and then continue executing it.
+ if (TieredCompilation) {
+ Tier3InvokeNotifyFreqLog = 0;
+ Tier4InvocationThreshold = 0;
+ }
break;
}
}
diff --git a/src/share/vm/runtime/globals.hpp b/src/share/vm/runtime/globals.hpp
index 448ce30d7..22ee17157 100644
--- a/src/share/vm/runtime/globals.hpp
+++ b/src/share/vm/runtime/globals.hpp
@@ -527,6 +527,9 @@ class CommandLineFlags {
product(intx, UseSSE, 99, \
"Highest supported SSE instructions set on x86/x64") \
\
+ product(intx, UseAVX, 99, \
+ "Highest supported AVX instructions set on x86/x64") \
+ \
product(intx, UseVIS, 99, \
"Highest supported VIS instructions set on Sparc") \
\
diff --git a/src/share/vm/shark/sharkIntrinsics.cpp b/src/share/vm/shark/sharkIntrinsics.cpp
index a5d83cf1a..8efaa432e 100644
--- a/src/share/vm/shark/sharkIntrinsics.cpp
+++ b/src/share/vm/shark/sharkIntrinsics.cpp
@@ -213,17 +213,11 @@ void SharkIntrinsics::do_Object_getClass() {
SharkType::oop_type(),
"klass");
- Value *klass_part = builder()->CreateAddressOfStructEntry(
- klass,
- in_ByteSize(klassOopDesc::klass_part_offset_in_bytes()),
- SharkType::klass_type(),
- "klass_part");
-
state()->push(
SharkValue::create_jobject(
builder()->CreateValueOfStructEntry(
- klass_part,
- in_ByteSize(Klass::java_mirror_offset_in_bytes()),
+ klass,
+ Klass::java_mirror_offset(),
SharkType::oop_type(),
"java_mirror"),
true));
diff --git a/src/share/vm/shark/sharkTopLevelBlock.cpp b/src/share/vm/shark/sharkTopLevelBlock.cpp
index 1e236ff6f..70e30f9fa 100644
--- a/src/share/vm/shark/sharkTopLevelBlock.cpp
+++ b/src/share/vm/shark/sharkTopLevelBlock.cpp
@@ -745,15 +745,9 @@ void SharkTopLevelBlock::call_register_finalizer(Value *receiver) {
SharkType::oop_type(),
"klass");
- Value *klass_part = builder()->CreateAddressOfStructEntry(
- klass,
- in_ByteSize(klassOopDesc::klass_part_offset_in_bytes()),
- SharkType::klass_type(),
- "klass_part");
-
Value *access_flags = builder()->CreateValueOfStructEntry(
- klass_part,
- in_ByteSize(Klass::access_flags_offset_in_bytes()),
+ klass,
+ Klass::access_flags_offset(),
SharkType::jint_type(),
"access_flags");
diff --git a/test/compiler/7123108/Test7123108.java b/test/compiler/7123108/Test7123108.java
new file mode 100644
index 000000000..66d3a01db
--- /dev/null
+++ b/test/compiler/7123108/Test7123108.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7123108
+ * @summary C1 crashes with assert(if_state != NULL) failed: states do not match up
+ *
+ * @run main/othervm -Xcomp Test7123108
+ */
+
+public class Test7123108 {
+
+ static class Test_Class_0 {
+ final static byte var_2 = 67;
+ byte var_3;
+ }
+
+ Object var_25 = "kgfpyhcms";
+ static long var_27 = 6899666748616086528L;
+
+ static float func_1()
+ {
+ return 0.0F;
+ }
+
+ private void test()
+ {
+ "dlwq".charAt(((short)'x' > var_27 | func_1() <= (((Test_Class_0)var_25).var_3) ? true : true) ? Test_Class_0.var_2 & (short)-1.1173839E38F : 'Y');
+ }
+
+ public static void main(String[] args)
+ {
+ Test7123108 t = new Test7123108();
+ try {
+ t.test();
+ } catch (Throwable e) { }
+ }
+}
diff --git a/test/compiler/7125879/Test7125879.java b/test/compiler/7125879/Test7125879.java
new file mode 100644
index 000000000..729aac669
--- /dev/null
+++ b/test/compiler/7125879/Test7125879.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7125879
+ * @summary assert(proj != NULL) failed: must be found
+ *
+ * @run main/othervm -Xcomp Test7125879
+ */
+
+public class Test7125879 {
+ String var_1 = "abc";
+
+ public Test7125879() {
+ var_1 = var_1.replaceAll("d", "e") + var_1;
+ }
+
+ public static void main(String[] args) {
+ Test7125879 t = new Test7125879();
+ try {
+ t.test();
+ } catch(Throwable e) { }
+ }
+
+ private void test() {
+ new Test7125879().var_1 = ((Test7125879)(new Object[-1])[0]).var_1;
+ }
+}
+