aboutsummaryrefslogtreecommitdiff
path: root/src/cpu
diff options
context:
space:
mode:
authortwisti <none@none>2012-08-24 11:48:32 -0700
committertwisti <none@none>2012-08-24 11:48:32 -0700
commit4d8f8fae1f754b66a85c8bbbc106edb21483a8af (patch)
tree16c2de63f303fc16c193196e2f30603ed2433188 /src/cpu
parent07ace2d00d734040aa3fbea9de1e814102ab0baa (diff)
parenta21fb84c812ed5d3eda2e083ec1ed4dfee480cb3 (diff)
Merge
Diffstat (limited to 'src/cpu')
-rw-r--r--src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp79
-rw-r--r--src/cpu/x86/vm/assembler_x86.cpp1141
-rw-r--r--src/cpu/x86/vm/assembler_x86.hpp207
-rw-r--r--src/cpu/x86/vm/c1_CodeStubs_x86.cpp62
-rw-r--r--src/cpu/x86/vm/x86.ad2443
-rw-r--r--src/cpu/x86/vm/x86_32.ad16
-rw-r--r--src/cpu/x86/vm/x86_64.ad46
7 files changed, 3370 insertions, 624 deletions
diff --git a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp
index c38b82bc5..b284a3e93 100644
--- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp
+++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp
@@ -435,85 +435,6 @@ void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
}
-void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) {
- // At this point we know that offset == referent_offset.
- //
- // So we might have to emit:
- // if (src == null) goto continuation.
- //
- // and we definitely have to emit:
- // if (klass(src).reference_type == REF_NONE) goto continuation
- // if (!marking_active) goto continuation
- // if (pre_val == null) goto continuation
- // call pre_barrier(pre_val)
- // goto continuation
- //
- __ bind(_entry);
-
- assert(src()->is_register(), "sanity");
- Register src_reg = src()->as_register();
-
- if (gen_src_check()) {
- // The original src operand was not a constant.
- // Generate src == null?
- if (__ is_in_wdisp16_range(_continuation)) {
- __ br_null(src_reg, /*annul*/false, Assembler::pt, _continuation);
- } else {
- __ cmp(src_reg, G0);
- __ brx(Assembler::equal, false, Assembler::pt, _continuation);
- }
- __ delayed()->nop();
- }
-
- // Generate src->_klass->_reference_type() == REF_NONE)?
- assert(tmp()->is_register(), "sanity");
- Register tmp_reg = tmp()->as_register();
-
- __ load_klass(src_reg, tmp_reg);
-
- Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset());
- __ ldub(ref_type_adr, tmp_reg);
-
- // _reference_type field is of type ReferenceType (enum)
- assert(REF_NONE == 0, "check this code");
- __ cmp_zero_and_br(Assembler::equal, tmp_reg, _continuation, /*annul*/false, Assembler::pt);
- __ delayed()->nop();
-
- // Is marking active?
- assert(thread()->is_register(), "precondition");
- Register thread_reg = thread()->as_pointer_register();
-
- Address in_progress(thread_reg, in_bytes(JavaThread::satb_mark_queue_offset() +
- PtrQueue::byte_offset_of_active()));
-
- if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
- __ ld(in_progress, tmp_reg);
- } else {
- assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
- __ ldsb(in_progress, tmp_reg);
- }
-
- __ cmp_zero_and_br(Assembler::equal, tmp_reg, _continuation, /*annul*/false, Assembler::pt);
- __ delayed()->nop();
-
- // val == null?
- assert(val()->is_register(), "Precondition.");
- Register val_reg = val()->as_register();
-
- if (__ is_in_wdisp16_range(_continuation)) {
- __ br_null(val_reg, /*annul*/false, Assembler::pt, _continuation);
- } else {
- __ cmp(val_reg, G0);
- __ brx(Assembler::equal, false, Assembler::pt, _continuation);
- }
- __ delayed()->nop();
-
- __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_pre_barrier_slow_id));
- __ delayed()->mov(val_reg, G4);
- __ br(Assembler::always, false, Assembler::pt, _continuation);
- __ delayed()->nop();
-}
-
jbyte* G1PostBarrierStub::_byte_map_base = NULL;
jbyte* G1PostBarrierStub::byte_map_base_slow() {
diff --git a/src/cpu/x86/vm/assembler_x86.cpp b/src/cpu/x86/vm/assembler_x86.cpp
index f88f9c62b..abf837339 100644
--- a/src/cpu/x86/vm/assembler_x86.cpp
+++ b/src/cpu/x86/vm/assembler_x86.cpp
@@ -999,32 +999,22 @@ void Assembler::addr_nop_8() {
void Assembler::addsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
- emit_byte(0x58);
- emit_byte(0xC0 | encode);
+ emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
}
void Assembler::addsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_F2);
- emit_byte(0x58);
- emit_operand(dst, src);
+ emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
}
void Assembler::addss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
- emit_byte(0x58);
- emit_byte(0xC0 | encode);
+ emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
}
void Assembler::addss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_F3);
- emit_byte(0x58);
- emit_operand(dst, src);
+ emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
}
void Assembler::andl(Address dst, int32_t imm32) {
@@ -1052,36 +1042,6 @@ void Assembler::andl(Register dst, Register src) {
emit_arith(0x23, 0xC0, dst, src);
}
-void Assembler::andpd(XMMRegister dst, Address src) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_66);
- emit_byte(0x54);
- emit_operand(dst, src);
-}
-
-void Assembler::andpd(XMMRegister dst, XMMRegister src) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
- emit_byte(0x54);
- emit_byte(0xC0 | encode);
-}
-
-void Assembler::andps(XMMRegister dst, Address src) {
- NOT_LP64(assert(VM_Version::supports_sse(), ""));
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_NONE);
- emit_byte(0x54);
- emit_operand(dst, src);
-}
-
-void Assembler::andps(XMMRegister dst, XMMRegister src) {
- NOT_LP64(assert(VM_Version::supports_sse(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE);
- emit_byte(0x54);
- emit_byte(0xC0 | encode);
-}
-
void Assembler::bsfl(Register dst, Register src) {
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
@@ -1246,61 +1206,42 @@ void Assembler::comisd(XMMRegister dst, Address src) {
// NOTE: dbx seems to decode this as comiss even though the
// 0x66 is there. Strangly ucomisd comes out correct
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionMark im(this);
- simd_prefix(dst, src, VEX_SIMD_66);
- emit_byte(0x2F);
- emit_operand(dst, src);
+ emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
}
void Assembler::comisd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
- emit_byte(0x2F);
- emit_byte(0xC0 | encode);
+ emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
}
void Assembler::comiss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- InstructionMark im(this);
- simd_prefix(dst, src, VEX_SIMD_NONE);
- emit_byte(0x2F);
- emit_operand(dst, src);
+ emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
}
void Assembler::comiss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
- emit_byte(0x2F);
- emit_byte(0xC0 | encode);
+ emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
}
void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
- emit_byte(0xE6);
- emit_byte(0xC0 | encode);
+ emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3);
}
void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
- emit_byte(0x5B);
- emit_byte(0xC0 | encode);
+ emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE);
}
void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
- emit_byte(0x5A);
- emit_byte(0xC0 | encode);
+ emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
}
void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_F2);
- emit_byte(0x5A);
- emit_operand(dst, src);
+ emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
}
void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
@@ -1312,10 +1253,7 @@ void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_F2);
- emit_byte(0x2A);
- emit_operand(dst, src);
+ emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
}
void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
@@ -1327,25 +1265,17 @@ void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_F3);
- emit_byte(0x2A);
- emit_operand(dst, src);
+ emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3);
}
void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
- emit_byte(0x5A);
- emit_byte(0xC0 | encode);
+ emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
}
void Assembler::cvtss2sd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_F3);
- emit_byte(0x5A);
- emit_operand(dst, src);
+ emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
}
@@ -1373,32 +1303,22 @@ void Assembler::decl(Address dst) {
void Assembler::divsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_F2);
- emit_byte(0x5E);
- emit_operand(dst, src);
+ emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
}
void Assembler::divsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
- emit_byte(0x5E);
- emit_byte(0xC0 | encode);
+ emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
}
void Assembler::divss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_F3);
- emit_byte(0x5E);
- emit_operand(dst, src);
+ emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
}
void Assembler::divss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
- emit_byte(0x5E);
- emit_byte(0xC0 | encode);
+ emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
}
void Assembler::emms() {
@@ -1634,16 +1554,12 @@ void Assembler::mov(Register dst, Register src) {
void Assembler::movapd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
- emit_byte(0x28);
- emit_byte(0xC0 | encode);
+ emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
}
void Assembler::movaps(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
- emit_byte(0x28);
- emit_byte(0xC0 | encode);
+ emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE);
}
void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
@@ -1712,24 +1628,17 @@ void Assembler::movdl(Address dst, XMMRegister src) {
void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
- emit_byte(0x6F);
- emit_byte(0xC0 | encode);
+ emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
}
void Assembler::movdqu(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionMark im(this);
- simd_prefix(dst, src, VEX_SIMD_F3);
- emit_byte(0x6F);
- emit_operand(dst, src);
+ emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
}
void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
- emit_byte(0x6F);
- emit_byte(0xC0 | encode);
+ emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
}
void Assembler::movdqu(Address dst, XMMRegister src) {
@@ -1810,10 +1719,7 @@ void Assembler::movl(Address dst, Register src) {
// The selection is done in MacroAssembler::movdbl() and movflt().
void Assembler::movlpd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_66);
- emit_byte(0x12);
- emit_operand(dst, src);
+ emit_simd_arith(0x12, dst, src, VEX_SIMD_66);
}
void Assembler::movq( MMXRegister dst, Address src ) {
@@ -1870,17 +1776,12 @@ void Assembler::movsbl(Register dst, Register src) { // movsxb
void Assembler::movsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
- emit_byte(0x10);
- emit_byte(0xC0 | encode);
+ emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
}
void Assembler::movsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionMark im(this);
- simd_prefix(dst, src, VEX_SIMD_F2);
- emit_byte(0x10);
- emit_operand(dst, src);
+ emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
}
void Assembler::movsd(Address dst, XMMRegister src) {
@@ -1893,17 +1794,12 @@ void Assembler::movsd(Address dst, XMMRegister src) {
void Assembler::movss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
- emit_byte(0x10);
- emit_byte(0xC0 | encode);
+ emit_simd_arith(0x10, dst, src, VEX_SIMD_F3);
}
void Assembler::movss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- InstructionMark im(this);
- simd_prefix(dst, src, VEX_SIMD_F3);
- emit_byte(0x10);
- emit_operand(dst, src);
+ emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3);
}
void Assembler::movss(Address dst, XMMRegister src) {
@@ -2001,32 +1897,22 @@ void Assembler::mull(Register src) {
void Assembler::mulsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_F2);
- emit_byte(0x59);
- emit_operand(dst, src);
+ emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
}
void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
- emit_byte(0x59);
- emit_byte(0xC0 | encode);
+ emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
}
void Assembler::mulss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_F3);
- emit_byte(0x59);
- emit_operand(dst, src);
+ emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
}
void Assembler::mulss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
- emit_byte(0x59);
- emit_byte(0xC0 | encode);
+ emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
}
void Assembler::negl(Register dst) {
@@ -2315,17 +2201,12 @@ void Assembler::orl(Register dst, Register src) {
void Assembler::packuswb(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_66);
- emit_byte(0x67);
- emit_operand(dst, src);
+ emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
}
void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
- emit_byte(0x67);
- emit_byte(0xC0 | encode);
+ emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
}
void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
@@ -2339,7 +2220,7 @@ void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_2(), "");
- int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
emit_byte(0x61);
emit_byte(0xC0 | encode);
emit_byte(imm8);
@@ -2355,7 +2236,7 @@ void Assembler::pmovzxbw(XMMRegister dst, Address src) {
void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_sse4_1(), "");
- int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
emit_byte(0x30);
emit_byte(0xC0 | encode);
}
@@ -2456,28 +2337,10 @@ void Assembler::prefix(Prefix p) {
a_byte(p);
}
-void Assembler::por(XMMRegister dst, XMMRegister src) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
- emit_byte(0xEB);
- emit_byte(0xC0 | encode);
-}
-
-void Assembler::por(XMMRegister dst, Address src) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_66);
- emit_byte(0xEB);
- emit_operand(dst, src);
-}
-
void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
assert(isByte(mode), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
- emit_byte(0x70);
- emit_byte(0xC0 | encode);
+ emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
emit_byte(mode & 0xFF);
}
@@ -2496,9 +2359,7 @@ void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
assert(isByte(mode), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
- emit_byte(0x70);
- emit_byte(0xC0 | encode);
+ emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2);
emit_byte(mode & 0xFF);
}
@@ -2513,18 +2374,6 @@ void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
emit_byte(mode & 0xFF);
}
-void Assembler::psrlq(XMMRegister dst, int shift) {
- // Shift 64 bit value logically right by specified number of bits.
- // HMM Table D-1 says sse2 or mmx.
- // Do not confuse it with psrldq SSE2 instruction which
- // shifts 128 bit value in xmm register by number of bytes.
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
- emit_byte(0x73);
- emit_byte(0xC0 | encode);
- emit_byte(shift);
-}
-
void Assembler::psrldq(XMMRegister dst, int shift) {
// Shift 128 bit value in xmm register by number of bytes.
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
@@ -2545,7 +2394,7 @@ void Assembler::ptest(XMMRegister dst, Address src) {
void Assembler::ptest(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_sse4_1(), "");
- int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
emit_byte(0x17);
emit_byte(0xC0 | encode);
}
@@ -2553,40 +2402,28 @@ void Assembler::ptest(XMMRegister dst, XMMRegister src) {
void Assembler::punpcklbw(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_66);
- emit_byte(0x60);
- emit_operand(dst, src);
+ emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
}
void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
- emit_byte(0x60);
- emit_byte(0xC0 | encode);
+ emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
}
void Assembler::punpckldq(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_66);
- emit_byte(0x62);
- emit_operand(dst, src);
+ emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
}
void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
- emit_byte(0x62);
- emit_byte(0xC0 | encode);
+ emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
}
void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
- emit_byte(0x6C);
- emit_byte(0xC0 | encode);
+ emit_simd_arith(0x6C, dst, src, VEX_SIMD_66);
}
void Assembler::push(int32_t imm32) {
@@ -2616,22 +2453,6 @@ void Assembler::pushl(Address src) {
}
#endif
-void Assembler::pxor(XMMRegister dst, Address src) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_66);
- emit_byte(0xEF);
- emit_operand(dst, src);
-}
-
-void Assembler::pxor(XMMRegister dst, XMMRegister src) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
- emit_byte(0xEF);
- emit_byte(0xC0 | encode);
-}
-
void Assembler::rcll(Register dst, int imm8) {
assert(isShiftCount(imm8), "illegal shift count");
int encode = prefix_and_encode(dst->encoding());
@@ -2790,32 +2611,22 @@ void Assembler::smovl() {
void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
- emit_byte(0x51);
- emit_byte(0xC0 | encode);
+ emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
}
void Assembler::sqrtsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_F2);
- emit_byte(0x51);
- emit_operand(dst, src);
+ emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
}
void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
- emit_byte(0x51);
- emit_byte(0xC0 | encode);
+ emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
}
void Assembler::sqrtss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_F3);
- emit_byte(0x51);
- emit_operand(dst, src);
+ emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
}
void Assembler::stmxcsr( Address dst) {
@@ -2865,32 +2676,22 @@ void Assembler::subl(Register dst, Register src) {
void Assembler::subsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
- emit_byte(0x5C);
- emit_byte(0xC0 | encode);
+ emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
}
void Assembler::subsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_F2);
- emit_byte(0x5C);
- emit_operand(dst, src);
+ emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
}
void Assembler::subss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
- emit_byte(0x5C);
- emit_byte(0xC0 | encode);
+ emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
}
void Assembler::subss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_F3);
- emit_byte(0x5C);
- emit_operand(dst, src);
+ emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
}
void Assembler::testb(Register dst, int imm8) {
@@ -2928,32 +2729,22 @@ void Assembler::testl(Register dst, Address src) {
void Assembler::ucomisd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionMark im(this);
- simd_prefix(dst, src, VEX_SIMD_66);
- emit_byte(0x2E);
- emit_operand(dst, src);
+ emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
}
void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
- emit_byte(0x2E);
- emit_byte(0xC0 | encode);
+ emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
}
void Assembler::ucomiss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- InstructionMark im(this);
- simd_prefix(dst, src, VEX_SIMD_NONE);
- emit_byte(0x2E);
- emit_operand(dst, src);
+ emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
}
void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
- emit_byte(0x2E);
- emit_byte(0xC0 | encode);
+ emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
}
@@ -2995,212 +2786,715 @@ void Assembler::xorl(Register dst, Register src) {
emit_arith(0x33, 0xC0, dst, src);
}
-void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
- emit_byte(0x57);
- emit_byte(0xC0 | encode);
-}
-void Assembler::xorpd(XMMRegister dst, Address src) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_66);
- emit_byte(0x57);
- emit_operand(dst, src);
-}
-
-
-void Assembler::xorps(XMMRegister dst, XMMRegister src) {
- NOT_LP64(assert(VM_Version::supports_sse(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE);
- emit_byte(0x57);
- emit_byte(0xC0 | encode);
-}
-
-void Assembler::xorps(XMMRegister dst, Address src) {
- NOT_LP64(assert(VM_Version::supports_sse(), ""));
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_NONE);
- emit_byte(0x57);
- emit_operand(dst, src);
-}
-
-// AVX 3-operands non destructive source instructions (encoded with VEX prefix)
+// AVX 3-operands scalar float-point arithmetic instructions
void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), "");
- InstructionMark im(this);
- vex_prefix(dst, nds, src, VEX_SIMD_F2);
- emit_byte(0x58);
- emit_operand(dst, src);
+ emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
}
void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
- int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
- emit_byte(0x58);
- emit_byte(0xC0 | encode);
+ emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
}
void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), "");
- InstructionMark im(this);
- vex_prefix(dst, nds, src, VEX_SIMD_F3);
- emit_byte(0x58);
- emit_operand(dst, src);
+ emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
}
void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
- int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
- emit_byte(0x58);
- emit_byte(0xC0 | encode);
-}
-
-void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src) {
- assert(VM_Version::supports_avx(), "");
- InstructionMark im(this);
- vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector
- emit_byte(0x54);
- emit_operand(dst, src);
-}
-
-void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src) {
- assert(VM_Version::supports_avx(), "");
- InstructionMark im(this);
- vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector
- emit_byte(0x54);
- emit_operand(dst, src);
+ emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
}
void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), "");
- InstructionMark im(this);
- vex_prefix(dst, nds, src, VEX_SIMD_F2);
- emit_byte(0x5E);
- emit_operand(dst, src);
+ emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
}
void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
- int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
- emit_byte(0x5E);
- emit_byte(0xC0 | encode);
+ emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
}
void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), "");
- InstructionMark im(this);
- vex_prefix(dst, nds, src, VEX_SIMD_F3);
- emit_byte(0x5E);
- emit_operand(dst, src);
+ emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
}
void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
- int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
- emit_byte(0x5E);
- emit_byte(0xC0 | encode);
+ emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
}
void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), "");
- InstructionMark im(this);
- vex_prefix(dst, nds, src, VEX_SIMD_F2);
- emit_byte(0x59);
- emit_operand(dst, src);
+ emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
}
void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
- int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
- emit_byte(0x59);
- emit_byte(0xC0 | encode);
+ emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
}
void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
- InstructionMark im(this);
- vex_prefix(dst, nds, src, VEX_SIMD_F3);
- emit_byte(0x59);
- emit_operand(dst, src);
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
}
void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
- int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
- emit_byte(0x59);
- emit_byte(0xC0 | encode);
+ emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
}
-
void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), "");
- InstructionMark im(this);
- vex_prefix(dst, nds, src, VEX_SIMD_F2);
- emit_byte(0x5C);
- emit_operand(dst, src);
+ emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
}
void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
- int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
- emit_byte(0x5C);
- emit_byte(0xC0 | encode);
+ emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
}
void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), "");
- InstructionMark im(this);
- vex_prefix(dst, nds, src, VEX_SIMD_F3);
- emit_byte(0x5C);
- emit_operand(dst, src);
+ emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
}
void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
- int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
- emit_byte(0x5C);
- emit_byte(0xC0 | encode);
+ emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+}
+
+//====================VECTOR ARITHMETIC=====================================
+
+// Float-point vector arithmetic
+
+void Assembler::addpd(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::addps(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
}
-void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src) {
+void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
assert(VM_Version::supports_avx(), "");
- InstructionMark im(this);
- vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector
- emit_byte(0x57);
- emit_operand(dst, src);
+ emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::subpd(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::subps(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::mulps(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::divpd(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::divps(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::andpd(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::andps(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::andps(XMMRegister dst, Address src) {
+ NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::andpd(XMMRegister dst, Address src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::xorps(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::xorpd(XMMRegister dst, Address src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::xorps(XMMRegister dst, Address src) {
+ NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
}
void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
assert(VM_Version::supports_avx(), "");
- int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256);
- emit_byte(0x57);
- emit_byte(0xC0 | encode);
+ emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
}
-void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) {
+void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+
+// Integer vector arithmetic
+void Assembler::paddb(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0xFC, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::paddw(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0xFD, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::paddd(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0xFE, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::paddq(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::psubb(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0xF8, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::psubw(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0xF9, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::psubd(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0xFA, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::psubq(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0xD5, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0x40);
+ emit_byte(0xC0 | encode);
+}
+
+void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+ emit_byte(0x40);
+ emit_byte(0xC0 | encode);
+}
+
+void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
InstructionMark im(this);
- vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector
- emit_byte(0x57);
+ int dst_enc = dst->encoding();
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
+ emit_byte(0x40);
emit_operand(dst, src);
}
-void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
- assert(VM_Version::supports_avx(), "");
- int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, vector256);
- emit_byte(0x57);
+// Shift packed integers left by specified number of bits.
+void Assembler::psllw(XMMRegister dst, int shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ // XMM6 is for /6 encoding: 66 0F 71 /6 ib
+ int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
+ emit_byte(0x71);
emit_byte(0xC0 | encode);
+ emit_byte(shift & 0xFF);
}
-void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
- assert(VM_Version::supports_avx2() || (!vector256) && VM_Version::supports_avx(), "");
- int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256);
- emit_byte(0xEF);
+void Assembler::pslld(XMMRegister dst, int shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ // XMM6 is for /6 encoding: 66 0F 72 /6 ib
+ int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
+ emit_byte(0x72);
+ emit_byte(0xC0 | encode);
+ emit_byte(shift & 0xFF);
+}
+
+void Assembler::psllq(XMMRegister dst, int shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ // XMM6 is for /6 encoding: 66 0F 73 /6 ib
+ int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
+ emit_byte(0x73);
emit_byte(0xC0 | encode);
+ emit_byte(shift & 0xFF);
+}
+
+void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ // XMM6 is for /6 encoding: 66 0F 71 /6 ib
+ emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256);
+ emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ // XMM6 is for /6 encoding: 66 0F 72 /6 ib
+ emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256);
+ emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ // XMM6 is for /6 encoding: 66 0F 73 /6 ib
+ emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256);
+ emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector256);
}
+// Shift packed integers logically right by specified number of bits.
+void Assembler::psrlw(XMMRegister dst, int shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ // XMM2 is for /2 encoding: 66 0F 71 /2 ib
+ int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
+ emit_byte(0x71);
+ emit_byte(0xC0 | encode);
+ emit_byte(shift & 0xFF);
+}
+
+void Assembler::psrld(XMMRegister dst, int shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ // XMM2 is for /2 encoding: 66 0F 72 /2 ib
+ int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
+ emit_byte(0x72);
+ emit_byte(0xC0 | encode);
+ emit_byte(shift & 0xFF);
+}
+
+void Assembler::psrlq(XMMRegister dst, int shift) {
+ // Do not confuse it with psrldq SSE2 instruction which
+ // shifts 128 bit value in xmm register by number of bytes.
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ // XMM2 is for /2 encoding: 66 0F 73 /2 ib
+ int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
+ emit_byte(0x73);
+ emit_byte(0xC0 | encode);
+ emit_byte(shift & 0xFF);
+}
+
+void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ // XMM2 is for /2 encoding: 66 0F 73 /2 ib
+ emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256);
+ emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ // XMM2 is for /2 encoding: 66 0F 73 /2 ib
+ emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256);
+ emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ // XMM2 is for /2 encoding: 66 0F 73 /2 ib
+ emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256);
+ emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+// Shift packed integers arithmetically right by specified number of bits.
+void Assembler::psraw(XMMRegister dst, int shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ // XMM4 is for /4 encoding: 66 0F 71 /4 ib
+ int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
+ emit_byte(0x71);
+ emit_byte(0xC0 | encode);
+ emit_byte(shift & 0xFF);
+}
+
+void Assembler::psrad(XMMRegister dst, int shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ // XMM4 is for /4 encoding: 66 0F 72 /4 ib
+ int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
+ emit_byte(0x72);
+ emit_byte(0xC0 | encode);
+ emit_byte(shift & 0xFF);
+}
+
+void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ // XMM4 is for /4 encoding: 66 0F 71 /4 ib
+ emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256);
+ emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ // XMM4 is for /4 encoding: 66 0F 71 /4 ib
+ emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256);
+ emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+
+// AND packed integers
+void Assembler::pand(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::por(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::pxor(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+
void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
bool vector256 = true;
@@ -3805,6 +4099,49 @@ int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegis
}
}
+void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
+ InstructionMark im(this);
+ simd_prefix(dst, dst, src, pre);
+ emit_byte(opcode);
+ emit_operand(dst, src);
+}
+
+void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
+ int encode = simd_prefix_and_encode(dst, dst, src, pre);
+ emit_byte(opcode);
+ emit_byte(0xC0 | encode);
+}
+
+// Versions with no second source register (non-destructive source).
+void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
+ InstructionMark im(this);
+ simd_prefix(dst, xnoreg, src, pre);
+ emit_byte(opcode);
+ emit_operand(dst, src);
+}
+
+void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, pre);
+ emit_byte(opcode);
+ emit_byte(0xC0 | encode);
+}
+
+// 3-operands AVX instructions
+void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
+ Address src, VexSimdPrefix pre, bool vector256) {
+ InstructionMark im(this);
+ vex_prefix(dst, nds, src, pre, vector256);
+ emit_byte(opcode);
+ emit_operand(dst, src);
+}
+
+void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
+ XMMRegister src, VexSimdPrefix pre, bool vector256) {
+ int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256);
+ emit_byte(opcode);
+ emit_byte(0xC0 | encode);
+}
+
#ifndef _LP64
void Assembler::incl(Register dst) {
@@ -7968,21 +8305,21 @@ void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src
}
}
-void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
if (reachable(src)) {
- vandpd(dst, nds, as_Address(src));
+ vandpd(dst, nds, as_Address(src), vector256);
} else {
lea(rscratch1, src);
- vandpd(dst, nds, Address(rscratch1, 0));
+ vandpd(dst, nds, Address(rscratch1, 0), vector256);
}
}
-void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
if (reachable(src)) {
- vandps(dst, nds, as_Address(src));
+ vandps(dst, nds, as_Address(src), vector256);
} else {
lea(rscratch1, src);
- vandps(dst, nds, Address(rscratch1, 0));
+ vandps(dst, nds, Address(rscratch1, 0), vector256);
}
}
@@ -8040,21 +8377,21 @@ void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src
}
}
-void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
if (reachable(src)) {
- vxorpd(dst, nds, as_Address(src));
+ vxorpd(dst, nds, as_Address(src), vector256);
} else {
lea(rscratch1, src);
- vxorpd(dst, nds, Address(rscratch1, 0));
+ vxorpd(dst, nds, Address(rscratch1, 0), vector256);
}
}
-void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
if (reachable(src)) {
- vxorps(dst, nds, as_Address(src));
+ vxorps(dst, nds, as_Address(src), vector256);
} else {
lea(rscratch1, src);
- vxorps(dst, nds, Address(rscratch1, 0));
+ vxorps(dst, nds, Address(rscratch1, 0), vector256);
}
}
diff --git a/src/cpu/x86/vm/assembler_x86.hpp b/src/cpu/x86/vm/assembler_x86.hpp
index 0d8746543..d06f499ca 100644
--- a/src/cpu/x86/vm/assembler_x86.hpp
+++ b/src/cpu/x86/vm/assembler_x86.hpp
@@ -617,6 +617,7 @@ private:
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
simd_prefix(dst, xnoreg, src, pre, opc);
}
+
void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {
simd_prefix(src, dst, pre);
}
@@ -626,16 +627,10 @@ private:
simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);
}
-
int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
bool rex_w = false, bool vector256 = false);
- int simd_prefix_and_encode(XMMRegister dst, XMMRegister src,
- VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
- return simd_prefix_and_encode(dst, xnoreg, src, pre, opc);
- }
-
// Move/convert 32-bit integer value.
int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
VexSimdPrefix pre) {
@@ -677,6 +672,15 @@ private:
void emit_arith(int op1, int op2, Register dst, jobject obj);
void emit_arith(int op1, int op2, Register dst, Register src);
+ void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
+ void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
+ void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
+ void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
+ void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
+ Address src, VexSimdPrefix pre, bool vector256);
+ void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
+ XMMRegister src, VexSimdPrefix pre, bool vector256);
+
void emit_operand(Register reg,
Register base, Register index, Address::ScaleFactor scale,
int disp,
@@ -891,12 +895,6 @@ private:
void andq(Register dst, Address src);
void andq(Register dst, Register src);
- // Bitwise Logical AND of Packed Double-Precision Floating-Point Values
- void andpd(XMMRegister dst, XMMRegister src);
-
- // Bitwise Logical AND of Packed Single-Precision Floating-Point Values
- void andps(XMMRegister dst, XMMRegister src);
-
void bsfl(Register dst, Register src);
void bsrl(Register dst, Register src);
@@ -1436,10 +1434,6 @@ private:
void prefetcht2(Address src);
void prefetchw(Address src);
- // POR - Bitwise logical OR
- void por(XMMRegister dst, XMMRegister src);
- void por(XMMRegister dst, Address src);
-
// Shuffle Packed Doublewords
void pshufd(XMMRegister dst, XMMRegister src, int mode);
void pshufd(XMMRegister dst, Address src, int mode);
@@ -1448,9 +1442,6 @@ private:
void pshuflw(XMMRegister dst, XMMRegister src, int mode);
void pshuflw(XMMRegister dst, Address src, int mode);
- // Shift Right by bits Logical Quadword Immediate
- void psrlq(XMMRegister dst, int shift);
-
// Shift Right by bytes Logical DoubleQuadword Immediate
void psrldq(XMMRegister dst, int shift);
@@ -1475,10 +1466,6 @@ private:
void pushq(Address src);
- // Xor Packed Byte Integer Values
- void pxor(XMMRegister dst, Address src);
- void pxor(XMMRegister dst, XMMRegister src);
-
void rcll(Register dst, int imm8);
void rclq(Register dst, int imm8);
@@ -1601,15 +1588,10 @@ private:
void xorq(Register dst, Address src);
void xorq(Register dst, Register src);
- // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
- void xorpd(XMMRegister dst, XMMRegister src);
-
- // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
- void xorps(XMMRegister dst, XMMRegister src);
-
void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
// AVX 3-operands scalar instructions (encoded with VEX prefix)
+
void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vaddss(XMMRegister dst, XMMRegister nds, Address src);
@@ -1627,14 +1609,147 @@ private:
void vsubss(XMMRegister dst, XMMRegister nds, Address src);
void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
- // AVX Vector instrucitons.
- void vandpd(XMMRegister dst, XMMRegister nds, Address src);
- void vandps(XMMRegister dst, XMMRegister nds, Address src);
- void vxorpd(XMMRegister dst, XMMRegister nds, Address src);
- void vxorps(XMMRegister dst, XMMRegister nds, Address src);
+
+ //====================VECTOR ARITHMETIC=====================================
+
+ // Add Packed Floating-Point Values
+ void addpd(XMMRegister dst, XMMRegister src);
+ void addps(XMMRegister dst, XMMRegister src);
+ void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+ void vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+ // Subtract Packed Floating-Point Values
+ void subpd(XMMRegister dst, XMMRegister src);
+ void subps(XMMRegister dst, XMMRegister src);
+ void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+ void vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+ // Multiply Packed Floating-Point Values
+ void mulpd(XMMRegister dst, XMMRegister src);
+ void mulps(XMMRegister dst, XMMRegister src);
+ void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+ void vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+ // Divide Packed Floating-Point Values
+ void divpd(XMMRegister dst, XMMRegister src);
+ void divps(XMMRegister dst, XMMRegister src);
+ void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+ void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+ // Bitwise Logical AND of Packed Floating-Point Values
+ void andpd(XMMRegister dst, XMMRegister src);
+ void andps(XMMRegister dst, XMMRegister src);
+ void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+ void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+ // Bitwise Logical XOR of Packed Floating-Point Values
+ void xorpd(XMMRegister dst, XMMRegister src);
+ void xorps(XMMRegister dst, XMMRegister src);
void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+ void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+ // Add packed integers
+ void paddb(XMMRegister dst, XMMRegister src);
+ void paddw(XMMRegister dst, XMMRegister src);
+ void paddd(XMMRegister dst, XMMRegister src);
+ void paddq(XMMRegister dst, XMMRegister src);
+ void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+ void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+ void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+ void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+ // Sub packed integers
+ void psubb(XMMRegister dst, XMMRegister src);
+ void psubw(XMMRegister dst, XMMRegister src);
+ void psubd(XMMRegister dst, XMMRegister src);
+ void psubq(XMMRegister dst, XMMRegister src);
+ void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+ void vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+ void vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+ void vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+ // Multiply packed integers (only shorts and ints)
+ void pmullw(XMMRegister dst, XMMRegister src);
+ void pmulld(XMMRegister dst, XMMRegister src);
+ void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+ void vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+ // Shift left packed integers
+ void psllw(XMMRegister dst, int shift);
+ void pslld(XMMRegister dst, int shift);
+ void psllq(XMMRegister dst, int shift);
+ void psllw(XMMRegister dst, XMMRegister shift);
+ void pslld(XMMRegister dst, XMMRegister shift);
+ void psllq(XMMRegister dst, XMMRegister shift);
+ void vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+ void vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+ void vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+ void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+ void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+ void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+
+ // Logical shift right packed integers
+ void psrlw(XMMRegister dst, int shift);
+ void psrld(XMMRegister dst, int shift);
+ void psrlq(XMMRegister dst, int shift);
+ void psrlw(XMMRegister dst, XMMRegister shift);
+ void psrld(XMMRegister dst, XMMRegister shift);
+ void psrlq(XMMRegister dst, XMMRegister shift);
+ void vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+ void vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+ void vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+ void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+ void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+ void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+
+ // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
+ void psraw(XMMRegister dst, int shift);
+ void psrad(XMMRegister dst, int shift);
+ void psraw(XMMRegister dst, XMMRegister shift);
+ void psrad(XMMRegister dst, XMMRegister shift);
+ void vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+ void vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+ void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+ void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+
+ // And packed integers
+ void pand(XMMRegister dst, XMMRegister src);
+ void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+ // Or packed integers
+ void por(XMMRegister dst, XMMRegister src);
+ void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+ // Xor packed integers
+ void pxor(XMMRegister dst, XMMRegister src);
void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+ // Copy low 128bit into high 128bit of YMM registers.
void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
@@ -2532,11 +2647,13 @@ public:
void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); }
void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
- void vandpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vandpd(dst, nds, src); }
- void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+ void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); }
+ void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); }
+ void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
- void vandps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vandps(dst, nds, src); }
- void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+ void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); }
+ void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); }
+ void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); }
@@ -2565,12 +2682,12 @@ public:
// AVX Vector instructions
void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
- void vxorpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorpd(dst, nds, src); }
- void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+ void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
+ void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
- void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); }
- void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+ void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
+ void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
@@ -2578,6 +2695,12 @@ public:
else
Assembler::vxorpd(dst, nds, src, vector256);
}
+ void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
+ Assembler::vpxor(dst, nds, src, vector256);
+ else
+ Assembler::vxorpd(dst, nds, src, vector256);
+ }
// Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector.
void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
diff --git a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp
index 24b879aec..618a37c91 100644
--- a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp
+++ b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp
@@ -488,68 +488,6 @@ void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
}
-void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) {
- // At this point we know that offset == referent_offset.
- //
- // So we might have to emit:
- // if (src == null) goto continuation.
- //
- // and we definitely have to emit:
- // if (klass(src).reference_type == REF_NONE) goto continuation
- // if (!marking_active) goto continuation
- // if (pre_val == null) goto continuation
- // call pre_barrier(pre_val)
- // goto continuation
- //
- __ bind(_entry);
-
- assert(src()->is_register(), "sanity");
- Register src_reg = src()->as_register();
-
- if (gen_src_check()) {
- // The original src operand was not a constant.
- // Generate src == null?
- __ cmpptr(src_reg, (int32_t) NULL_WORD);
- __ jcc(Assembler::equal, _continuation);
- }
-
- // Generate src->_klass->_reference_type == REF_NONE)?
- assert(tmp()->is_register(), "sanity");
- Register tmp_reg = tmp()->as_register();
-
- __ load_klass(tmp_reg, src_reg);
-
- Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset());
- __ cmpb(ref_type_adr, REF_NONE);
- __ jcc(Assembler::equal, _continuation);
-
- // Is marking active?
- assert(thread()->is_register(), "precondition");
- Register thread_reg = thread()->as_pointer_register();
-
- Address in_progress(thread_reg, in_bytes(JavaThread::satb_mark_queue_offset() +
- PtrQueue::byte_offset_of_active()));
-
- if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
- __ cmpl(in_progress, 0);
- } else {
- assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
- __ cmpb(in_progress, 0);
- }
- __ jcc(Assembler::equal, _continuation);
-
- // val == null?
- assert(val()->is_register(), "Precondition.");
- Register val_reg = val()->as_register();
-
- __ cmpptr(val_reg, (int32_t) NULL_WORD);
- __ jcc(Assembler::equal, _continuation);
-
- ce->store_parameter(val()->as_register(), 0);
- __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id)));
- __ jmp(_continuation);
-}
-
jbyte* G1PostBarrierStub::_byte_map_base = NULL;
jbyte* G1PostBarrierStub::byte_map_base_slow() {
diff --git a/src/cpu/x86/vm/x86.ad b/src/cpu/x86/vm/x86.ad
index de512c37b..6bb14ef16 100644
--- a/src/cpu/x86/vm/x86.ad
+++ b/src/cpu/x86/vm/x86.ad
@@ -500,6 +500,24 @@ const int Matcher::base2reg[Type::lastype] = {
0 /*bottom*/
};
+const bool Matcher::match_rule_supported(int opcode) {
+ if (!has_match_rule(opcode))
+ return false;
+
+ switch (opcode) {
+ case Op_PopCountI:
+ case Op_PopCountL:
+ if (!UsePopCountInstruction)
+ return false;
+ case Op_MulVI:
+ if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
+ return false;
+ break;
+ }
+
+ return true; // Per default match rules are supported.
+}
+
// Max vector size in bytes. 0 if not supported.
const int Matcher::vector_width_in_bytes(BasicType bt) {
assert(is_java_primitive(bt), "only primitive type vectors");
@@ -1439,8 +1457,9 @@ instruct absF_reg_reg(regF dst, regF src) %{
ins_cost(150);
format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
ins_encode %{
+ bool vector256 = false;
__ vandps($dst$$XMMRegister, $src$$XMMRegister,
- ExternalAddress(float_signmask()));
+ ExternalAddress(float_signmask()), vector256);
%}
ins_pipe(pipe_slow);
%}
@@ -1464,8 +1483,9 @@ instruct absD_reg_reg(regD dst, regD src) %{
format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
"# abs double by sign masking" %}
ins_encode %{
+ bool vector256 = false;
__ vandpd($dst$$XMMRegister, $src$$XMMRegister,
- ExternalAddress(double_signmask()));
+ ExternalAddress(double_signmask()), vector256);
%}
ins_pipe(pipe_slow);
%}
@@ -1487,8 +1507,9 @@ instruct negF_reg_reg(regF dst, regF src) %{
ins_cost(150);
format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
ins_encode %{
+ bool vector256 = false;
__ vxorps($dst$$XMMRegister, $src$$XMMRegister,
- ExternalAddress(float_signflip()));
+ ExternalAddress(float_signflip()), vector256);
%}
ins_pipe(pipe_slow);
%}
@@ -1512,8 +1533,9 @@ instruct negD_reg_reg(regD dst, regD src) %{
format %{ "vxorpd $dst, $src, [0x8000000000000000]\t"
"# neg double by sign flipping" %}
ins_encode %{
+ bool vector256 = false;
__ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
- ExternalAddress(double_signflip()));
+ ExternalAddress(double_signflip()), vector256);
%}
ins_pipe(pipe_slow);
%}
@@ -2382,3 +2404,2416 @@ instruct Repl4D_zero(vecY dst, immD0 zero) %{
ins_pipe( fpu_reg_reg );
%}
+// ====================VECTOR ARITHMETIC=======================================
+
+// --------------------------------- ADD --------------------------------------
+
+// Bytes vector add
+instruct vadd4B(vecS dst, vecS src) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (AddVB dst src));
+ format %{ "paddb $dst,$src\t! add packed4B" %}
+ ins_encode %{
+ __ paddb($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (AddVB src1 src2));
+ format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd8B(vecD dst, vecD src) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (AddVB dst src));
+ format %{ "paddb $dst,$src\t! add packed8B" %}
+ ins_encode %{
+ __ paddb($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (AddVB src1 src2));
+ format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd16B(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (AddVB dst src));
+ format %{ "paddb $dst,$src\t! add packed16B" %}
+ ins_encode %{
+ __ paddb($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+ match(Set dst (AddVB src1 src2));
+ format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+ match(Set dst (AddVB src (LoadVector mem)));
+ format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+ match(Set dst (AddVB src1 src2));
+ format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+ match(Set dst (AddVB src (LoadVector mem)));
+ format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Shorts/Chars vector add
+instruct vadd2S(vecS dst, vecS src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (AddVS dst src));
+ format %{ "paddw $dst,$src\t! add packed2S" %}
+ ins_encode %{
+ __ paddw($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (AddVS src1 src2));
+ format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd4S(vecD dst, vecD src) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (AddVS dst src));
+ format %{ "paddw $dst,$src\t! add packed4S" %}
+ ins_encode %{
+ __ paddw($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (AddVS src1 src2));
+ format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd8S(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (AddVS dst src));
+ format %{ "paddw $dst,$src\t! add packed8S" %}
+ ins_encode %{
+ __ paddw($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (AddVS src1 src2));
+ format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (AddVS src (LoadVector mem)));
+ format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+ match(Set dst (AddVS src1 src2));
+ format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+ match(Set dst (AddVS src (LoadVector mem)));
+ format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Integers vector add
+instruct vadd2I(vecD dst, vecD src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (AddVI dst src));
+ format %{ "paddd $dst,$src\t! add packed2I" %}
+ ins_encode %{
+ __ paddd($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (AddVI src1 src2));
+ format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd4I(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (AddVI dst src));
+ format %{ "paddd $dst,$src\t! add packed4I" %}
+ ins_encode %{
+ __ paddd($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (AddVI src1 src2));
+ format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (AddVI src (LoadVector mem)));
+ format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+ match(Set dst (AddVI src1 src2));
+ format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+ match(Set dst (AddVI src (LoadVector mem)));
+ format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Longs vector add
+instruct vadd2L(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (AddVL dst src));
+ format %{ "paddq $dst,$src\t! add packed2L" %}
+ ins_encode %{
+ __ paddq($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (AddVL src1 src2));
+ format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (AddVL src (LoadVector mem)));
+ format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+ match(Set dst (AddVL src1 src2));
+ format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+ match(Set dst (AddVL src (LoadVector mem)));
+ format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Floats vector add
+instruct vadd2F(vecD dst, vecD src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (AddVF dst src));
+ format %{ "addps $dst,$src\t! add packed2F" %}
+ ins_encode %{
+ __ addps($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (AddVF src1 src2));
+ format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd4F(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (AddVF dst src));
+ format %{ "addps $dst,$src\t! add packed4F" %}
+ ins_encode %{
+ __ addps($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (AddVF src1 src2));
+ format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (AddVF src (LoadVector mem)));
+ format %{ "vaddps $dst,$src,$mem\t! add packed4F" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (AddVF src1 src2));
+ format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (AddVF src (LoadVector mem)));
+ format %{ "vaddps $dst,$src,$mem\t! add packed8F" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Doubles vector add
+instruct vadd2D(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (AddVD dst src));
+ format %{ "addpd $dst,$src\t! add packed2D" %}
+ ins_encode %{
+ __ addpd($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (AddVD src1 src2));
+ format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (AddVD src (LoadVector mem)));
+ format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (AddVD src1 src2));
+ format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (AddVD src (LoadVector mem)));
+ format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- SUB --------------------------------------
+
+// Bytes vector sub
+instruct vsub4B(vecS dst, vecS src) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (SubVB dst src));
+ format %{ "psubb $dst,$src\t! sub packed4B" %}
+ ins_encode %{
+ __ psubb($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (SubVB src1 src2));
+ format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub8B(vecD dst, vecD src) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (SubVB dst src));
+ format %{ "psubb $dst,$src\t! sub packed8B" %}
+ ins_encode %{
+ __ psubb($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (SubVB src1 src2));
+ format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (SubVB dst src));
+ format %{ "psubb $dst,$src\t! sub packed16B" %}
+ ins_encode %{
+ __ psubb($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+ match(Set dst (SubVB src1 src2));
+ format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+ match(Set dst (SubVB src (LoadVector mem)));
+ format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+ match(Set dst (SubVB src1 src2));
+ format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+ match(Set dst (SubVB src (LoadVector mem)));
+ format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Shorts/Chars vector sub
+instruct vsub2S(vecS dst, vecS src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (SubVS dst src));
+ format %{ "psubw $dst,$src\t! sub packed2S" %}
+ ins_encode %{
+ __ psubw($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (SubVS src1 src2));
+ format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub4S(vecD dst, vecD src) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (SubVS dst src));
+ format %{ "psubw $dst,$src\t! sub packed4S" %}
+ ins_encode %{
+ __ psubw($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (SubVS src1 src2));
+ format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub8S(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (SubVS dst src));
+ format %{ "psubw $dst,$src\t! sub packed8S" %}
+ ins_encode %{
+ __ psubw($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (SubVS src1 src2));
+ format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (SubVS src (LoadVector mem)));
+ format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+ match(Set dst (SubVS src1 src2));
+ format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+ match(Set dst (SubVS src (LoadVector mem)));
+ format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Integers vector sub
+instruct vsub2I(vecD dst, vecD src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (SubVI dst src));
+ format %{ "psubd $dst,$src\t! sub packed2I" %}
+ ins_encode %{
+ __ psubd($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (SubVI src1 src2));
+ format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub4I(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (SubVI dst src));
+ format %{ "psubd $dst,$src\t! sub packed4I" %}
+ ins_encode %{
+ __ psubd($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (SubVI src1 src2));
+ format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (SubVI src (LoadVector mem)));
+ format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+ match(Set dst (SubVI src1 src2));
+ format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+ match(Set dst (SubVI src (LoadVector mem)));
+ format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Longs vector sub
+instruct vsub2L(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (SubVL dst src));
+ format %{ "psubq $dst,$src\t! sub packed2L" %}
+ ins_encode %{
+ __ psubq($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (SubVL src1 src2));
+ format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (SubVL src (LoadVector mem)));
+ format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+ match(Set dst (SubVL src1 src2));
+ format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+ match(Set dst (SubVL src (LoadVector mem)));
+ format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Floats vector sub
+instruct vsub2F(vecD dst, vecD src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (SubVF dst src));
+ format %{ "subps $dst,$src\t! sub packed2F" %}
+ ins_encode %{
+ __ subps($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (SubVF src1 src2));
+ format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub4F(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (SubVF dst src));
+ format %{ "subps $dst,$src\t! sub packed4F" %}
+ ins_encode %{
+ __ subps($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (SubVF src1 src2));
+ format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (SubVF src (LoadVector mem)));
+ format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (SubVF src1 src2));
+ format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (SubVF src (LoadVector mem)));
+ format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Doubles vector sub
+instruct vsub2D(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (SubVD dst src));
+ format %{ "subpd $dst,$src\t! sub packed2D" %}
+ ins_encode %{
+ __ subpd($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (SubVD src1 src2));
+ format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (SubVD src (LoadVector mem)));
+ format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (SubVD src1 src2));
+ format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (SubVD src (LoadVector mem)));
+ format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- MUL --------------------------------------
+
+// Shorts/Chars vector mul
+instruct vmul2S(vecS dst, vecS src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (MulVS dst src));
+ format %{ "pmullw $dst,$src\t! mul packed2S" %}
+ ins_encode %{
+ __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (MulVS src1 src2));
+ format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul4S(vecD dst, vecD src) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (MulVS dst src));
+ format %{ "pmullw $dst,$src\t! mul packed4S" %}
+ ins_encode %{
+ __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (MulVS src1 src2));
+ format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul8S(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (MulVS dst src));
+ format %{ "pmullw $dst,$src\t! mul packed8S" %}
+ ins_encode %{
+ __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (MulVS src1 src2));
+ format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (MulVS src (LoadVector mem)));
+ format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+ match(Set dst (MulVS src1 src2));
+ format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+ match(Set dst (MulVS src (LoadVector mem)));
+ format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Integers vector mul (sse4_1)
+instruct vmul2I(vecD dst, vecD src) %{
+ predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
+ match(Set dst (MulVI dst src));
+ format %{ "pmulld $dst,$src\t! mul packed2I" %}
+ ins_encode %{
+ __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (MulVI src1 src2));
+ format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul4I(vecX dst, vecX src) %{
+ predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
+ match(Set dst (MulVI dst src));
+ format %{ "pmulld $dst,$src\t! mul packed4I" %}
+ ins_encode %{
+ __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (MulVI src1 src2));
+ format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (MulVI src (LoadVector mem)));
+ format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+ match(Set dst (MulVI src1 src2));
+ format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+ match(Set dst (MulVI src (LoadVector mem)));
+ format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Floats vector mul
+instruct vmul2F(vecD dst, vecD src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (MulVF dst src));
+ format %{ "mulps $dst,$src\t! mul packed2F" %}
+ ins_encode %{
+ __ mulps($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (MulVF src1 src2));
+ format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul4F(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (MulVF dst src));
+ format %{ "mulps $dst,$src\t! mul packed4F" %}
+ ins_encode %{
+ __ mulps($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (MulVF src1 src2));
+ format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (MulVF src (LoadVector mem)));
+ format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (MulVF src1 src2));
+ format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (MulVF src (LoadVector mem)));
+ format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Doubles vector mul
+instruct vmul2D(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (MulVD dst src));
+ format %{ "mulpd $dst,$src\t! mul packed2D" %}
+ ins_encode %{
+ __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (MulVD src1 src2));
+ format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (MulVD src (LoadVector mem)));
+ format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (MulVD src1 src2));
+ format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (MulVD src (LoadVector mem)));
+ format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- DIV --------------------------------------
+
+// Floats vector div
+instruct vdiv2F(vecD dst, vecD src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (DivVF dst src));
+ format %{ "divps $dst,$src\t! div packed2F" %}
+ ins_encode %{
+ __ divps($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (DivVF src1 src2));
+ format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vdiv4F(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (DivVF dst src));
+ format %{ "divps $dst,$src\t! div packed4F" %}
+ ins_encode %{
+ __ divps($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (DivVF src1 src2));
+ format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (DivVF src (LoadVector mem)));
+ format %{ "vdivps $dst,$src,$mem\t! div packed4F" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (DivVF src1 src2));
+ format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (DivVF src (LoadVector mem)));
+ format %{ "vdivps $dst,$src,$mem\t! div packed8F" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Doubles vector div
+instruct vdiv2D(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (DivVD dst src));
+ format %{ "divpd $dst,$src\t! div packed2D" %}
+ ins_encode %{
+ __ divpd($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (DivVD src1 src2));
+ format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (DivVD src (LoadVector mem)));
+ format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (DivVD src1 src2));
+ format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (DivVD src (LoadVector mem)));
+ format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// ------------------------------ LeftShift -----------------------------------
+
+// Shorts/Chars vector left shift
+instruct vsll2S(vecS dst, regF shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVS dst shift));
+ format %{ "psllw $dst,$shift\t! left shift packed2S" %}
+ ins_encode %{
+ __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_imm(vecS dst, immI8 shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVS dst shift));
+ format %{ "psllw $dst,$shift\t! left shift packed2S" %}
+ ins_encode %{
+ __ psllw($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_reg(vecS dst, vecS src, regF shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S(vecD dst, regF shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVS dst shift));
+ format %{ "psllw $dst,$shift\t! left shift packed4S" %}
+ ins_encode %{
+ __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_imm(vecD dst, immI8 shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVS dst shift));
+ format %{ "psllw $dst,$shift\t! left shift packed4S" %}
+ ins_encode %{
+ __ psllw($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_reg(vecD dst, vecD src, regF shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll8S(vecX dst, regF shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (LShiftVS dst shift));
+ format %{ "psllw $dst,$shift\t! left shift packed8S" %}
+ ins_encode %{
+ __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll8S_imm(vecX dst, immI8 shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (LShiftVS dst shift));
+ format %{ "psllw $dst,$shift\t! left shift packed8S" %}
+ ins_encode %{
+ __ psllw($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll8S_reg(vecX dst, vecX src, regF shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll16S_reg(vecY dst, vecY src, regF shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Integers vector left shift
+instruct vsll2I(vecD dst, regF shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVI dst shift));
+ format %{ "pslld $dst,$shift\t! left shift packed2I" %}
+ ins_encode %{
+ __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll2I_imm(vecD dst, immI8 shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVI dst shift));
+ format %{ "pslld $dst,$shift\t! left shift packed2I" %}
+ ins_encode %{
+ __ pslld($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll2I_reg(vecD dst, vecD src, regF shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVI src shift));
+ format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVI src shift));
+ format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll4I(vecX dst, regF shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVI dst shift));
+ format %{ "pslld $dst,$shift\t! left shift packed4I" %}
+ ins_encode %{
+ __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll4I_imm(vecX dst, immI8 shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVI dst shift));
+ format %{ "pslld $dst,$shift\t! left shift packed4I" %}
+ ins_encode %{
+ __ pslld($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll4I_reg(vecX dst, vecX src, regF shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVI src shift));
+ format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVI src shift));
+ format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll8I_reg(vecY dst, vecY src, regF shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+ match(Set dst (LShiftVI src shift));
+ format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+ match(Set dst (LShiftVI src shift));
+ format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Longs vector left shift
+instruct vsll2L(vecX dst, regF shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVL dst shift));
+ format %{ "psllq $dst,$shift\t! left shift packed2L" %}
+ ins_encode %{
+ __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll2L_imm(vecX dst, immI8 shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVL dst shift));
+ format %{ "psllq $dst,$shift\t! left shift packed2L" %}
+ ins_encode %{
+ __ psllq($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll2L_reg(vecX dst, vecX src, regF shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVL src shift));
+ format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVL src shift));
+ format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll4L_reg(vecY dst, vecY src, regF shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVL src shift));
+ format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVL src shift));
+ format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// ----------------------- LogicalRightShift -----------------------------------
+
+// Shorts/Chars vector logical right shift produces incorrect Java result
+// for negative data because java code convert short value into int with
+// sign extension before a shift.
+
+// Integers vector logical right shift
+instruct vsrl2I(vecD dst, regF shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVI dst shift));
+ format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
+ ins_encode %{
+ __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2I_imm(vecD dst, immI8 shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVI dst shift));
+ format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
+ ins_encode %{
+ __ psrld($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2I_reg(vecD dst, vecD src, regF shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVI src shift));
+ format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVI src shift));
+ format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4I(vecX dst, regF shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVI dst shift));
+ format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
+ ins_encode %{
+ __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4I_imm(vecX dst, immI8 shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVI dst shift));
+ format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
+ ins_encode %{
+ __ psrld($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4I_reg(vecX dst, vecX src, regF shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVI src shift));
+ format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVI src shift));
+ format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8I_reg(vecY dst, vecY src, regF shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+ match(Set dst (URShiftVI src shift));
+ format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+ match(Set dst (URShiftVI src shift));
+ format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Longs vector logical right shift
+instruct vsrl2L(vecX dst, regF shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVL dst shift));
+ format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
+ ins_encode %{
+ __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2L_imm(vecX dst, immI8 shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVL dst shift));
+ format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
+ ins_encode %{
+ __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2L_reg(vecX dst, vecX src, regF shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVL src shift));
+ format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVL src shift));
+ format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4L_reg(vecY dst, vecY src, regF shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVL src shift));
+ format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVL src shift));
+ format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// ------------------- ArithmeticRightShift -----------------------------------
+
+// Shorts/Chars vector arithmetic right shift
+instruct vsra2S(vecS dst, regF shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (RShiftVS dst shift));
+ format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
+ ins_encode %{
+ __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra2S_imm(vecS dst, immI8 shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (RShiftVS dst shift));
+ format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
+ ins_encode %{
+ __ psraw($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra2S_reg(vecS dst, vecS src, regF shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (RShiftVS src shift));
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (RShiftVS src shift));
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S(vecD dst, regF shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (RShiftVS dst shift));
+ format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
+ ins_encode %{
+ __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S_imm(vecD dst, immI8 shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (RShiftVS dst shift));
+ format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
+ ins_encode %{
+ __ psraw($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S_reg(vecD dst, vecD src, regF shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (RShiftVS src shift));
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (RShiftVS src shift));
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S(vecX dst, regF shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (RShiftVS dst shift));
+ format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
+ ins_encode %{
+ __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S_imm(vecX dst, immI8 shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (RShiftVS dst shift));
+ format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
+ ins_encode %{
+ __ psraw($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S_reg(vecX dst, vecX src, regF shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (RShiftVS src shift));
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (RShiftVS src shift));
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra16S_reg(vecY dst, vecY src, regF shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+ match(Set dst (RShiftVS src shift));
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+ match(Set dst (RShiftVS src shift));
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Integers vector arithmetic right shift
+instruct vsra2I(vecD dst, regF shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (RShiftVI dst shift));
+ format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
+ ins_encode %{
+ __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra2I_imm(vecD dst, immI8 shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (RShiftVI dst shift));
+ format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
+ ins_encode %{
+ __ psrad($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra2I_reg(vecD dst, vecD src, regF shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (RShiftVI src shift));
+ format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (RShiftVI src shift));
+ format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra4I(vecX dst, regF shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (RShiftVI dst shift));
+ format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
+ ins_encode %{
+ __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra4I_imm(vecX dst, immI8 shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (RShiftVI dst shift));
+ format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
+ ins_encode %{
+ __ psrad($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra4I_reg(vecX dst, vecX src, regF shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (RShiftVI src shift));
+ format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (RShiftVI src shift));
+ format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra8I_reg(vecY dst, vecY src, regF shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+ match(Set dst (RShiftVI src shift));
+ format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+ match(Set dst (RShiftVI src shift));
+ format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// There are no longs vector arithmetic right shift instructions.
+
+
+// --------------------------------- AND --------------------------------------
+
+instruct vand4B(vecS dst, vecS src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 4);
+ match(Set dst (AndV dst src));
+ format %{ "pand $dst,$src\t! and vectors (4 bytes)" %}
+ ins_encode %{
+ __ pand($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
+ match(Set dst (AndV src1 src2));
+ format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vand8B(vecD dst, vecD src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 8);
+ match(Set dst (AndV dst src));
+ format %{ "pand $dst,$src\t! and vectors (8 bytes)" %}
+ ins_encode %{
+ __ pand($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
+ match(Set dst (AndV src1 src2));
+ format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vand16B(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 16);
+ match(Set dst (AndV dst src));
+ format %{ "pand $dst,$src\t! and vectors (16 bytes)" %}
+ ins_encode %{
+ __ pand($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+ match(Set dst (AndV src1 src2));
+ format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+ match(Set dst (AndV src (LoadVector mem)));
+ format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+ match(Set dst (AndV src1 src2));
+ format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+ match(Set dst (AndV src (LoadVector mem)));
+ format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- OR ---------------------------------------
+
+instruct vor4B(vecS dst, vecS src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 4);
+ match(Set dst (OrV dst src));
+ format %{ "por $dst,$src\t! or vectors (4 bytes)" %}
+ ins_encode %{
+ __ por($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
+ match(Set dst (OrV src1 src2));
+ format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vor8B(vecD dst, vecD src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 8);
+ match(Set dst (OrV dst src));
+ format %{ "por $dst,$src\t! or vectors (8 bytes)" %}
+ ins_encode %{
+ __ por($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
+ match(Set dst (OrV src1 src2));
+ format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vor16B(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 16);
+ match(Set dst (OrV dst src));
+ format %{ "por $dst,$src\t! or vectors (16 bytes)" %}
+ ins_encode %{
+ __ por($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+ match(Set dst (OrV src1 src2));
+ format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+ match(Set dst (OrV src (LoadVector mem)));
+ format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+ match(Set dst (OrV src1 src2));
+ format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+ match(Set dst (OrV src (LoadVector mem)));
+ format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- XOR --------------------------------------
+
+instruct vxor4B(vecS dst, vecS src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 4);
+ match(Set dst (XorV dst src));
+ format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %}
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
+ match(Set dst (XorV src1 src2));
+ format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vxor8B(vecD dst, vecD src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 8);
+ match(Set dst (XorV dst src));
+ format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %}
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
+ match(Set dst (XorV src1 src2));
+ format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vxor16B(vecX dst, vecX src) %{
+ predicate(n->as_Vector()->length_in_bytes() == 16);
+ match(Set dst (XorV dst src));
+ format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %}
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+ match(Set dst (XorV src1 src2));
+ format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+ match(Set dst (XorV src (LoadVector mem)));
+ format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+ match(Set dst (XorV src1 src2));
+ format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+ match(Set dst (XorV src (LoadVector mem)));
+ format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
diff --git a/src/cpu/x86/vm/x86_32.ad b/src/cpu/x86/vm/x86_32.ad
index b46d3d688..02e5b3224 100644
--- a/src/cpu/x86/vm/x86_32.ad
+++ b/src/cpu/x86/vm/x86_32.ad
@@ -1367,22 +1367,6 @@ int emit_deopt_handler(CodeBuffer& cbuf) {
return offset;
}
-
-const bool Matcher::match_rule_supported(int opcode) {
- if (!has_match_rule(opcode))
- return false;
-
- switch (opcode) {
- case Op_PopCountI:
- case Op_PopCountL:
- if (!UsePopCountInstruction)
- return false;
- break;
- }
-
- return true; // Per default match rules are supported.
-}
-
int Matcher::regnum_to_fpu_offset(int regnum) {
return regnum - 32; // The FP registers are in the second chunk
}
diff --git a/src/cpu/x86/vm/x86_64.ad b/src/cpu/x86/vm/x86_64.ad
index 68c6d3540..96b71b615 100644
--- a/src/cpu/x86/vm/x86_64.ad
+++ b/src/cpu/x86/vm/x86_64.ad
@@ -1513,22 +1513,6 @@ int emit_deopt_handler(CodeBuffer& cbuf)
return offset;
}
-
-const bool Matcher::match_rule_supported(int opcode) {
- if (!has_match_rule(opcode))
- return false;
-
- switch (opcode) {
- case Op_PopCountI:
- case Op_PopCountL:
- if (!UsePopCountInstruction)
- return false;
- break;
- }
-
- return true; // Per default match rules are supported.
-}
-
int Matcher::regnum_to_fpu_offset(int regnum)
{
return regnum - 32; // The FP registers are in the second chunk
@@ -6427,6 +6411,31 @@ instruct castP2X(rRegL dst, rRegP src)
ins_pipe(ialu_reg_reg); // XXX
%}
+// Convert oop into int for vectors alignment masking
+instruct convP2I(rRegI dst, rRegP src)
+%{
+ match(Set dst (ConvL2I (CastP2X src)));
+
+ format %{ "movl $dst, $src\t# ptr -> int" %}
+ ins_encode %{
+ __ movl($dst$$Register, $src$$Register);
+ %}
+ ins_pipe(ialu_reg_reg); // XXX
+%}
+
+// Convert compressed oop into int for vectors alignment masking
+// in case of 32bit oops (heap < 4Gb).
+instruct convN2I(rRegI dst, rRegN src)
+%{
+ predicate(Universe::narrow_oop_shift() == 0);
+ match(Set dst (ConvL2I (CastP2X (DecodeN src))));
+
+ format %{ "movl $dst, $src\t# compressed ptr -> int" %}
+ ins_encode %{
+ __ movl($dst$$Register, $src$$Register);
+ %}
+ ins_pipe(ialu_reg_reg); // XXX
+%}
// Convert oop pointer into compressed form
instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
@@ -10049,11 +10058,10 @@ instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
ins_pipe( pipe_slow );
%}
-// The next instructions have long latency and use Int unit. Set high cost.
instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
match(Set dst (MoveI2F src));
effect(DEF dst, USE src);
- ins_cost(300);
+ ins_cost(100);
format %{ "movd $dst,$src\t# MoveI2F" %}
ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
@@ -10064,7 +10072,7 @@ instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
match(Set dst (MoveL2D src));
effect(DEF dst, USE src);
- ins_cost(300);
+ ins_cost(100);
format %{ "movd $dst,$src\t# MoveL2D" %}
ins_encode %{
__ movdq($dst$$XMMRegister, $src$$Register);