aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authoriveresov <none@none>2014-03-12 11:24:26 -0700
committeriveresov <none@none>2014-03-12 11:24:26 -0700
commit1fb028c71167db3f4b8a9af4ae6c1fc4fc6ecc0a (patch)
tree34debf4cffbeb55dbb3bd60c67a3e17cba9e87ba /src
parent2bbdf1d4cc262990989e64ace354e8428c81a002 (diff)
8031321: Support Intel bit manipulation instructions
Summary: Add support for BMI1 instructions Reviewed-by: kvn, roland
Diffstat (limited to 'src')
-rw-r--r--src/cpu/x86/vm/assembler_x86.cpp138
-rw-r--r--src/cpu/x86/vm/assembler_x86.hpp49
-rw-r--r--src/cpu/x86/vm/globals_x86.hpp6
-rw-r--r--src/cpu/x86/vm/vm_version_x86.cpp42
-rw-r--r--src/cpu/x86/vm/vm_version_x86.hpp19
-rw-r--r--src/cpu/x86/vm/x86_32.ad358
-rw-r--r--src/cpu/x86/vm/x86_64.ad258
-rw-r--r--src/share/vm/adlc/formssel.cpp12
-rw-r--r--src/share/vm/opto/matcher.cpp107
-rw-r--r--src/share/vm/opto/matcher.hpp3
10 files changed, 979 insertions, 13 deletions
diff --git a/src/cpu/x86/vm/assembler_x86.cpp b/src/cpu/x86/vm/assembler_x86.cpp
index 49cc9ed65..d305ac185 100644
--- a/src/cpu/x86/vm/assembler_x86.cpp
+++ b/src/cpu/x86/vm/assembler_x86.cpp
@@ -1089,6 +1089,21 @@ void Assembler::andl(Register dst, Register src) {
emit_arith(0x23, 0xC0, dst, src);
}
+void Assembler::andnl(Register dst, Register src1, Register src2) {
+ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+ int encode = vex_prefix_0F38_and_encode(dst, src1, src2);
+ emit_int8((unsigned char)0xF2);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::andnl(Register dst, Register src1, Address src2) {
+ InstructionMark im(this);
+ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+ vex_prefix_0F38(dst, src1, src2);
+ emit_int8((unsigned char)0xF2);
+ emit_operand(dst, src2);
+}
+
void Assembler::bsfl(Register dst, Register src) {
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_int8(0x0F);
@@ -1110,6 +1125,51 @@ void Assembler::bswapl(Register reg) { // bswap
emit_int8((unsigned char)(0xC8 | encode));
}
+void Assembler::blsil(Register dst, Register src) {
+ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+ int encode = vex_prefix_0F38_and_encode(rbx, dst, src);
+ emit_int8((unsigned char)0xF3);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::blsil(Register dst, Address src) {
+ InstructionMark im(this);
+ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+ vex_prefix_0F38(rbx, dst, src);
+ emit_int8((unsigned char)0xF3);
+ emit_operand(rbx, src);
+}
+
+void Assembler::blsmskl(Register dst, Register src) {
+ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+ int encode = vex_prefix_0F38_and_encode(rdx, dst, src);
+ emit_int8((unsigned char)0xF3);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::blsmskl(Register dst, Address src) {
+ InstructionMark im(this);
+ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+ vex_prefix_0F38(rdx, dst, src);
+ emit_int8((unsigned char)0xF3);
+ emit_operand(rdx, src);
+}
+
+void Assembler::blsrl(Register dst, Register src) {
+ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+ int encode = vex_prefix_0F38_and_encode(rcx, dst, src);
+ emit_int8((unsigned char)0xF3);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::blsrl(Register dst, Address src) {
+ InstructionMark im(this);
+ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+ vex_prefix_0F38(rcx, dst, src);
+ emit_int8((unsigned char)0xF3);
+ emit_operand(rcx, src);
+}
+
void Assembler::call(Label& L, relocInfo::relocType rtype) {
// suspect disp32 is always good
int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
@@ -2878,6 +2938,24 @@ void Assembler::testl(Register dst, Address src) {
emit_operand(dst, src);
}
+void Assembler::tzcntl(Register dst, Register src) {
+ assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
+ emit_int8((unsigned char)0xF3);
+ int encode = prefix_and_encode(dst->encoding(), src->encoding());
+ emit_int8(0x0F);
+ emit_int8((unsigned char)0xBC);
+ emit_int8((unsigned char)0xC0 | encode);
+}
+
+void Assembler::tzcntq(Register dst, Register src) {
+ assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
+ emit_int8((unsigned char)0xF3);
+ int encode = prefixq_and_encode(dst->encoding(), src->encoding());
+ emit_int8(0x0F);
+ emit_int8((unsigned char)0xBC);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
void Assembler::ucomisd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
@@ -4837,6 +4915,21 @@ void Assembler::andq(Register dst, Register src) {
emit_arith(0x23, 0xC0, dst, src);
}
+void Assembler::andnq(Register dst, Register src1, Register src2) {
+ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+ int encode = vex_prefix_0F38_and_encode_q(dst, src1, src2);
+ emit_int8((unsigned char)0xF2);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::andnq(Register dst, Register src1, Address src2) {
+ InstructionMark im(this);
+ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+ vex_prefix_0F38_q(dst, src1, src2);
+ emit_int8((unsigned char)0xF2);
+ emit_operand(dst, src2);
+}
+
void Assembler::bsfq(Register dst, Register src) {
int encode = prefixq_and_encode(dst->encoding(), src->encoding());
emit_int8(0x0F);
@@ -4858,6 +4951,51 @@ void Assembler::bswapq(Register reg) {
emit_int8((unsigned char)(0xC8 | encode));
}
+void Assembler::blsiq(Register dst, Register src) {
+ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+ int encode = vex_prefix_0F38_and_encode_q(rbx, dst, src);
+ emit_int8((unsigned char)0xF3);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::blsiq(Register dst, Address src) {
+ InstructionMark im(this);
+ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+ vex_prefix_0F38_q(rbx, dst, src);
+ emit_int8((unsigned char)0xF3);
+ emit_operand(rbx, src);
+}
+
+void Assembler::blsmskq(Register dst, Register src) {
+ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+ int encode = vex_prefix_0F38_and_encode_q(rdx, dst, src);
+ emit_int8((unsigned char)0xF3);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::blsmskq(Register dst, Address src) {
+ InstructionMark im(this);
+ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+ vex_prefix_0F38_q(rdx, dst, src);
+ emit_int8((unsigned char)0xF3);
+ emit_operand(rdx, src);
+}
+
+void Assembler::blsrq(Register dst, Register src) {
+ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+ int encode = vex_prefix_0F38_and_encode_q(rcx, dst, src);
+ emit_int8((unsigned char)0xF3);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::blsrq(Register dst, Address src) {
+ InstructionMark im(this);
+ assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+ vex_prefix_0F38_q(rcx, dst, src);
+ emit_int8((unsigned char)0xF3);
+ emit_operand(rcx, src);
+}
+
void Assembler::cdqq() {
prefix(REX_W);
emit_int8((unsigned char)0x99);
diff --git a/src/cpu/x86/vm/assembler_x86.hpp b/src/cpu/x86/vm/assembler_x86.hpp
index 1ad66bd6a..95ca231ce 100644
--- a/src/cpu/x86/vm/assembler_x86.hpp
+++ b/src/cpu/x86/vm/assembler_x86.hpp
@@ -590,10 +590,35 @@ private:
vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256);
}
+ void vex_prefix_0F38(Register dst, Register nds, Address src) {
+ bool vex_w = false;
+ bool vector256 = false;
+ vex_prefix(src, nds->encoding(), dst->encoding(),
+ VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
+ }
+
+ void vex_prefix_0F38_q(Register dst, Register nds, Address src) {
+ bool vex_w = true;
+ bool vector256 = false;
+ vex_prefix(src, nds->encoding(), dst->encoding(),
+ VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
+ }
int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
VexSimdPrefix pre, VexOpcode opc,
bool vex_w, bool vector256);
+ int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src) {
+ bool vex_w = false;
+ bool vector256 = false;
+ return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
+ VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
+ }
+ int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src) {
+ bool vex_w = true;
+ bool vector256 = false;
+ return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
+ VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
+ }
int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
VexSimdPrefix pre, bool vector256 = false,
VexOpcode opc = VEX_OPCODE_0F) {
@@ -897,6 +922,27 @@ private:
void andq(Register dst, Address src);
void andq(Register dst, Register src);
+ // BMI instructions
+ void andnl(Register dst, Register src1, Register src2);
+ void andnl(Register dst, Register src1, Address src2);
+ void andnq(Register dst, Register src1, Register src2);
+ void andnq(Register dst, Register src1, Address src2);
+
+ void blsil(Register dst, Register src);
+ void blsil(Register dst, Address src);
+ void blsiq(Register dst, Register src);
+ void blsiq(Register dst, Address src);
+
+ void blsmskl(Register dst, Register src);
+ void blsmskl(Register dst, Address src);
+ void blsmskq(Register dst, Register src);
+ void blsmskq(Register dst, Address src);
+
+ void blsrl(Register dst, Register src);
+ void blsrl(Register dst, Address src);
+ void blsrq(Register dst, Register src);
+ void blsrq(Register dst, Address src);
+
void bsfl(Register dst, Register src);
void bsrl(Register dst, Register src);
@@ -1574,6 +1620,9 @@ private:
void testq(Register dst, int32_t imm32);
void testq(Register dst, Register src);
+ // BMI - count trailing zeros
+ void tzcntl(Register dst, Register src);
+ void tzcntq(Register dst, Register src);
// Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
void ucomisd(XMMRegister dst, Address src);
diff --git a/src/cpu/x86/vm/globals_x86.hpp b/src/cpu/x86/vm/globals_x86.hpp
index b194ffbcf..1bc5d1e68 100644
--- a/src/cpu/x86/vm/globals_x86.hpp
+++ b/src/cpu/x86/vm/globals_x86.hpp
@@ -134,5 +134,11 @@ define_pd_global(uintx, TypeProfileLevel, 111);
\
product(bool, UseCountLeadingZerosInstruction, false, \
"Use count leading zeros instruction") \
+ \
+ product(bool, UseCountTrailingZerosInstruction, false, \
+ "Use count trailing zeros instruction") \
+ \
+ product(bool, UseBMI1Instructions, false, \
+ "Use BMI instructions")
#endif // CPU_X86_VM_GLOBALS_X86_HPP
diff --git a/src/cpu/x86/vm/vm_version_x86.cpp b/src/cpu/x86/vm/vm_version_x86.cpp
index de38b4a3a..b949e4e29 100644
--- a/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/src/cpu/x86/vm/vm_version_x86.cpp
@@ -429,7 +429,7 @@ void VM_Version::get_processor_features() {
}
char buf[256];
- jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+ jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
cores_per_cpu(), threads_per_core(),
cpu_family(), _model, _stepping,
(supports_cmov() ? ", cmov" : ""),
@@ -455,7 +455,9 @@ void VM_Version::get_processor_features() {
(supports_ht() ? ", ht": ""),
(supports_tsc() ? ", tsc": ""),
(supports_tscinv_bit() ? ", tscinvbit": ""),
- (supports_tscinv() ? ", tscinv": ""));
+ (supports_tscinv() ? ", tscinv": ""),
+ (supports_bmi1() ? ", bmi1" : ""),
+ (supports_bmi2() ? ", bmi2" : ""));
_features_str = strdup(buf);
// UseSSE is set to the smaller of what hardware supports and what
@@ -600,13 +602,6 @@ void VM_Version::get_processor_features() {
}
}
- // Use count leading zeros count instruction if available.
- if (supports_lzcnt()) {
- if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
- UseCountLeadingZerosInstruction = true;
- }
- }
-
// some defaults for AMD family 15h
if ( cpu_family() == 0x15 ) {
// On family 15h processors default is no sw prefetch
@@ -692,6 +687,35 @@ void VM_Version::get_processor_features() {
}
#endif // COMPILER2
+ // Use count leading zeros count instruction if available.
+ if (supports_lzcnt()) {
+ if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
+ UseCountLeadingZerosInstruction = true;
+ }
+ } else if (UseCountLeadingZerosInstruction) {
+ warning("lzcnt instruction is not available on this CPU");
+ FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
+ }
+
+ if (supports_bmi1()) {
+ if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
+ UseBMI1Instructions = true;
+ }
+ } else if (UseBMI1Instructions) {
+ warning("BMI1 instructions are not available on this CPU");
+ FLAG_SET_DEFAULT(UseBMI1Instructions, false);
+ }
+
+ // Use count trailing zeros instruction if available
+ if (supports_bmi1()) {
+ if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
+ UseCountTrailingZerosInstruction = UseBMI1Instructions;
+ }
+ } else if (UseCountTrailingZerosInstruction) {
+ warning("tzcnt instruction is not available on this CPU");
+ FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
+ }
+
// Use population count instruction if available.
if (supports_popcnt()) {
if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
diff --git a/src/cpu/x86/vm/vm_version_x86.hpp b/src/cpu/x86/vm/vm_version_x86.hpp
index 86e9b662d..07d644518 100644
--- a/src/cpu/x86/vm/vm_version_x86.hpp
+++ b/src/cpu/x86/vm/vm_version_x86.hpp
@@ -141,7 +141,8 @@ public:
struct {
uint32_t LahfSahf : 1,
CmpLegacy : 1,
- : 4,
+ : 3,
+ lzcnt_intel : 1,
lzcnt : 1,
sse4a : 1,
misalignsse : 1,
@@ -251,7 +252,9 @@ protected:
CPU_AVX2 = (1 << 18),
CPU_AES = (1 << 19),
CPU_ERMS = (1 << 20), // enhanced 'rep movsb/stosb' instructions
- CPU_CLMUL = (1 << 21) // carryless multiply for CRC
+ CPU_CLMUL = (1 << 21), // carryless multiply for CRC
+ CPU_BMI1 = (1 << 22),
+ CPU_BMI2 = (1 << 23)
} cpuFeatureFlags;
enum {
@@ -423,6 +426,8 @@ protected:
if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
result |= CPU_AVX2;
}
+ if(_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
+ result |= CPU_BMI1;
if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
result |= CPU_TSC;
if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
@@ -444,6 +449,13 @@ protected:
if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
result |= CPU_SSE4A;
}
+ // Intel features.
+ if(is_intel()) {
+ if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
+ result |= CPU_BMI2;
+ if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
+ result |= CPU_LZCNT;
+ }
return result;
}
@@ -560,7 +572,8 @@ public:
static bool supports_aes() { return (_cpuFeatures & CPU_AES) != 0; }
static bool supports_erms() { return (_cpuFeatures & CPU_ERMS) != 0; }
static bool supports_clmul() { return (_cpuFeatures & CPU_CLMUL) != 0; }
-
+ static bool supports_bmi1() { return (_cpuFeatures & CPU_BMI1) != 0; }
+ static bool supports_bmi2() { return (_cpuFeatures & CPU_BMI2) != 0; }
// Intel features
static bool is_intel_family_core() { return is_intel() &&
extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
diff --git a/src/cpu/x86/vm/x86_32.ad b/src/cpu/x86/vm/x86_32.ad
index f4a68abc7..f0bd571a8 100644
--- a/src/cpu/x86/vm/x86_32.ad
+++ b/src/cpu/x86/vm/x86_32.ad
@@ -5155,6 +5155,19 @@ instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
%}
instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
+ predicate(UseCountTrailingZerosInstruction);
+ match(Set dst (CountTrailingZerosI src));
+ effect(KILL cr);
+
+ format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %}
+ ins_encode %{
+ __ tzcntl($dst$$Register, $src$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
+ predicate(!UseCountTrailingZerosInstruction);
match(Set dst (CountTrailingZerosI src));
effect(KILL cr);
@@ -5174,6 +5187,30 @@ instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
%}
instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
+ predicate(UseCountTrailingZerosInstruction);
+ match(Set dst (CountTrailingZerosL src));
+ effect(TEMP dst, KILL cr);
+
+ format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t"
+ "JNC done\n\t"
+ "TZCNT $dst, $src.hi\n\t"
+ "ADD $dst, 32\n"
+ "done:" %}
+ ins_encode %{
+ Register Rdst = $dst$$Register;
+ Register Rsrc = $src$$Register;
+ Label done;
+ __ tzcntl(Rdst, Rsrc);
+ __ jccb(Assembler::carryClear, done);
+ __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
+ __ addl(Rdst, BitsPerInt);
+ __ bind(done);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
+ predicate(!UseCountTrailingZerosInstruction);
match(Set dst (CountTrailingZerosL src));
effect(TEMP dst, KILL cr);
@@ -8017,6 +8054,123 @@ instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
ins_pipe( ialu_mem_imm );
%}
+// BMI1 instructions
+instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
+ match(Set dst (AndI (XorI src1 minus_1) src2));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ format %{ "ANDNL $dst, $src1, $src2" %}
+
+ ins_encode %{
+ __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
+ match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ ins_cost(125);
+ format %{ "ANDNL $dst, $src1, $src2" %}
+
+ ins_encode %{
+ __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
+ %}
+ ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
+ match(Set dst (AndI (SubI imm_zero src) src));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ format %{ "BLSIL $dst, $src" %}
+
+ ins_encode %{
+ __ blsil($dst$$Register, $src$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
+ match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ ins_cost(125);
+ format %{ "BLSIL $dst, $src" %}
+
+ ins_encode %{
+ __ blsil($dst$$Register, $src$$Address);
+ %}
+ ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
+%{
+ match(Set dst (XorI (AddI src minus_1) src));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ format %{ "BLSMSKL $dst, $src" %}
+
+ ins_encode %{
+ __ blsmskl($dst$$Register, $src$$Register);
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
+%{
+ match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ ins_cost(125);
+ format %{ "BLSMSKL $dst, $src" %}
+
+ ins_encode %{
+ __ blsmskl($dst$$Register, $src$$Address);
+ %}
+
+ ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
+%{
+ match(Set dst (AndI (AddI src minus_1) src) );
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ format %{ "BLSRL $dst, $src" %}
+
+ ins_encode %{
+ __ blsrl($dst$$Register, $src$$Register);
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
+%{
+ match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ ins_cost(125);
+ format %{ "BLSRL $dst, $src" %}
+
+ ins_encode %{
+ __ blsrl($dst$$Register, $src$$Address);
+ %}
+
+ ins_pipe(ialu_reg_mem);
+%}
+
// Or Instructions
// Or Register with Register
instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
@@ -8639,6 +8793,210 @@ instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
ins_pipe( ialu_reg_long_mem );
%}
+// BMI1 instructions
+instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
+ match(Set dst (AndL (XorL src1 minus_1) src2));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr, TEMP dst);
+
+ format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t"
+ "ANDNL $dst.hi, $src1.hi, $src2.hi"
+ %}
+
+ ins_encode %{
+ Register Rdst = $dst$$Register;
+ Register Rsrc1 = $src1$$Register;
+ Register Rsrc2 = $src2$$Register;
+ __ andnl(Rdst, Rsrc1, Rsrc2);
+ __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
+ %}
+ ins_pipe(ialu_reg_reg_long);
+%}
+
+instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
+ match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr, TEMP dst);
+
+ ins_cost(125);
+ format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t"
+ "ANDNL $dst.hi, $src1.hi, $src2+4"
+ %}
+
+ ins_encode %{
+ Register Rdst = $dst$$Register;
+ Register Rsrc1 = $src1$$Register;
+ Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
+
+ __ andnl(Rdst, Rsrc1, $src2$$Address);
+ __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
+ %}
+ ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
+ match(Set dst (AndL (SubL imm_zero src) src));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr, TEMP dst);
+
+ format %{ "MOVL $dst.hi, 0\n\t"
+ "BLSIL $dst.lo, $src.lo\n\t"
+ "JNZ done\n\t"
+ "BLSIL $dst.hi, $src.hi\n"
+ "done:"
+ %}
+
+ ins_encode %{
+ Label done;
+ Register Rdst = $dst$$Register;
+ Register Rsrc = $src$$Register;
+ __ movl(HIGH_FROM_LOW(Rdst), 0);
+ __ blsil(Rdst, Rsrc);
+ __ jccb(Assembler::notZero, done);
+ __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
+ __ bind(done);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
+ match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr, TEMP dst);
+
+ ins_cost(125);
+ format %{ "MOVL $dst.hi, 0\n\t"
+ "BLSIL $dst.lo, $src\n\t"
+ "JNZ done\n\t"
+ "BLSIL $dst.hi, $src+4\n"
+ "done:"
+ %}
+
+ ins_encode %{
+ Label done;
+ Register Rdst = $dst$$Register;
+ Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
+
+ __ movl(HIGH_FROM_LOW(Rdst), 0);
+ __ blsil(Rdst, $src$$Address);
+ __ jccb(Assembler::notZero, done);
+ __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
+ __ bind(done);
+ %}
+ ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
+%{
+ match(Set dst (XorL (AddL src minus_1) src));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr, TEMP dst);
+
+ format %{ "MOVL $dst.hi, 0\n\t"
+ "BLSMSKL $dst.lo, $src.lo\n\t"
+ "JNC done\n\t"
+ "BLSMSKL $dst.hi, $src.hi\n"
+ "done:"
+ %}
+
+ ins_encode %{
+ Label done;
+ Register Rdst = $dst$$Register;
+ Register Rsrc = $src$$Register;
+ __ movl(HIGH_FROM_LOW(Rdst), 0);
+ __ blsmskl(Rdst, Rsrc);
+ __ jccb(Assembler::carryClear, done);
+ __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
+ __ bind(done);
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
+%{
+ match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr, TEMP dst);
+
+ ins_cost(125);
+ format %{ "MOVL $dst.hi, 0\n\t"
+ "BLSMSKL $dst.lo, $src\n\t"
+ "JNC done\n\t"
+ "BLSMSKL $dst.hi, $src+4\n"
+ "done:"
+ %}
+
+ ins_encode %{
+ Label done;
+ Register Rdst = $dst$$Register;
+ Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
+
+ __ movl(HIGH_FROM_LOW(Rdst), 0);
+ __ blsmskl(Rdst, $src$$Address);
+ __ jccb(Assembler::carryClear, done);
+ __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
+ __ bind(done);
+ %}
+
+ ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
+%{
+ match(Set dst (AndL (AddL src minus_1) src) );
+ predicate(UseBMI1Instructions);
+ effect(KILL cr, TEMP dst);
+
+ format %{ "MOVL $dst.hi, $src.hi\n\t"
+ "BLSRL $dst.lo, $src.lo\n\t"
+ "JNC done\n\t"
+ "BLSRL $dst.hi, $src.hi\n"
+ "done:"
+ %}
+
+ ins_encode %{
+ Label done;
+ Register Rdst = $dst$$Register;
+ Register Rsrc = $src$$Register;
+ __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
+ __ blsrl(Rdst, Rsrc);
+ __ jccb(Assembler::carryClear, done);
+ __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
+ __ bind(done);
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
+%{
+ match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr, TEMP dst);
+
+ ins_cost(125);
+ format %{ "MOVL $dst.hi, $src+4\n\t"
+ "BLSRL $dst.lo, $src\n\t"
+ "JNC done\n\t"
+ "BLSRL $dst.hi, $src+4\n"
+ "done:"
+ %}
+
+ ins_encode %{
+ Label done;
+ Register Rdst = $dst$$Register;
+ Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
+ __ movl(HIGH_FROM_LOW(Rdst), src_hi);
+ __ blsrl(Rdst, $src$$Address);
+ __ jccb(Assembler::carryClear, done);
+ __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
+ __ bind(done);
+ %}
+
+ ins_pipe(ialu_reg_mem);
+%}
+
// Or Long Register with Register
instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
match(Set dst (OrL dst src));
diff --git a/src/cpu/x86/vm/x86_64.ad b/src/cpu/x86/vm/x86_64.ad
index f82b8f381..220aff9a8 100644
--- a/src/cpu/x86/vm/x86_64.ad
+++ b/src/cpu/x86/vm/x86_64.ad
@@ -6014,6 +6014,19 @@ instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
%}
instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
+ predicate(UseCountTrailingZerosInstruction);
+ match(Set dst (CountTrailingZerosI src));
+ effect(KILL cr);
+
+ format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
+ ins_encode %{
+ __ tzcntl($dst$$Register, $src$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
+ predicate(!UseCountTrailingZerosInstruction);
match(Set dst (CountTrailingZerosI src));
effect(KILL cr);
@@ -6033,6 +6046,19 @@ instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
%}
instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
+ predicate(UseCountTrailingZerosInstruction);
+ match(Set dst (CountTrailingZerosL src));
+ effect(KILL cr);
+
+ format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
+ ins_encode %{
+ __ tzcntq($dst$$Register, $src$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
+ predicate(!UseCountTrailingZerosInstruction);
match(Set dst (CountTrailingZerosL src));
effect(KILL cr);
@@ -8612,6 +8638,122 @@ instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
ins_pipe(ialu_mem_imm);
%}
+// BMI1 instructions
+instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
+ match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ ins_cost(125);
+ format %{ "andnl $dst, $src1, $src2" %}
+
+ ins_encode %{
+ __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
+ %}
+ ins_pipe(ialu_reg_mem);
+%}
+
+instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
+ match(Set dst (AndI (XorI src1 minus_1) src2));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ format %{ "andnl $dst, $src1, $src2" %}
+
+ ins_encode %{
+ __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, rFlagsReg cr) %{
+ match(Set dst (AndI (SubI imm_zero src) src));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ format %{ "blsil $dst, $src" %}
+
+ ins_encode %{
+ __ blsil($dst$$Register, $src$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, rFlagsReg cr) %{
+ match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ ins_cost(125);
+ format %{ "blsil $dst, $src" %}
+
+ ins_encode %{
+ __ blsil($dst$$Register, $src$$Address);
+ %}
+ ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
+%{
+ match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ ins_cost(125);
+ format %{ "blsmskl $dst, $src" %}
+
+ ins_encode %{
+ __ blsmskl($dst$$Register, $src$$Address);
+ %}
+ ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
+%{
+ match(Set dst (XorI (AddI src minus_1) src));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ format %{ "blsmskl $dst, $src" %}
+
+ ins_encode %{
+ __ blsmskl($dst$$Register, $src$$Register);
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
+%{
+ match(Set dst (AndI (AddI src minus_1) src) );
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ format %{ "blsrl $dst, $src" %}
+
+ ins_encode %{
+ __ blsrl($dst$$Register, $src$$Register);
+ %}
+
+ ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
+%{
+ match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ ins_cost(125);
+ format %{ "blsrl $dst, $src" %}
+
+ ins_encode %{
+ __ blsrl($dst$$Register, $src$$Address);
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
// Or Instructions
// Or Register with Register
instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
@@ -8843,6 +8985,122 @@ instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
ins_pipe(ialu_mem_imm);
%}
+// BMI1 instructions
+instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
+ match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ ins_cost(125);
+ format %{ "andnq $dst, $src1, $src2" %}
+
+ ins_encode %{
+ __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
+ %}
+ ins_pipe(ialu_reg_mem);
+%}
+
+instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
+ match(Set dst (AndL (XorL src1 minus_1) src2));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ format %{ "andnq $dst, $src1, $src2" %}
+
+ ins_encode %{
+ __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
+ %}
+ ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
+ match(Set dst (AndL (SubL imm_zero src) src));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ format %{ "blsiq $dst, $src" %}
+
+ ins_encode %{
+ __ blsiq($dst$$Register, $src$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
+ match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ ins_cost(125);
+ format %{ "blsiq $dst, $src" %}
+
+ ins_encode %{
+ __ blsiq($dst$$Register, $src$$Address);
+ %}
+ ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
+%{
+ match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ ins_cost(125);
+ format %{ "blsmskq $dst, $src" %}
+
+ ins_encode %{
+ __ blsmskq($dst$$Register, $src$$Address);
+ %}
+ ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
+%{
+ match(Set dst (XorL (AddL src minus_1) src));
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ format %{ "blsmskq $dst, $src" %}
+
+ ins_encode %{
+ __ blsmskq($dst$$Register, $src$$Register);
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
+%{
+ match(Set dst (AndL (AddL src minus_1) src) );
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ format %{ "blsrq $dst, $src" %}
+
+ ins_encode %{
+ __ blsrq($dst$$Register, $src$$Register);
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
+instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
+%{
+ match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
+ predicate(UseBMI1Instructions);
+ effect(KILL cr);
+
+ ins_cost(125);
+ format %{ "blsrq $dst, $src" %}
+
+ ins_encode %{
+ __ blsrq($dst$$Register, $src$$Address);
+ %}
+
+ ins_pipe(ialu_reg);
+%}
+
// Or Instructions
// Or Register with Register
instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
diff --git a/src/share/vm/adlc/formssel.cpp b/src/share/vm/adlc/formssel.cpp
index 01aedd36a..b7cd6736e 100644
--- a/src/share/vm/adlc/formssel.cpp
+++ b/src/share/vm/adlc/formssel.cpp
@@ -649,6 +649,7 @@ int InstructForm::memory_operand(FormDict &globals) const {
int USE_of_memory = 0;
int DEF_of_memory = 0;
const char* last_memory_DEF = NULL; // to test DEF/USE pairing in asserts
+ const char* last_memory_USE = NULL;
Component *unique = NULL;
Component *comp = NULL;
ComponentList &components = (ComponentList &)_components;
@@ -670,7 +671,16 @@ int InstructForm::memory_operand(FormDict &globals) const {
assert(0 == strcmp(last_memory_DEF, comp->_name), "every memory DEF is followed by a USE of the same name");
last_memory_DEF = NULL;
}
- USE_of_memory++;
+ // Handles same memory being used multiple times in the case of BMI1 instructions.
+ if (last_memory_USE != NULL) {
+ if (strcmp(comp->_name, last_memory_USE) != 0) {
+ USE_of_memory++;
+ }
+ } else {
+ USE_of_memory++;
+ }
+ last_memory_USE = comp->_name;
+
if (DEF_of_memory == 0) // defs take precedence
unique = comp;
} else {
diff --git a/src/share/vm/opto/matcher.cpp b/src/share/vm/opto/matcher.cpp
index 5088deb7c..81e7455a8 100644
--- a/src/share/vm/opto/matcher.cpp
+++ b/src/share/vm/opto/matcher.cpp
@@ -1908,6 +1908,105 @@ OptoReg::Name Matcher::find_receiver( bool is_outgoing ) {
return OptoReg::as_OptoReg(regs.first());
}
+// This function identifies sub-graphs in which a 'load' node is
+// input to two different nodes, and such that it can be matched
+// with BMI instructions like blsi, blsr, etc.
+// Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
+// The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
+// refers to the same node.
+#ifdef X86
+// Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
+// This is a temporary solution until we make DAGs expressible in ADL.
+template<typename ConType>
+class FusedPatternMatcher {
+ Node* _op1_node;
+ Node* _mop_node;
+ int _con_op;
+
+ static int match_next(Node* n, int next_op, int next_op_idx) {
+ if (n->in(1) == NULL || n->in(2) == NULL) {
+ return -1;
+ }
+
+ if (next_op_idx == -1) { // n is commutative, try rotations
+ if (n->in(1)->Opcode() == next_op) {
+ return 1;
+ } else if (n->in(2)->Opcode() == next_op) {
+ return 2;
+ }
+ } else {
+ assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
+ if (n->in(next_op_idx)->Opcode() == next_op) {
+ return next_op_idx;
+ }
+ }
+ return -1;
+ }
+public:
+ FusedPatternMatcher(Node* op1_node, Node *mop_node, int con_op) :
+ _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
+
+ bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
+ int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
+ typename ConType::NativeType con_value) {
+ if (_op1_node->Opcode() != op1) {
+ return false;
+ }
+ if (_mop_node->outcnt() > 2) {
+ return false;
+ }
+ op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
+ if (op1_op2_idx == -1) {
+ return false;
+ }
+ // Memory operation must be the other edge
+ int op1_mop_idx = (op1_op2_idx & 1) + 1;
+
+ // Check that the mop node is really what we want
+ if (_op1_node->in(op1_mop_idx) == _mop_node) {
+ Node *op2_node = _op1_node->in(op1_op2_idx);
+ if (op2_node->outcnt() > 1) {
+ return false;
+ }
+ assert(op2_node->Opcode() == op2, "Should be");
+ op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
+ if (op2_con_idx == -1) {
+ return false;
+ }
+ // Memory operation must be the other edge
+ int op2_mop_idx = (op2_con_idx & 1) + 1;
+ // Check that the memory operation is the same node
+ if (op2_node->in(op2_mop_idx) == _mop_node) {
+ // Now check the constant
+ const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
+ if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+};
+
+
+bool Matcher::is_bmi_pattern(Node *n, Node *m) {
+ if (n != NULL && m != NULL) {
+ if (m->Opcode() == Op_LoadI) {
+ FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
+ return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
+ bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
+ bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
+ } else if (m->Opcode() == Op_LoadL) {
+ FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
+ return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
+ bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
+ bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
+ }
+ }
+ return false;
+}
+#endif // X86
+
// A method-klass-holder may be passed in the inline_cache_reg
// and then expanded into the inline_cache_reg and a method_oop register
// defined in ad_<arch>.cpp
@@ -2063,6 +2162,14 @@ void Matcher::find_shared( Node *n ) {
set_shared(m->in(AddPNode::Base)->in(1));
}
+ // if 'n' and 'm' are part of a graph for BMI instruction, clone this node.
+#ifdef X86
+ if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
+ mstack.push(m, Visit);
+ continue;
+ }
+#endif
+
// Clone addressing expressions as they are "free" in memory access instructions
if( mem_op && i == MemNode::Address && mop == Op_AddP ) {
// Some inputs for address expression are not put on stack
diff --git a/src/share/vm/opto/matcher.hpp b/src/share/vm/opto/matcher.hpp
index 5ebf59068..6d90b9fb7 100644
--- a/src/share/vm/opto/matcher.hpp
+++ b/src/share/vm/opto/matcher.hpp
@@ -79,6 +79,9 @@ class Matcher : public PhaseTransform {
// Find shared Nodes, or Nodes that otherwise are Matcher roots
void find_shared( Node *n );
+#ifdef X86
+ bool is_bmi_pattern(Node *n, Node *m);
+#endif
// Debug and profile information for nodes in old space:
GrowableArray<Node_Notes*>* _old_node_note_array;