aboutsummaryrefslogtreecommitdiff
path: root/src/share/vm/opto
diff options
context:
space:
mode:
authorkvn <none@none>2012-06-15 01:25:19 -0700
committerkvn <none@none>2012-06-15 01:25:19 -0700
commit68446ffadeedf06b663e39278a07cd6171a28b2e (patch)
treec872b74df1f9eeb5dde27cf0e472c47bfb6fd82c /src/share/vm/opto
parentd62195ef3f73f37d98d1b6dfbe12ed1fa5a51bc0 (diff)
7119644: Increase superword's vector size up to 256 bits
Summary: Increase vector size up to 256-bits for YMM AVX registers on x86. Reviewed-by: never, twisti, roland
Diffstat (limited to 'src/share/vm/opto')
-rw-r--r--src/share/vm/opto/c2_globals.hpp7
-rw-r--r--src/share/vm/opto/chaitin.cpp165
-rw-r--r--src/share/vm/opto/chaitin.hpp17
-rw-r--r--src/share/vm/opto/classes.hpp72
-rw-r--r--src/share/vm/opto/compile.cpp30
-rw-r--r--src/share/vm/opto/ifg.cpp31
-rw-r--r--src/share/vm/opto/lcm.cpp8
-rw-r--r--src/share/vm/opto/loopnode.cpp6
-rw-r--r--src/share/vm/opto/machnode.cpp6
-rw-r--r--src/share/vm/opto/machnode.hpp7
-rw-r--r--src/share/vm/opto/matcher.cpp98
-rw-r--r--src/share/vm/opto/matcher.hpp17
-rw-r--r--src/share/vm/opto/memnode.cpp3
-rw-r--r--src/share/vm/opto/mulnode.hpp6
-rw-r--r--src/share/vm/opto/node.cpp5
-rw-r--r--src/share/vm/opto/node.hpp21
-rw-r--r--src/share/vm/opto/opcodes.cpp6
-rw-r--r--src/share/vm/opto/opcodes.hpp6
-rw-r--r--src/share/vm/opto/postaloc.cpp81
-rw-r--r--src/share/vm/opto/reg_split.cpp36
-rw-r--r--src/share/vm/opto/regmask.cpp165
-rw-r--r--src/share/vm/opto/regmask.hpp61
-rw-r--r--src/share/vm/opto/superword.cpp496
-rw-r--r--src/share/vm/opto/superword.hpp16
-rw-r--r--src/share/vm/opto/type.cpp151
-rw-r--r--src/share/vm/opto/type.hpp85
-rw-r--r--src/share/vm/opto/vectornode.cpp460
-rw-r--r--src/share/vm/opto/vectornode.hpp868
28 files changed, 1477 insertions, 1453 deletions
diff --git a/src/share/vm/opto/c2_globals.hpp b/src/share/vm/opto/c2_globals.hpp
index 4d5424da5..947d806a3 100644
--- a/src/share/vm/opto/c2_globals.hpp
+++ b/src/share/vm/opto/c2_globals.hpp
@@ -81,6 +81,13 @@
product(intx, MaxLoopPad, (OptoLoopAlignment-1), \
"Align a loop if padding size in bytes is less or equal to this value") \
\
+ product(intx, MaxVectorSize, 32, \
+ "Max vector size in bytes, " \
+ "actual size could be less depending on elements type") \
+ \
+ product(bool, AlignVector, false, \
+ "Perform vector store/load alignment in loop") \
+ \
product(intx, NumberOfLoopInstrToAlign, 4, \
"Number of first instructions in a loop to align") \
\
diff --git a/src/share/vm/opto/chaitin.cpp b/src/share/vm/opto/chaitin.cpp
index a74114add..0ed9b9627 100644
--- a/src/share/vm/opto/chaitin.cpp
+++ b/src/share/vm/opto/chaitin.cpp
@@ -75,6 +75,7 @@ void LRG::dump( ) const {
// Flags
if( _is_oop ) tty->print("Oop ");
if( _is_float ) tty->print("Float ");
+ if( _is_vector ) tty->print("Vector ");
if( _was_spilled1 ) tty->print("Spilled ");
if( _was_spilled2 ) tty->print("Spilled2 ");
if( _direct_conflict ) tty->print("Direct_conflict ");
@@ -479,16 +480,18 @@ void PhaseChaitin::Register_Allocate() {
// Move important info out of the live_arena to longer lasting storage.
alloc_node_regs(_names.Size());
- for( uint i=0; i < _names.Size(); i++ ) {
- if( _names[i] ) { // Live range associated with Node?
- LRG &lrg = lrgs( _names[i] );
- if( lrg.num_regs() == 1 ) {
- _node_regs[i].set1( lrg.reg() );
+ for (uint i=0; i < _names.Size(); i++) {
+ if (_names[i]) { // Live range associated with Node?
+ LRG &lrg = lrgs(_names[i]);
+ if (!lrg.alive()) {
+ _node_regs[i].set_bad();
+ } else if (lrg.num_regs() == 1) {
+ _node_regs[i].set1(lrg.reg());
} else { // Must be a register-pair
- if( !lrg._fat_proj ) { // Must be aligned adjacent register pair
+ if (!lrg._fat_proj) { // Must be aligned adjacent register pair
// Live ranges record the highest register in their mask.
// We want the low register for the AD file writer's convenience.
- _node_regs[i].set2( OptoReg::add(lrg.reg(),-1) );
+ _node_regs[i].set2( OptoReg::add(lrg.reg(),(1-lrg.num_regs())) );
} else { // Misaligned; extract 2 bits
OptoReg::Name hi = lrg.reg(); // Get hi register
lrg.Remove(hi); // Yank from mask
@@ -568,7 +571,7 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
// Check for float-vs-int live range (used in register-pressure
// calculations)
const Type *n_type = n->bottom_type();
- if( n_type->is_floatingpoint() )
+ if (n_type->is_floatingpoint())
lrg._is_float = 1;
// Check for twice prior spilling. Once prior spilling might have
@@ -599,18 +602,28 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
// Limit result register mask to acceptable registers
const RegMask &rm = n->out_RegMask();
lrg.AND( rm );
+
+ int ireg = n->ideal_reg();
+ assert( !n->bottom_type()->isa_oop_ptr() || ireg == Op_RegP,
+ "oops must be in Op_RegP's" );
+
+ // Check for vector live range (only if vector register is used).
+ // On SPARC vector uses RegD which could be misaligned so it is not
+ // processes as vector in RA.
+ if (RegMask::is_vector(ireg))
+ lrg._is_vector = 1;
+ assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD,
+ "vector must be in vector registers");
+
// Check for bound register masks
const RegMask &lrgmask = lrg.mask();
- if( lrgmask.is_bound1() || lrgmask.is_bound2() )
+ if (lrgmask.is_bound(ireg))
lrg._is_bound = 1;
// Check for maximum frequency value
- if( lrg._maxfreq < b->_freq )
+ if (lrg._maxfreq < b->_freq)
lrg._maxfreq = b->_freq;
- int ireg = n->ideal_reg();
- assert( !n->bottom_type()->isa_oop_ptr() || ireg == Op_RegP,
- "oops must be in Op_RegP's" );
// Check for oop-iness, or long/double
// Check for multi-kill projection
switch( ireg ) {
@@ -689,7 +702,7 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
// AND changes how we count interferences. A mis-aligned
// double can interfere with TWO aligned pairs, or effectively
// FOUR registers!
- if( rm.is_misaligned_Pair() ) {
+ if (rm.is_misaligned_pair()) {
lrg._fat_proj = 1;
lrg._is_bound = 1;
}
@@ -706,6 +719,33 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
lrg.set_reg_pressure(1);
#endif
break;
+ case Op_VecS:
+ assert(Matcher::vector_size_supported(T_BYTE,4), "sanity");
+ assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity");
+ lrg.set_num_regs(RegMask::SlotsPerVecS);
+ lrg.set_reg_pressure(1);
+ break;
+ case Op_VecD:
+ assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecD), "sanity");
+ assert(RegMask::num_registers(Op_VecD) == RegMask::SlotsPerVecD, "sanity");
+ assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecD), "vector should be aligned");
+ lrg.set_num_regs(RegMask::SlotsPerVecD);
+ lrg.set_reg_pressure(1);
+ break;
+ case Op_VecX:
+ assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecX), "sanity");
+ assert(RegMask::num_registers(Op_VecX) == RegMask::SlotsPerVecX, "sanity");
+ assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecX), "vector should be aligned");
+ lrg.set_num_regs(RegMask::SlotsPerVecX);
+ lrg.set_reg_pressure(1);
+ break;
+ case Op_VecY:
+ assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecY), "sanity");
+ assert(RegMask::num_registers(Op_VecY) == RegMask::SlotsPerVecY, "sanity");
+ assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecY), "vector should be aligned");
+ lrg.set_num_regs(RegMask::SlotsPerVecY);
+ lrg.set_reg_pressure(1);
+ break;
default:
ShouldNotReachHere();
}
@@ -763,24 +803,38 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
} else {
lrg.AND( rm );
}
+
// Check for bound register masks
const RegMask &lrgmask = lrg.mask();
- if( lrgmask.is_bound1() || lrgmask.is_bound2() )
+ int kreg = n->in(k)->ideal_reg();
+ bool is_vect = RegMask::is_vector(kreg);
+ assert(n->in(k)->bottom_type()->isa_vect() == NULL ||
+ is_vect || kreg == Op_RegD,
+ "vector must be in vector registers");
+ if (lrgmask.is_bound(kreg))
lrg._is_bound = 1;
+
// If this use of a double forces a mis-aligned double,
// flag as '_fat_proj' - really flag as allowing misalignment
// AND changes how we count interferences. A mis-aligned
// double can interfere with TWO aligned pairs, or effectively
// FOUR registers!
- if( lrg.num_regs() == 2 && !lrg._fat_proj && rm.is_misaligned_Pair() ) {
+#ifdef ASSERT
+ if (is_vect) {
+ assert(lrgmask.is_aligned_sets(lrg.num_regs()), "vector should be aligned");
+ assert(!lrg._fat_proj, "sanity");
+ assert(RegMask::num_registers(kreg) == lrg.num_regs(), "sanity");
+ }
+#endif
+ if (!is_vect && lrg.num_regs() == 2 && !lrg._fat_proj && rm.is_misaligned_pair()) {
lrg._fat_proj = 1;
lrg._is_bound = 1;
}
// if the LRG is an unaligned pair, we will have to spill
// so clear the LRG's register mask if it is not already spilled
- if ( !n->is_SpillCopy() &&
- (lrg._def == NULL || lrg.is_multidef() || !lrg._def->is_SpillCopy()) &&
- lrgmask.is_misaligned_Pair()) {
+ if (!is_vect && !n->is_SpillCopy() &&
+ (lrg._def == NULL || lrg.is_multidef() || !lrg._def->is_SpillCopy()) &&
+ lrgmask.is_misaligned_pair()) {
lrg.Clear();
}
@@ -793,12 +847,14 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
} // end for all blocks
// Final per-liverange setup
- for( uint i2=0; i2<_maxlrg; i2++ ) {
+ for (uint i2=0; i2<_maxlrg; i2++) {
LRG &lrg = lrgs(i2);
- if( lrg.num_regs() == 2 && !lrg._fat_proj )
- lrg.ClearToPairs();
+ assert(!lrg._is_vector || !lrg._fat_proj, "sanity");
+ if (lrg.num_regs() > 1 && !lrg._fat_proj) {
+ lrg.clear_to_sets();
+ }
lrg.compute_set_mask_size();
- if( lrg.not_free() ) { // Handle case where we lose from the start
+ if (lrg.not_free()) { // Handle case where we lose from the start
lrg.set_reg(OptoReg::Name(LRG::SPILL_REG));
lrg._direct_conflict = 1;
}
@@ -1104,22 +1160,17 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
// Choose a color which is legal for him
RegMask tempmask = lrg.mask();
tempmask.AND(lrgs(copy_lrg).mask());
- OptoReg::Name reg;
- if( lrg.num_regs() == 1 ) {
- reg = tempmask.find_first_elem();
- } else {
- tempmask.ClearToPairs();
- reg = tempmask.find_first_pair();
- }
- if( OptoReg::is_valid(reg) )
+ tempmask.clear_to_sets(lrg.num_regs());
+ OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs());
+ if (OptoReg::is_valid(reg))
return reg;
}
}
// If no bias info exists, just go with the register selection ordering
- if( lrg.num_regs() == 2 ) {
- // Find an aligned pair
- return OptoReg::add(lrg.mask().find_first_pair(),chunk);
+ if (lrg._is_vector || lrg.num_regs() == 2) {
+ // Find an aligned set
+ return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk);
}
// CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate
@@ -1149,6 +1200,7 @@ OptoReg::Name PhaseChaitin::choose_color( LRG &lrg, int chunk ) {
// Use a heuristic to "bias" the color choice
return bias_color(lrg, chunk);
+ assert(!lrg._is_vector, "should be not vector here" );
assert( lrg.num_regs() >= 2, "dead live ranges do not color" );
// Fat-proj case or misaligned double argument.
@@ -1238,14 +1290,16 @@ uint PhaseChaitin::Select( ) {
}
//assert(is_allstack == lrg->mask().is_AllStack(), "nbrs must not change AllStackedness");
// Aligned pairs need aligned masks
- if( lrg->num_regs() == 2 && !lrg->_fat_proj )
- lrg->ClearToPairs();
+ assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
+ if (lrg->num_regs() > 1 && !lrg->_fat_proj) {
+ lrg->clear_to_sets();
+ }
// Check if a color is available and if so pick the color
OptoReg::Name reg = choose_color( *lrg, chunk );
#ifdef SPARC
debug_only(lrg->compute_set_mask_size());
- assert(lrg->num_regs() != 2 || lrg->is_bound() || is_even(reg-1), "allocate all doubles aligned");
+ assert(lrg->num_regs() < 2 || lrg->is_bound() || is_even(reg-1), "allocate all doubles aligned");
#endif
//---------------
@@ -1277,17 +1331,16 @@ uint PhaseChaitin::Select( ) {
// If the live range is not bound, then we actually had some choices
// to make. In this case, the mask has more bits in it than the colors
// chosen. Restrict the mask to just what was picked.
- if( lrg->num_regs() == 1 ) { // Size 1 live range
- lrg->Clear(); // Clear the mask
- lrg->Insert(reg); // Set regmask to match selected reg
- lrg->set_mask_size(1);
- } else if( !lrg->_fat_proj ) {
- // For pairs, also insert the low bit of the pair
- assert( lrg->num_regs() == 2, "unbound fatproj???" );
+ int n_regs = lrg->num_regs();
+ assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
+ if (n_regs == 1 || !lrg->_fat_proj) {
+ assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecY, "sanity");
lrg->Clear(); // Clear the mask
lrg->Insert(reg); // Set regmask to match selected reg
- lrg->Insert(OptoReg::add(reg,-1));
- lrg->set_mask_size(2);
+ // For vectors and pairs, also insert the low bit of the pair
+ for (int i = 1; i < n_regs; i++)
+ lrg->Insert(OptoReg::add(reg,-i));
+ lrg->set_mask_size(n_regs);
} else { // Else fatproj
// mask must be equal to fatproj bits, by definition
}
@@ -1860,12 +1913,20 @@ char *PhaseChaitin::dump_register( const Node *n, char *buf ) const {
sprintf(buf,"L%d",lidx); // No register binding yet
} else if( !lidx ) { // Special, not allocated value
strcpy(buf,"Special");
- } else if( (lrgs(lidx).num_regs() == 1)
- ? !lrgs(lidx).mask().is_bound1()
- : !lrgs(lidx).mask().is_bound2() ) {
- sprintf(buf,"L%d",lidx); // No register binding yet
- } else { // Hah! We have a bound machine register
- print_reg( lrgs(lidx).reg(), this, buf );
+ } else {
+ if (lrgs(lidx)._is_vector) {
+ if (lrgs(lidx).mask().is_bound_set(lrgs(lidx).num_regs()))
+ print_reg( lrgs(lidx).reg(), this, buf ); // a bound machine register
+ else
+ sprintf(buf,"L%d",lidx); // No register binding yet
+ } else if( (lrgs(lidx).num_regs() == 1)
+ ? lrgs(lidx).mask().is_bound1()
+ : lrgs(lidx).mask().is_bound_pair() ) {
+ // Hah! We have a bound machine register
+ print_reg( lrgs(lidx).reg(), this, buf );
+ } else {
+ sprintf(buf,"L%d",lidx); // No register binding yet
+ }
}
}
return buf+strlen(buf);
diff --git a/src/share/vm/opto/chaitin.hpp b/src/share/vm/opto/chaitin.hpp
index 1e6be63c4..c10f18d74 100644
--- a/src/share/vm/opto/chaitin.hpp
+++ b/src/share/vm/opto/chaitin.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -99,8 +99,15 @@ public:
void set_mask_size( int size ) {
assert((size == 65535) || (size == (int)_mask.Size()), "");
_mask_size = size;
- debug_only(_msize_valid=1;)
- debug_only( if( _num_regs == 2 && !_fat_proj ) _mask.VerifyPairs(); )
+#ifdef ASSERT
+ _msize_valid=1;
+ if (_is_vector) {
+ assert(!_fat_proj, "sanity");
+ _mask.verify_sets(_num_regs);
+ } else if (_num_regs == 2 && !_fat_proj) {
+ _mask.verify_pairs();
+ }
+#endif
}
void compute_set_mask_size() { set_mask_size(compute_mask_size()); }
int mask_size() const { assert( _msize_valid, "mask size not valid" );
@@ -116,7 +123,8 @@ public:
void Set_All() { _mask.Set_All(); debug_only(_msize_valid=1); _mask_size = RegMask::CHUNK_SIZE; }
void Insert( OptoReg::Name reg ) { _mask.Insert(reg); debug_only(_msize_valid=0;) }
void Remove( OptoReg::Name reg ) { _mask.Remove(reg); debug_only(_msize_valid=0;) }
- void ClearToPairs() { _mask.ClearToPairs(); debug_only(_msize_valid=0;) }
+ void clear_to_pairs() { _mask.clear_to_pairs(); debug_only(_msize_valid=0;) }
+ void clear_to_sets() { _mask.clear_to_sets(_num_regs); debug_only(_msize_valid=0;) }
// Number of registers this live range uses when it colors
private:
@@ -150,6 +158,7 @@ public:
uint _is_oop:1, // Live-range holds an oop
_is_float:1, // True if in float registers
+ _is_vector:1, // True if in vector registers
_was_spilled1:1, // True if prior spilling on def
_was_spilled2:1, // True if twice prior spilling on def
_is_bound:1, // live range starts life with no
diff --git a/src/share/vm/opto/classes.hpp b/src/share/vm/opto/classes.hpp
index 03b5107c3..bdf18b51f 100644
--- a/src/share/vm/opto/classes.hpp
+++ b/src/share/vm/opto/classes.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -245,14 +245,12 @@ macro(XorI)
macro(XorL)
macro(Vector)
macro(AddVB)
-macro(AddVC)
macro(AddVS)
macro(AddVI)
macro(AddVL)
macro(AddVF)
macro(AddVD)
macro(SubVB)
-macro(SubVC)
macro(SubVS)
macro(SubVI)
macro(SubVL)
@@ -263,74 +261,36 @@ macro(MulVD)
macro(DivVF)
macro(DivVD)
macro(LShiftVB)
-macro(LShiftVC)
macro(LShiftVS)
macro(LShiftVI)
-macro(URShiftVB)
-macro(URShiftVC)
-macro(URShiftVS)
-macro(URShiftVI)
+macro(RShiftVB)
+macro(RShiftVS)
+macro(RShiftVI)
macro(AndV)
macro(OrV)
macro(XorV)
-macro(VectorLoad)
-macro(Load16B)
-macro(Load8B)
-macro(Load4B)
-macro(Load8C)
-macro(Load4C)
-macro(Load2C)
-macro(Load8S)
-macro(Load4S)
-macro(Load2S)
-macro(Load4I)
-macro(Load2I)
-macro(Load2L)
-macro(Load4F)
-macro(Load2F)
-macro(Load2D)
-macro(VectorStore)
-macro(Store16B)
-macro(Store8B)
-macro(Store4B)
-macro(Store8C)
-macro(Store4C)
-macro(Store2C)
-macro(Store4I)
-macro(Store2I)
-macro(Store2L)
-macro(Store4F)
-macro(Store2F)
-macro(Store2D)
+macro(LoadVector)
+macro(StoreVector)
macro(Pack)
macro(PackB)
macro(PackS)
-macro(PackC)
macro(PackI)
macro(PackL)
macro(PackF)
macro(PackD)
-macro(Pack2x1B)
-macro(Pack2x2B)
-macro(Replicate16B)
-macro(Replicate8B)
-macro(Replicate4B)
-macro(Replicate8S)
-macro(Replicate4S)
-macro(Replicate2S)
-macro(Replicate8C)
-macro(Replicate4C)
-macro(Replicate2C)
-macro(Replicate4I)
-macro(Replicate2I)
-macro(Replicate2L)
-macro(Replicate4F)
-macro(Replicate2F)
-macro(Replicate2D)
+macro(Pack2L)
+macro(Pack2D)
+macro(ReplicateB)
+macro(ReplicateS)
+macro(ReplicateI)
+macro(ReplicateL)
+macro(ReplicateF)
+macro(ReplicateD)
macro(Extract)
macro(ExtractB)
-macro(ExtractS)
+macro(ExtractUB)
macro(ExtractC)
+macro(ExtractS)
macro(ExtractI)
macro(ExtractL)
macro(ExtractF)
diff --git a/src/share/vm/opto/compile.cpp b/src/share/vm/opto/compile.cpp
index 5c4b5145a..5331d033f 100644
--- a/src/share/vm/opto/compile.cpp
+++ b/src/share/vm/opto/compile.cpp
@@ -2591,38 +2591,12 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) {
}
break;
- case Op_Load16B:
- case Op_Load8B:
- case Op_Load4B:
- case Op_Load8S:
- case Op_Load4S:
- case Op_Load2S:
- case Op_Load8C:
- case Op_Load4C:
- case Op_Load2C:
- case Op_Load4I:
- case Op_Load2I:
- case Op_Load2L:
- case Op_Load4F:
- case Op_Load2F:
- case Op_Load2D:
- case Op_Store16B:
- case Op_Store8B:
- case Op_Store4B:
- case Op_Store8C:
- case Op_Store4C:
- case Op_Store2C:
- case Op_Store4I:
- case Op_Store2I:
- case Op_Store2L:
- case Op_Store4F:
- case Op_Store2F:
- case Op_Store2D:
+ case Op_LoadVector:
+ case Op_StoreVector:
break;
case Op_PackB:
case Op_PackS:
- case Op_PackC:
case Op_PackI:
case Op_PackF:
case Op_PackL:
diff --git a/src/share/vm/opto/ifg.cpp b/src/share/vm/opto/ifg.cpp
index 3a2254565..4827a17d8 100644
--- a/src/share/vm/opto/ifg.cpp
+++ b/src/share/vm/opto/ifg.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -416,6 +416,7 @@ uint PhaseChaitin::count_int_pressure( IndexSet *liveout ) {
if( lrgs(lidx).mask().is_UP() &&
lrgs(lidx).mask_size() &&
!lrgs(lidx)._is_float &&
+ !lrgs(lidx)._is_vector &&
lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) )
cnt += lrgs(lidx).reg_pressure();
}
@@ -430,7 +431,7 @@ uint PhaseChaitin::count_float_pressure( IndexSet *liveout ) {
while ((lidx = elements.next()) != 0) {
if( lrgs(lidx).mask().is_UP() &&
lrgs(lidx).mask_size() &&
- lrgs(lidx)._is_float )
+ (lrgs(lidx)._is_float || lrgs(lidx)._is_vector))
cnt += lrgs(lidx).reg_pressure();
}
return cnt;
@@ -439,8 +440,8 @@ uint PhaseChaitin::count_float_pressure( IndexSet *liveout ) {
//------------------------------lower_pressure---------------------------------
// Adjust register pressure down by 1. Capture last hi-to-low transition,
static void lower_pressure( LRG *lrg, uint where, Block *b, uint *pressure, uint *hrp_index ) {
- if( lrg->mask().is_UP() && lrg->mask_size() ) {
- if( lrg->_is_float ) {
+ if (lrg->mask().is_UP() && lrg->mask_size()) {
+ if (lrg->_is_float || lrg->_is_vector) {
pressure[1] -= lrg->reg_pressure();
if( pressure[1] == (uint)FLOATPRESSURE ) {
hrp_index[1] = where;
@@ -522,8 +523,8 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
LRG &lrg = lrgs(lidx);
lrg._area += cost;
// Compute initial register pressure
- if( lrg.mask().is_UP() && lrg.mask_size() ) {
- if( lrg._is_float ) { // Count float pressure
+ if (lrg.mask().is_UP() && lrg.mask_size()) {
+ if (lrg._is_float || lrg._is_vector) { // Count float pressure
pressure[1] += lrg.reg_pressure();
#ifdef EXACT_PRESSURE
if( pressure[1] > b->_freg_pressure )
@@ -681,13 +682,10 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
// according to its bindings.
const RegMask &rmask = lrgs(r).mask();
if( lrgs(r).is_bound() && !(n->rematerialize()) && rmask.is_NotEmpty() ) {
- // Smear odd bits; leave only aligned pairs of bits.
- RegMask r2mask = rmask;
- r2mask.SmearToPairs();
// Check for common case
int r_size = lrgs(r).num_regs();
OptoReg::Name r_reg = (r_size == 1) ? rmask.find_first_elem() : OptoReg::Physical;
-
+ // Smear odd bits
IndexSetIterator elements(&liveout);
uint l;
while ((l = elements.next()) != 0) {
@@ -701,10 +699,15 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
// Remove the bits from LRG 'r' from LRG 'l' so 'l' no
// longer interferes with 'r'. If 'l' requires aligned
// adjacent pairs, subtract out bit pairs.
- if( lrg.num_regs() == 2 && !lrg._fat_proj ) {
+ assert(!lrg._is_vector || !lrg._fat_proj, "sanity");
+ if (lrg.num_regs() > 1 && !lrg._fat_proj) {
+ RegMask r2mask = rmask;
+ // Leave only aligned set of bits.
+ r2mask.smear_to_sets(lrg.num_regs());
+ // It includes vector case.
lrg.SUBTRACT( r2mask );
lrg.compute_set_mask_size();
- } else if( r_size != 1 ) {
+ } else if( r_size != 1 ) { // fat proj
lrg.SUBTRACT( rmask );
lrg.compute_set_mask_size();
} else { // Common case: size 1 bound removal
@@ -763,8 +766,8 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
// Newly live things assumed live from here to top of block
lrg._area += cost;
// Adjust register pressure
- if( lrg.mask().is_UP() && lrg.mask_size() ) {
- if( lrg._is_float ) {
+ if (lrg.mask().is_UP() && lrg.mask_size()) {
+ if (lrg._is_float || lrg._is_vector) {
pressure[1] += lrg.reg_pressure();
#ifdef EXACT_PRESSURE
if( pressure[1] > b->_freg_pressure )
diff --git a/src/share/vm/opto/lcm.cpp b/src/share/vm/opto/lcm.cpp
index 1ad9f0b1f..2f272eb55 100644
--- a/src/share/vm/opto/lcm.cpp
+++ b/src/share/vm/opto/lcm.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -139,6 +139,7 @@ void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowe
int iop = mach->ideal_Opcode();
switch( iop ) {
case Op_LoadB:
+ case Op_LoadUB:
case Op_LoadUS:
case Op_LoadD:
case Op_LoadF:
@@ -445,6 +446,11 @@ Node *Block::select(PhaseCFG *cfg, Node_List &worklist, GrowableArray<int> &read
if( e->is_MachNullCheck() && e->in(1) == n )
continue;
+ // Schedule IV increment last.
+ if (e->is_Mach() && e->as_Mach()->ideal_Opcode() == Op_CountedLoopEnd &&
+ e->in(1)->in(1) == n && n->is_iteratively_computed())
+ continue;
+
uint n_choice = 2;
// See if this instruction is consumed by a branch. If so, then (as the
diff --git a/src/share/vm/opto/loopnode.cpp b/src/share/vm/opto/loopnode.cpp
index afd96045a..43def7314 100644
--- a/src/share/vm/opto/loopnode.cpp
+++ b/src/share/vm/opto/loopnode.cpp
@@ -2751,7 +2751,8 @@ int PhaseIdealLoop::build_loop_tree_impl( Node *n, int pre_order ) {
// Do not count uncommon calls
if( !n->is_CallStaticJava() || !n->as_CallStaticJava()->_name ) {
Node *iff = n->in(0)->in(0);
- if( !iff->is_If() ||
+ // No any calls for vectorized loops.
+ if( UseSuperWord || !iff->is_If() ||
(n->in(0)->Opcode() == Op_IfFalse &&
(1.0 - iff->as_If()->_prob) >= 0.01) ||
(iff->as_If()->_prob >= 0.01) )
@@ -3216,7 +3217,8 @@ void PhaseIdealLoop::build_loop_late_post( Node *n ) {
case Op_ModF:
case Op_ModD:
case Op_LoadB: // Same with Loads; they can sink
- case Op_LoadUS: // during loop optimizations.
+ case Op_LoadUB: // during loop optimizations.
+ case Op_LoadUS:
case Op_LoadD:
case Op_LoadF:
case Op_LoadI:
diff --git a/src/share/vm/opto/machnode.cpp b/src/share/vm/opto/machnode.cpp
index 7bc587785..88bb23b37 100644
--- a/src/share/vm/opto/machnode.cpp
+++ b/src/share/vm/opto/machnode.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -439,9 +439,9 @@ bool MachNode::rematerialize() const {
// Don't remateralize somebody with bound inputs - it stretches a
// fixed register lifetime.
uint idx = oper_input_base();
- if( req() > idx ) {
+ if (req() > idx) {
const RegMask &rm = in_RegMask(idx);
- if( rm.is_bound1() || rm.is_bound2() )
+ if (rm.is_bound(ideal_reg()))
return false;
}
diff --git a/src/share/vm/opto/machnode.hpp b/src/share/vm/opto/machnode.hpp
index 566e031d1..4db1154e8 100644
--- a/src/share/vm/opto/machnode.hpp
+++ b/src/share/vm/opto/machnode.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -319,6 +319,7 @@ public:
class MachTypeNode : public MachNode {
virtual uint size_of() const { return sizeof(*this); } // Size is bigger
public:
+ MachTypeNode( ) {}
const Type *_bottom_type;
virtual const class Type *bottom_type() const { return _bottom_type; }
@@ -370,12 +371,12 @@ public:
//------------------------------MachConstantNode-------------------------------
// Machine node that holds a constant which is stored in the constant table.
-class MachConstantNode : public MachNode {
+class MachConstantNode : public MachTypeNode {
protected:
Compile::Constant _constant; // This node's constant.
public:
- MachConstantNode() : MachNode() {
+ MachConstantNode() : MachTypeNode() {
init_class_id(Class_MachConstant);
}
diff --git a/src/share/vm/opto/matcher.cpp b/src/share/vm/opto/matcher.cpp
index 397385670..f6bb30769 100644
--- a/src/share/vm/opto/matcher.cpp
+++ b/src/share/vm/opto/matcher.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -35,6 +35,7 @@
#include "opto/rootnode.hpp"
#include "opto/runtime.hpp"
#include "opto/type.hpp"
+#include "opto/vectornode.hpp"
#include "runtime/atomic.hpp"
#include "runtime/os.hpp"
#ifdef TARGET_ARCH_MODEL_x86_32
@@ -58,18 +59,6 @@
OptoReg::Name OptoReg::c_frame_pointer;
-
-
-const int Matcher::base2reg[Type::lastype] = {
- Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN,
- Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */
- Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */
- 0, 0/*abio*/,
- Op_RegP /* Return address */, 0, /* the memories */
- Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD,
- 0 /*bottom*/
-};
-
const RegMask *Matcher::idealreg2regmask[_last_machine_leaf];
RegMask Matcher::mreg2regmask[_last_Mach_Reg];
RegMask Matcher::STACK_ONLY_mask;
@@ -107,6 +96,10 @@ Matcher::Matcher( Node_List &proj_list ) :
idealreg2spillmask [Op_RegF] = NULL;
idealreg2spillmask [Op_RegD] = NULL;
idealreg2spillmask [Op_RegP] = NULL;
+ idealreg2spillmask [Op_VecS] = NULL;
+ idealreg2spillmask [Op_VecD] = NULL;
+ idealreg2spillmask [Op_VecX] = NULL;
+ idealreg2spillmask [Op_VecY] = NULL;
idealreg2debugmask [Op_RegI] = NULL;
idealreg2debugmask [Op_RegN] = NULL;
@@ -114,6 +107,10 @@ Matcher::Matcher( Node_List &proj_list ) :
idealreg2debugmask [Op_RegF] = NULL;
idealreg2debugmask [Op_RegD] = NULL;
idealreg2debugmask [Op_RegP] = NULL;
+ idealreg2debugmask [Op_VecS] = NULL;
+ idealreg2debugmask [Op_VecD] = NULL;
+ idealreg2debugmask [Op_VecX] = NULL;
+ idealreg2debugmask [Op_VecY] = NULL;
idealreg2mhdebugmask[Op_RegI] = NULL;
idealreg2mhdebugmask[Op_RegN] = NULL;
@@ -121,6 +118,10 @@ Matcher::Matcher( Node_List &proj_list ) :
idealreg2mhdebugmask[Op_RegF] = NULL;
idealreg2mhdebugmask[Op_RegD] = NULL;
idealreg2mhdebugmask[Op_RegP] = NULL;
+ idealreg2mhdebugmask[Op_VecS] = NULL;
+ idealreg2mhdebugmask[Op_VecD] = NULL;
+ idealreg2mhdebugmask[Op_VecX] = NULL;
+ idealreg2mhdebugmask[Op_VecY] = NULL;
debug_only(_mem_node = NULL;) // Ideal memory node consumed by mach node
}
@@ -134,7 +135,7 @@ OptoReg::Name Matcher::warp_incoming_stk_arg( VMReg reg ) {
warped = OptoReg::add(warped, C->out_preserve_stack_slots());
if( warped >= _in_arg_limit )
_in_arg_limit = OptoReg::add(warped, 1); // Bump max stack slot seen
- if (!RegMask::can_represent(warped)) {
+ if (!RegMask::can_represent_arg(warped)) {
// the compiler cannot represent this method's calling sequence
C->record_method_not_compilable_all_tiers("unsupported incoming calling sequence");
return OptoReg::Bad;
@@ -302,7 +303,7 @@ void Matcher::match( ) {
_out_arg_limit = OptoReg::add(_new_SP, C->out_preserve_stack_slots());
assert( is_even(_out_arg_limit), "out_preserve must be even" );
- if (!RegMask::can_represent(OptoReg::add(_out_arg_limit,-1))) {
+ if (!RegMask::can_represent_arg(OptoReg::add(_out_arg_limit,-1))) {
// the compiler cannot represent this method's calling sequence
C->record_method_not_compilable("must be able to represent all call arguments in reg mask");
}
@@ -428,7 +429,7 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
void Matcher::init_first_stack_mask() {
// Allocate storage for spill masks as masks for the appropriate load type.
- RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * 3*6);
+ RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+4));
idealreg2spillmask [Op_RegN] = &rms[0];
idealreg2spillmask [Op_RegI] = &rms[1];
@@ -451,6 +452,11 @@ void Matcher::init_first_stack_mask() {
idealreg2mhdebugmask[Op_RegD] = &rms[16];
idealreg2mhdebugmask[Op_RegP] = &rms[17];
+ idealreg2spillmask [Op_VecS] = &rms[18];
+ idealreg2spillmask [Op_VecD] = &rms[19];
+ idealreg2spillmask [Op_VecX] = &rms[20];
+ idealreg2spillmask [Op_VecY] = &rms[21];
+
OptoReg::Name i;
// At first, start with the empty mask
@@ -462,7 +468,7 @@ void Matcher::init_first_stack_mask() {
C->FIRST_STACK_mask().Insert(i);
// Add in all bits past the outgoing argument area
- guarantee(RegMask::can_represent(OptoReg::add(_out_arg_limit,-1)),
+ guarantee(RegMask::can_represent_arg(OptoReg::add(_out_arg_limit,-1)),
"must be able to represent all call arguments in reg mask");
init = _out_arg_limit;
for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1))
@@ -472,21 +478,48 @@ void Matcher::init_first_stack_mask() {
C->FIRST_STACK_mask().set_AllStack();
// Make spill masks. Registers for their class, plus FIRST_STACK_mask.
+ RegMask aligned_stack_mask = C->FIRST_STACK_mask();
+ // Keep spill masks aligned.
+ aligned_stack_mask.clear_to_pairs();
+ assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
+
+ *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
#ifdef _LP64
*idealreg2spillmask[Op_RegN] = *idealreg2regmask[Op_RegN];
idealreg2spillmask[Op_RegN]->OR(C->FIRST_STACK_mask());
+ idealreg2spillmask[Op_RegP]->OR(aligned_stack_mask);
+#else
+ idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask());
#endif
*idealreg2spillmask[Op_RegI] = *idealreg2regmask[Op_RegI];
idealreg2spillmask[Op_RegI]->OR(C->FIRST_STACK_mask());
*idealreg2spillmask[Op_RegL] = *idealreg2regmask[Op_RegL];
- idealreg2spillmask[Op_RegL]->OR(C->FIRST_STACK_mask());
+ idealreg2spillmask[Op_RegL]->OR(aligned_stack_mask);
*idealreg2spillmask[Op_RegF] = *idealreg2regmask[Op_RegF];
idealreg2spillmask[Op_RegF]->OR(C->FIRST_STACK_mask());
*idealreg2spillmask[Op_RegD] = *idealreg2regmask[Op_RegD];
- idealreg2spillmask[Op_RegD]->OR(C->FIRST_STACK_mask());
- *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
- idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask());
+ idealreg2spillmask[Op_RegD]->OR(aligned_stack_mask);
+ if (Matcher::vector_size_supported(T_BYTE,4)) {
+ *idealreg2spillmask[Op_VecS] = *idealreg2regmask[Op_VecS];
+ idealreg2spillmask[Op_VecS]->OR(C->FIRST_STACK_mask());
+ }
+ if (Matcher::vector_size_supported(T_FLOAT,2)) {
+ *idealreg2spillmask[Op_VecD] = *idealreg2regmask[Op_VecD];
+ idealreg2spillmask[Op_VecD]->OR(aligned_stack_mask);
+ }
+ if (Matcher::vector_size_supported(T_FLOAT,4)) {
+ aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecX);
+ assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
+ *idealreg2spillmask[Op_VecX] = *idealreg2regmask[Op_VecX];
+ idealreg2spillmask[Op_VecX]->OR(aligned_stack_mask);
+ }
+ if (Matcher::vector_size_supported(T_FLOAT,8)) {
+ aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecY);
+ assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
+ *idealreg2spillmask[Op_VecY] = *idealreg2regmask[Op_VecY];
+ idealreg2spillmask[Op_VecY]->OR(aligned_stack_mask);
+ }
if (UseFPUForSpilling) {
// This mask logic assumes that the spill operations are
// symmetric and that the registers involved are the same size.
@@ -807,6 +840,25 @@ void Matcher::init_spill_mask( Node *ret ) {
idealreg2regmask[Op_RegF] = &spillF->out_RegMask();
idealreg2regmask[Op_RegD] = &spillD->out_RegMask();
idealreg2regmask[Op_RegP] = &spillP->out_RegMask();
+
+ // Vector regmasks.
+ if (Matcher::vector_size_supported(T_BYTE,4)) {
+ TypeVect::VECTS = TypeVect::make(T_BYTE, 4);
+ MachNode *spillVectS = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTS));
+ idealreg2regmask[Op_VecS] = &spillVectS->out_RegMask();
+ }
+ if (Matcher::vector_size_supported(T_FLOAT,2)) {
+ MachNode *spillVectD = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTD));
+ idealreg2regmask[Op_VecD] = &spillVectD->out_RegMask();
+ }
+ if (Matcher::vector_size_supported(T_FLOAT,4)) {
+ MachNode *spillVectX = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTX));
+ idealreg2regmask[Op_VecX] = &spillVectX->out_RegMask();
+ }
+ if (Matcher::vector_size_supported(T_FLOAT,8)) {
+ MachNode *spillVectY = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTY));
+ idealreg2regmask[Op_VecY] = &spillVectY->out_RegMask();
+ }
}
#ifdef ASSERT
@@ -1063,7 +1115,7 @@ OptoReg::Name Matcher::warp_outgoing_stk_arg( VMReg reg, OptoReg::Name begin_out
// that is killed by the call.
if( warped >= out_arg_limit_per_call )
out_arg_limit_per_call = OptoReg::add(warped,1);
- if (!RegMask::can_represent(warped)) {
+ if (!RegMask::can_represent_arg(warped)) {
C->record_method_not_compilable_all_tiers("unsupported calling sequence");
return OptoReg::Bad;
}
@@ -1251,7 +1303,7 @@ MachNode *Matcher::match_sfpt( SafePointNode *sfpt ) {
// this killed area.
uint r_cnt = mcall->tf()->range()->cnt();
MachProjNode *proj = new (C, 1) MachProjNode( mcall, r_cnt+10000, RegMask::Empty, MachProjNode::fat_proj );
- if (!RegMask::can_represent(OptoReg::Name(out_arg_limit_per_call-1))) {
+ if (!RegMask::can_represent_arg(OptoReg::Name(out_arg_limit_per_call-1))) {
C->record_method_not_compilable_all_tiers("unsupported outgoing calling sequence");
} else {
for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++)
diff --git a/src/share/vm/opto/matcher.hpp b/src/share/vm/opto/matcher.hpp
index e6aae28b3..0597cb543 100644
--- a/src/share/vm/opto/matcher.hpp
+++ b/src/share/vm/opto/matcher.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -250,10 +250,21 @@ public:
static const bool convL2FSupported(void);
// Vector width in bytes
- static const uint vector_width_in_bytes(void);
+ static const int vector_width_in_bytes(BasicType bt);
+
+ // Limits on vector size (number of elements).
+ static const int max_vector_size(const BasicType bt);
+ static const int min_vector_size(const BasicType bt);
+ static const bool vector_size_supported(const BasicType bt, int size) {
+ return (Matcher::max_vector_size(bt) >= size &&
+ Matcher::min_vector_size(bt) <= size);
+ }
// Vector ideal reg
- static const uint vector_ideal_reg(void);
+ static const int vector_ideal_reg(int len);
+
+ // CPU supports misaligned vectors store/load.
+ static const bool misaligned_vectors_ok();
// Used to determine a "low complexity" 64-bit constant. (Zero is simple.)
// The standard of comparison is one (StoreL ConL) vs. two (StoreI ConI).
diff --git a/src/share/vm/opto/memnode.cpp b/src/share/vm/opto/memnode.cpp
index e28e09226..799c2ba14 100644
--- a/src/share/vm/opto/memnode.cpp
+++ b/src/share/vm/opto/memnode.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -1543,6 +1543,7 @@ const Type *LoadNode::Value( PhaseTransform *phase ) const {
// had an original form like p1:(AddP x x (LShiftL quux 3)), where the
// expression (LShiftL quux 3) independently optimized to the constant 8.
if ((t->isa_int() == NULL) && (t->isa_long() == NULL)
+ && (_type->isa_vect() == NULL)
&& Opcode() != Op_LoadKlass && Opcode() != Op_LoadNKlass) {
// t might actually be lower than _type, if _type is a unique
// concrete subclass of abstract class t.
diff --git a/src/share/vm/opto/mulnode.hpp b/src/share/vm/opto/mulnode.hpp
index 11cc77145..c3adc433f 100644
--- a/src/share/vm/opto/mulnode.hpp
+++ b/src/share/vm/opto/mulnode.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -41,7 +41,9 @@ class PhaseTransform;
class MulNode : public Node {
virtual uint hash() const;
public:
- MulNode( Node *in1, Node *in2 ): Node(0,in1,in2) {}
+ MulNode( Node *in1, Node *in2 ): Node(0,in1,in2) {
+ init_class_id(Class_Mul);
+ }
// Handle algebraic identities here. If we have an identity, return the Node
// we are equivalent to. We look for "add of zero" as an identity.
diff --git a/src/share/vm/opto/node.cpp b/src/share/vm/opto/node.cpp
index 4bd752fed..2cb44ad0e 100644
--- a/src/share/vm/opto/node.cpp
+++ b/src/share/vm/opto/node.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -1576,6 +1576,9 @@ void Node::dump() const {
} else {
tty->print("no type");
}
+ } else if (t->isa_vect() && this->is_MachSpillCopy()) {
+ // Dump MachSpillcopy vector type.
+ t->dump();
}
if (is_new) {
debug_only(dump_orig(debug_orig()));
diff --git a/src/share/vm/opto/node.hpp b/src/share/vm/opto/node.hpp
index 5ddae2f0b..f63a967b6 100644
--- a/src/share/vm/opto/node.hpp
+++ b/src/share/vm/opto/node.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -100,6 +100,7 @@ class MemBarNode;
class MemBarStoreStoreNode;
class MemNode;
class MergeMemNode;
+class MulNode;
class MultiNode;
class MultiBranchNode;
class NeverBranchNode;
@@ -133,8 +134,8 @@ class Type;
class TypeNode;
class UnlockNode;
class VectorNode;
-class VectorLoadNode;
-class VectorStoreNode;
+class LoadVectorNode;
+class StoreVectorNode;
class VectorSet;
typedef void (*NFunc)(Node&,void*);
extern "C" {
@@ -609,9 +610,9 @@ public:
DEFINE_CLASS_ID(Mem, Node, 4)
DEFINE_CLASS_ID(Load, Mem, 0)
- DEFINE_CLASS_ID(VectorLoad, Load, 0)
+ DEFINE_CLASS_ID(LoadVector, Load, 0)
DEFINE_CLASS_ID(Store, Mem, 1)
- DEFINE_CLASS_ID(VectorStore, Store, 0)
+ DEFINE_CLASS_ID(StoreVector, Store, 0)
DEFINE_CLASS_ID(LoadStore, Mem, 2)
DEFINE_CLASS_ID(Region, Node, 5)
@@ -629,8 +630,9 @@ public:
DEFINE_CLASS_ID(AddP, Node, 9)
DEFINE_CLASS_ID(BoxLock, Node, 10)
DEFINE_CLASS_ID(Add, Node, 11)
- DEFINE_CLASS_ID(Vector, Node, 12)
- DEFINE_CLASS_ID(ClearArray, Node, 13)
+ DEFINE_CLASS_ID(Mul, Node, 12)
+ DEFINE_CLASS_ID(Vector, Node, 13)
+ DEFINE_CLASS_ID(ClearArray, Node, 14)
_max_classes = ClassMask_ClearArray
};
@@ -752,6 +754,7 @@ public:
DEFINE_CLASS_QUERY(MemBar)
DEFINE_CLASS_QUERY(MemBarStoreStore)
DEFINE_CLASS_QUERY(MergeMem)
+ DEFINE_CLASS_QUERY(Mul)
DEFINE_CLASS_QUERY(Multi)
DEFINE_CLASS_QUERY(MultiBranch)
DEFINE_CLASS_QUERY(Parm)
@@ -767,8 +770,8 @@ public:
DEFINE_CLASS_QUERY(Sub)
DEFINE_CLASS_QUERY(Type)
DEFINE_CLASS_QUERY(Vector)
- DEFINE_CLASS_QUERY(VectorLoad)
- DEFINE_CLASS_QUERY(VectorStore)
+ DEFINE_CLASS_QUERY(LoadVector)
+ DEFINE_CLASS_QUERY(StoreVector)
DEFINE_CLASS_QUERY(Unlock)
#undef DEFINE_CLASS_QUERY
diff --git a/src/share/vm/opto/opcodes.cpp b/src/share/vm/opto/opcodes.cpp
index 58489db0a..83310568b 100644
--- a/src/share/vm/opto/opcodes.cpp
+++ b/src/share/vm/opto/opcodes.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -38,6 +38,10 @@ const char *NodeClassNames[] = {
"RegD",
"RegL",
"RegFlags",
+ "VecS",
+ "VecD",
+ "VecX",
+ "VecY",
"_last_machine_leaf",
#include "classes.hpp"
"_last_class_name",
diff --git a/src/share/vm/opto/opcodes.hpp b/src/share/vm/opto/opcodes.hpp
index 9eb5b8a7c..4baec83fe 100644
--- a/src/share/vm/opto/opcodes.hpp
+++ b/src/share/vm/opto/opcodes.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -36,6 +36,10 @@ enum Opcodes {
macro(RegF) // Machine float register
macro(RegD) // Machine double register
macro(RegL) // Machine long register
+ macro(VecS) // Machine vectors register
+ macro(VecD) // Machine vectord register
+ macro(VecX) // Machine vectorx register
+ macro(VecY) // Machine vectory register
macro(RegFlags) // Machine flags register
_last_machine_leaf, // Split between regular opcodes and machine
#include "classes.hpp"
diff --git a/src/share/vm/opto/postaloc.cpp b/src/share/vm/opto/postaloc.cpp
index 1a7553bc5..8e24d353d 100644
--- a/src/share/vm/opto/postaloc.cpp
+++ b/src/share/vm/opto/postaloc.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -27,13 +27,15 @@
#include "opto/chaitin.hpp"
#include "opto/machnode.hpp"
-// see if this register kind does not requires two registers
-static bool is_single_register(uint x) {
-#ifdef _LP64
- return (x != Op_RegD && x != Op_RegL && x != Op_RegP);
-#else
- return (x != Op_RegD && x != Op_RegL);
-#endif
+// See if this register (or pairs, or vector) already contains the value.
+static bool register_contains_value(Node* val, OptoReg::Name reg, int n_regs,
+ Node_List& value) {
+ for (int i = 0; i < n_regs; i++) {
+ OptoReg::Name nreg = OptoReg::add(reg,-i);
+ if (value[nreg] != val)
+ return false;
+ }
+ return true;
}
//---------------------------may_be_copy_of_callee-----------------------------
@@ -167,9 +169,11 @@ int PhaseChaitin::use_prior_register( Node *n, uint idx, Node *def, Block *curre
const RegMask &use_mask = n->in_RegMask(idx);
bool can_use = ( RegMask::can_represent(def_reg) ? (use_mask.Member(def_reg) != 0)
: (use_mask.is_AllStack() != 0));
- // Check for a copy to or from a misaligned pair.
- can_use = can_use && !use_mask.is_misaligned_Pair() && !def_lrg.mask().is_misaligned_Pair();
-
+ if (!RegMask::is_vector(def->ideal_reg())) {
+ // Check for a copy to or from a misaligned pair.
+ // It is workaround for a sparc with misaligned pairs.
+ can_use = can_use && !use_mask.is_misaligned_pair() && !def_lrg.mask().is_misaligned_pair();
+ }
if (!can_use)
return 0;
@@ -263,18 +267,16 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
val = skip_copies(n->in(k));
}
- if( val == x ) return blk_adjust; // No progress?
+ if (val == x) return blk_adjust; // No progress?
- bool single = is_single_register(val->ideal_reg());
+ int n_regs = RegMask::num_registers(val->ideal_reg());
uint val_idx = n2lidx(val);
OptoReg::Name val_reg = lrgs(val_idx).reg();
// See if it happens to already be in the correct register!
// (either Phi's direct register, or the common case of the name
// never-clobbered original-def register)
- if( value[val_reg] == val &&
- // Doubles check both halves
- ( single || value[val_reg-1] == val ) ) {
+ if (register_contains_value(val, val_reg, n_regs, value)) {
blk_adjust += use_prior_register(n,k,regnd[val_reg],current_block,value,regnd);
if( n->in(k) == regnd[val_reg] ) // Success! Quit trying
return blk_adjust;
@@ -306,7 +308,7 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
}
Node *vv = value[reg];
- if( !single ) { // Doubles check for aligned-adjacent pair
+ if (n_regs > 1) { // Doubles check for aligned-adjacent pair
if( (reg&1)==0 ) continue; // Wrong half of a pair
if( vv != value[reg-1] ) continue; // Not a complete pair
}
@@ -526,8 +528,9 @@ void PhaseChaitin::post_allocate_copy_removal() {
if( pidx ) {
value.map(preg,phi);
regnd.map(preg,phi);
- OptoReg::Name preg_lo = OptoReg::add(preg,-1);
- if( !is_single_register(phi->ideal_reg()) ) {
+ int n_regs = RegMask::num_registers(phi->ideal_reg());
+ for (int l = 1; l < n_regs; l++) {
+ OptoReg::Name preg_lo = OptoReg::add(preg,-l);
value.map(preg_lo,phi);
regnd.map(preg_lo,phi);
}
@@ -568,13 +571,17 @@ void PhaseChaitin::post_allocate_copy_removal() {
value.map(ureg,valdef); // record improved reaching-def info
regnd.map(ureg, def);
// Record other half of doubles
- OptoReg::Name ureg_lo = OptoReg::add(ureg,-1);
- if( !is_single_register(def->ideal_reg()) &&
- ( !RegMask::can_represent(ureg_lo) ||
- lrgs(useidx).mask().Member(ureg_lo) ) && // Nearly always adjacent
- !value[ureg_lo] ) {
- value.map(ureg_lo,valdef); // record improved reaching-def info
- regnd.map(ureg_lo, def);
+ uint def_ideal_reg = def->ideal_reg();
+ int n_regs = RegMask::num_registers(def_ideal_reg);
+ bool is_vec = RegMask::is_vector(def_ideal_reg);
+ for (int l = 1; l < n_regs; l++) {
+ OptoReg::Name ureg_lo = OptoReg::add(ureg,-l);
+ if (!value[ureg_lo] &&
+ (!RegMask::can_represent(ureg_lo) ||
+ lrgs(useidx).mask().Member(ureg_lo))) { // Nearly always adjacent
+ value.map(ureg_lo,valdef); // record improved reaching-def info
+ regnd.map(ureg_lo, def);
+ }
}
}
}
@@ -607,7 +614,8 @@ void PhaseChaitin::post_allocate_copy_removal() {
}
uint n_ideal_reg = n->ideal_reg();
- if( is_single_register(n_ideal_reg) ) {
+ int n_regs = RegMask::num_registers(n_ideal_reg);
+ if (n_regs == 1) {
// If Node 'n' does not change the value mapped by the register,
// then 'n' is a useless copy. Do not update the register->node
// mapping so 'n' will go dead.
@@ -625,6 +633,25 @@ void PhaseChaitin::post_allocate_copy_removal() {
assert( n->is_Copy(), "" );
j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
}
+ } else if (RegMask::is_vector(n_ideal_reg)) {
+ // If Node 'n' does not change the value mapped by the register,
+ // then 'n' is a useless copy. Do not update the register->node
+ // mapping so 'n' will go dead.
+ if (!register_contains_value(val, nreg, n_regs, value)) {
+ // Update the mapping: record new Node defined by the register
+ regnd.map(nreg,n);
+ // Update mapping for defined *value*, which is the defined
+ // Node after skipping all copies.
+ value.map(nreg,val);
+ for (int l = 1; l < n_regs; l++) {
+ OptoReg::Name nreg_lo = OptoReg::add(nreg,-l);
+ regnd.map(nreg_lo, n );
+ value.map(nreg_lo,val);
+ }
+ } else if (n->is_Copy()) {
+ // Note: vector can't be constant and can't be copy of calee.
+ j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
+ }
} else {
// If the value occupies a register pair, record same info
// in both registers.
diff --git a/src/share/vm/opto/reg_split.cpp b/src/share/vm/opto/reg_split.cpp
index 63a11fe8f..cae363bea 100644
--- a/src/share/vm/opto/reg_split.cpp
+++ b/src/share/vm/opto/reg_split.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -74,12 +74,13 @@ Node *PhaseChaitin::get_spillcopy_wide( Node *def, Node *use, uint uidx ) {
const RegMask *w_i_mask = w_mask->overlap( *i_mask ) ? w_mask : i_mask;
const RegMask *w_o_mask;
+ int num_regs = RegMask::num_registers(ireg);
+ bool is_vect = RegMask::is_vector(ireg);
if( w_mask->overlap( *o_mask ) && // Overlap AND
- ((ireg != Op_RegL && ireg != Op_RegD // Single use or aligned
-#ifdef _LP64
- && ireg != Op_RegP
-#endif
- ) || o_mask->is_aligned_Pairs()) ) {
+ ((num_regs == 1) // Single use or aligned
+ || is_vect // or vector
+ || !is_vect && o_mask->is_aligned_pairs()) ) {
+ assert(!is_vect || o_mask->is_aligned_sets(num_regs), "vectors are aligned");
// Don't come here for mis-aligned doubles
w_o_mask = w_mask;
} else { // wide ideal mask does not overlap with o_mask
@@ -400,15 +401,17 @@ bool PhaseChaitin::is_high_pressure( Block *b, LRG *lrg, uint insidx ) {
// CNC - Turned off 7/8/99, causes too much spilling
// if( lrg->_is_bound ) return false;
+ // Use float pressure numbers for vectors.
+ bool is_float_or_vector = lrg->_is_float || lrg->_is_vector;
// Not yet reached the high-pressure cutoff point, so low pressure
- uint hrp_idx = lrg->_is_float ? b->_fhrp_index : b->_ihrp_index;
+ uint hrp_idx = is_float_or_vector ? b->_fhrp_index : b->_ihrp_index;
if( insidx < hrp_idx ) return false;
// Register pressure for the block as a whole depends on reg class
- int block_pres = lrg->_is_float ? b->_freg_pressure : b->_reg_pressure;
+ int block_pres = is_float_or_vector ? b->_freg_pressure : b->_reg_pressure;
// Bound live ranges will split at the binding points first;
// Intermediate splits should assume the live range's register set
// got "freed up" and that num_regs will become INT_PRESSURE.
- int bound_pres = lrg->_is_float ? FLOATPRESSURE : INTPRESSURE;
+ int bound_pres = is_float_or_vector ? FLOATPRESSURE : INTPRESSURE;
// Effective register pressure limit.
int lrg_pres = (lrg->get_invalid_mask_size() > lrg->num_regs())
? (lrg->get_invalid_mask_size() >> (lrg->num_regs()-1)) : bound_pres;
@@ -794,12 +797,15 @@ uint PhaseChaitin::Split( uint maxlrg ) {
if( i < n->req() ) break;
insert_point--;
}
+ uint orig_eidx = b->end_idx();
maxlrg = split_DEF( n1, b, insert_point, maxlrg, Reachblock, debug_defs, splits, slidx);
// If it wasn't split bail
if (!maxlrg) {
return 0;
}
- insidx++;
+ // Spill of NULL check mem op goes into the following block.
+ if (b->end_idx() > orig_eidx)
+ insidx++;
}
// This is a new DEF, so update UP
UPblock[slidx] = false;
@@ -960,7 +966,7 @@ uint PhaseChaitin::Split( uint maxlrg ) {
// Grab register mask info
const RegMask &dmask = def->out_RegMask();
const RegMask &umask = n->in_RegMask(inpidx);
-
+ bool is_vect = RegMask::is_vector(def->ideal_reg());
assert(inpidx < oopoff, "cannot use-split oop map info");
bool dup = UPblock[slidx];
@@ -972,7 +978,7 @@ uint PhaseChaitin::Split( uint maxlrg ) {
if( !umask.is_AllStack() &&
(int)umask.Size() <= lrgs(useidx).num_regs() &&
(!def->rematerialize() ||
- umask.is_misaligned_Pair())) {
+ !is_vect && umask.is_misaligned_pair())) {
// These need a Split regardless of overlap or pressure
// SPLIT - NO DEF - NO CISC SPILL
maxlrg = split_USE(def,b,n,inpidx,maxlrg,dup,false, splits,slidx);
@@ -1123,10 +1129,12 @@ uint PhaseChaitin::Split( uint maxlrg ) {
// Grab UP info for DEF
const RegMask &dmask = n->out_RegMask();
bool defup = dmask.is_UP();
+ int ireg = n->ideal_reg();
+ bool is_vect = RegMask::is_vector(ireg);
// Only split at Def if this is a HRP block or bound (and spilled once)
if( !n->rematerialize() &&
- (((dmask.is_bound1() || dmask.is_bound2() || dmask.is_misaligned_Pair()) &&
- (deflrg._direct_conflict || deflrg._must_spill)) ||
+ (((dmask.is_bound(ireg) || !is_vect && dmask.is_misaligned_pair()) &&
+ (deflrg._direct_conflict || deflrg._must_spill)) ||
// Check for LRG being up in a register and we are inside a high
// pressure area. Spill it down immediately.
(defup && is_high_pressure(b,&deflrg,insidx))) ) {
diff --git a/src/share/vm/opto/regmask.cpp b/src/share/vm/opto/regmask.cpp
index ce220f01b..59413388c 100644
--- a/src/share/vm/opto/regmask.cpp
+++ b/src/share/vm/opto/regmask.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -129,11 +129,34 @@ const RegMask RegMask::Empty(
0
);
+//=============================================================================
+bool RegMask::is_vector(uint ireg) {
+ return (ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY);
+}
+
+int RegMask::num_registers(uint ireg) {
+ switch(ireg) {
+ case Op_VecY:
+ return 8;
+ case Op_VecX:
+ return 4;
+ case Op_VecD:
+ case Op_RegD:
+ case Op_RegL:
+#ifdef _LP64
+ case Op_RegP:
+#endif
+ return 2;
+ }
+ // Op_VecS and the rest ideal registers.
+ return 1;
+}
+
//------------------------------find_first_pair--------------------------------
// Find the lowest-numbered register pair in the mask. Return the
// HIGHEST register number in the pair, or BAD if no pairs.
OptoReg::Name RegMask::find_first_pair() const {
- VerifyPairs();
+ verify_pairs();
for( int i = 0; i < RM_SIZE; i++ ) {
if( _A[i] ) { // Found some bits
int bit = _A[i] & -_A[i]; // Extract low bit
@@ -146,30 +169,30 @@ OptoReg::Name RegMask::find_first_pair() const {
//------------------------------ClearToPairs-----------------------------------
// Clear out partial bits; leave only bit pairs
-void RegMask::ClearToPairs() {
+void RegMask::clear_to_pairs() {
for( int i = 0; i < RM_SIZE; i++ ) {
int bits = _A[i];
bits &= ((bits & 0x55555555)<<1); // 1 hi-bit set for each pair
bits |= (bits>>1); // Smear 1 hi-bit into a pair
_A[i] = bits;
}
- VerifyPairs();
+ verify_pairs();
}
//------------------------------SmearToPairs-----------------------------------
// Smear out partial bits; leave only bit pairs
-void RegMask::SmearToPairs() {
+void RegMask::smear_to_pairs() {
for( int i = 0; i < RM_SIZE; i++ ) {
int bits = _A[i];
bits |= ((bits & 0x55555555)<<1); // Smear lo bit hi per pair
bits |= ((bits & 0xAAAAAAAA)>>1); // Smear hi bit lo per pair
_A[i] = bits;
}
- VerifyPairs();
+ verify_pairs();
}
//------------------------------is_aligned_pairs-------------------------------
-bool RegMask::is_aligned_Pairs() const {
+bool RegMask::is_aligned_pairs() const {
// Assert that the register mask contains only bit pairs.
for( int i = 0; i < RM_SIZE; i++ ) {
int bits = _A[i];
@@ -204,7 +227,7 @@ int RegMask::is_bound1() const {
//------------------------------is_bound2--------------------------------------
// Return TRUE if the mask contains an adjacent pair of bits and no other bits.
-int RegMask::is_bound2() const {
+int RegMask::is_bound_pair() const {
if( is_AllStack() ) return false;
int bit = -1; // Set to hold the one bit allowed
@@ -226,6 +249,132 @@ int RegMask::is_bound2() const {
return true;
}
+static int low_bits[3] = { 0x55555555, 0x11111111, 0x01010101 };
+//------------------------------find_first_set---------------------------------
+// Find the lowest-numbered register set in the mask. Return the
+// HIGHEST register number in the set, or BAD if no sets.
+// Works also for size 1.
+OptoReg::Name RegMask::find_first_set(int size) const {
+ verify_sets(size);
+ for (int i = 0; i < RM_SIZE; i++) {
+ if (_A[i]) { // Found some bits
+ int bit = _A[i] & -_A[i]; // Extract low bit
+ // Convert to bit number, return hi bit in pair
+ return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(bit)+(size-1));
+ }
+ }
+ return OptoReg::Bad;
+}
+
+//------------------------------clear_to_sets----------------------------------
+// Clear out partial bits; leave only aligned adjacent bit pairs
+void RegMask::clear_to_sets(int size) {
+ if (size == 1) return;
+ assert(2 <= size && size <= 8, "update low bits table");
+ assert(is_power_of_2(size), "sanity");
+ int low_bits_mask = low_bits[size>>2];
+ for (int i = 0; i < RM_SIZE; i++) {
+ int bits = _A[i];
+ int sets = (bits & low_bits_mask);
+ for (int j = 1; j < size; j++) {
+ sets = (bits & (sets<<1)); // filter bits which produce whole sets
+ }
+ sets |= (sets>>1); // Smear 1 hi-bit into a set
+ if (size > 2) {
+ sets |= (sets>>2); // Smear 2 hi-bits into a set
+ if (size > 4) {
+ sets |= (sets>>4); // Smear 4 hi-bits into a set
+ }
+ }
+ _A[i] = sets;
+ }
+ verify_sets(size);
+}
+
+//------------------------------smear_to_sets----------------------------------
+// Smear out partial bits to aligned adjacent bit sets
+void RegMask::smear_to_sets(int size) {
+ if (size == 1) return;
+ assert(2 <= size && size <= 8, "update low bits table");
+ assert(is_power_of_2(size), "sanity");
+ int low_bits_mask = low_bits[size>>2];
+ for (int i = 0; i < RM_SIZE; i++) {
+ int bits = _A[i];
+ int sets = 0;
+ for (int j = 0; j < size; j++) {
+ sets |= (bits & low_bits_mask); // collect partial bits
+ bits = bits>>1;
+ }
+ sets |= (sets<<1); // Smear 1 lo-bit into a set
+ if (size > 2) {
+ sets |= (sets<<2); // Smear 2 lo-bits into a set
+ if (size > 4) {
+ sets |= (sets<<4); // Smear 4 lo-bits into a set
+ }
+ }
+ _A[i] = sets;
+ }
+ verify_sets(size);
+}
+
+//------------------------------is_aligned_set--------------------------------
+bool RegMask::is_aligned_sets(int size) const {
+ if (size == 1) return true;
+ assert(2 <= size && size <= 8, "update low bits table");
+ assert(is_power_of_2(size), "sanity");
+ int low_bits_mask = low_bits[size>>2];
+ // Assert that the register mask contains only bit sets.
+ for (int i = 0; i < RM_SIZE; i++) {
+ int bits = _A[i];
+ while (bits) { // Check bits for pairing
+ int bit = bits & -bits; // Extract low bit
+ // Low bit is not odd means its mis-aligned.
+ if ((bit & low_bits_mask) == 0) return false;
+ // Do extra work since (bit << size) may overflow.
+ int hi_bit = bit << (size-1); // high bit
+ int set = hi_bit + ((hi_bit-1) & ~(bit-1));
+ // Check for aligned adjacent bits in this set
+ if ((bits & set) != set) return false;
+ bits -= set; // Remove this set
+ }
+ }
+ return true;
+}
+
+//------------------------------is_bound_set-----------------------------------
+// Return TRUE if the mask contains one adjacent set of bits and no other bits.
+// Works also for size 1.
+int RegMask::is_bound_set(int size) const {
+ if( is_AllStack() ) return false;
+ assert(1 <= size && size <= 8, "update low bits table");
+ int bit = -1; // Set to hold the one bit allowed
+ for (int i = 0; i < RM_SIZE; i++) {
+ if (_A[i] ) { // Found some bits
+ if (bit != -1)
+ return false; // Already had bits, so fail
+ bit = _A[i] & -_A[i]; // Extract 1 bit from mask
+ int hi_bit = bit << (size-1); // high bit
+ if (hi_bit != 0) { // Bit set stays in same word?
+ int set = hi_bit + ((hi_bit-1) & ~(bit-1));
+ if (set != _A[i])
+ return false; // Require adjacent bit set and no more bits
+ } else { // Else its a split-set case
+ if (((-1) & ~(bit-1)) != _A[i])
+ return false; // Found many bits, so fail
+ i++; // Skip iteration forward and check high part
+ assert(size <= 8, "update next code");
+ // The lower 24 bits should be 0 since it is split case and size <= 8.
+ int set = bit>>24;
+ set = set & -set; // Remove sign extension.
+ set = (((set << size) - 1) >> 8);
+ if (_A[i] != set) return false; // Require 1 lo bit in next word
+ }
+ }
+ }
+ // True for both the empty mask and for a bit set
+ return true;
+}
+
//------------------------------is_UP------------------------------------------
// UP means register only, Register plus stack, or stack only is DOWN
bool RegMask::is_UP() const {
diff --git a/src/share/vm/opto/regmask.hpp b/src/share/vm/opto/regmask.hpp
index e50ff84ca..e4c31dcef 100644
--- a/src/share/vm/opto/regmask.hpp
+++ b/src/share/vm/opto/regmask.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -113,7 +113,11 @@ public:
// the controlling alignment constraint. Note that this alignment
// requirement is internal to the allocator, and independent of any
// particular platform.
- enum { SlotsPerLong = 2 };
+ enum { SlotsPerLong = 2,
+ SlotsPerVecS = 1,
+ SlotsPerVecD = 2,
+ SlotsPerVecX = 4,
+ SlotsPerVecY = 8 };
// A constructor only used by the ADLC output. All mask fields are filled
// in directly. Calls to this look something like RM(1,2,3,4);
@@ -193,20 +197,53 @@ public:
OptoReg::Name find_first_pair() const;
// Clear out partial bits; leave only aligned adjacent bit pairs.
- void ClearToPairs();
+ void clear_to_pairs();
// Smear out partial bits; leave only aligned adjacent bit pairs.
- void SmearToPairs();
+ void smear_to_pairs();
// Verify that the mask contains only aligned adjacent bit pairs
- void VerifyPairs() const { assert( is_aligned_Pairs(), "mask is not aligned, adjacent pairs" ); }
+ void verify_pairs() const { assert( is_aligned_pairs(), "mask is not aligned, adjacent pairs" ); }
// Test that the mask contains only aligned adjacent bit pairs
- bool is_aligned_Pairs() const;
+ bool is_aligned_pairs() const;
// mask is a pair of misaligned registers
- bool is_misaligned_Pair() const { return Size()==2 && !is_aligned_Pairs();}
+ bool is_misaligned_pair() const { return Size()==2 && !is_aligned_pairs(); }
// Test for single register
int is_bound1() const;
// Test for a single adjacent pair
- int is_bound2() const;
+ int is_bound_pair() const;
+ // Test for a single adjacent set of ideal register's size.
+ int is_bound(uint ireg) const {
+ if (is_vector(ireg)) {
+ if (is_bound_set(num_registers(ireg)))
+ return true;
+ } else if (is_bound1() || is_bound_pair()) {
+ return true;
+ }
+ return false;
+ }
+
+ // Find the lowest-numbered register set in the mask. Return the
+ // HIGHEST register number in the set, or BAD if no sets.
+ // Assert that the mask contains only bit sets.
+ OptoReg::Name find_first_set(int size) const;
+
+ // Clear out partial bits; leave only aligned adjacent bit sets of size.
+ void clear_to_sets(int size);
+ // Smear out partial bits to aligned adjacent bit sets.
+ void smear_to_sets(int size);
+ // Verify that the mask contains only aligned adjacent bit sets
+ void verify_sets(int size) const { assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); }
+ // Test that the mask contains only aligned adjacent bit sets
+ bool is_aligned_sets(int size) const;
+
+ // mask is a set of misaligned registers
+ bool is_misaligned_set(int size) const { return (int)Size()==size && !is_aligned_sets(size);}
+
+ // Test for a single adjacent set
+ int is_bound_set(int size) const;
+
+ static bool is_vector(uint ireg);
+ static int num_registers(uint ireg);
// Fast overlap test. Non-zero if any registers in common.
int overlap( const RegMask &rm ) const {
@@ -280,9 +317,15 @@ public:
static bool can_represent(OptoReg::Name reg) {
// NOTE: -1 in computation reflects the usage of the last
- // bit of the regmask as an infinite stack flag.
+ // bit of the regmask as an infinite stack flag and
+ // -7 is to keep mask aligned for largest value (VecY).
return (int)reg < (int)(CHUNK_SIZE-1);
}
+ static bool can_represent_arg(OptoReg::Name reg) {
+ // NOTE: -SlotsPerVecY in computation reflects the need
+ // to keep mask aligned for largest value (VecY).
+ return (int)reg < (int)(CHUNK_SIZE-SlotsPerVecY);
+ }
};
// Do not use this constant directly in client code!
diff --git a/src/share/vm/opto/superword.cpp b/src/share/vm/opto/superword.cpp
index ae46e7f17..78db4b5ba 100644
--- a/src/share/vm/opto/superword.cpp
+++ b/src/share/vm/opto/superword.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -67,6 +67,10 @@ SuperWord::SuperWord(PhaseIdealLoop* phase) :
//------------------------------transform_loop---------------------------
void SuperWord::transform_loop(IdealLoopTree* lpt) {
+ assert(UseSuperWord, "should be");
+ // Do vectors exist on this architecture?
+ if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
+
assert(lpt->_head->is_CountedLoop(), "must be");
CountedLoopNode *cl = lpt->_head->as_CountedLoop();
@@ -89,15 +93,12 @@ void SuperWord::transform_loop(IdealLoopTree* lpt) {
Node *pre_opaq1 = pre_end->limit();
if (pre_opaq1->Opcode() != Op_Opaque1) return;
- // Do vectors exist on this architecture?
- if (vector_width_in_bytes() == 0) return;
-
init(); // initialize data structures
set_lpt(lpt);
set_lp(cl);
- // For now, define one block which is the entire loop body
+ // For now, define one block which is the entire loop body
set_bb(cl);
assert(_packset.length() == 0, "packset must be empty");
@@ -177,7 +178,7 @@ void SuperWord::find_adjacent_refs() {
Node_List memops;
for (int i = 0; i < _block.length(); i++) {
Node* n = _block.at(i);
- if (n->is_Mem() && in_bb(n) &&
+ if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) &&
is_java_primitive(n->as_Mem()->memory_type())) {
int align = memory_alignment(n->as_Mem(), 0);
if (align != bottom_align) {
@@ -185,54 +186,130 @@ void SuperWord::find_adjacent_refs() {
}
}
}
- if (memops.size() == 0) return;
- // Find a memory reference to align to. The pre-loop trip count
- // is modified to align this reference to a vector-aligned address
- find_align_to_ref(memops);
- if (align_to_ref() == NULL) return;
+ Node_List align_to_refs;
+ int best_iv_adjustment = 0;
+ MemNode* best_align_to_mem_ref = NULL;
- SWPointer align_to_ref_p(align_to_ref(), this);
- int offset = align_to_ref_p.offset_in_bytes();
- int scale = align_to_ref_p.scale_in_bytes();
- int vw = vector_width_in_bytes();
- int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
- int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw;
+ while (memops.size() != 0) {
+ // Find a memory reference to align to.
+ MemNode* mem_ref = find_align_to_ref(memops);
+ if (mem_ref == NULL) break;
+ align_to_refs.push(mem_ref);
+ int iv_adjustment = get_iv_adjustment(mem_ref);
-#ifndef PRODUCT
- if (TraceSuperWord)
- tty->print_cr("\noffset = %d iv_adjustment = %d elt_align = %d scale = %d iv_stride = %d",
- offset, iv_adjustment, align_to_ref_p.memory_size(), align_to_ref_p.scale_in_bytes(), iv_stride());
-#endif
+ if (best_align_to_mem_ref == NULL) {
+ // Set memory reference which is the best from all memory operations
+ // to be used for alignment. The pre-loop trip count is modified to align
+ // this reference to a vector-aligned address.
+ best_align_to_mem_ref = mem_ref;
+ best_iv_adjustment = iv_adjustment;
+ }
- // Set alignment relative to "align_to_ref"
- for (int i = memops.size() - 1; i >= 0; i--) {
- MemNode* s = memops.at(i)->as_Mem();
- SWPointer p2(s, this);
- if (p2.comparable(align_to_ref_p)) {
- int align = memory_alignment(s, iv_adjustment);
- set_alignment(s, align);
- } else {
- memops.remove(i);
+ SWPointer align_to_ref_p(mem_ref, this);
+ // Set alignment relative to "align_to_ref" for all related memory operations.
+ for (int i = memops.size() - 1; i >= 0; i--) {
+ MemNode* s = memops.at(i)->as_Mem();
+ if (isomorphic(s, mem_ref)) {
+ SWPointer p2(s, this);
+ if (p2.comparable(align_to_ref_p)) {
+ int align = memory_alignment(s, iv_adjustment);
+ set_alignment(s, align);
+ }
+ }
}
- }
- // Create initial pack pairs of memory operations
- for (uint i = 0; i < memops.size(); i++) {
- Node* s1 = memops.at(i);
- for (uint j = 0; j < memops.size(); j++) {
- Node* s2 = memops.at(j);
- if (s1 != s2 && are_adjacent_refs(s1, s2)) {
+ // Create initial pack pairs of memory operations for which
+ // alignment is set and vectors will be aligned.
+ bool create_pack = true;
+ if (memory_alignment(mem_ref, best_iv_adjustment) != 0) {
+ if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
+ // Can't allow vectorization of unaligned memory accesses with the
+ // same type since it could be overlapped accesses to the same array.
+ create_pack = false;
+ } else {
+ // Allow independent (different type) unaligned memory operations
+ // if HW supports them.
+ if (!Matcher::misaligned_vectors_ok()) {
+ create_pack = false;
+ } else {
+ // Check if packs of the same memory type but
+ // with a different alignment were created before.
+ for (uint i = 0; i < align_to_refs.size(); i++) {
+ MemNode* mr = align_to_refs.at(i)->as_Mem();
+ if (same_velt_type(mr, mem_ref) &&
+ memory_alignment(mr, iv_adjustment) != 0)
+ create_pack = false;
+ }
+ }
+ }
+ }
+ if (create_pack) {
+ for (uint i = 0; i < memops.size(); i++) {
+ Node* s1 = memops.at(i);
int align = alignment(s1);
- if (stmts_can_pack(s1, s2, align)) {
- Node_List* pair = new Node_List();
- pair->push(s1);
- pair->push(s2);
- _packset.append(pair);
+ if (align == top_align) continue;
+ for (uint j = 0; j < memops.size(); j++) {
+ Node* s2 = memops.at(j);
+ if (alignment(s2) == top_align) continue;
+ if (s1 != s2 && are_adjacent_refs(s1, s2)) {
+ if (stmts_can_pack(s1, s2, align)) {
+ Node_List* pair = new Node_List();
+ pair->push(s1);
+ pair->push(s2);
+ _packset.append(pair);
+ }
+ }
+ }
+ }
+ } else { // Don't create unaligned pack
+ // First, remove remaining memory ops of the same type from the list.
+ for (int i = memops.size() - 1; i >= 0; i--) {
+ MemNode* s = memops.at(i)->as_Mem();
+ if (same_velt_type(s, mem_ref)) {
+ memops.remove(i);
+ }
+ }
+
+ // Second, remove already constructed packs of the same type.
+ for (int i = _packset.length() - 1; i >= 0; i--) {
+ Node_List* p = _packset.at(i);
+ MemNode* s = p->at(0)->as_Mem();
+ if (same_velt_type(s, mem_ref)) {
+ remove_pack_at(i);
}
}
+
+ // If needed find the best memory reference for loop alignment again.
+ if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
+ // Put memory ops from remaining packs back on memops list for
+ // the best alignment search.
+ uint orig_msize = memops.size();
+ for (int i = 0; i < _packset.length(); i++) {
+ Node_List* p = _packset.at(i);
+ MemNode* s = p->at(0)->as_Mem();
+ assert(!same_velt_type(s, mem_ref), "sanity");
+ memops.push(s);
+ }
+ MemNode* best_align_to_mem_ref = find_align_to_ref(memops);
+ if (best_align_to_mem_ref == NULL) break;
+ best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref);
+ // Restore list.
+ while (memops.size() > orig_msize)
+ (void)memops.pop();
+ }
+ } // unaligned memory accesses
+
+ // Remove used mem nodes.
+ for (int i = memops.size() - 1; i >= 0; i--) {
+ MemNode* m = memops.at(i)->as_Mem();
+ if (alignment(m) != top_align) {
+ memops.remove(i);
+ }
}
- }
+
+ } // while (memops.size() != 0
+ set_align_to_ref(best_align_to_mem_ref);
#ifndef PRODUCT
if (TraceSuperWord) {
@@ -246,7 +323,7 @@ void SuperWord::find_adjacent_refs() {
// Find a memory reference to align the loop induction variable to.
// Looks first at stores then at loads, looking for a memory reference
// with the largest number of references similar to it.
-void SuperWord::find_align_to_ref(Node_List &memops) {
+MemNode* SuperWord::find_align_to_ref(Node_List &memops) {
GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0);
// Count number of comparable memory ops
@@ -270,20 +347,28 @@ void SuperWord::find_align_to_ref(Node_List &memops) {
}
}
- // Find Store (or Load) with the greatest number of "comparable" references
+ // Find Store (or Load) with the greatest number of "comparable" references,
+ // biggest vector size, smallest data size and smallest iv offset.
int max_ct = 0;
+ int max_vw = 0;
int max_idx = -1;
int min_size = max_jint;
int min_iv_offset = max_jint;
for (uint j = 0; j < memops.size(); j++) {
MemNode* s = memops.at(j)->as_Mem();
if (s->is_Store()) {
+ int vw = vector_width_in_bytes(velt_basic_type(s));
+ assert(vw > 1, "sanity");
SWPointer p(s, this);
- if (cmp_ct.at(j) > max_ct ||
- cmp_ct.at(j) == max_ct && (data_size(s) < min_size ||
- data_size(s) == min_size &&
- p.offset_in_bytes() < min_iv_offset)) {
+ if (cmp_ct.at(j) > max_ct ||
+ cmp_ct.at(j) == max_ct &&
+ (vw > max_vw ||
+ vw == max_vw &&
+ (data_size(s) < min_size ||
+ data_size(s) == min_size &&
+ (p.offset_in_bytes() < min_iv_offset)))) {
max_ct = cmp_ct.at(j);
+ max_vw = vw;
max_idx = j;
min_size = data_size(s);
min_iv_offset = p.offset_in_bytes();
@@ -295,12 +380,18 @@ void SuperWord::find_align_to_ref(Node_List &memops) {
for (uint j = 0; j < memops.size(); j++) {
MemNode* s = memops.at(j)->as_Mem();
if (s->is_Load()) {
+ int vw = vector_width_in_bytes(velt_basic_type(s));
+ assert(vw > 1, "sanity");
SWPointer p(s, this);
- if (cmp_ct.at(j) > max_ct ||
- cmp_ct.at(j) == max_ct && (data_size(s) < min_size ||
- data_size(s) == min_size &&
- p.offset_in_bytes() < min_iv_offset)) {
+ if (cmp_ct.at(j) > max_ct ||
+ cmp_ct.at(j) == max_ct &&
+ (vw > max_vw ||
+ vw == max_vw &&
+ (data_size(s) < min_size ||
+ data_size(s) == min_size &&
+ (p.offset_in_bytes() < min_iv_offset)))) {
max_ct = cmp_ct.at(j);
+ max_vw = vw;
max_idx = j;
min_size = data_size(s);
min_iv_offset = p.offset_in_bytes();
@@ -309,10 +400,7 @@ void SuperWord::find_align_to_ref(Node_List &memops) {
}
}
- if (max_ct > 0)
- set_align_to_ref(memops.at(max_idx)->as_Mem());
-
-#ifndef PRODUCT
+#ifdef ASSERT
if (TraceSuperWord && Verbose) {
tty->print_cr("\nVector memops after find_align_to_refs");
for (uint i = 0; i < memops.size(); i++) {
@@ -321,6 +409,17 @@ void SuperWord::find_align_to_ref(Node_List &memops) {
}
}
#endif
+
+ if (max_ct > 0) {
+#ifdef ASSERT
+ if (TraceSuperWord) {
+ tty->print("\nVector align to node: ");
+ memops.at(max_idx)->as_Mem()->dump();
+ }
+#endif
+ return memops.at(max_idx)->as_Mem();
+ }
+ return NULL;
}
//------------------------------ref_is_alignable---------------------------
@@ -341,7 +440,9 @@ bool SuperWord::ref_is_alignable(SWPointer& p) {
// If initial offset from start of object is computable,
// compute alignment within the vector.
- int vw = vector_width_in_bytes();
+ BasicType bt = velt_basic_type(p.mem());
+ int vw = vector_width_in_bytes(bt);
+ assert(vw > 1, "sanity");
if (vw % span == 0) {
Node* init_nd = pre_end->init_trip();
if (init_nd->is_Con() && p.invar() == NULL) {
@@ -361,6 +462,26 @@ bool SuperWord::ref_is_alignable(SWPointer& p) {
return false;
}
+//---------------------------get_iv_adjustment---------------------------
+// Calculate loop's iv adjustment for this memory ops.
+int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
+ SWPointer align_to_ref_p(mem_ref, this);
+ int offset = align_to_ref_p.offset_in_bytes();
+ int scale = align_to_ref_p.scale_in_bytes();
+ BasicType bt = velt_basic_type(mem_ref);
+ int vw = vector_width_in_bytes(bt);
+ assert(vw > 1, "sanity");
+ int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
+ int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw;
+
+#ifndef PRODUCT
+ if (TraceSuperWord)
+ tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d",
+ offset, iv_adjustment, align_to_ref_p.memory_size(), scale, iv_stride(), vw);
+#endif
+ return iv_adjustment;
+}
+
//---------------------------dependence_graph---------------------------
// Construct dependency graph.
// Add dependence edges to load/store nodes for memory dependence
@@ -488,9 +609,13 @@ void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &p
bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {
// Do not use superword for non-primitives
- if((s1->is_Mem() && !is_java_primitive(s1->as_Mem()->memory_type())) ||
- (s2->is_Mem() && !is_java_primitive(s2->as_Mem()->memory_type())))
+ BasicType bt1 = velt_basic_type(s1);
+ BasicType bt2 = velt_basic_type(s2);
+ if(!is_java_primitive(bt1) || !is_java_primitive(bt2))
return false;
+ if (Matcher::max_vector_size(bt1) < 2) {
+ return false; // No vectors for this type
+ }
if (isomorphic(s1, s2)) {
if (independent(s1, s2)) {
@@ -552,7 +677,7 @@ bool SuperWord::isomorphic(Node* s1, Node* s2) {
if (s1->Opcode() != s2->Opcode()) return false;
if (s1->req() != s2->req()) return false;
if (s1->in(0) != s2->in(0)) return false;
- if (velt_type(s1) != velt_type(s2)) return false;
+ if (!same_velt_type(s1, s2)) return false;
return true;
}
@@ -595,14 +720,16 @@ bool SuperWord::independent_path(Node* shallow, Node* deep, uint dp) {
//------------------------------set_alignment---------------------------
void SuperWord::set_alignment(Node* s1, Node* s2, int align) {
set_alignment(s1, align);
- set_alignment(s2, align + data_size(s1));
+ if (align == top_align || align == bottom_align) {
+ set_alignment(s2, align);
+ } else {
+ set_alignment(s2, align + data_size(s1));
+ }
}
//------------------------------data_size---------------------------
int SuperWord::data_size(Node* s) {
- const Type* t = velt_type(s);
- BasicType bt = t->array_element_basic_type();
- int bsize = type2aelembytes(bt);
+ int bsize = type2aelembytes(velt_basic_type(s));
assert(bsize != 0, "valid size");
return bsize;
}
@@ -631,9 +758,9 @@ void SuperWord::extend_packlist() {
//------------------------------follow_use_defs---------------------------
// Extend the packset by visiting operand definitions of nodes in pack p
bool SuperWord::follow_use_defs(Node_List* p) {
+ assert(p->size() == 2, "just checking");
Node* s1 = p->at(0);
Node* s2 = p->at(1);
- assert(p->size() == 2, "just checking");
assert(s1->req() == s2->req(), "just checking");
assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
@@ -718,7 +845,12 @@ bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) {
for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break;
for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break;
if (i1 != i2) {
- return false;
+ if ((i1 == (3-i2)) && (u2->is_Add() || u2->is_Mul())) {
+ // Further analysis relies on operands position matching.
+ u2->swap_edges(i1, i2);
+ } else {
+ return false;
+ }
}
} while (i1 < ct);
return true;
@@ -727,7 +859,7 @@ bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) {
//------------------------------est_savings---------------------------
// Estimate the savings from executing s1 and s2 as a pack
int SuperWord::est_savings(Node* s1, Node* s2) {
- int save = 2 - 1; // 2 operations per instruction in packed form
+ int save_in = 2 - 1; // 2 operations per instruction in packed form
// inputs
for (uint i = 1; i < s1->req(); i++) {
@@ -735,17 +867,18 @@ int SuperWord::est_savings(Node* s1, Node* s2) {
Node* x2 = s2->in(i);
if (x1 != x2) {
if (are_adjacent_refs(x1, x2)) {
- save += adjacent_profit(x1, x2);
+ save_in += adjacent_profit(x1, x2);
} else if (!in_packset(x1, x2)) {
- save -= pack_cost(2);
+ save_in -= pack_cost(2);
} else {
- save += unpack_cost(2);
+ save_in += unpack_cost(2);
}
}
}
// uses of result
uint ct = 0;
+ int save_use = 0;
for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
Node* s1_use = s1->fast_out(i);
for (int j = 0; j < _packset.length(); j++) {
@@ -756,7 +889,7 @@ int SuperWord::est_savings(Node* s1, Node* s2) {
if (p->at(p->size()-1) == s2_use) {
ct++;
if (are_adjacent_refs(s1_use, s2_use)) {
- save += adjacent_profit(s1_use, s2_use);
+ save_use += adjacent_profit(s1_use, s2_use);
}
}
}
@@ -764,10 +897,10 @@ int SuperWord::est_savings(Node* s1, Node* s2) {
}
}
- if (ct < s1->outcnt()) save += unpack_cost(1);
- if (ct < s2->outcnt()) save += unpack_cost(1);
+ if (ct < s1->outcnt()) save_use += unpack_cost(1);
+ if (ct < s2->outcnt()) save_use += unpack_cost(1);
- return save;
+ return MAX2(save_in, save_use);
}
//------------------------------costs---------------------------
@@ -778,8 +911,9 @@ int SuperWord::unpack_cost(int ct) { return ct; }
//------------------------------combine_packs---------------------------
// Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last
void SuperWord::combine_packs() {
- bool changed;
- do {
+ bool changed = true;
+ // Combine packs regardless max vector size.
+ while (changed) {
changed = false;
for (int i = 0; i < _packset.length(); i++) {
Node_List* p1 = _packset.at(i);
@@ -787,6 +921,7 @@ void SuperWord::combine_packs() {
for (int j = 0; j < _packset.length(); j++) {
Node_List* p2 = _packset.at(j);
if (p2 == NULL) continue;
+ if (i == j) continue;
if (p1->at(p1->size()-1) == p2->at(0)) {
for (uint k = 1; k < p2->size(); k++) {
p1->push(p2->at(k));
@@ -796,8 +931,39 @@ void SuperWord::combine_packs() {
}
}
}
- } while (changed);
+ }
+
+ // Split packs which have size greater then max vector size.
+ for (int i = 0; i < _packset.length(); i++) {
+ Node_List* p1 = _packset.at(i);
+ if (p1 != NULL) {
+ BasicType bt = velt_basic_type(p1->at(0));
+ uint max_vlen = Matcher::max_vector_size(bt); // Max elements in vector
+ assert(is_power_of_2(max_vlen), "sanity");
+ uint psize = p1->size();
+ if (!is_power_of_2(psize)) {
+ // Skip pack which can't be vector.
+ // case1: for(...) { a[i] = i; } elements values are different (i+x)
+ // case2: for(...) { a[i] = b[i+1]; } can't align both, load and store
+ _packset.at_put(i, NULL);
+ continue;
+ }
+ if (psize > max_vlen) {
+ Node_List* pack = new Node_List();
+ for (uint j = 0; j < psize; j++) {
+ pack->push(p1->at(j));
+ if (pack->size() >= max_vlen) {
+ assert(is_power_of_2(pack->size()), "sanity");
+ _packset.append(pack);
+ pack = new Node_List();
+ }
+ }
+ _packset.at_put(i, NULL);
+ }
+ }
+ }
+ // Compress list.
for (int i = _packset.length() - 1; i >= 0; i--) {
Node_List* p1 = _packset.at(i);
if (p1 == NULL) {
@@ -880,8 +1046,7 @@ void SuperWord::filter_packs() {
// Can code be generated for pack p?
bool SuperWord::implemented(Node_List* p) {
Node* p0 = p->at(0);
- int vopc = VectorNode::opcode(p0->Opcode(), p->size(), velt_type(p0));
- return vopc > 0 && Matcher::has_match_rule(vopc);
+ return VectorNode::implemented(p0->Opcode(), p->size(), velt_basic_type(p0));
}
//------------------------------profitable---------------------------
@@ -939,36 +1104,36 @@ void SuperWord::schedule() {
}
//-------------------------------remove_and_insert-------------------
-//remove "current" from its current position in the memory graph and insert
-//it after the appropriate insertion point (lip or uip)
+// Remove "current" from its current position in the memory graph and insert
+// it after the appropriate insertion point (lip or uip).
void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip,
Node *uip, Unique_Node_List &sched_before) {
Node* my_mem = current->in(MemNode::Memory);
- _igvn.rehash_node_delayed(current);
- _igvn.hash_delete(my_mem);
+ bool sched_up = sched_before.member(current);
- //remove current_store from its current position in the memmory graph
+ // remove current_store from its current position in the memmory graph
for (DUIterator i = current->outs(); current->has_out(i); i++) {
Node* use = current->out(i);
if (use->is_Mem()) {
assert(use->in(MemNode::Memory) == current, "must be");
- _igvn.rehash_node_delayed(use);
if (use == prev) { // connect prev to my_mem
- use->set_req(MemNode::Memory, my_mem);
+ _igvn.replace_input_of(use, MemNode::Memory, my_mem);
+ --i; //deleted this edge; rescan position
} else if (sched_before.member(use)) {
- _igvn.hash_delete(uip);
- use->set_req(MemNode::Memory, uip);
+ if (!sched_up) { // Will be moved together with current
+ _igvn.replace_input_of(use, MemNode::Memory, uip);
+ --i; //deleted this edge; rescan position
+ }
} else {
- _igvn.hash_delete(lip);
- use->set_req(MemNode::Memory, lip);
+ if (sched_up) { // Will be moved together with current
+ _igvn.replace_input_of(use, MemNode::Memory, lip);
+ --i; //deleted this edge; rescan position
+ }
}
- --i; //deleted this edge; rescan position
}
}
- bool sched_up = sched_before.member(current);
Node *insert_pt = sched_up ? uip : lip;
- _igvn.hash_delete(insert_pt);
// all uses of insert_pt's memory state should use current's instead
for (DUIterator i = insert_pt->outs(); insert_pt->has_out(i); i++) {
@@ -988,7 +1153,7 @@ void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip,
}
//connect current to insert_pt
- current->set_req(MemNode::Memory, insert_pt);
+ _igvn.replace_input_of(current, MemNode::Memory, insert_pt);
}
//------------------------------co_locate_pack----------------------------------
@@ -1025,7 +1190,7 @@ void SuperWord::co_locate_pack(Node_List* pk) {
if (use->is_Mem() && use != previous)
memops.push(use);
}
- if(current == first) break;
+ if (current == first) break;
previous = current;
current = current->in(MemNode::Memory)->as_Mem();
}
@@ -1038,27 +1203,37 @@ void SuperWord::co_locate_pack(Node_List* pk) {
Node *s2 = memops.at(j);
if (!independent(s1, s2)) {
if (in_pack(s2, pk) || schedule_before_pack.member(s2)) {
- schedule_before_pack.push(s1); //s1 must be scheduled before
+ schedule_before_pack.push(s1); // s1 must be scheduled before
Node_List* mem_pk = my_pack(s1);
if (mem_pk != NULL) {
for (uint ii = 0; ii < mem_pk->size(); ii++) {
- Node* s = mem_pk->at(ii); // follow partner
+ Node* s = mem_pk->at(ii); // follow partner
if (memops.member(s) && !schedule_before_pack.member(s))
schedule_before_pack.push(s);
}
}
+ break;
}
}
}
}
}
- MemNode* lower_insert_pt = last;
Node* upper_insert_pt = first->in(MemNode::Memory);
+ // Following code moves loads connected to upper_insert_pt below aliased stores.
+ // Collect such loads here and reconnect them back to upper_insert_pt later.
+ memops.clear();
+ for (DUIterator i = upper_insert_pt->outs(); upper_insert_pt->has_out(i); i++) {
+ Node* use = upper_insert_pt->out(i);
+ if (!use->is_Store())
+ memops.push(use);
+ }
+
+ MemNode* lower_insert_pt = last;
previous = last; //previous store in pk
current = last->in(MemNode::Memory)->as_Mem();
- //start scheduling from "last" to "first"
+ // start scheduling from "last" to "first"
while (true) {
assert(in_bb(current), "stay in block");
assert(in_pack(previous, pk), "previous stays in pack");
@@ -1066,16 +1241,13 @@ void SuperWord::co_locate_pack(Node_List* pk) {
if (in_pack(current, pk)) {
// Forward users of my memory state (except "previous) to my input memory state
- _igvn.hash_delete(current);
for (DUIterator i = current->outs(); current->has_out(i); i++) {
Node* use = current->out(i);
if (use->is_Mem() && use != previous) {
assert(use->in(MemNode::Memory) == current, "must be");
if (schedule_before_pack.member(use)) {
- _igvn.hash_delete(upper_insert_pt);
_igvn.replace_input_of(use, MemNode::Memory, upper_insert_pt);
} else {
- _igvn.hash_delete(lower_insert_pt);
_igvn.replace_input_of(use, MemNode::Memory, lower_insert_pt);
}
--i; // deleted this edge; rescan position
@@ -1089,6 +1261,14 @@ void SuperWord::co_locate_pack(Node_List* pk) {
if (current == first) break;
current = my_mem->as_Mem();
} // end while
+
+ // Reconnect loads back to upper_insert_pt.
+ for (uint i = 0; i < memops.size(); i++) {
+ Node *ld = memops.at(i);
+ if (ld->in(MemNode::Memory) != upper_insert_pt) {
+ _igvn.replace_input_of(ld, MemNode::Memory, upper_insert_pt);
+ }
+ }
} else if (pk->at(0)->is_Load()) { //load
// all loads in the pack should have the same memory state. By default,
// we use the memory state of the last load. However, if any load could
@@ -1149,35 +1329,30 @@ void SuperWord::output() {
Node* vn = NULL;
Node* low_adr = p->at(0);
Node* first = executed_first(p);
+ int opc = n->Opcode();
if (n->is_Load()) {
- int opc = n->Opcode();
Node* ctl = n->in(MemNode::Control);
Node* mem = first->in(MemNode::Memory);
Node* adr = low_adr->in(MemNode::Address);
const TypePtr* atyp = n->adr_type();
- vn = VectorLoadNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen);
-
+ vn = LoadVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n));
} else if (n->is_Store()) {
// Promote value to be stored to vector
Node* val = vector_opd(p, MemNode::ValueIn);
-
- int opc = n->Opcode();
Node* ctl = n->in(MemNode::Control);
Node* mem = first->in(MemNode::Memory);
Node* adr = low_adr->in(MemNode::Address);
const TypePtr* atyp = n->adr_type();
- vn = VectorStoreNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen);
-
+ vn = StoreVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen);
} else if (n->req() == 3) {
// Promote operands to vector
Node* in1 = vector_opd(p, 1);
Node* in2 = vector_opd(p, 2);
- vn = VectorNode::make(_phase->C, n->Opcode(), in1, in2, vlen, velt_type(n));
-
+ vn = VectorNode::make(_phase->C, opc, in1, in2, vlen, velt_basic_type(n));
} else {
ShouldNotReachHere();
}
-
+ assert(vn != NULL, "sanity");
_phase->_igvn.register_new_node_with_optimizer(vn);
_phase->set_ctrl(vn, _phase->get_ctrl(p->at(0)));
for (uint j = 0; j < p->size(); j++) {
@@ -1185,6 +1360,12 @@ void SuperWord::output() {
_igvn.replace_node(pm, vn);
}
_igvn._worklist.push(vn);
+#ifdef ASSERT
+ if (TraceSuperWord) {
+ tty->print("new Vector node: ");
+ vn->dump();
+ }
+#endif
}
}
}
@@ -1207,10 +1388,10 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
}
if (same_opd) {
- if (opd->is_Vector() || opd->is_VectorLoad()) {
+ if (opd->is_Vector() || opd->is_LoadVector()) {
return opd; // input is matching vector
}
- assert(!opd->is_VectorStore(), "such vector is not expected here");
+ assert(!opd->is_StoreVector(), "such vector is not expected here");
// Convert scalar input to vector with the same number of elements as
// p0's vector. Use p0's type because size of operand's container in
// vector should match p0's size regardless operand's size.
@@ -1219,12 +1400,18 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
_phase->_igvn.register_new_node_with_optimizer(vn);
_phase->set_ctrl(vn, _phase->get_ctrl(opd));
+#ifdef ASSERT
+ if (TraceSuperWord) {
+ tty->print("new Vector node: ");
+ vn->dump();
+ }
+#endif
return vn;
}
// Insert pack operation
- const Type* p0_t = velt_type(p0);
- PackNode* pk = PackNode::make(_phase->C, opd, p0_t);
+ BasicType bt = velt_basic_type(p0);
+ PackNode* pk = PackNode::make(_phase->C, opd, vlen, bt);
DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); )
for (uint i = 1; i < vlen; i++) {
@@ -1232,10 +1419,16 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
Node* in = pi->in(opd_idx);
assert(my_pack(in) == NULL, "Should already have been unpacked");
assert(opd_bt == in->bottom_type()->basic_type(), "all same type");
- pk->add_opd(in);
+ pk->add_opd(i, in);
}
_phase->_igvn.register_new_node_with_optimizer(pk);
_phase->set_ctrl(pk, _phase->get_ctrl(opd));
+#ifdef ASSERT
+ if (TraceSuperWord) {
+ tty->print("new Pack node: ");
+ pk->dump();
+ }
+#endif
return pk;
}
@@ -1273,16 +1466,15 @@ void SuperWord::insert_extracts(Node_List* p) {
// Insert extract operation
_igvn.hash_delete(def);
int def_pos = alignment(def) / data_size(def);
- const Type* def_t = velt_type(def);
- Node* ex = ExtractNode::make(_phase->C, def, def_pos, def_t);
+ Node* ex = ExtractNode::make(_phase->C, def, def_pos, velt_basic_type(def));
_phase->_igvn.register_new_node_with_optimizer(ex);
_phase->set_ctrl(ex, _phase->get_ctrl(def));
_igvn.replace_input_of(use, idx, ex);
_igvn._worklist.push(def);
bb_insert_after(ex, bb_idx(def));
- set_velt_type(ex, def_t);
+ set_velt_type(ex, velt_type(def));
}
}
@@ -1509,10 +1701,7 @@ void SuperWord::compute_vector_element_type() {
// Initial type
for (int i = 0; i < _block.length(); i++) {
Node* n = _block.at(i);
- const Type* t = n->is_Mem() ? Type::get_const_basic_type(n->as_Mem()->memory_type())
- : _igvn.type(n);
- const Type* vt = container_type(t);
- set_velt_type(n, vt);
+ set_velt_type(n, container_type(n));
}
// Propagate narrowed type backwards through operations
@@ -1543,7 +1732,7 @@ void SuperWord::compute_vector_element_type() {
bool same_type = true;
for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) {
Node *use = in->fast_out(k);
- if (!in_bb(use) || velt_type(use) != vt) {
+ if (!in_bb(use) || !same_velt_type(use, n)) {
same_type = false;
break;
}
@@ -1575,20 +1764,24 @@ int SuperWord::memory_alignment(MemNode* s, int iv_adjust_in_bytes) {
if (!p.valid()) {
return bottom_align;
}
+ int vw = vector_width_in_bytes(velt_basic_type(s));
+ if (vw < 2) {
+ return bottom_align; // No vectors for this type
+ }
int offset = p.offset_in_bytes();
offset += iv_adjust_in_bytes;
- int off_rem = offset % vector_width_in_bytes();
- int off_mod = off_rem >= 0 ? off_rem : off_rem + vector_width_in_bytes();
+ int off_rem = offset % vw;
+ int off_mod = off_rem >= 0 ? off_rem : off_rem + vw;
return off_mod;
}
//---------------------------container_type---------------------------
// Smallest type containing range of values
-const Type* SuperWord::container_type(const Type* t) {
- const Type* tp = t->make_ptr();
- if (tp && tp->isa_aryptr()) {
- t = tp->is_aryptr()->elem();
+const Type* SuperWord::container_type(Node* n) {
+ if (n->is_Mem()) {
+ return Type::get_const_basic_type(n->as_Mem()->memory_type());
}
+ const Type* t = _igvn.type(n);
if (t->basic_type() == T_INT) {
if (t->higher_equal(TypeInt::BOOL)) return TypeInt::BOOL;
if (t->higher_equal(TypeInt::BYTE)) return TypeInt::BYTE;
@@ -1599,11 +1792,22 @@ const Type* SuperWord::container_type(const Type* t) {
return t;
}
+bool SuperWord::same_velt_type(Node* n1, Node* n2) {
+ const Type* vt1 = velt_type(n1);
+ const Type* vt2 = velt_type(n1);
+ if (vt1->basic_type() == T_INT && vt2->basic_type() == T_INT) {
+ // Compare vectors element sizes for integer types.
+ return data_size(n1) == data_size(n2);
+ }
+ return vt1 == vt2;
+}
+
//-------------------------vector_opd_range-----------------------
// (Start, end] half-open range defining which operands are vector
void SuperWord::vector_opd_range(Node* n, uint* start, uint* end) {
switch (n->Opcode()) {
- case Op_LoadB: case Op_LoadUS:
+ case Op_LoadB: case Op_LoadUB:
+ case Op_LoadS: case Op_LoadUS:
case Op_LoadI: case Op_LoadL:
case Op_LoadF: case Op_LoadD:
case Op_LoadP:
@@ -1721,6 +1925,7 @@ void SuperWord::align_initial_loop_index(MemNode* align_to_ref) {
assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, "");
SWPointer align_to_ref_p(align_to_ref, this);
+ assert(align_to_ref_p.valid(), "sanity");
// Given:
// lim0 == original pre loop limit
@@ -1773,10 +1978,12 @@ void SuperWord::align_initial_loop_index(MemNode* align_to_ref) {
// N = (V - (e - lim0)) % V
// lim = lim0 - (V - (e - lim0)) % V
+ int vw = vector_width_in_bytes(velt_basic_type(align_to_ref));
+ assert(vw > 1, "sanity");
int stride = iv_stride();
int scale = align_to_ref_p.scale_in_bytes();
int elt_size = align_to_ref_p.memory_size();
- int v_align = vector_width_in_bytes() / elt_size;
+ int v_align = vw / elt_size;
int k = align_to_ref_p.offset_in_bytes() / elt_size;
Node *kn = _igvn.intcon(k);
@@ -1796,6 +2003,25 @@ void SuperWord::align_initial_loop_index(MemNode* align_to_ref) {
_phase->_igvn.register_new_node_with_optimizer(e);
_phase->set_ctrl(e, pre_ctrl);
}
+ if (vw > ObjectAlignmentInBytes) {
+ // incorporate base e +/- base && Mask >>> log2(elt)
+ Node* mask = _igvn.MakeConX(~(-1 << exact_log2(vw)));
+ Node* xbase = new(_phase->C, 2) CastP2XNode(NULL, align_to_ref_p.base());
+ _phase->_igvn.register_new_node_with_optimizer(xbase);
+ Node* masked_xbase = new (_phase->C, 3) AndXNode(xbase, mask);
+ _phase->_igvn.register_new_node_with_optimizer(masked_xbase);
+#ifdef _LP64
+ masked_xbase = new (_phase->C, 2) ConvL2INode(masked_xbase);
+ _phase->_igvn.register_new_node_with_optimizer(masked_xbase);
+#endif
+ Node* log2_elt = _igvn.intcon(exact_log2(elt_size));
+ Node* bref = new (_phase->C, 3) URShiftINode(masked_xbase, log2_elt);
+ _phase->_igvn.register_new_node_with_optimizer(bref);
+ _phase->set_ctrl(bref, pre_ctrl);
+ e = new (_phase->C, 3) AddINode(e, bref);
+ _phase->_igvn.register_new_node_with_optimizer(e);
+ _phase->set_ctrl(e, pre_ctrl);
+ }
// compute e +/- lim0
if (scale < 0) {
diff --git a/src/share/vm/opto/superword.hpp b/src/share/vm/opto/superword.hpp
index 509376712..97224e4d7 100644
--- a/src/share/vm/opto/superword.hpp
+++ b/src/share/vm/opto/superword.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -264,7 +264,10 @@ class SuperWord : public ResourceObj {
_iv = lp->as_CountedLoop()->phi()->as_Phi(); }
int iv_stride() { return lp()->as_CountedLoop()->stride_con(); }
- int vector_width_in_bytes() { return Matcher::vector_width_in_bytes(); }
+ int vector_width_in_bytes(BasicType bt) {
+ return MIN2(ABS(iv_stride())*type2aelembytes(bt),
+ Matcher::vector_width_in_bytes(bt));
+ }
MemNode* align_to_ref() { return _align_to_ref; }
void set_align_to_ref(MemNode* m) { _align_to_ref = m; }
@@ -298,7 +301,9 @@ class SuperWord : public ResourceObj {
// vector element type
const Type* velt_type(Node* n) { return _node_info.adr_at(bb_idx(n))->_velt_type; }
+ BasicType velt_basic_type(Node* n) { return velt_type(n)->array_element_basic_type(); }
void set_velt_type(Node* n, const Type* t) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_velt_type = t; }
+ bool same_velt_type(Node* n1, Node* n2);
// my_pack
Node_List* my_pack(Node* n) { return !in_bb(n) ? NULL : _node_info.adr_at(bb_idx(n))->_my_pack; }
@@ -311,7 +316,9 @@ class SuperWord : public ResourceObj {
// Find the adjacent memory references and create pack pairs for them.
void find_adjacent_refs();
// Find a memory reference to align the loop induction variable to.
- void find_align_to_ref(Node_List &memops);
+ MemNode* find_align_to_ref(Node_List &memops);
+ // Calculate loop's iv adjustment for this memory ops.
+ int get_iv_adjustment(MemNode* mem);
// Can the preloop align the reference to position zero in the vector?
bool ref_is_alignable(SWPointer& p);
// Construct dependency graph.
@@ -394,7 +401,7 @@ class SuperWord : public ResourceObj {
// (Start, end] half-open range defining which operands are vector
void vector_opd_range(Node* n, uint* start, uint* end);
// Smallest type containing range of values
- static const Type* container_type(const Type* t);
+ const Type* container_type(Node* n);
// Adjust pre-loop limit so that in main loop, a load/store reference
// to align_to_ref will be a position zero in the vector.
void align_initial_loop_index(MemNode* align_to_ref);
@@ -462,6 +469,7 @@ class SWPointer VALUE_OBJ_CLASS_SPEC {
Node* base() { return _base; }
Node* adr() { return _adr; }
+ MemNode* mem() { return _mem; }
int scale_in_bytes() { return _scale; }
Node* invar() { return _invar; }
bool negate_invar() { return _negate_invar; }
diff --git a/src/share/vm/opto/type.cpp b/src/share/vm/opto/type.cpp
index af118139e..30ec56959 100644
--- a/src/share/vm/opto/type.cpp
+++ b/src/share/vm/opto/type.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -60,6 +60,10 @@ const BasicType Type::_basic_type[Type::lastype] = {
T_ILLEGAL, // Tuple
T_ARRAY, // Array
+ T_ILLEGAL, // VectorS
+ T_ILLEGAL, // VectorD
+ T_ILLEGAL, // VectorX
+ T_ILLEGAL, // VectorY
T_ADDRESS, // AnyPtr // shows up in factory methods for NULL_PTR
T_ADDRESS, // RawPtr
@@ -414,6 +418,24 @@ void Type::Initialize_shared(Compile* current) {
// get_zero_type() should not happen for T_CONFLICT
_zero_type[T_CONFLICT]= NULL;
+ // Vector predefined types, it needs initialized _const_basic_type[].
+ if (Matcher::vector_size_supported(T_BYTE,4)) {
+ TypeVect::VECTS = TypeVect::make(T_BYTE,4);
+ }
+ if (Matcher::vector_size_supported(T_FLOAT,2)) {
+ TypeVect::VECTD = TypeVect::make(T_FLOAT,2);
+ }
+ if (Matcher::vector_size_supported(T_FLOAT,4)) {
+ TypeVect::VECTX = TypeVect::make(T_FLOAT,4);
+ }
+ if (Matcher::vector_size_supported(T_FLOAT,8)) {
+ TypeVect::VECTY = TypeVect::make(T_FLOAT,8);
+ }
+ mreg2type[Op_VecS] = TypeVect::VECTS;
+ mreg2type[Op_VecD] = TypeVect::VECTD;
+ mreg2type[Op_VecX] = TypeVect::VECTX;
+ mreg2type[Op_VecY] = TypeVect::VECTY;
+
// Restore working type arena.
current->set_type_arena(save);
current->set_type_dict(NULL);
@@ -668,6 +690,10 @@ const Type::TYPES Type::dual_type[Type::lastype] = {
Bad, // Tuple - handled in v-call
Bad, // Array - handled in v-call
+ Bad, // VectorS - handled in v-call
+ Bad, // VectorD - handled in v-call
+ Bad, // VectorX - handled in v-call
+ Bad, // VectorY - handled in v-call
Bad, // AnyPtr - handled in v-call
Bad, // RawPtr - handled in v-call
@@ -728,8 +754,8 @@ void Type::dump_on(outputStream *st) const {
//------------------------------data-------------------------------------------
const char * const Type::msg[Type::lastype] = {
"bad","control","top","int:","long:","half", "narrowoop:",
- "tuple:", "aryptr",
- "anyptr:", "rawptr:", "java:", "inst:", "ary:", "klass:",
+ "tuple:", "array:", "vectors:", "vectord:", "vectorx:", "vectory:",
+ "anyptr:", "rawptr:", "java:", "inst:", "aryptr:", "klass:",
"func", "abIO", "return_address", "memory",
"float_top", "ftcon:", "float",
"double_top", "dblcon:", "double",
@@ -790,7 +816,7 @@ void Type::typerr( const Type *t ) const {
//------------------------------isa_oop_ptr------------------------------------
// Return true if type is an oop pointer type. False for raw pointers.
static char isa_oop_ptr_tbl[Type::lastype] = {
- 0,0,0,0,0,0,0/*narrowoop*/,0/*tuple*/, 0/*ary*/,
+ 0,0,0,0,0,0,0/*narrowoop*/,0/*tuple*/, 0/*array*/, 0, 0, 0, 0/*vector*/,
0/*anyptr*/,0/*rawptr*/,1/*OopPtr*/,1/*InstPtr*/,1/*AryPtr*/,1/*KlassPtr*/,
0/*func*/,0,0/*return_address*/,0,
/*floats*/0,0,0, /*doubles*/0,0,0,
@@ -1926,6 +1952,121 @@ bool TypeAry::ary_must_be_exact() const {
return false;
}
+//==============================TypeVect=======================================
+// Convenience common pre-built types.
+const TypeVect *TypeVect::VECTS = NULL; // 32-bit vectors
+const TypeVect *TypeVect::VECTD = NULL; // 64-bit vectors
+const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors
+const TypeVect *TypeVect::VECTY = NULL; // 256-bit vectors
+
+//------------------------------make-------------------------------------------
+const TypeVect* TypeVect::make(const Type *elem, uint length) {
+ BasicType elem_bt = elem->array_element_basic_type();
+ assert(is_java_primitive(elem_bt), "only primitive types in vector");
+ assert(length > 1 && is_power_of_2(length), "vector length is power of 2");
+ assert(Matcher::vector_size_supported(elem_bt, length), "length in range");
+ int size = length * type2aelembytes(elem_bt);
+ switch (Matcher::vector_ideal_reg(size)) {
+ case Op_VecS:
+ return (TypeVect*)(new TypeVectS(elem, length))->hashcons();
+ case Op_VecD:
+ case Op_RegD:
+ return (TypeVect*)(new TypeVectD(elem, length))->hashcons();
+ case Op_VecX:
+ return (TypeVect*)(new TypeVectX(elem, length))->hashcons();
+ case Op_VecY:
+ return (TypeVect*)(new TypeVectY(elem, length))->hashcons();
+ }
+ ShouldNotReachHere();
+ return NULL;
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types. It returns a new Type object.
+const Type *TypeVect::xmeet( const Type *t ) const {
+ // Perform a fast test for common case; meeting the same types together.
+ if( this == t ) return this; // Meeting same type-rep?
+
+ // Current "this->_base" is Vector
+ switch (t->base()) { // switch on original type
+
+ case Bottom: // Ye Olde Default
+ return t;
+
+ default: // All else is a mistake
+ typerr(t);
+
+ case VectorS:
+ case VectorD:
+ case VectorX:
+ case VectorY: { // Meeting 2 vectors?
+ const TypeVect* v = t->is_vect();
+ assert( base() == v->base(), "");
+ assert(length() == v->length(), "");
+ assert(element_basic_type() == v->element_basic_type(), "");
+ return TypeVect::make(_elem->xmeet(v->_elem), _length);
+ }
+ case Top:
+ break;
+ }
+ return this;
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: compute field-by-field dual
+const Type *TypeVect::xdual() const {
+ return new TypeVect(base(), _elem->dual(), _length);
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeVect::eq(const Type *t) const {
+ const TypeVect *v = t->is_vect();
+ return (_elem == v->_elem) && (_length == v->_length);
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeVect::hash(void) const {
+ return (intptr_t)_elem + (intptr_t)_length;
+}
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise. Singletons are simple
+// constants (Ldi nodes). Vector is singleton if all elements are the same
+// constant value (when vector is created with Replicate code).
+bool TypeVect::singleton(void) const {
+// There is no Con node for vectors yet.
+// return _elem->singleton();
+ return false;
+}
+
+bool TypeVect::empty(void) const {
+ return _elem->empty();
+}
+
+//------------------------------dump2------------------------------------------
+#ifndef PRODUCT
+void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const {
+ switch (base()) {
+ case VectorS:
+ st->print("vectors["); break;
+ case VectorD:
+ st->print("vectord["); break;
+ case VectorX:
+ st->print("vectorx["); break;
+ case VectorY:
+ st->print("vectory["); break;
+ default:
+ ShouldNotReachHere();
+ }
+ st->print("%d]:{", _length);
+ _elem->dump2(d, depth, st);
+ st->print("}");
+}
+#endif
+
+
//=============================================================================
// Convenience common pre-built types.
const TypePtr *TypePtr::NULL_PTR;
@@ -4140,7 +4281,7 @@ void TypeFunc::dump2( Dict &d, uint depth, outputStream *st ) const {
// Print a 'flattened' signature
static const char * const flat_type_msg[Type::lastype] = {
"bad","control","top","int","long","_", "narrowoop",
- "tuple:", "array:",
+ "tuple:", "array:", "vectors:", "vectord:", "vectorx:", "vectory:",
"ptr", "rawptr", "ptr", "ptr", "ptr", "ptr",
"func", "abIO", "return_address", "mem",
"float_top", "ftcon:", "flt",
diff --git a/src/share/vm/opto/type.hpp b/src/share/vm/opto/type.hpp
index 133ce78f0..a4b5487d7 100644
--- a/src/share/vm/opto/type.hpp
+++ b/src/share/vm/opto/type.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -51,6 +51,11 @@ class TypeLong;
class TypeNarrowOop;
class TypeAry;
class TypeTuple;
+class TypeVect;
+class TypeVectS;
+class TypeVectD;
+class TypeVectX;
+class TypeVectY;
class TypePtr;
class TypeRawPtr;
class TypeOopPtr;
@@ -78,6 +83,10 @@ public:
Tuple, // Method signature or object layout
Array, // Array types
+ VectorS, // 32bit Vector types
+ VectorD, // 64bit Vector types
+ VectorX, // 128bit Vector types
+ VectorY, // 256bit Vector types
AnyPtr, // Any old raw, klass, inst, or array pointer
RawPtr, // Raw (non-oop) pointers
@@ -222,6 +231,8 @@ public:
const TypeF *isa_float_constant() const; // Returns NULL if not a FloatCon
const TypeTuple *is_tuple() const; // Collection of fields, NOT a pointer
const TypeAry *is_ary() const; // Array, NOT array pointer
+ const TypeVect *is_vect() const; // Vector
+ const TypeVect *isa_vect() const; // Returns NULL if not a Vector
const TypePtr *is_ptr() const; // Asserts it is a ptr type
const TypePtr *isa_ptr() const; // Returns NULL if not ptr type
const TypeRawPtr *isa_rawptr() const; // NOT Java oop
@@ -574,6 +585,69 @@ public:
#endif
};
+//------------------------------TypeVect---------------------------------------
+// Class of Vector Types
+class TypeVect : public Type {
+ const Type* _elem; // Vector's element type
+ const uint _length; // Elements in vector (power of 2)
+
+protected:
+ TypeVect(TYPES t, const Type* elem, uint length) : Type(t),
+ _elem(elem), _length(length) {}
+
+public:
+ const Type* element_type() const { return _elem; }
+ BasicType element_basic_type() const { return _elem->array_element_basic_type(); }
+ uint length() const { return _length; }
+ uint length_in_bytes() const {
+ return _length * type2aelembytes(element_basic_type());
+ }
+
+ virtual bool eq(const Type *t) const;
+ virtual int hash() const; // Type specific hashing
+ virtual bool singleton(void) const; // TRUE if type is a singleton
+ virtual bool empty(void) const; // TRUE if type is vacuous
+
+ static const TypeVect *make(const BasicType elem_bt, uint length) {
+ // Use bottom primitive type.
+ return make(get_const_basic_type(elem_bt), length);
+ }
+ // Used directly by Replicate nodes to construct singleton vector.
+ static const TypeVect *make(const Type* elem, uint length);
+
+ virtual const Type *xmeet( const Type *t) const;
+ virtual const Type *xdual() const; // Compute dual right now.
+
+ static const TypeVect *VECTS;
+ static const TypeVect *VECTD;
+ static const TypeVect *VECTX;
+ static const TypeVect *VECTY;
+
+#ifndef PRODUCT
+ virtual void dump2(Dict &d, uint, outputStream *st) const; // Specialized per-Type dumping
+#endif
+};
+
+class TypeVectS : public TypeVect {
+ friend class TypeVect;
+ TypeVectS(const Type* elem, uint length) : TypeVect(VectorS, elem, length) {}
+};
+
+class TypeVectD : public TypeVect {
+ friend class TypeVect;
+ TypeVectD(const Type* elem, uint length) : TypeVect(VectorD, elem, length) {}
+};
+
+class TypeVectX : public TypeVect {
+ friend class TypeVect;
+ TypeVectX(const Type* elem, uint length) : TypeVect(VectorX, elem, length) {}
+};
+
+class TypeVectY : public TypeVect {
+ friend class TypeVect;
+ TypeVectY(const Type* elem, uint length) : TypeVect(VectorY, elem, length) {}
+};
+
//------------------------------TypePtr----------------------------------------
// Class of machine Pointer Types: raw data, instances or arrays.
// If the _base enum is AnyPtr, then this refers to all of the above.
@@ -1113,6 +1187,15 @@ inline const TypeAry *Type::is_ary() const {
return (TypeAry*)this;
}
+inline const TypeVect *Type::is_vect() const {
+ assert( _base >= VectorS && _base <= VectorY, "Not a Vector" );
+ return (TypeVect*)this;
+}
+
+inline const TypeVect *Type::isa_vect() const {
+ return (_base >= VectorS && _base <= VectorY) ? (TypeVect*)this : NULL;
+}
+
inline const TypePtr *Type::is_ptr() const {
// AnyPtr is the first Ptr and KlassPtr the last, with no non-ptrs between.
assert(_base >= AnyPtr && _base <= KlassPtr, "Not a pointer");
diff --git a/src/share/vm/opto/vectornode.cpp b/src/share/vm/opto/vectornode.cpp
index 885a1c898..c786754cd 100644
--- a/src/share/vm/opto/vectornode.cpp
+++ b/src/share/vm/opto/vectornode.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -28,147 +28,16 @@
//------------------------------VectorNode--------------------------------------
-// Return vector type for an element type and vector length.
-const Type* VectorNode::vect_type(BasicType elt_bt, uint len) {
- assert(len <= VectorNode::max_vlen(elt_bt), "len in range");
- switch(elt_bt) {
- case T_BOOLEAN:
- case T_BYTE:
- switch(len) {
- case 2: return TypeInt::CHAR;
- case 4: return TypeInt::INT;
- case 8: return TypeLong::LONG;
- }
- break;
- case T_CHAR:
- case T_SHORT:
- switch(len) {
- case 2: return TypeInt::INT;
- case 4: return TypeLong::LONG;
- }
- break;
- case T_INT:
- switch(len) {
- case 2: return TypeLong::LONG;
- }
- break;
- case T_LONG:
- break;
- case T_FLOAT:
- switch(len) {
- case 2: return Type::DOUBLE;
- }
- break;
- case T_DOUBLE:
- break;
- }
- ShouldNotReachHere();
- return NULL;
-}
-
-// Scalar promotion
-VectorNode* VectorNode::scalar2vector(Compile* C, Node* s, uint vlen, const Type* opd_t) {
- BasicType bt = opd_t->array_element_basic_type();
- assert(vlen <= VectorNode::max_vlen(bt), "vlen in range");
- switch (bt) {
- case T_BOOLEAN:
- case T_BYTE:
- if (vlen == 16) return new (C, 2) Replicate16BNode(s);
- if (vlen == 8) return new (C, 2) Replicate8BNode(s);
- if (vlen == 4) return new (C, 2) Replicate4BNode(s);
- break;
- case T_CHAR:
- if (vlen == 8) return new (C, 2) Replicate8CNode(s);
- if (vlen == 4) return new (C, 2) Replicate4CNode(s);
- if (vlen == 2) return new (C, 2) Replicate2CNode(s);
- break;
- case T_SHORT:
- if (vlen == 8) return new (C, 2) Replicate8SNode(s);
- if (vlen == 4) return new (C, 2) Replicate4SNode(s);
- if (vlen == 2) return new (C, 2) Replicate2SNode(s);
- break;
- case T_INT:
- if (vlen == 4) return new (C, 2) Replicate4INode(s);
- if (vlen == 2) return new (C, 2) Replicate2INode(s);
- break;
- case T_LONG:
- if (vlen == 2) return new (C, 2) Replicate2LNode(s);
- break;
- case T_FLOAT:
- if (vlen == 4) return new (C, 2) Replicate4FNode(s);
- if (vlen == 2) return new (C, 2) Replicate2FNode(s);
- break;
- case T_DOUBLE:
- if (vlen == 2) return new (C, 2) Replicate2DNode(s);
- break;
- }
- ShouldNotReachHere();
- return NULL;
-}
-
-// Return initial Pack node. Additional operands added with add_opd() calls.
-PackNode* PackNode::make(Compile* C, Node* s, const Type* opd_t) {
- BasicType bt = opd_t->array_element_basic_type();
- switch (bt) {
- case T_BOOLEAN:
- case T_BYTE:
- return new (C, 2) PackBNode(s);
- case T_CHAR:
- return new (C, 2) PackCNode(s);
- case T_SHORT:
- return new (C, 2) PackSNode(s);
- case T_INT:
- return new (C, 2) PackINode(s);
- case T_LONG:
- return new (C, 2) PackLNode(s);
- case T_FLOAT:
- return new (C, 2) PackFNode(s);
- case T_DOUBLE:
- return new (C, 2) PackDNode(s);
- }
- ShouldNotReachHere();
- return NULL;
-}
-
-// Create a binary tree form for Packs. [lo, hi) (half-open) range
-Node* PackNode::binaryTreePack(Compile* C, int lo, int hi) {
- int ct = hi - lo;
- assert(is_power_of_2(ct), "power of 2");
- int mid = lo + ct/2;
- Node* n1 = ct == 2 ? in(lo) : binaryTreePack(C, lo, mid);
- Node* n2 = ct == 2 ? in(lo+1) : binaryTreePack(C, mid, hi );
- int rslt_bsize = ct * type2aelembytes(elt_basic_type());
- if (bottom_type()->is_floatingpoint()) {
- switch (rslt_bsize) {
- case 8: return new (C, 3) PackFNode(n1, n2);
- case 16: return new (C, 3) PackDNode(n1, n2);
- }
- } else {
- assert(bottom_type()->isa_int() || bottom_type()->isa_long(), "int or long");
- switch (rslt_bsize) {
- case 2: return new (C, 3) Pack2x1BNode(n1, n2);
- case 4: return new (C, 3) Pack2x2BNode(n1, n2);
- case 8: return new (C, 3) PackINode(n1, n2);
- case 16: return new (C, 3) PackLNode(n1, n2);
- }
- }
- ShouldNotReachHere();
- return NULL;
-}
-
// Return the vector operator for the specified scalar operation
-// and vector length. One use is to check if the code generator
+// and vector length. Also used to check if the code generator
// supports the vector operation.
-int VectorNode::opcode(int sopc, uint vlen, const Type* opd_t) {
- BasicType bt = opd_t->array_element_basic_type();
- if (!(is_power_of_2(vlen) && vlen <= max_vlen(bt)))
- return 0; // unimplemented
+int VectorNode::opcode(int sopc, uint vlen, BasicType bt) {
switch (sopc) {
case Op_AddI:
switch (bt) {
case T_BOOLEAN:
case T_BYTE: return Op_AddVB;
- case T_CHAR: return Op_AddVC;
+ case T_CHAR:
case T_SHORT: return Op_AddVS;
case T_INT: return Op_AddVI;
}
@@ -186,7 +55,7 @@ int VectorNode::opcode(int sopc, uint vlen, const Type* opd_t) {
switch (bt) {
case T_BOOLEAN:
case T_BYTE: return Op_SubVB;
- case T_CHAR: return Op_SubVC;
+ case T_CHAR:
case T_SHORT: return Op_SubVS;
case T_INT: return Op_SubVI;
}
@@ -216,18 +85,18 @@ int VectorNode::opcode(int sopc, uint vlen, const Type* opd_t) {
switch (bt) {
case T_BOOLEAN:
case T_BYTE: return Op_LShiftVB;
- case T_CHAR: return Op_LShiftVC;
+ case T_CHAR:
case T_SHORT: return Op_LShiftVS;
case T_INT: return Op_LShiftVI;
}
ShouldNotReachHere();
- case Op_URShiftI:
+ case Op_RShiftI:
switch (bt) {
case T_BOOLEAN:
- case T_BYTE: return Op_URShiftVB;
- case T_CHAR: return Op_URShiftVC;
- case T_SHORT: return Op_URShiftVS;
- case T_INT: return Op_URShiftVI;
+ case T_BYTE: return Op_RShiftVB;
+ case T_CHAR:
+ case T_SHORT: return Op_RShiftVS;
+ case T_INT: return Op_RShiftVI;
}
ShouldNotReachHere();
case Op_AndI:
@@ -241,13 +110,14 @@ int VectorNode::opcode(int sopc, uint vlen, const Type* opd_t) {
return Op_XorV;
case Op_LoadB:
+ case Op_LoadUB:
case Op_LoadUS:
case Op_LoadS:
case Op_LoadI:
case Op_LoadL:
case Op_LoadF:
case Op_LoadD:
- return VectorLoadNode::opcode(sopc, vlen);
+ return Op_LoadVector;
case Op_StoreB:
case Op_StoreC:
@@ -255,211 +125,170 @@ int VectorNode::opcode(int sopc, uint vlen, const Type* opd_t) {
case Op_StoreL:
case Op_StoreF:
case Op_StoreD:
- return VectorStoreNode::opcode(sopc, vlen);
- }
- return 0; // Unimplemented
-}
-
-// Helper for above.
-int VectorLoadNode::opcode(int sopc, uint vlen) {
- switch (sopc) {
- case Op_LoadB:
- switch (vlen) {
- case 2: return 0; // Unimplemented
- case 4: return Op_Load4B;
- case 8: return Op_Load8B;
- case 16: return Op_Load16B;
- }
- break;
- case Op_LoadUS:
- switch (vlen) {
- case 2: return Op_Load2C;
- case 4: return Op_Load4C;
- case 8: return Op_Load8C;
- }
- break;
- case Op_LoadS:
- switch (vlen) {
- case 2: return Op_Load2S;
- case 4: return Op_Load4S;
- case 8: return Op_Load8S;
- }
- break;
- case Op_LoadI:
- switch (vlen) {
- case 2: return Op_Load2I;
- case 4: return Op_Load4I;
- }
- break;
- case Op_LoadL:
- if (vlen == 2) return Op_Load2L;
- break;
- case Op_LoadF:
- switch (vlen) {
- case 2: return Op_Load2F;
- case 4: return Op_Load4F;
- }
- break;
- case Op_LoadD:
- if (vlen == 2) return Op_Load2D;
- break;
+ return Op_StoreVector;
}
return 0; // Unimplemented
}
-// Helper for above
-int VectorStoreNode::opcode(int sopc, uint vlen) {
- switch (sopc) {
- case Op_StoreB:
- switch (vlen) {
- case 2: return 0; // Unimplemented
- case 4: return Op_Store4B;
- case 8: return Op_Store8B;
- case 16: return Op_Store16B;
- }
- break;
- case Op_StoreC:
- switch (vlen) {
- case 2: return Op_Store2C;
- case 4: return Op_Store4C;
- case 8: return Op_Store8C;
- }
- break;
- case Op_StoreI:
- switch (vlen) {
- case 2: return Op_Store2I;
- case 4: return Op_Store4I;
- }
- break;
- case Op_StoreL:
- if (vlen == 2) return Op_Store2L;
- break;
- case Op_StoreF:
- switch (vlen) {
- case 2: return Op_Store2F;
- case 4: return Op_Store4F;
- }
- break;
- case Op_StoreD:
- if (vlen == 2) return Op_Store2D;
- break;
+bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
+ if (is_java_primitive(bt) &&
+ (vlen > 1) && is_power_of_2(vlen) &&
+ Matcher::vector_size_supported(bt, vlen)) {
+ int vopc = VectorNode::opcode(opc, vlen, bt);
+ return vopc > 0 && Matcher::has_match_rule(vopc);
}
- return 0; // Unimplemented
+ return false;
}
// Return the vector version of a scalar operation node.
-VectorNode* VectorNode::make(Compile* C, int sopc, Node* n1, Node* n2, uint vlen, const Type* opd_t) {
- int vopc = opcode(sopc, vlen, opd_t);
+VectorNode* VectorNode::make(Compile* C, int opc, Node* n1, Node* n2, uint vlen, BasicType bt) {
+ const TypeVect* vt = TypeVect::make(bt, vlen);
+ int vopc = VectorNode::opcode(opc, vlen, bt);
switch (vopc) {
- case Op_AddVB: return new (C, 3) AddVBNode(n1, n2, vlen);
- case Op_AddVC: return new (C, 3) AddVCNode(n1, n2, vlen);
- case Op_AddVS: return new (C, 3) AddVSNode(n1, n2, vlen);
- case Op_AddVI: return new (C, 3) AddVINode(n1, n2, vlen);
- case Op_AddVL: return new (C, 3) AddVLNode(n1, n2, vlen);
- case Op_AddVF: return new (C, 3) AddVFNode(n1, n2, vlen);
- case Op_AddVD: return new (C, 3) AddVDNode(n1, n2, vlen);
-
- case Op_SubVB: return new (C, 3) SubVBNode(n1, n2, vlen);
- case Op_SubVC: return new (C, 3) SubVCNode(n1, n2, vlen);
- case Op_SubVS: return new (C, 3) SubVSNode(n1, n2, vlen);
- case Op_SubVI: return new (C, 3) SubVINode(n1, n2, vlen);
- case Op_SubVL: return new (C, 3) SubVLNode(n1, n2, vlen);
- case Op_SubVF: return new (C, 3) SubVFNode(n1, n2, vlen);
- case Op_SubVD: return new (C, 3) SubVDNode(n1, n2, vlen);
-
- case Op_MulVF: return new (C, 3) MulVFNode(n1, n2, vlen);
- case Op_MulVD: return new (C, 3) MulVDNode(n1, n2, vlen);
-
- case Op_DivVF: return new (C, 3) DivVFNode(n1, n2, vlen);
- case Op_DivVD: return new (C, 3) DivVDNode(n1, n2, vlen);
-
- case Op_LShiftVB: return new (C, 3) LShiftVBNode(n1, n2, vlen);
- case Op_LShiftVC: return new (C, 3) LShiftVCNode(n1, n2, vlen);
- case Op_LShiftVS: return new (C, 3) LShiftVSNode(n1, n2, vlen);
- case Op_LShiftVI: return new (C, 3) LShiftVINode(n1, n2, vlen);
+ case Op_AddVB: return new (C, 3) AddVBNode(n1, n2, vt);
+ case Op_AddVS: return new (C, 3) AddVSNode(n1, n2, vt);
+ case Op_AddVI: return new (C, 3) AddVINode(n1, n2, vt);
+ case Op_AddVL: return new (C, 3) AddVLNode(n1, n2, vt);
+ case Op_AddVF: return new (C, 3) AddVFNode(n1, n2, vt);
+ case Op_AddVD: return new (C, 3) AddVDNode(n1, n2, vt);
+
+ case Op_SubVB: return new (C, 3) SubVBNode(n1, n2, vt);
+ case Op_SubVS: return new (C, 3) SubVSNode(n1, n2, vt);
+ case Op_SubVI: return new (C, 3) SubVINode(n1, n2, vt);
+ case Op_SubVL: return new (C, 3) SubVLNode(n1, n2, vt);
+ case Op_SubVF: return new (C, 3) SubVFNode(n1, n2, vt);
+ case Op_SubVD: return new (C, 3) SubVDNode(n1, n2, vt);
+
+ case Op_MulVF: return new (C, 3) MulVFNode(n1, n2, vt);
+ case Op_MulVD: return new (C, 3) MulVDNode(n1, n2, vt);
+
+ case Op_DivVF: return new (C, 3) DivVFNode(n1, n2, vt);
+ case Op_DivVD: return new (C, 3) DivVDNode(n1, n2, vt);
+
+ case Op_LShiftVB: return new (C, 3) LShiftVBNode(n1, n2, vt);
+ case Op_LShiftVS: return new (C, 3) LShiftVSNode(n1, n2, vt);
+ case Op_LShiftVI: return new (C, 3) LShiftVINode(n1, n2, vt);
+
+ case Op_RShiftVB: return new (C, 3) RShiftVBNode(n1, n2, vt);
+ case Op_RShiftVS: return new (C, 3) RShiftVSNode(n1, n2, vt);
+ case Op_RShiftVI: return new (C, 3) RShiftVINode(n1, n2, vt);
+
+ case Op_AndV: return new (C, 3) AndVNode(n1, n2, vt);
+ case Op_OrV: return new (C, 3) OrVNode (n1, n2, vt);
+ case Op_XorV: return new (C, 3) XorVNode(n1, n2, vt);
+ }
+ ShouldNotReachHere();
+ return NULL;
- case Op_URShiftVB: return new (C, 3) URShiftVBNode(n1, n2, vlen);
- case Op_URShiftVC: return new (C, 3) URShiftVCNode(n1, n2, vlen);
- case Op_URShiftVS: return new (C, 3) URShiftVSNode(n1, n2, vlen);
- case Op_URShiftVI: return new (C, 3) URShiftVINode(n1, n2, vlen);
+}
- case Op_AndV: return new (C, 3) AndVNode(n1, n2, vlen, opd_t->array_element_basic_type());
- case Op_OrV: return new (C, 3) OrVNode (n1, n2, vlen, opd_t->array_element_basic_type());
- case Op_XorV: return new (C, 3) XorVNode(n1, n2, vlen, opd_t->array_element_basic_type());
+// Scalar promotion
+VectorNode* VectorNode::scalar2vector(Compile* C, Node* s, uint vlen, const Type* opd_t) {
+ BasicType bt = opd_t->array_element_basic_type();
+ const TypeVect* vt = opd_t->singleton() ? TypeVect::make(opd_t, vlen)
+ : TypeVect::make(bt, vlen);
+ switch (bt) {
+ case T_BOOLEAN:
+ case T_BYTE:
+ return new (C, 2) ReplicateBNode(s, vt);
+ case T_CHAR:
+ case T_SHORT:
+ return new (C, 2) ReplicateSNode(s, vt);
+ case T_INT:
+ return new (C, 2) ReplicateINode(s, vt);
+ case T_LONG:
+ return new (C, 2) ReplicateLNode(s, vt);
+ case T_FLOAT:
+ return new (C, 2) ReplicateFNode(s, vt);
+ case T_DOUBLE:
+ return new (C, 2) ReplicateDNode(s, vt);
}
ShouldNotReachHere();
return NULL;
}
-// Return the vector version of a scalar load node.
-VectorLoadNode* VectorLoadNode::make(Compile* C, int opc, Node* ctl, Node* mem,
- Node* adr, const TypePtr* atyp, uint vlen) {
- int vopc = opcode(opc, vlen);
-
- switch(vopc) {
- case Op_Load16B: return new (C, 3) Load16BNode(ctl, mem, adr, atyp);
- case Op_Load8B: return new (C, 3) Load8BNode(ctl, mem, adr, atyp);
- case Op_Load4B: return new (C, 3) Load4BNode(ctl, mem, adr, atyp);
-
- case Op_Load8C: return new (C, 3) Load8CNode(ctl, mem, adr, atyp);
- case Op_Load4C: return new (C, 3) Load4CNode(ctl, mem, adr, atyp);
- case Op_Load2C: return new (C, 3) Load2CNode(ctl, mem, adr, atyp);
-
- case Op_Load8S: return new (C, 3) Load8SNode(ctl, mem, adr, atyp);
- case Op_Load4S: return new (C, 3) Load4SNode(ctl, mem, adr, atyp);
- case Op_Load2S: return new (C, 3) Load2SNode(ctl, mem, adr, atyp);
-
- case Op_Load4I: return new (C, 3) Load4INode(ctl, mem, adr, atyp);
- case Op_Load2I: return new (C, 3) Load2INode(ctl, mem, adr, atyp);
+// Return initial Pack node. Additional operands added with add_opd() calls.
+PackNode* PackNode::make(Compile* C, Node* s, uint vlen, BasicType bt) {
+ const TypeVect* vt = TypeVect::make(bt, vlen);
+ switch (bt) {
+ case T_BOOLEAN:
+ case T_BYTE:
+ return new (C, vlen+1) PackBNode(s, vt);
+ case T_CHAR:
+ case T_SHORT:
+ return new (C, vlen+1) PackSNode(s, vt);
+ case T_INT:
+ return new (C, vlen+1) PackINode(s, vt);
+ case T_LONG:
+ return new (C, vlen+1) PackLNode(s, vt);
+ case T_FLOAT:
+ return new (C, vlen+1) PackFNode(s, vt);
+ case T_DOUBLE:
+ return new (C, vlen+1) PackDNode(s, vt);
+ }
+ ShouldNotReachHere();
+ return NULL;
+}
- case Op_Load2L: return new (C, 3) Load2LNode(ctl, mem, adr, atyp);
+// Create a binary tree form for Packs. [lo, hi) (half-open) range
+Node* PackNode::binaryTreePack(Compile* C, int lo, int hi) {
+ int ct = hi - lo;
+ assert(is_power_of_2(ct), "power of 2");
+ if (ct == 2) {
+ PackNode* pk = PackNode::make(C, in(lo), 2, vect_type()->element_basic_type());
+ pk->add_opd(1, in(lo+1));
+ return pk;
- case Op_Load4F: return new (C, 3) Load4FNode(ctl, mem, adr, atyp);
- case Op_Load2F: return new (C, 3) Load2FNode(ctl, mem, adr, atyp);
+ } else {
+ int mid = lo + ct/2;
+ Node* n1 = binaryTreePack(C, lo, mid);
+ Node* n2 = binaryTreePack(C, mid, hi );
- case Op_Load2D: return new (C, 3) Load2DNode(ctl, mem, adr, atyp);
+ BasicType bt = vect_type()->element_basic_type();
+ switch (bt) {
+ case T_BOOLEAN:
+ case T_BYTE:
+ return new (C, 3) PackSNode(n1, n2, TypeVect::make(T_SHORT, 2));
+ case T_CHAR:
+ case T_SHORT:
+ return new (C, 3) PackINode(n1, n2, TypeVect::make(T_INT, 2));
+ case T_INT:
+ return new (C, 3) PackLNode(n1, n2, TypeVect::make(T_LONG, 2));
+ case T_LONG:
+ return new (C, 3) Pack2LNode(n1, n2, TypeVect::make(T_LONG, 2));
+ case T_FLOAT:
+ return new (C, 3) PackDNode(n1, n2, TypeVect::make(T_DOUBLE, 2));
+ case T_DOUBLE:
+ return new (C, 3) Pack2DNode(n1, n2, TypeVect::make(T_DOUBLE, 2));
+ }
+ ShouldNotReachHere();
}
- ShouldNotReachHere();
+ return NULL;
+}
+
+// Return the vector version of a scalar load node.
+LoadVectorNode* LoadVectorNode::make(Compile* C, int opc, Node* ctl, Node* mem,
+ Node* adr, const TypePtr* atyp, uint vlen, BasicType bt) {
+ const TypeVect* vt = TypeVect::make(bt, vlen);
+ return new (C, 3) LoadVectorNode(ctl, mem, adr, atyp, vt);
return NULL;
}
// Return the vector version of a scalar store node.
-VectorStoreNode* VectorStoreNode::make(Compile* C, int opc, Node* ctl, Node* mem,
+StoreVectorNode* StoreVectorNode::make(Compile* C, int opc, Node* ctl, Node* mem,
Node* adr, const TypePtr* atyp, Node* val,
uint vlen) {
- int vopc = opcode(opc, vlen);
-
- switch(vopc) {
- case Op_Store16B: return new (C, 4) Store16BNode(ctl, mem, adr, atyp, val);
- case Op_Store8B: return new (C, 4) Store8BNode(ctl, mem, adr, atyp, val);
- case Op_Store4B: return new (C, 4) Store4BNode(ctl, mem, adr, atyp, val);
-
- case Op_Store8C: return new (C, 4) Store8CNode(ctl, mem, adr, atyp, val);
- case Op_Store4C: return new (C, 4) Store4CNode(ctl, mem, adr, atyp, val);
- case Op_Store2C: return new (C, 4) Store2CNode(ctl, mem, adr, atyp, val);
-
- case Op_Store4I: return new (C, 4) Store4INode(ctl, mem, adr, atyp, val);
- case Op_Store2I: return new (C, 4) Store2INode(ctl, mem, adr, atyp, val);
-
- case Op_Store2L: return new (C, 4) Store2LNode(ctl, mem, adr, atyp, val);
-
- case Op_Store4F: return new (C, 4) Store4FNode(ctl, mem, adr, atyp, val);
- case Op_Store2F: return new (C, 4) Store2FNode(ctl, mem, adr, atyp, val);
-
- case Op_Store2D: return new (C, 4) Store2DNode(ctl, mem, adr, atyp, val);
- }
- ShouldNotReachHere();
- return NULL;
+ return new (C, 4) StoreVectorNode(ctl, mem, adr, atyp, val);
}
// Extract a scalar element of vector.
-Node* ExtractNode::make(Compile* C, Node* v, uint position, const Type* opd_t) {
- BasicType bt = opd_t->array_element_basic_type();
- assert(position < VectorNode::max_vlen(bt), "pos in range");
+Node* ExtractNode::make(Compile* C, Node* v, uint position, BasicType bt) {
+ assert((int)position < Matcher::max_vector_size(bt), "pos in range");
ConINode* pos = ConINode::make(C, (int)position);
switch (bt) {
case T_BOOLEAN:
+ return new (C, 3) ExtractUBNode(v, pos);
case T_BYTE:
return new (C, 3) ExtractBNode(v, pos);
case T_CHAR:
@@ -478,3 +307,4 @@ Node* ExtractNode::make(Compile* C, Node* v, uint position, const Type* opd_t) {
ShouldNotReachHere();
return NULL;
}
+
diff --git a/src/share/vm/opto/vectornode.hpp b/src/share/vm/opto/vectornode.hpp
index 7d1905c7f..602ee94c5 100644
--- a/src/share/vm/opto/vectornode.hpp
+++ b/src/share/vm/opto/vectornode.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -31,48 +31,32 @@
//------------------------------VectorNode--------------------------------------
// Vector Operation
-class VectorNode : public Node {
- virtual uint size_of() const { return sizeof(*this); }
- protected:
- uint _length; // vector length
- virtual BasicType elt_basic_type() const = 0; // Vector element basic type
-
- static const Type* vect_type(BasicType elt_bt, uint len);
- static const Type* vect_type(const Type* elt_type, uint len) {
- return vect_type(elt_type->array_element_basic_type(), len);
- }
-
+class VectorNode : public TypeNode {
public:
- friend class VectorLoadNode; // For vect_type
- friend class VectorStoreNode; // ditto.
- VectorNode(Node* n1, uint vlen) : Node(NULL, n1), _length(vlen) {
+ VectorNode(Node* n1, const TypeVect* vt) : TypeNode(vt, 2) {
init_class_id(Class_Vector);
+ init_req(1, n1);
}
- VectorNode(Node* n1, Node* n2, uint vlen) : Node(NULL, n1, n2), _length(vlen) {
+ VectorNode(Node* n1, Node* n2, const TypeVect* vt) : TypeNode(vt, 3) {
init_class_id(Class_Vector);
+ init_req(1, n1);
+ init_req(2, n2);
}
- virtual int Opcode() const;
- uint length() const { return _length; } // Vector length
+ const TypeVect* vect_type() const { return type()->is_vect(); }
+ uint length() const { return vect_type()->length(); } // Vector length
- static uint max_vlen(BasicType bt) { // max vector length
- return (uint)(Matcher::vector_width_in_bytes() / type2aelembytes(bt));
- }
-
- // Element and vector type
- const Type* elt_type() const { return Type::get_const_basic_type(elt_basic_type()); }
- const Type* vect_type() const { return vect_type(elt_basic_type(), length()); }
-
- virtual const Type *bottom_type() const { return vect_type(); }
- virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(); }
+ virtual int Opcode() const;
- // Vector opcode from scalar opcode
- static int opcode(int sopc, uint vlen, const Type* opd_t);
+ virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(vect_type()->length_in_bytes()); }
static VectorNode* scalar2vector(Compile* C, Node* s, uint vlen, const Type* opd_t);
- static VectorNode* make(Compile* C, int sopc, Node* n1, Node* n2, uint vlen, const Type* elt_t);
+ static VectorNode* make(Compile* C, int opc, Node* n1, Node* n2, uint vlen, BasicType bt);
+
+ static int opcode(int opc, uint vlen, BasicType bt);
+ static bool implemented(int opc, uint vlen, BasicType bt);
};
@@ -81,981 +65,393 @@ class VectorNode : public Node {
//------------------------------AddVBNode---------------------------------------
// Vector add byte
class AddVBNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
- AddVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
- virtual int Opcode() const;
-};
-
-//------------------------------AddVCNode---------------------------------------
-// Vector add char
-class AddVCNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
public:
- AddVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ AddVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------AddVSNode---------------------------------------
-// Vector add short
+// Vector add char/short
class AddVSNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
public:
- AddVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ AddVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------AddVINode---------------------------------------
// Vector add int
class AddVINode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_INT; }
public:
- AddVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ AddVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------AddVLNode---------------------------------------
// Vector add long
class AddVLNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_LONG; }
public:
- AddVLNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ AddVLNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------AddVFNode---------------------------------------
// Vector add float
class AddVFNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_FLOAT; }
public:
- AddVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ AddVFNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------AddVDNode---------------------------------------
// Vector add double
class AddVDNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_DOUBLE; }
public:
- AddVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ AddVDNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------SubVBNode---------------------------------------
// Vector subtract byte
class SubVBNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
public:
- SubVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
- virtual int Opcode() const;
-};
-
-//------------------------------SubVCNode---------------------------------------
-// Vector subtract char
-class SubVCNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- SubVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ SubVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------SubVSNode---------------------------------------
// Vector subtract short
class SubVSNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
public:
- SubVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ SubVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------SubVINode---------------------------------------
// Vector subtract int
class SubVINode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_INT; }
public:
- SubVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ SubVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------SubVLNode---------------------------------------
// Vector subtract long
class SubVLNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_LONG; }
public:
- SubVLNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ SubVLNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------SubVFNode---------------------------------------
// Vector subtract float
class SubVFNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_FLOAT; }
public:
- SubVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ SubVFNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------SubVDNode---------------------------------------
// Vector subtract double
class SubVDNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_DOUBLE; }
public:
- SubVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ SubVDNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------MulVFNode---------------------------------------
// Vector multiply float
class MulVFNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_FLOAT; }
public:
- MulVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ MulVFNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------MulVDNode---------------------------------------
// Vector multiply double
class MulVDNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_DOUBLE; }
public:
- MulVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ MulVDNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------DivVFNode---------------------------------------
// Vector divide float
class DivVFNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_FLOAT; }
public:
- DivVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ DivVFNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------DivVDNode---------------------------------------
// Vector Divide double
class DivVDNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_DOUBLE; }
public:
- DivVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ DivVDNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------LShiftVBNode---------------------------------------
// Vector lshift byte
class LShiftVBNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
- LShiftVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
- virtual int Opcode() const;
-};
-
-//------------------------------LShiftVCNode---------------------------------------
-// Vector lshift chars
-class LShiftVCNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
public:
- LShiftVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ LShiftVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------LShiftVSNode---------------------------------------
// Vector lshift shorts
class LShiftVSNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
public:
- LShiftVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ LShiftVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------LShiftVINode---------------------------------------
// Vector lshift ints
class LShiftVINode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_INT; }
public:
- LShiftVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ LShiftVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------URShiftVBNode---------------------------------------
// Vector urshift bytes
-class URShiftVBNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
- URShiftVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
- virtual int Opcode() const;
-};
-
-//------------------------------URShiftVCNode---------------------------------------
-// Vector urshift char
-class URShiftVCNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
+class RShiftVBNode : public VectorNode {
public:
- URShiftVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ RShiftVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------URShiftVSNode---------------------------------------
// Vector urshift shorts
-class URShiftVSNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
+class RShiftVSNode : public VectorNode {
public:
- URShiftVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ RShiftVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------URShiftVINode---------------------------------------
// Vector urshift ints
-class URShiftVINode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_INT; }
+class RShiftVINode : public VectorNode {
public:
- URShiftVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ RShiftVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------AndVNode---------------------------------------
// Vector and
class AndVNode : public VectorNode {
- protected:
- BasicType _bt;
- virtual BasicType elt_basic_type() const { return _bt; }
public:
- AndVNode(Node* in1, Node* in2, uint vlen, BasicType bt) : VectorNode(in1,in2,vlen), _bt(bt) {}
+ AndVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------OrVNode---------------------------------------
// Vector or
class OrVNode : public VectorNode {
- protected:
- BasicType _bt;
- virtual BasicType elt_basic_type() const { return _bt; }
public:
- OrVNode(Node* in1, Node* in2, uint vlen, BasicType bt) : VectorNode(in1,in2,vlen), _bt(bt) {}
+ OrVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------XorVNode---------------------------------------
// Vector xor
class XorVNode : public VectorNode {
- protected:
- BasicType _bt;
- virtual BasicType elt_basic_type() const { return _bt; }
public:
- XorVNode(Node* in1, Node* in2, uint vlen, BasicType bt) : VectorNode(in1,in2,vlen), _bt(bt) {}
+ XorVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
-//================================= M E M O R Y ==================================
-
-
-//------------------------------VectorLoadNode--------------------------------------
-// Vector Load from memory
-class VectorLoadNode : public LoadNode {
- virtual uint size_of() const { return sizeof(*this); }
-
- protected:
- virtual BasicType elt_basic_type() const = 0; // Vector element basic type
- // For use in constructor
- static const Type* vect_type(const Type* elt_type, uint len) {
- return VectorNode::vect_type(elt_type, len);
- }
+//================================= M E M O R Y ===============================
+//------------------------------LoadVectorNode---------------------------------
+// Load Vector from memory
+class LoadVectorNode : public LoadNode {
public:
- VectorLoadNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *rt)
- : LoadNode(c,mem,adr,at,rt) {
- init_class_id(Class_VectorLoad);
+ LoadVectorNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeVect* vt)
+ : LoadNode(c, mem, adr, at, vt) {
+ init_class_id(Class_LoadVector);
}
- virtual int Opcode() const;
-
- virtual uint length() const = 0; // Vector length
-
- // Element and vector type
- const Type* elt_type() const { return Type::get_const_basic_type(elt_basic_type()); }
- const Type* vect_type() const { return VectorNode::vect_type(elt_basic_type(), length()); }
-
- virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(); }
- virtual BasicType memory_type() const { return T_VOID; }
- virtual int memory_size() const { return length()*type2aelembytes(elt_basic_type()); }
-
- // Vector opcode from scalar opcode
- static int opcode(int sopc, uint vlen);
-
- static VectorLoadNode* make(Compile* C, int opc, Node* ctl, Node* mem,
- Node* adr, const TypePtr* atyp, uint vlen);
-};
-
-//------------------------------Load16BNode--------------------------------------
-// Vector load of 16 bytes (8bits signed) from memory
-class Load16BNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
- Load16BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE)
- : VectorLoadNode(c,mem,adr,at,vect_type(ti,16)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store16B; }
- virtual uint length() const { return 16; }
-};
-
-//------------------------------Load8BNode--------------------------------------
-// Vector load of 8 bytes (8bits signed) from memory
-class Load8BNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
- Load8BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE)
- : VectorLoadNode(c,mem,adr,at,vect_type(ti,8)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store8B; }
- virtual uint length() const { return 8; }
-};
-
-//------------------------------Load4BNode--------------------------------------
-// Vector load of 4 bytes (8bits signed) from memory
-class Load4BNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
- Load4BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE)
- : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store4B; }
- virtual uint length() const { return 4; }
-};
-
-//------------------------------Load8CNode--------------------------------------
-// Vector load of 8 chars (16bits unsigned) from memory
-class Load8CNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- Load8CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR)
- : VectorLoadNode(c,mem,adr,at,vect_type(ti,8)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store8C; }
- virtual uint length() const { return 8; }
-};
-
-//------------------------------Load4CNode--------------------------------------
-// Vector load of 4 chars (16bits unsigned) from memory
-class Load4CNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- Load4CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR)
- : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store4C; }
- virtual uint length() const { return 4; }
-};
-
-//------------------------------Load2CNode--------------------------------------
-// Vector load of 2 chars (16bits unsigned) from memory
-class Load2CNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- Load2CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR)
- : VectorLoadNode(c,mem,adr,at,vect_type(ti,2)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store2C; }
- virtual uint length() const { return 2; }
-};
-
-//------------------------------Load8SNode--------------------------------------
-// Vector load of 8 shorts (16bits signed) from memory
-class Load8SNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
- Load8SNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT)
- : VectorLoadNode(c,mem,adr,at,vect_type(ti,8)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store8C; }
- virtual uint length() const { return 8; }
-};
-
-//------------------------------Load4SNode--------------------------------------
-// Vector load of 4 shorts (16bits signed) from memory
-class Load4SNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
- Load4SNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT)
- : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store4C; }
- virtual uint length() const { return 4; }
-};
-
-//------------------------------Load2SNode--------------------------------------
-// Vector load of 2 shorts (16bits signed) from memory
-class Load2SNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
- Load2SNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT)
- : VectorLoadNode(c,mem,adr,at,vect_type(ti,2)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store2C; }
- virtual uint length() const { return 2; }
-};
-
-//------------------------------Load4INode--------------------------------------
-// Vector load of 4 integers (32bits signed) from memory
-class Load4INode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_INT; }
- public:
- Load4INode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::INT)
- : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store4I; }
- virtual uint length() const { return 4; }
-};
-//------------------------------Load2INode--------------------------------------
-// Vector load of 2 integers (32bits signed) from memory
-class Load2INode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_INT; }
- public:
- Load2INode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::INT)
- : VectorLoadNode(c,mem,adr,at,vect_type(ti,2)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store2I; }
- virtual uint length() const { return 2; }
-};
+ const TypeVect* vect_type() const { return type()->is_vect(); }
+ uint length() const { return vect_type()->length(); } // Vector length
-//------------------------------Load2LNode--------------------------------------
-// Vector load of 2 longs (64bits signed) from memory
-class Load2LNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_LONG; }
- public:
- Load2LNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeLong *tl = TypeLong::LONG)
- : VectorLoadNode(c,mem,adr,at,vect_type(tl,2)) {}
virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store2L; }
- virtual uint length() const { return 2; }
-};
-//------------------------------Load4FNode--------------------------------------
-// Vector load of 4 floats (32bits) from memory
-class Load4FNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_FLOAT; }
- public:
- Load4FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *t = Type::FLOAT)
- : VectorLoadNode(c,mem,adr,at,vect_type(t,4)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store4F; }
- virtual uint length() const { return 4; }
-};
+ virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(memory_size()); }
+ virtual BasicType memory_type() const { return T_VOID; }
+ virtual int memory_size() const { return vect_type()->length_in_bytes(); }
-//------------------------------Load2FNode--------------------------------------
-// Vector load of 2 floats (32bits) from memory
-class Load2FNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_FLOAT; }
- public:
- Load2FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *t = Type::FLOAT)
- : VectorLoadNode(c,mem,adr,at,vect_type(t,2)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store2F; }
- virtual uint length() const { return 2; }
-};
+ virtual int store_Opcode() const { return Op_StoreVector; }
-//------------------------------Load2DNode--------------------------------------
-// Vector load of 2 doubles (64bits) from memory
-class Load2DNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_DOUBLE; }
- public:
- Load2DNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *t = Type::DOUBLE)
- : VectorLoadNode(c,mem,adr,at,vect_type(t,2)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store2D; }
- virtual uint length() const { return 2; }
+ static LoadVectorNode* make(Compile* C, int opc, Node* ctl, Node* mem,
+ Node* adr, const TypePtr* atyp, uint vlen, BasicType bt);
};
-
-//------------------------------VectorStoreNode--------------------------------------
-// Vector Store to memory
-class VectorStoreNode : public StoreNode {
- virtual uint size_of() const { return sizeof(*this); }
-
- protected:
- virtual BasicType elt_basic_type() const = 0; // Vector element basic type
-
+//------------------------------StoreVectorNode--------------------------------
+// Store Vector to memory
+class StoreVectorNode : public StoreNode {
public:
- VectorStoreNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : StoreNode(c,mem,adr,at,val) {
- init_class_id(Class_VectorStore);
+ StoreVectorNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+ : StoreNode(c, mem, adr, at, val) {
+ assert(val->is_Vector() || val->is_LoadVector(), "sanity");
+ init_class_id(Class_StoreVector);
}
- virtual int Opcode() const;
- virtual uint length() const = 0; // Vector length
+ const TypeVect* vect_type() const { return in(MemNode::ValueIn)->bottom_type()->is_vect(); }
+ uint length() const { return vect_type()->length(); } // Vector length
- // Element and vector type
- const Type* elt_type() const { return Type::get_const_basic_type(elt_basic_type()); }
- const Type* vect_type() const { return VectorNode::vect_type(elt_basic_type(), length()); }
+ virtual int Opcode() const;
- virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(); }
+ virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(memory_size()); }
virtual BasicType memory_type() const { return T_VOID; }
- virtual int memory_size() const { return length()*type2aelembytes(elt_basic_type()); }
-
- // Vector opcode from scalar opcode
- static int opcode(int sopc, uint vlen);
+ virtual int memory_size() const { return vect_type()->length_in_bytes(); }
- static VectorStoreNode* make(Compile* C, int opc, Node* ctl, Node* mem,
+ static StoreVectorNode* make(Compile* C, int opc, Node* ctl, Node* mem,
Node* adr, const TypePtr* atyp, Node* val,
uint vlen);
};
-//------------------------------Store16BNode--------------------------------------
-// Vector store of 16 bytes (8bits signed) to memory
-class Store16BNode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
- Store16BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
- virtual int Opcode() const;
- virtual uint length() const { return 16; }
-};
-//------------------------------Store8BNode--------------------------------------
-// Vector store of 8 bytes (8bits signed) to memory
-class Store8BNode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
- Store8BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
- virtual int Opcode() const;
- virtual uint length() const { return 8; }
-};
-
-//------------------------------Store4BNode--------------------------------------
-// Vector store of 4 bytes (8bits signed) to memory
-class Store4BNode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
- Store4BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
- virtual int Opcode() const;
- virtual uint length() const { return 4; }
-};
-
-//------------------------------Store8CNode--------------------------------------
-// Vector store of 8 chars (16bits signed/unsigned) to memory
-class Store8CNode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- Store8CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
- virtual int Opcode() const;
- virtual uint length() const { return 8; }
-};
-
-//------------------------------Store4CNode--------------------------------------
-// Vector store of 4 chars (16bits signed/unsigned) to memory
-class Store4CNode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- Store4CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
- virtual int Opcode() const;
- virtual uint length() const { return 4; }
-};
-
-//------------------------------Store2CNode--------------------------------------
-// Vector store of 2 chars (16bits signed/unsigned) to memory
-class Store2CNode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- Store2CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
- virtual int Opcode() const;
- virtual uint length() const { return 2; }
-};
-
-//------------------------------Store4INode--------------------------------------
-// Vector store of 4 integers (32bits signed) to memory
-class Store4INode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_INT; }
- public:
- Store4INode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
- virtual int Opcode() const;
- virtual uint length() const { return 4; }
-};
-
-//------------------------------Store2INode--------------------------------------
-// Vector store of 2 integers (32bits signed) to memory
-class Store2INode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_INT; }
- public:
- Store2INode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
- virtual int Opcode() const;
- virtual uint length() const { return 2; }
-};
+//=========================Promote_Scalar_to_Vector============================
-//------------------------------Store2LNode--------------------------------------
-// Vector store of 2 longs (64bits signed) to memory
-class Store2LNode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_LONG; }
+//------------------------------ReplicateBNode---------------------------------
+// Replicate byte scalar to be vector
+class ReplicateBNode : public VectorNode {
public:
- Store2LNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
+ ReplicateBNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
virtual int Opcode() const;
- virtual uint length() const { return 2; }
};
-//------------------------------Store4FNode--------------------------------------
-// Vector store of 4 floats (32bits) to memory
-class Store4FNode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_FLOAT; }
+//------------------------------ReplicateSNode---------------------------------
+// Replicate short scalar to be vector
+class ReplicateSNode : public VectorNode {
public:
- Store4FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
+ ReplicateSNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
virtual int Opcode() const;
- virtual uint length() const { return 4; }
};
-//------------------------------Store2FNode--------------------------------------
-// Vector store of 2 floats (32bits) to memory
-class Store2FNode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_FLOAT; }
+//------------------------------ReplicateINode---------------------------------
+// Replicate int scalar to be vector
+class ReplicateINode : public VectorNode {
public:
- Store2FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
+ ReplicateINode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
virtual int Opcode() const;
- virtual uint length() const { return 2; }
};
-//------------------------------Store2DNode--------------------------------------
-// Vector store of 2 doubles (64bits) to memory
-class Store2DNode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_DOUBLE; }
+//------------------------------ReplicateLNode---------------------------------
+// Replicate long scalar to be vector
+class ReplicateLNode : public VectorNode {
public:
- Store2DNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
+ ReplicateLNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
virtual int Opcode() const;
- virtual uint length() const { return 2; }
};
-//=========================Promote_Scalar_to_Vector====================================
-
-//------------------------------Replicate16BNode---------------------------------------
-// Replicate byte scalar to be vector of 16 bytes
-class Replicate16BNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
+//------------------------------ReplicateFNode---------------------------------
+// Replicate float scalar to be vector
+class ReplicateFNode : public VectorNode {
public:
- Replicate16BNode(Node* in1) : VectorNode(in1, 16) {}
+ ReplicateFNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
virtual int Opcode() const;
};
-//------------------------------Replicate8BNode---------------------------------------
-// Replicate byte scalar to be vector of 8 bytes
-class Replicate8BNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
+//------------------------------ReplicateDNode---------------------------------
+// Replicate double scalar to be vector
+class ReplicateDNode : public VectorNode {
public:
- Replicate8BNode(Node* in1) : VectorNode(in1, 8) {}
+ ReplicateDNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
virtual int Opcode() const;
};
-//------------------------------Replicate4BNode---------------------------------------
-// Replicate byte scalar to be vector of 4 bytes
-class Replicate4BNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
- Replicate4BNode(Node* in1) : VectorNode(in1, 4) {}
- virtual int Opcode() const;
-};
-
-//------------------------------Replicate8CNode---------------------------------------
-// Replicate char scalar to be vector of 8 chars
-class Replicate8CNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- Replicate8CNode(Node* in1) : VectorNode(in1, 8) {}
- virtual int Opcode() const;
-};
-
-//------------------------------Replicate4CNode---------------------------------------
-// Replicate char scalar to be vector of 4 chars
-class Replicate4CNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- Replicate4CNode(Node* in1) : VectorNode(in1, 4) {}
- virtual int Opcode() const;
-};
-
-//------------------------------Replicate2CNode---------------------------------------
-// Replicate char scalar to be vector of 2 chars
-class Replicate2CNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- Replicate2CNode(Node* in1) : VectorNode(in1, 2) {}
- virtual int Opcode() const;
-};
-
-//------------------------------Replicate8SNode---------------------------------------
-// Replicate short scalar to be vector of 8 shorts
-class Replicate8SNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
- Replicate8SNode(Node* in1) : VectorNode(in1, 8) {}
- virtual int Opcode() const;
-};
-
-//------------------------------Replicate4SNode---------------------------------------
-// Replicate short scalar to be vector of 4 shorts
-class Replicate4SNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
- Replicate4SNode(Node* in1) : VectorNode(in1, 4) {}
- virtual int Opcode() const;
-};
-
-//------------------------------Replicate2SNode---------------------------------------
-// Replicate short scalar to be vector of 2 shorts
-class Replicate2SNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
- Replicate2SNode(Node* in1) : VectorNode(in1, 2) {}
- virtual int Opcode() const;
-};
-
-//------------------------------Replicate4INode---------------------------------------
-// Replicate int scalar to be vector of 4 ints
-class Replicate4INode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_INT; }
- public:
- Replicate4INode(Node* in1) : VectorNode(in1, 4) {}
- virtual int Opcode() const;
-};
-
-//------------------------------Replicate2INode---------------------------------------
-// Replicate int scalar to be vector of 2 ints
-class Replicate2INode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_INT; }
- public:
- Replicate2INode(Node* in1) : VectorNode(in1, 2) {}
- virtual int Opcode() const;
-};
-
-//------------------------------Replicate2LNode---------------------------------------
-// Replicate long scalar to be vector of 2 longs
-class Replicate2LNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_LONG; }
- public:
- Replicate2LNode(Node* in1) : VectorNode(in1, 2) {}
- virtual int Opcode() const;
-};
-
-//------------------------------Replicate4FNode---------------------------------------
-// Replicate float scalar to be vector of 4 floats
-class Replicate4FNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_FLOAT; }
- public:
- Replicate4FNode(Node* in1) : VectorNode(in1, 4) {}
- virtual int Opcode() const;
-};
-
-//------------------------------Replicate2FNode---------------------------------------
-// Replicate float scalar to be vector of 2 floats
-class Replicate2FNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_FLOAT; }
- public:
- Replicate2FNode(Node* in1) : VectorNode(in1, 2) {}
- virtual int Opcode() const;
-};
-
-//------------------------------Replicate2DNode---------------------------------------
-// Replicate double scalar to be vector of 2 doubles
-class Replicate2DNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_DOUBLE; }
- public:
- Replicate2DNode(Node* in1) : VectorNode(in1, 2) {}
- virtual int Opcode() const;
-};
-
-//========================Pack_Scalars_into_a_Vector==============================
+//========================Pack_Scalars_into_a_Vector===========================
//------------------------------PackNode---------------------------------------
// Pack parent class (not for code generation).
class PackNode : public VectorNode {
public:
- PackNode(Node* in1) : VectorNode(in1, 1) {}
- PackNode(Node* in1, Node* n2) : VectorNode(in1, n2, 2) {}
+ PackNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
+ PackNode(Node* in1, Node* n2, const TypeVect* vt) : VectorNode(in1, n2, vt) {}
virtual int Opcode() const;
- void add_opd(Node* n) {
- add_req(n);
- _length++;
- assert(_length == req() - 1, "vector length matches edge count");
+ void add_opd(uint i, Node* n) {
+ init_req(i+1, n);
}
// Create a binary tree form for Packs. [lo, hi) (half-open) range
Node* binaryTreePack(Compile* C, int lo, int hi);
- static PackNode* make(Compile* C, Node* s, const Type* elt_t);
+ static PackNode* make(Compile* C, Node* s, uint vlen, BasicType bt);
};
//------------------------------PackBNode---------------------------------------
// Pack byte scalars into vector
class PackBNode : public PackNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
- PackBNode(Node* in1) : PackNode(in1) {}
- virtual int Opcode() const;
-};
-
-//------------------------------PackCNode---------------------------------------
-// Pack char scalars into vector
-class PackCNode : public PackNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
public:
- PackCNode(Node* in1) : PackNode(in1) {}
+ PackBNode(Node* in1, const TypeVect* vt) : PackNode(in1, vt) {}
virtual int Opcode() const;
};
//------------------------------PackSNode---------------------------------------
// Pack short scalars into a vector
class PackSNode : public PackNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
public:
- PackSNode(Node* in1) : PackNode(in1) {}
+ PackSNode(Node* in1, const TypeVect* vt) : PackNode(in1, vt) {}
+ PackSNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
virtual int Opcode() const;
};
//------------------------------PackINode---------------------------------------
// Pack integer scalars into a vector
class PackINode : public PackNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_INT; }
public:
- PackINode(Node* in1) : PackNode(in1) {}
- PackINode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+ PackINode(Node* in1, const TypeVect* vt) : PackNode(in1, vt) {}
+ PackINode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
virtual int Opcode() const;
};
//------------------------------PackLNode---------------------------------------
// Pack long scalars into a vector
class PackLNode : public PackNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_LONG; }
public:
- PackLNode(Node* in1) : PackNode(in1) {}
- PackLNode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+ PackLNode(Node* in1, const TypeVect* vt) : PackNode(in1, vt) {}
+ PackLNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------Pack2LNode--------------------------------------
+// Pack 2 long scalars into a vector
+class Pack2LNode : public PackNode {
+ public:
+ Pack2LNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
virtual int Opcode() const;
};
//------------------------------PackFNode---------------------------------------
// Pack float scalars into vector
class PackFNode : public PackNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_FLOAT; }
public:
- PackFNode(Node* in1) : PackNode(in1) {}
- PackFNode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+ PackFNode(Node* in1, const TypeVect* vt) : PackNode(in1, vt) {}
+ PackFNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
virtual int Opcode() const;
};
//------------------------------PackDNode---------------------------------------
// Pack double scalars into a vector
class PackDNode : public PackNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_DOUBLE; }
public:
- PackDNode(Node* in1) : PackNode(in1) {}
- PackDNode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+ PackDNode(Node* in1, const TypeVect* vt) : PackNode(in1, vt) {}
+ PackDNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
virtual int Opcode() const;
};
-// The Pack2xN nodes assist code generation. They are created from
-// Pack4C, etc. nodes in final_graph_reshape in the form of a
-// balanced, binary tree.
-
-//------------------------------Pack2x1BNode-----------------------------------------
-// Pack 2 1-byte integers into vector of 2 bytes
-class Pack2x1BNode : public PackNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
+//------------------------------Pack2DNode--------------------------------------
+// Pack 2 double scalars into a vector
+class Pack2DNode : public PackNode {
public:
- Pack2x1BNode(Node *in1, Node* in2) : PackNode(in1, in2) {}
+ Pack2DNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
virtual int Opcode() const;
- virtual uint ideal_reg() const { return Op_RegI; }
};
-//------------------------------Pack2x2BNode---------------------------------------
-// Pack 2 2-byte integers into vector of 4 bytes
-class Pack2x2BNode : public PackNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- Pack2x2BNode(Node *in1, Node* in2) : PackNode(in1, in2) {}
- virtual int Opcode() const;
- virtual uint ideal_reg() const { return Op_RegI; }
-};
//========================Extract_Scalar_from_Vector===============================
@@ -1069,7 +465,7 @@ class ExtractNode : public Node {
virtual int Opcode() const;
uint pos() const { return in(2)->get_int(); }
- static Node* make(Compile* C, Node* v, uint position, const Type* opd_t);
+ static Node* make(Compile* C, Node* v, uint position, BasicType bt);
};
//------------------------------ExtractBNode---------------------------------------
@@ -1082,6 +478,16 @@ class ExtractBNode : public ExtractNode {
virtual uint ideal_reg() const { return Op_RegI; }
};
+//------------------------------ExtractUBNode--------------------------------------
+// Extract a boolean from a vector at position "pos"
+class ExtractUBNode : public ExtractNode {
+ public:
+ ExtractUBNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return TypeInt::INT; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
//------------------------------ExtractCNode---------------------------------------
// Extract a char from a vector at position "pos"
class ExtractCNode : public ExtractNode {