7119644: Increase superword's vector size up to 256 bits

Summary: Increase vector size up to 256-bits for YMM AVX registers on x86. Reviewed-by: never, twisti, roland
author: kvn <none@none> 2012-06-15 01:25:19 -0700
committer: kvn <none@none> 2012-06-15 01:25:19 -0700
commit: 68446ffadeedf06b663e39278a07cd6171a28b2e (patch)
tree: c872b74df1f9eeb5dde27cf0e472c47bfb6fd82c /src/share/vm/opto
parent: d62195ef3f73f37d98d1b6dfbe12ed1fa5a51bc0 (diff)
28 files changed, 1477 insertions, 1453 deletions
diff --git a/src/share/vm/opto/c2_globals.hpp b/src/share/vm/opto/c2_globals.hpp
index 4d5424da5..947d806a3 100644
--- a/src/share/vm/opto/c2_globals.hpp
+++ b/src/share/vm/opto/c2_globals.hpp
@@ -81,6 +81,13 @@
   product(intx, MaxLoopPad, (OptoLoopAlignment-1),                          \
           "Align a loop if padding size in bytes is less or equal to this value") \
                                                                             \
+  product(intx, MaxVectorSize, 32,                                          \
+          "Max vector size in bytes, "                                      \
+          "actual size could be less depending on elements type")           \
+                                                                            \
+  product(bool, AlignVector, false,                                         \
+          "Perform vector store/load alignment in loop")                    \
+                                                                            \
   product(intx, NumberOfLoopInstrToAlign, 4,                                \
           "Number of first instructions in a loop to align")                \
                                                                             \
diff --git a/src/share/vm/opto/chaitin.cpp b/src/share/vm/opto/chaitin.cpp
index a74114add..0ed9b9627 100644
--- a/src/share/vm/opto/chaitin.cpp
+++ b/src/share/vm/opto/chaitin.cpp
@@ -75,6 +75,7 @@ void LRG::dump( ) const {
   // Flags
   if( _is_oop ) tty->print("Oop ");
   if( _is_float ) tty->print("Float ");
+  if( _is_vector ) tty->print("Vector ");
   if( _was_spilled1 ) tty->print("Spilled ");
   if( _was_spilled2 ) tty->print("Spilled2 ");
   if( _direct_conflict ) tty->print("Direct_conflict ");
@@ -479,16 +480,18 @@ void PhaseChaitin::Register_Allocate() {
 
   // Move important info out of the live_arena to longer lasting storage.
   alloc_node_regs(_names.Size());
-  for( uint i=0; i < _names.Size(); i++ ) {
-    if( _names[i] ) {           // Live range associated with Node?
-      LRG &lrg = lrgs( _names[i] );
-      if( lrg.num_regs() == 1 ) {
-        _node_regs[i].set1( lrg.reg() );
+  for (uint i=0; i < _names.Size(); i++) {
+    if (_names[i]) {           // Live range associated with Node?
+      LRG &lrg = lrgs(_names[i]);
+      if (!lrg.alive()) {
+        _node_regs[i].set_bad();
+      } else if (lrg.num_regs() == 1) {
+        _node_regs[i].set1(lrg.reg());
       } else {                  // Must be a register-pair
-        if( !lrg._fat_proj ) {  // Must be aligned adjacent register pair
+        if (!lrg._fat_proj) {   // Must be aligned adjacent register pair
           // Live ranges record the highest register in their mask.
           // We want the low register for the AD file writer's convenience.
-          _node_regs[i].set2( OptoReg::add(lrg.reg(),-1) );
+          _node_regs[i].set2( OptoReg::add(lrg.reg(),(1-lrg.num_regs())) );
         } else {                // Misaligned; extract 2 bits
           OptoReg::Name hi = lrg.reg(); // Get hi register
           lrg.Remove(hi);       // Yank from mask
@@ -568,7 +571,7 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
         // Check for float-vs-int live range (used in register-pressure
         // calculations)
         const Type *n_type = n->bottom_type();
-        if( n_type->is_floatingpoint() )
+        if (n_type->is_floatingpoint())
           lrg._is_float = 1;
 
         // Check for twice prior spilling.  Once prior spilling might have
@@ -599,18 +602,28 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
         // Limit result register mask to acceptable registers
         const RegMask &rm = n->out_RegMask();
         lrg.AND( rm );
+
+        int ireg = n->ideal_reg();
+        assert( !n->bottom_type()->isa_oop_ptr() || ireg == Op_RegP,
+                "oops must be in Op_RegP's" );
+
+        // Check for vector live range (only if vector register is used).
+        // On SPARC vector uses RegD which could be misaligned so it is not
+        // processes as vector in RA.
+        if (RegMask::is_vector(ireg))
+          lrg._is_vector = 1;
+        assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD,
+               "vector must be in vector registers");
+
         // Check for bound register masks
         const RegMask &lrgmask = lrg.mask();
-        if( lrgmask.is_bound1() || lrgmask.is_bound2() )
+        if (lrgmask.is_bound(ireg))
           lrg._is_bound = 1;
 
         // Check for maximum frequency value
-        if( lrg._maxfreq < b->_freq )
+        if (lrg._maxfreq < b->_freq)
           lrg._maxfreq = b->_freq;
 
-        int ireg = n->ideal_reg();
-        assert( !n->bottom_type()->isa_oop_ptr() || ireg == Op_RegP,
-                "oops must be in Op_RegP's" );
         // Check for oop-iness, or long/double
         // Check for multi-kill projection
         switch( ireg ) {
@@ -689,7 +702,7 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
           // AND changes how we count interferences.  A mis-aligned
           // double can interfere with TWO aligned pairs, or effectively
           // FOUR registers!
-          if( rm.is_misaligned_Pair() ) {
+          if (rm.is_misaligned_pair()) {
             lrg._fat_proj = 1;
             lrg._is_bound = 1;
           }
@@ -706,6 +719,33 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
           lrg.set_reg_pressure(1);
 #endif
           break;
+        case Op_VecS:
+          assert(Matcher::vector_size_supported(T_BYTE,4), "sanity");
+          assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity");
+          lrg.set_num_regs(RegMask::SlotsPerVecS);
+          lrg.set_reg_pressure(1);
+          break;
+        case Op_VecD:
+          assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecD), "sanity");
+          assert(RegMask::num_registers(Op_VecD) == RegMask::SlotsPerVecD, "sanity");
+          assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecD), "vector should be aligned");
+          lrg.set_num_regs(RegMask::SlotsPerVecD);
+          lrg.set_reg_pressure(1);
+          break;
+        case Op_VecX:
+          assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecX), "sanity");
+          assert(RegMask::num_registers(Op_VecX) == RegMask::SlotsPerVecX, "sanity");
+          assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecX), "vector should be aligned");
+          lrg.set_num_regs(RegMask::SlotsPerVecX);
+          lrg.set_reg_pressure(1);
+          break;
+        case Op_VecY:
+          assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecY), "sanity");
+          assert(RegMask::num_registers(Op_VecY) == RegMask::SlotsPerVecY, "sanity");
+          assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecY), "vector should be aligned");
+          lrg.set_num_regs(RegMask::SlotsPerVecY);
+          lrg.set_reg_pressure(1);
+          break;
         default:
           ShouldNotReachHere();
         }
@@ -763,24 +803,38 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
         } else {
           lrg.AND( rm );
         }
+
         // Check for bound register masks
         const RegMask &lrgmask = lrg.mask();
-        if( lrgmask.is_bound1() || lrgmask.is_bound2() )
+        int kreg = n->in(k)->ideal_reg();
+        bool is_vect = RegMask::is_vector(kreg);
+        assert(n->in(k)->bottom_type()->isa_vect() == NULL ||
+               is_vect || kreg == Op_RegD,
+               "vector must be in vector registers");
+        if (lrgmask.is_bound(kreg))
           lrg._is_bound = 1;
+
         // If this use of a double forces a mis-aligned double,
         // flag as '_fat_proj' - really flag as allowing misalignment
         // AND changes how we count interferences.  A mis-aligned
         // double can interfere with TWO aligned pairs, or effectively
         // FOUR registers!
-        if( lrg.num_regs() == 2 && !lrg._fat_proj && rm.is_misaligned_Pair() ) {
+#ifdef ASSERT
+        if (is_vect) {
+          assert(lrgmask.is_aligned_sets(lrg.num_regs()), "vector should be aligned");
+          assert(!lrg._fat_proj, "sanity");
+          assert(RegMask::num_registers(kreg) == lrg.num_regs(), "sanity");
+        }
+#endif
+        if (!is_vect && lrg.num_regs() == 2 && !lrg._fat_proj && rm.is_misaligned_pair()) {
           lrg._fat_proj = 1;
           lrg._is_bound = 1;
         }
         // if the LRG is an unaligned pair, we will have to spill
         // so clear the LRG's register mask if it is not already spilled
-        if ( !n->is_SpillCopy() &&
-               (lrg._def == NULL || lrg.is_multidef() || !lrg._def->is_SpillCopy()) &&
-               lrgmask.is_misaligned_Pair()) {
+        if (!is_vect && !n->is_SpillCopy() &&
+            (lrg._def == NULL || lrg.is_multidef() || !lrg._def->is_SpillCopy()) &&
+            lrgmask.is_misaligned_pair()) {
           lrg.Clear();
         }
 
@@ -793,12 +847,14 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
   } // end for all blocks
 
   // Final per-liverange setup
-  for( uint i2=0; i2<_maxlrg; i2++ ) {
+  for (uint i2=0; i2<_maxlrg; i2++) {
     LRG &lrg = lrgs(i2);
-    if( lrg.num_regs() == 2 && !lrg._fat_proj )
-      lrg.ClearToPairs();
+    assert(!lrg._is_vector || !lrg._fat_proj, "sanity");
+    if (lrg.num_regs() > 1 && !lrg._fat_proj) {
+      lrg.clear_to_sets();
+    }
     lrg.compute_set_mask_size();
-    if( lrg.not_free() ) {      // Handle case where we lose from the start
+    if (lrg.not_free()) {      // Handle case where we lose from the start
       lrg.set_reg(OptoReg::Name(LRG::SPILL_REG));
       lrg._direct_conflict = 1;
     }
@@ -1104,22 +1160,17 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
       // Choose a color which is legal for him
       RegMask tempmask = lrg.mask();
       tempmask.AND(lrgs(copy_lrg).mask());
-      OptoReg::Name reg;
-      if( lrg.num_regs() == 1 ) {
-        reg = tempmask.find_first_elem();
-      } else {
-        tempmask.ClearToPairs();
-        reg = tempmask.find_first_pair();
-      }
-      if( OptoReg::is_valid(reg) )
+      tempmask.clear_to_sets(lrg.num_regs());
+      OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs());
+      if (OptoReg::is_valid(reg))
         return reg;
     }
   }
 
   // If no bias info exists, just go with the register selection ordering
-  if( lrg.num_regs() == 2 ) {
-    // Find an aligned pair
-    return OptoReg::add(lrg.mask().find_first_pair(),chunk);
+  if (lrg._is_vector || lrg.num_regs() == 2) {
+    // Find an aligned set
+    return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk);
   }
 
   // CNC - Fun hack.  Alternate 1st and 2nd selection.  Enables post-allocate
@@ -1149,6 +1200,7 @@ OptoReg::Name PhaseChaitin::choose_color( LRG &lrg, int chunk ) {
     // Use a heuristic to "bias" the color choice
     return bias_color(lrg, chunk);
 
+  assert(!lrg._is_vector, "should be not vector here" );
   assert( lrg.num_regs() >= 2, "dead live ranges do not color" );
 
   // Fat-proj case or misaligned double argument.
@@ -1238,14 +1290,16 @@ uint PhaseChaitin::Select( ) {
     }
     //assert(is_allstack == lrg->mask().is_AllStack(), "nbrs must not change AllStackedness");
     // Aligned pairs need aligned masks
-    if( lrg->num_regs() == 2 && !lrg->_fat_proj )
-      lrg->ClearToPairs();
+    assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
+    if (lrg->num_regs() > 1 && !lrg->_fat_proj) {
+      lrg->clear_to_sets();
+    }
 
     // Check if a color is available and if so pick the color
     OptoReg::Name reg = choose_color( *lrg, chunk );
 #ifdef SPARC
     debug_only(lrg->compute_set_mask_size());
-    assert(lrg->num_regs() != 2 || lrg->is_bound() || is_even(reg-1), "allocate all doubles aligned");
+    assert(lrg->num_regs() < 2 || lrg->is_bound() || is_even(reg-1), "allocate all doubles aligned");
 #endif
 
     //---------------
@@ -1277,17 +1331,16 @@ uint PhaseChaitin::Select( ) {
       // If the live range is not bound, then we actually had some choices
       // to make.  In this case, the mask has more bits in it than the colors
       // chosen.  Restrict the mask to just what was picked.
-      if( lrg->num_regs() == 1 ) { // Size 1 live range
-        lrg->Clear();           // Clear the mask
-        lrg->Insert(reg);       // Set regmask to match selected reg
-        lrg->set_mask_size(1);
-      } else if( !lrg->_fat_proj ) {
-        // For pairs, also insert the low bit of the pair
-        assert( lrg->num_regs() == 2, "unbound fatproj???" );
+      int n_regs = lrg->num_regs();
+      assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
+      if (n_regs == 1 || !lrg->_fat_proj) {
+        assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecY, "sanity");
         lrg->Clear();           // Clear the mask
         lrg->Insert(reg);       // Set regmask to match selected reg
-        lrg->Insert(OptoReg::add(reg,-1));
-        lrg->set_mask_size(2);
+        // For vectors and pairs, also insert the low bit of the pair
+        for (int i = 1; i < n_regs; i++)
+          lrg->Insert(OptoReg::add(reg,-i));
+        lrg->set_mask_size(n_regs);
       } else {                  // Else fatproj
         // mask must be equal to fatproj bits, by definition
       }
@@ -1860,12 +1913,20 @@ char *PhaseChaitin::dump_register( const Node *n, char *buf  ) const {
       sprintf(buf,"L%d",lidx);  // No register binding yet
     } else if( !lidx ) {        // Special, not allocated value
       strcpy(buf,"Special");
-    } else if( (lrgs(lidx).num_regs() == 1)
-                ? !lrgs(lidx).mask().is_bound1()
-                : !lrgs(lidx).mask().is_bound2() ) {
-      sprintf(buf,"L%d",lidx); // No register binding yet
-    } else {                    // Hah!  We have a bound machine register
-      print_reg( lrgs(lidx).reg(), this, buf );
+    } else {
+      if (lrgs(lidx)._is_vector) {
+        if (lrgs(lidx).mask().is_bound_set(lrgs(lidx).num_regs()))
+          print_reg( lrgs(lidx).reg(), this, buf ); // a bound machine register
+        else
+          sprintf(buf,"L%d",lidx); // No register binding yet
+      } else if( (lrgs(lidx).num_regs() == 1)
+                 ? lrgs(lidx).mask().is_bound1()
+                 : lrgs(lidx).mask().is_bound_pair() ) {
+        // Hah!  We have a bound machine register
+        print_reg( lrgs(lidx).reg(), this, buf );
+      } else {
+        sprintf(buf,"L%d",lidx); // No register binding yet
+      }
     }
   }
   return buf+strlen(buf);
diff --git a/src/share/vm/opto/chaitin.hpp b/src/share/vm/opto/chaitin.hpp
index 1e6be63c4..c10f18d74 100644
--- a/src/share/vm/opto/chaitin.hpp
+++ b/src/share/vm/opto/chaitin.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -99,8 +99,15 @@ public:
   void set_mask_size( int size ) {
     assert((size == 65535) || (size == (int)_mask.Size()), "");
     _mask_size = size;
-    debug_only(_msize_valid=1;)
-    debug_only( if( _num_regs == 2 && !_fat_proj ) _mask.VerifyPairs(); )
+#ifdef ASSERT
+    _msize_valid=1;
+    if (_is_vector) {
+      assert(!_fat_proj, "sanity");
+      _mask.verify_sets(_num_regs);
+    } else if (_num_regs == 2 && !_fat_proj) {
+      _mask.verify_pairs();
+    }
+#endif
   }
   void compute_set_mask_size() { set_mask_size(compute_mask_size()); }
   int mask_size() const { assert( _msize_valid, "mask size not valid" );
@@ -116,7 +123,8 @@ public:
   void Set_All() { _mask.Set_All(); debug_only(_msize_valid=1); _mask_size = RegMask::CHUNK_SIZE; }
   void Insert( OptoReg::Name reg ) { _mask.Insert(reg);  debug_only(_msize_valid=0;) }
   void Remove( OptoReg::Name reg ) { _mask.Remove(reg);  debug_only(_msize_valid=0;) }
-  void ClearToPairs() { _mask.ClearToPairs(); debug_only(_msize_valid=0;) }
+  void clear_to_pairs() { _mask.clear_to_pairs(); debug_only(_msize_valid=0;) }
+  void clear_to_sets()  { _mask.clear_to_sets(_num_regs); debug_only(_msize_valid=0;) }
 
   // Number of registers this live range uses when it colors
 private:
@@ -150,6 +158,7 @@ public:
 
   uint   _is_oop:1,             // Live-range holds an oop
          _is_float:1,           // True if in float registers
+         _is_vector:1,          // True if in vector registers
          _was_spilled1:1,       // True if prior spilling on def
          _was_spilled2:1,       // True if twice prior spilling on def
          _is_bound:1,           // live range starts life with no
diff --git a/src/share/vm/opto/classes.hpp b/src/share/vm/opto/classes.hpp
index 03b5107c3..bdf18b51f 100644
--- a/src/share/vm/opto/classes.hpp
+++ b/src/share/vm/opto/classes.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -245,14 +245,12 @@ macro(XorI)
 macro(XorL)
 macro(Vector)
 macro(AddVB)
-macro(AddVC)
 macro(AddVS)
 macro(AddVI)
 macro(AddVL)
 macro(AddVF)
 macro(AddVD)
 macro(SubVB)
-macro(SubVC)
 macro(SubVS)
 macro(SubVI)
 macro(SubVL)
@@ -263,74 +261,36 @@ macro(MulVD)
 macro(DivVF)
 macro(DivVD)
 macro(LShiftVB)
-macro(LShiftVC)
 macro(LShiftVS)
 macro(LShiftVI)
-macro(URShiftVB)
-macro(URShiftVC)
-macro(URShiftVS)
-macro(URShiftVI)
+macro(RShiftVB)
+macro(RShiftVS)
+macro(RShiftVI)
 macro(AndV)
 macro(OrV)
 macro(XorV)
-macro(VectorLoad)
-macro(Load16B)
-macro(Load8B)
-macro(Load4B)
-macro(Load8C)
-macro(Load4C)
-macro(Load2C)
-macro(Load8S)
-macro(Load4S)
-macro(Load2S)
-macro(Load4I)
-macro(Load2I)
-macro(Load2L)
-macro(Load4F)
-macro(Load2F)
-macro(Load2D)
-macro(VectorStore)
-macro(Store16B)
-macro(Store8B)
-macro(Store4B)
-macro(Store8C)
-macro(Store4C)
-macro(Store2C)
-macro(Store4I)
-macro(Store2I)
-macro(Store2L)
-macro(Store4F)
-macro(Store2F)
-macro(Store2D)
+macro(LoadVector)
+macro(StoreVector)
 macro(Pack)
 macro(PackB)
 macro(PackS)
-macro(PackC)
 macro(PackI)
 macro(PackL)
 macro(PackF)
 macro(PackD)
-macro(Pack2x1B)
-macro(Pack2x2B)
-macro(Replicate16B)
-macro(Replicate8B)
-macro(Replicate4B)
-macro(Replicate8S)
-macro(Replicate4S)
-macro(Replicate2S)
-macro(Replicate8C)
-macro(Replicate4C)
-macro(Replicate2C)
-macro(Replicate4I)
-macro(Replicate2I)
-macro(Replicate2L)
-macro(Replicate4F)
-macro(Replicate2F)
-macro(Replicate2D)
+macro(Pack2L)
+macro(Pack2D)
+macro(ReplicateB)
+macro(ReplicateS)
+macro(ReplicateI)
+macro(ReplicateL)
+macro(ReplicateF)
+macro(ReplicateD)
 macro(Extract)
 macro(ExtractB)
-macro(ExtractS)
+macro(ExtractUB)
 macro(ExtractC)
+macro(ExtractS)
 macro(ExtractI)
 macro(ExtractL)
 macro(ExtractF)
diff --git a/src/share/vm/opto/compile.cpp b/src/share/vm/opto/compile.cpp
index 5c4b5145a..5331d033f 100644
--- a/src/share/vm/opto/compile.cpp
+++ b/src/share/vm/opto/compile.cpp
@@ -2591,38 +2591,12 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) {
     }
     break;
 
-  case Op_Load16B:
-  case Op_Load8B:
-  case Op_Load4B:
-  case Op_Load8S:
-  case Op_Load4S:
-  case Op_Load2S:
-  case Op_Load8C:
-  case Op_Load4C:
-  case Op_Load2C:
-  case Op_Load4I:
-  case Op_Load2I:
-  case Op_Load2L:
-  case Op_Load4F:
-  case Op_Load2F:
-  case Op_Load2D:
-  case Op_Store16B:
-  case Op_Store8B:
-  case Op_Store4B:
-  case Op_Store8C:
-  case Op_Store4C:
-  case Op_Store2C:
-  case Op_Store4I:
-  case Op_Store2I:
-  case Op_Store2L:
-  case Op_Store4F:
-  case Op_Store2F:
-  case Op_Store2D:
+  case Op_LoadVector:
+  case Op_StoreVector:
     break;
 
   case Op_PackB:
   case Op_PackS:
-  case Op_PackC:
   case Op_PackI:
   case Op_PackF:
   case Op_PackL:
diff --git a/src/share/vm/opto/ifg.cpp b/src/share/vm/opto/ifg.cpp
index 3a2254565..4827a17d8 100644
--- a/src/share/vm/opto/ifg.cpp
+++ b/src/share/vm/opto/ifg.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -416,6 +416,7 @@ uint PhaseChaitin::count_int_pressure( IndexSet *liveout ) {
     if( lrgs(lidx).mask().is_UP() &&
         lrgs(lidx).mask_size() &&
         !lrgs(lidx)._is_float &&
+        !lrgs(lidx)._is_vector &&
         lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) )
       cnt += lrgs(lidx).reg_pressure();
   }
@@ -430,7 +431,7 @@ uint PhaseChaitin::count_float_pressure( IndexSet *liveout ) {
   while ((lidx = elements.next()) != 0) {
     if( lrgs(lidx).mask().is_UP() &&
         lrgs(lidx).mask_size() &&
-        lrgs(lidx)._is_float )
+        (lrgs(lidx)._is_float || lrgs(lidx)._is_vector))
       cnt += lrgs(lidx).reg_pressure();
   }
   return cnt;
@@ -439,8 +440,8 @@ uint PhaseChaitin::count_float_pressure( IndexSet *liveout ) {
 //------------------------------lower_pressure---------------------------------
 // Adjust register pressure down by 1.  Capture last hi-to-low transition,
 static void lower_pressure( LRG *lrg, uint where, Block *b, uint *pressure, uint *hrp_index ) {
-  if( lrg->mask().is_UP() && lrg->mask_size() ) {
-    if( lrg->_is_float ) {
+  if (lrg->mask().is_UP() && lrg->mask_size()) {
+    if (lrg->_is_float || lrg->_is_vector) {
       pressure[1] -= lrg->reg_pressure();
       if( pressure[1] == (uint)FLOATPRESSURE ) {
         hrp_index[1] = where;
@@ -522,8 +523,8 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
       LRG &lrg = lrgs(lidx);
       lrg._area += cost;
       // Compute initial register pressure
-      if( lrg.mask().is_UP() && lrg.mask_size() ) {
-        if( lrg._is_float ) {   // Count float pressure
+      if (lrg.mask().is_UP() && lrg.mask_size()) {
+        if (lrg._is_float || lrg._is_vector) {   // Count float pressure
           pressure[1] += lrg.reg_pressure();
 #ifdef EXACT_PRESSURE
           if( pressure[1] > b->_freg_pressure )
@@ -681,13 +682,10 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
         // according to its bindings.
         const RegMask &rmask = lrgs(r).mask();
         if( lrgs(r).is_bound() && !(n->rematerialize()) && rmask.is_NotEmpty() ) {
-          // Smear odd bits; leave only aligned pairs of bits.
-          RegMask r2mask = rmask;
-          r2mask.SmearToPairs();
           // Check for common case
           int r_size = lrgs(r).num_regs();
           OptoReg::Name r_reg = (r_size == 1) ? rmask.find_first_elem() : OptoReg::Physical;
-
+          // Smear odd bits
           IndexSetIterator elements(&liveout);
           uint l;
           while ((l = elements.next()) != 0) {
@@ -701,10 +699,15 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
             // Remove the bits from LRG 'r' from LRG 'l' so 'l' no
             // longer interferes with 'r'.  If 'l' requires aligned
             // adjacent pairs, subtract out bit pairs.
-            if( lrg.num_regs() == 2 && !lrg._fat_proj ) {
+            assert(!lrg._is_vector || !lrg._fat_proj, "sanity");
+            if (lrg.num_regs() > 1 && !lrg._fat_proj) {
+              RegMask r2mask = rmask;
+              // Leave only aligned set of bits.
+              r2mask.smear_to_sets(lrg.num_regs());
+              // It includes vector case.
               lrg.SUBTRACT( r2mask );
               lrg.compute_set_mask_size();
-            } else if( r_size != 1 ) {
+            } else if( r_size != 1 ) { // fat proj
               lrg.SUBTRACT( rmask );
               lrg.compute_set_mask_size();
             } else {            // Common case: size 1 bound removal
@@ -763,8 +766,8 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
             // Newly live things assumed live from here to top of block
             lrg._area += cost;
             // Adjust register pressure
-            if( lrg.mask().is_UP() && lrg.mask_size() ) {
-              if( lrg._is_float ) {
+            if (lrg.mask().is_UP() && lrg.mask_size()) {
+              if (lrg._is_float || lrg._is_vector) {
                 pressure[1] += lrg.reg_pressure();
 #ifdef EXACT_PRESSURE
                 if( pressure[1] > b->_freg_pressure )
diff --git a/src/share/vm/opto/lcm.cpp b/src/share/vm/opto/lcm.cpp
index 1ad9f0b1f..2f272eb55 100644
--- a/src/share/vm/opto/lcm.cpp
+++ b/src/share/vm/opto/lcm.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -139,6 +139,7 @@ void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowe
     int iop = mach->ideal_Opcode();
     switch( iop ) {
     case Op_LoadB:
+    case Op_LoadUB:
     case Op_LoadUS:
     case Op_LoadD:
     case Op_LoadF:
@@ -445,6 +446,11 @@ Node *Block::select(PhaseCFG *cfg, Node_List &worklist, GrowableArray<int> &read
     if( e->is_MachNullCheck() && e->in(1) == n )
       continue;
 
+    // Schedule IV increment last.
+    if (e->is_Mach() && e->as_Mach()->ideal_Opcode() == Op_CountedLoopEnd &&
+        e->in(1)->in(1) == n && n->is_iteratively_computed())
+      continue;
+
     uint n_choice  = 2;
 
     // See if this instruction is consumed by a branch. If so, then (as the
diff --git a/src/share/vm/opto/loopnode.cpp b/src/share/vm/opto/loopnode.cpp
index afd96045a..43def7314 100644
--- a/src/share/vm/opto/loopnode.cpp
+++ b/src/share/vm/opto/loopnode.cpp
@@ -2751,7 +2751,8 @@ int PhaseIdealLoop::build_loop_tree_impl( Node *n, int pre_order ) {
         // Do not count uncommon calls
         if( !n->is_CallStaticJava() || !n->as_CallStaticJava()->_name ) {
           Node *iff = n->in(0)->in(0);
-          if( !iff->is_If() ||
+          // No any calls for vectorized loops.
+          if( UseSuperWord || !iff->is_If() ||
               (n->in(0)->Opcode() == Op_IfFalse &&
                (1.0 - iff->as_If()->_prob) >= 0.01) ||
               (iff->as_If()->_prob >= 0.01) )
@@ -3216,7 +3217,8 @@ void PhaseIdealLoop::build_loop_late_post( Node *n ) {
     case Op_ModF:
     case Op_ModD:
     case Op_LoadB:              // Same with Loads; they can sink
-    case Op_LoadUS:             // during loop optimizations.
+    case Op_LoadUB:             // during loop optimizations.
+    case Op_LoadUS:
     case Op_LoadD:
     case Op_LoadF:
     case Op_LoadI:
diff --git a/src/share/vm/opto/machnode.cpp b/src/share/vm/opto/machnode.cpp
index 7bc587785..88bb23b37 100644
--- a/src/share/vm/opto/machnode.cpp
+++ b/src/share/vm/opto/machnode.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -439,9 +439,9 @@ bool MachNode::rematerialize() const {
   // Don't remateralize somebody with bound inputs - it stretches a
   // fixed register lifetime.
   uint idx = oper_input_base();
-  if( req() > idx ) {
+  if (req() > idx) {
     const RegMask &rm = in_RegMask(idx);
-    if( rm.is_bound1() || rm.is_bound2() )
+    if (rm.is_bound(ideal_reg()))
       return false;
   }
 
diff --git a/src/share/vm/opto/machnode.hpp b/src/share/vm/opto/machnode.hpp
index 566e031d1..4db1154e8 100644
--- a/src/share/vm/opto/machnode.hpp
+++ b/src/share/vm/opto/machnode.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -319,6 +319,7 @@ public:
 class MachTypeNode : public MachNode {
   virtual uint size_of() const { return sizeof(*this); } // Size is bigger
 public:
+  MachTypeNode( ) {}
   const Type *_bottom_type;
 
   virtual const class Type *bottom_type() const { return _bottom_type; }
@@ -370,12 +371,12 @@ public:
 
 //------------------------------MachConstantNode-------------------------------
 // Machine node that holds a constant which is stored in the constant table.
-class MachConstantNode : public MachNode {
+class MachConstantNode : public MachTypeNode {
 protected:
   Compile::Constant _constant;  // This node's constant.
 
 public:
-  MachConstantNode() : MachNode() {
+  MachConstantNode() : MachTypeNode() {
     init_class_id(Class_MachConstant);
   }
 
diff --git a/src/share/vm/opto/matcher.cpp b/src/share/vm/opto/matcher.cpp
index 397385670..f6bb30769 100644
--- a/src/share/vm/opto/matcher.cpp
+++ b/src/share/vm/opto/matcher.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -35,6 +35,7 @@
 #include "opto/rootnode.hpp"
 #include "opto/runtime.hpp"
 #include "opto/type.hpp"
+#include "opto/vectornode.hpp"
 #include "runtime/atomic.hpp"
 #include "runtime/os.hpp"
 #ifdef TARGET_ARCH_MODEL_x86_32
@@ -58,18 +59,6 @@
 
 OptoReg::Name OptoReg::c_frame_pointer;
 
-
-
-const int Matcher::base2reg[Type::lastype] = {
-  Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN,
-  Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */
-  Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */
-  0, 0/*abio*/,
-  Op_RegP /* Return address */, 0, /* the memories */
-  Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD,
-  0  /*bottom*/
-};
-
 const RegMask *Matcher::idealreg2regmask[_last_machine_leaf];
 RegMask Matcher::mreg2regmask[_last_Mach_Reg];
 RegMask Matcher::STACK_ONLY_mask;
@@ -107,6 +96,10 @@ Matcher::Matcher( Node_List &proj_list ) :
   idealreg2spillmask  [Op_RegF] = NULL;
   idealreg2spillmask  [Op_RegD] = NULL;
   idealreg2spillmask  [Op_RegP] = NULL;
+  idealreg2spillmask  [Op_VecS] = NULL;
+  idealreg2spillmask  [Op_VecD] = NULL;
+  idealreg2spillmask  [Op_VecX] = NULL;
+  idealreg2spillmask  [Op_VecY] = NULL;
 
   idealreg2debugmask  [Op_RegI] = NULL;
   idealreg2debugmask  [Op_RegN] = NULL;
@@ -114,6 +107,10 @@ Matcher::Matcher( Node_List &proj_list ) :
   idealreg2debugmask  [Op_RegF] = NULL;
   idealreg2debugmask  [Op_RegD] = NULL;
   idealreg2debugmask  [Op_RegP] = NULL;
+  idealreg2debugmask  [Op_VecS] = NULL;
+  idealreg2debugmask  [Op_VecD] = NULL;
+  idealreg2debugmask  [Op_VecX] = NULL;
+  idealreg2debugmask  [Op_VecY] = NULL;
 
   idealreg2mhdebugmask[Op_RegI] = NULL;
   idealreg2mhdebugmask[Op_RegN] = NULL;
@@ -121,6 +118,10 @@ Matcher::Matcher( Node_List &proj_list ) :
   idealreg2mhdebugmask[Op_RegF] = NULL;
   idealreg2mhdebugmask[Op_RegD] = NULL;
   idealreg2mhdebugmask[Op_RegP] = NULL;
+  idealreg2mhdebugmask[Op_VecS] = NULL;
+  idealreg2mhdebugmask[Op_VecD] = NULL;
+  idealreg2mhdebugmask[Op_VecX] = NULL;
+  idealreg2mhdebugmask[Op_VecY] = NULL;
 
   debug_only(_mem_node = NULL;)   // Ideal memory node consumed by mach node
 }
@@ -134,7 +135,7 @@ OptoReg::Name Matcher::warp_incoming_stk_arg( VMReg reg ) {
     warped = OptoReg::add(warped, C->out_preserve_stack_slots());
     if( warped >= _in_arg_limit )
       _in_arg_limit = OptoReg::add(warped, 1); // Bump max stack slot seen
-    if (!RegMask::can_represent(warped)) {
+    if (!RegMask::can_represent_arg(warped)) {
       // the compiler cannot represent this method's calling sequence
       C->record_method_not_compilable_all_tiers("unsupported incoming calling sequence");
       return OptoReg::Bad;
@@ -302,7 +303,7 @@ void Matcher::match( ) {
   _out_arg_limit = OptoReg::add(_new_SP, C->out_preserve_stack_slots());
   assert( is_even(_out_arg_limit), "out_preserve must be even" );
 
-  if (!RegMask::can_represent(OptoReg::add(_out_arg_limit,-1))) {
+  if (!RegMask::can_represent_arg(OptoReg::add(_out_arg_limit,-1))) {
     // the compiler cannot represent this method's calling sequence
     C->record_method_not_compilable("must be able to represent all call arguments in reg mask");
   }
@@ -428,7 +429,7 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
 void Matcher::init_first_stack_mask() {
 
   // Allocate storage for spill masks as masks for the appropriate load type.
-  RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * 3*6);
+  RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+4));
 
   idealreg2spillmask  [Op_RegN] = &rms[0];
   idealreg2spillmask  [Op_RegI] = &rms[1];
@@ -451,6 +452,11 @@ void Matcher::init_first_stack_mask() {
   idealreg2mhdebugmask[Op_RegD] = &rms[16];
   idealreg2mhdebugmask[Op_RegP] = &rms[17];
 
+  idealreg2spillmask  [Op_VecS] = &rms[18];
+  idealreg2spillmask  [Op_VecD] = &rms[19];
+  idealreg2spillmask  [Op_VecX] = &rms[20];
+  idealreg2spillmask  [Op_VecY] = &rms[21];
+
   OptoReg::Name i;
 
   // At first, start with the empty mask
@@ -462,7 +468,7 @@ void Matcher::init_first_stack_mask() {
     C->FIRST_STACK_mask().Insert(i);
 
   // Add in all bits past the outgoing argument area
-  guarantee(RegMask::can_represent(OptoReg::add(_out_arg_limit,-1)),
+  guarantee(RegMask::can_represent_arg(OptoReg::add(_out_arg_limit,-1)),
             "must be able to represent all call arguments in reg mask");
   init = _out_arg_limit;
   for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1))
@@ -472,21 +478,48 @@ void Matcher::init_first_stack_mask() {
   C->FIRST_STACK_mask().set_AllStack();
 
   // Make spill masks.  Registers for their class, plus FIRST_STACK_mask.
+  RegMask aligned_stack_mask = C->FIRST_STACK_mask();
+  // Keep spill masks aligned.
+  aligned_stack_mask.clear_to_pairs();
+  assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
+
+  *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
 #ifdef _LP64
   *idealreg2spillmask[Op_RegN] = *idealreg2regmask[Op_RegN];
    idealreg2spillmask[Op_RegN]->OR(C->FIRST_STACK_mask());
+   idealreg2spillmask[Op_RegP]->OR(aligned_stack_mask);
+#else
+   idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask());
 #endif
   *idealreg2spillmask[Op_RegI] = *idealreg2regmask[Op_RegI];
    idealreg2spillmask[Op_RegI]->OR(C->FIRST_STACK_mask());
   *idealreg2spillmask[Op_RegL] = *idealreg2regmask[Op_RegL];
-   idealreg2spillmask[Op_RegL]->OR(C->FIRST_STACK_mask());
+   idealreg2spillmask[Op_RegL]->OR(aligned_stack_mask);
   *idealreg2spillmask[Op_RegF] = *idealreg2regmask[Op_RegF];
    idealreg2spillmask[Op_RegF]->OR(C->FIRST_STACK_mask());
   *idealreg2spillmask[Op_RegD] = *idealreg2regmask[Op_RegD];
-   idealreg2spillmask[Op_RegD]->OR(C->FIRST_STACK_mask());
-  *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
-   idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask());
+   idealreg2spillmask[Op_RegD]->OR(aligned_stack_mask);
 
+  if (Matcher::vector_size_supported(T_BYTE,4)) {
+    *idealreg2spillmask[Op_VecS] = *idealreg2regmask[Op_VecS];
+     idealreg2spillmask[Op_VecS]->OR(C->FIRST_STACK_mask());
+  }
+  if (Matcher::vector_size_supported(T_FLOAT,2)) {
+    *idealreg2spillmask[Op_VecD] = *idealreg2regmask[Op_VecD];
+     idealreg2spillmask[Op_VecD]->OR(aligned_stack_mask);
+  }
+  if (Matcher::vector_size_supported(T_FLOAT,4)) {
+     aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecX);
+     assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
+    *idealreg2spillmask[Op_VecX] = *idealreg2regmask[Op_VecX];
+     idealreg2spillmask[Op_VecX]->OR(aligned_stack_mask);
+  }
+  if (Matcher::vector_size_supported(T_FLOAT,8)) {
+     aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecY);
+     assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
+    *idealreg2spillmask[Op_VecY] = *idealreg2regmask[Op_VecY];
+     idealreg2spillmask[Op_VecY]->OR(aligned_stack_mask);
+  }
    if (UseFPUForSpilling) {
      // This mask logic assumes that the spill operations are
      // symmetric and that the registers involved are the same size.
@@ -807,6 +840,25 @@ void Matcher::init_spill_mask( Node *ret ) {
   idealreg2regmask[Op_RegF] = &spillF->out_RegMask();
   idealreg2regmask[Op_RegD] = &spillD->out_RegMask();
   idealreg2regmask[Op_RegP] = &spillP->out_RegMask();
+
+  // Vector regmasks.
+  if (Matcher::vector_size_supported(T_BYTE,4)) {
+    TypeVect::VECTS = TypeVect::make(T_BYTE, 4);
+    MachNode *spillVectS = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTS));
+    idealreg2regmask[Op_VecS] = &spillVectS->out_RegMask();
+  }
+  if (Matcher::vector_size_supported(T_FLOAT,2)) {
+    MachNode *spillVectD = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTD));
+    idealreg2regmask[Op_VecD] = &spillVectD->out_RegMask();
+  }
+  if (Matcher::vector_size_supported(T_FLOAT,4)) {
+    MachNode *spillVectX = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTX));
+    idealreg2regmask[Op_VecX] = &spillVectX->out_RegMask();
+  }
+  if (Matcher::vector_size_supported(T_FLOAT,8)) {
+    MachNode *spillVectY = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTY));
+    idealreg2regmask[Op_VecY] = &spillVectY->out_RegMask();
+  }
 }
 
 #ifdef ASSERT
@@ -1063,7 +1115,7 @@ OptoReg::Name Matcher::warp_outgoing_stk_arg( VMReg reg, OptoReg::Name begin_out
     // that is killed by the call.
     if( warped >= out_arg_limit_per_call )
       out_arg_limit_per_call = OptoReg::add(warped,1);
-    if (!RegMask::can_represent(warped)) {
+    if (!RegMask::can_represent_arg(warped)) {
       C->record_method_not_compilable_all_tiers("unsupported calling sequence");
       return OptoReg::Bad;
     }
@@ -1251,7 +1303,7 @@ MachNode *Matcher::match_sfpt( SafePointNode *sfpt ) {
     // this killed area.
     uint r_cnt = mcall->tf()->range()->cnt();
     MachProjNode *proj = new (C, 1) MachProjNode( mcall, r_cnt+10000, RegMask::Empty, MachProjNode::fat_proj );
-    if (!RegMask::can_represent(OptoReg::Name(out_arg_limit_per_call-1))) {
+    if (!RegMask::can_represent_arg(OptoReg::Name(out_arg_limit_per_call-1))) {
       C->record_method_not_compilable_all_tiers("unsupported outgoing calling sequence");
     } else {
       for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++)
diff --git a/src/share/vm/opto/matcher.hpp b/src/share/vm/opto/matcher.hpp
index e6aae28b3..0597cb543 100644
--- a/src/share/vm/opto/matcher.hpp
+++ b/src/share/vm/opto/matcher.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -250,10 +250,21 @@ public:
   static const bool convL2FSupported(void);
 
   // Vector width in bytes
-  static const uint vector_width_in_bytes(void);
+  static const int vector_width_in_bytes(BasicType bt);
+
+  // Limits on vector size (number of elements).
+  static const int max_vector_size(const BasicType bt);
+  static const int min_vector_size(const BasicType bt);
+  static const bool vector_size_supported(const BasicType bt, int size) {
+    return (Matcher::max_vector_size(bt) >= size &&
+            Matcher::min_vector_size(bt) <= size);
+  }
 
   // Vector ideal reg
-  static const uint vector_ideal_reg(void);
+  static const int vector_ideal_reg(int len);
+
+  // CPU supports misaligned vectors store/load.
+  static const bool misaligned_vectors_ok();
 
   // Used to determine a "low complexity" 64-bit constant.  (Zero is simple.)
   // The standard of comparison is one (StoreL ConL) vs. two (StoreI ConI).
diff --git a/src/share/vm/opto/memnode.cpp b/src/share/vm/opto/memnode.cpp
index e28e09226..799c2ba14 100644
--- a/src/share/vm/opto/memnode.cpp
+++ b/src/share/vm/opto/memnode.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1543,6 +1543,7 @@ const Type *LoadNode::Value( PhaseTransform *phase ) const {
     // had an original form like p1:(AddP x x (LShiftL quux 3)), where the
     // expression (LShiftL quux 3) independently optimized to the constant 8.
     if ((t->isa_int() == NULL) && (t->isa_long() == NULL)
+        && (_type->isa_vect() == NULL)
         && Opcode() != Op_LoadKlass && Opcode() != Op_LoadNKlass) {
       // t might actually be lower than _type, if _type is a unique
       // concrete subclass of abstract class t.
diff --git a/src/share/vm/opto/mulnode.hpp b/src/share/vm/opto/mulnode.hpp
index 11cc77145..c3adc433f 100644
--- a/src/share/vm/opto/mulnode.hpp
+++ b/src/share/vm/opto/mulnode.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -41,7 +41,9 @@ class PhaseTransform;
 class MulNode : public Node {
   virtual uint hash() const;
 public:
-  MulNode( Node *in1, Node *in2 ): Node(0,in1,in2) {}
+  MulNode( Node *in1, Node *in2 ): Node(0,in1,in2) {
+    init_class_id(Class_Mul);
+  }
 
   // Handle algebraic identities here.  If we have an identity, return the Node
   // we are equivalent to.  We look for "add of zero" as an identity.
diff --git a/src/share/vm/opto/node.cpp b/src/share/vm/opto/node.cpp
index 4bd752fed..2cb44ad0e 100644
--- a/src/share/vm/opto/node.cpp
+++ b/src/share/vm/opto/node.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1576,6 +1576,9 @@ void Node::dump() const {
     } else {
       tty->print("no type");
     }
+  } else if (t->isa_vect() && this->is_MachSpillCopy()) {
+    // Dump MachSpillcopy vector type.
+    t->dump();
   }
   if (is_new) {
     debug_only(dump_orig(debug_orig()));
diff --git a/src/share/vm/opto/node.hpp b/src/share/vm/opto/node.hpp
index 5ddae2f0b..f63a967b6 100644
--- a/src/share/vm/opto/node.hpp
+++ b/src/share/vm/opto/node.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -100,6 +100,7 @@ class MemBarNode;
 class MemBarStoreStoreNode;
 class MemNode;
 class MergeMemNode;
+class MulNode;
 class MultiNode;
 class MultiBranchNode;
 class NeverBranchNode;
@@ -133,8 +134,8 @@ class Type;
 class TypeNode;
 class UnlockNode;
 class VectorNode;
-class VectorLoadNode;
-class VectorStoreNode;
+class LoadVectorNode;
+class StoreVectorNode;
 class VectorSet;
 typedef void (*NFunc)(Node&,void*);
 extern "C" {
@@ -609,9 +610,9 @@ public:
 
     DEFINE_CLASS_ID(Mem,   Node, 4)
       DEFINE_CLASS_ID(Load,  Mem, 0)
-        DEFINE_CLASS_ID(VectorLoad,  Load, 0)
+        DEFINE_CLASS_ID(LoadVector,  Load, 0)
       DEFINE_CLASS_ID(Store, Mem, 1)
-        DEFINE_CLASS_ID(VectorStore, Store, 0)
+        DEFINE_CLASS_ID(StoreVector, Store, 0)
       DEFINE_CLASS_ID(LoadStore, Mem, 2)
 
     DEFINE_CLASS_ID(Region, Node, 5)
@@ -629,8 +630,9 @@ public:
     DEFINE_CLASS_ID(AddP,     Node, 9)
     DEFINE_CLASS_ID(BoxLock,  Node, 10)
     DEFINE_CLASS_ID(Add,      Node, 11)
-    DEFINE_CLASS_ID(Vector,   Node, 12)
-    DEFINE_CLASS_ID(ClearArray, Node, 13)
+    DEFINE_CLASS_ID(Mul,      Node, 12)
+    DEFINE_CLASS_ID(Vector,   Node, 13)
+    DEFINE_CLASS_ID(ClearArray, Node, 14)
 
     _max_classes  = ClassMask_ClearArray
   };
@@ -752,6 +754,7 @@ public:
   DEFINE_CLASS_QUERY(MemBar)
   DEFINE_CLASS_QUERY(MemBarStoreStore)
   DEFINE_CLASS_QUERY(MergeMem)
+  DEFINE_CLASS_QUERY(Mul)
   DEFINE_CLASS_QUERY(Multi)
   DEFINE_CLASS_QUERY(MultiBranch)
   DEFINE_CLASS_QUERY(Parm)
@@ -767,8 +770,8 @@ public:
   DEFINE_CLASS_QUERY(Sub)
   DEFINE_CLASS_QUERY(Type)
   DEFINE_CLASS_QUERY(Vector)
-  DEFINE_CLASS_QUERY(VectorLoad)
-  DEFINE_CLASS_QUERY(VectorStore)
+  DEFINE_CLASS_QUERY(LoadVector)
+  DEFINE_CLASS_QUERY(StoreVector)
   DEFINE_CLASS_QUERY(Unlock)
 
   #undef DEFINE_CLASS_QUERY
diff --git a/src/share/vm/opto/opcodes.cpp b/src/share/vm/opto/opcodes.cpp
index 58489db0a..83310568b 100644
--- a/src/share/vm/opto/opcodes.cpp
+++ b/src/share/vm/opto/opcodes.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -38,6 +38,10 @@ const char *NodeClassNames[] = {
   "RegD",
   "RegL",
   "RegFlags",
+  "VecS",
+  "VecD",
+  "VecX",
+  "VecY",
   "_last_machine_leaf",
 #include "classes.hpp"
   "_last_class_name",
diff --git a/src/share/vm/opto/opcodes.hpp b/src/share/vm/opto/opcodes.hpp
index 9eb5b8a7c..4baec83fe 100644
--- a/src/share/vm/opto/opcodes.hpp
+++ b/src/share/vm/opto/opcodes.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,6 +36,10 @@ enum Opcodes {
   macro(RegF)                   // Machine float   register
   macro(RegD)                   // Machine double  register
   macro(RegL)                   // Machine long    register
+  macro(VecS)                   // Machine vectors register
+  macro(VecD)                   // Machine vectord register
+  macro(VecX)                   // Machine vectorx register
+  macro(VecY)                   // Machine vectory register
   macro(RegFlags)               // Machine flags   register
   _last_machine_leaf,           // Split between regular opcodes and machine
 #include "classes.hpp"
diff --git a/src/share/vm/opto/postaloc.cpp b/src/share/vm/opto/postaloc.cpp
index 1a7553bc5..8e24d353d 100644
--- a/src/share/vm/opto/postaloc.cpp
+++ b/src/share/vm/opto/postaloc.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,13 +27,15 @@
 #include "opto/chaitin.hpp"
 #include "opto/machnode.hpp"
 
-// see if this register kind does not requires two registers
-static bool is_single_register(uint x) {
-#ifdef _LP64
-  return (x != Op_RegD && x != Op_RegL && x != Op_RegP);
-#else
-  return (x != Op_RegD && x != Op_RegL);
-#endif
+// See if this register (or pairs, or vector) already contains the value.
+static bool register_contains_value(Node* val, OptoReg::Name reg, int n_regs,
+                                    Node_List& value) {
+  for (int i = 0; i < n_regs; i++) {
+    OptoReg::Name nreg = OptoReg::add(reg,-i);
+    if (value[nreg] != val)
+      return false;
+  }
+  return true;
 }
 
 //---------------------------may_be_copy_of_callee-----------------------------
@@ -167,9 +169,11 @@ int PhaseChaitin::use_prior_register( Node *n, uint idx, Node *def, Block *curre
   const RegMask &use_mask = n->in_RegMask(idx);
   bool can_use = ( RegMask::can_represent(def_reg) ? (use_mask.Member(def_reg) != 0)
                                                    : (use_mask.is_AllStack() != 0));
-  // Check for a copy to or from a misaligned pair.
-  can_use = can_use && !use_mask.is_misaligned_Pair() && !def_lrg.mask().is_misaligned_Pair();
-
+  if (!RegMask::is_vector(def->ideal_reg())) {
+    // Check for a copy to or from a misaligned pair.
+    // It is workaround for a sparc with misaligned pairs.
+    can_use = can_use && !use_mask.is_misaligned_pair() && !def_lrg.mask().is_misaligned_pair();
+  }
   if (!can_use)
     return 0;
 
@@ -263,18 +267,16 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
     val = skip_copies(n->in(k));
   }
 
-  if( val == x ) return blk_adjust; // No progress?
+  if (val == x) return blk_adjust; // No progress?
 
-  bool single = is_single_register(val->ideal_reg());
+  int n_regs = RegMask::num_registers(val->ideal_reg());
   uint val_idx = n2lidx(val);
   OptoReg::Name val_reg = lrgs(val_idx).reg();
 
   // See if it happens to already be in the correct register!
   // (either Phi's direct register, or the common case of the name
   // never-clobbered original-def register)
-  if( value[val_reg] == val &&
-      // Doubles check both halves
-      ( single || value[val_reg-1] == val ) ) {
+  if (register_contains_value(val, val_reg, n_regs, value)) {
     blk_adjust += use_prior_register(n,k,regnd[val_reg],current_block,value,regnd);
     if( n->in(k) == regnd[val_reg] ) // Success!  Quit trying
       return blk_adjust;
@@ -306,7 +308,7 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
     }
 
     Node *vv = value[reg];
-    if( !single ) {             // Doubles check for aligned-adjacent pair
+    if (n_regs > 1) {             // Doubles check for aligned-adjacent pair
       if( (reg&1)==0 ) continue;  // Wrong half of a pair
       if( vv != value[reg-1] ) continue; // Not a complete pair
     }
@@ -526,8 +528,9 @@ void PhaseChaitin::post_allocate_copy_removal() {
       if( pidx ) {
         value.map(preg,phi);
         regnd.map(preg,phi);
-        OptoReg::Name preg_lo = OptoReg::add(preg,-1);
-        if( !is_single_register(phi->ideal_reg()) ) {
+        int n_regs = RegMask::num_registers(phi->ideal_reg());
+        for (int l = 1; l < n_regs; l++) {
+          OptoReg::Name preg_lo = OptoReg::add(preg,-l);
           value.map(preg_lo,phi);
           regnd.map(preg_lo,phi);
         }
@@ -568,13 +571,17 @@ void PhaseChaitin::post_allocate_copy_removal() {
             value.map(ureg,valdef); // record improved reaching-def info
             regnd.map(ureg,   def);
             // Record other half of doubles
-            OptoReg::Name ureg_lo = OptoReg::add(ureg,-1);
-            if( !is_single_register(def->ideal_reg()) &&
-                ( !RegMask::can_represent(ureg_lo) ||
-                  lrgs(useidx).mask().Member(ureg_lo) ) && // Nearly always adjacent
-                !value[ureg_lo] ) {
-              value.map(ureg_lo,valdef); // record improved reaching-def info
-              regnd.map(ureg_lo,   def);
+            uint def_ideal_reg = def->ideal_reg();
+            int n_regs = RegMask::num_registers(def_ideal_reg);
+            bool is_vec = RegMask::is_vector(def_ideal_reg);
+            for (int l = 1; l < n_regs; l++) {
+              OptoReg::Name ureg_lo = OptoReg::add(ureg,-l);
+              if (!value[ureg_lo] &&
+                  (!RegMask::can_represent(ureg_lo) ||
+                   lrgs(useidx).mask().Member(ureg_lo))) { // Nearly always adjacent
+                value.map(ureg_lo,valdef); // record improved reaching-def info
+                regnd.map(ureg_lo,   def);
+              }
             }
           }
         }
@@ -607,7 +614,8 @@ void PhaseChaitin::post_allocate_copy_removal() {
       }
 
       uint n_ideal_reg = n->ideal_reg();
-      if( is_single_register(n_ideal_reg) ) {
+      int n_regs = RegMask::num_registers(n_ideal_reg);
+      if (n_regs == 1) {
         // If Node 'n' does not change the value mapped by the register,
         // then 'n' is a useless copy.  Do not update the register->node
         // mapping so 'n' will go dead.
@@ -625,6 +633,25 @@ void PhaseChaitin::post_allocate_copy_removal() {
           assert( n->is_Copy(), "" );
           j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
         }
+      } else if (RegMask::is_vector(n_ideal_reg)) {
+        // If Node 'n' does not change the value mapped by the register,
+        // then 'n' is a useless copy.  Do not update the register->node
+        // mapping so 'n' will go dead.
+        if (!register_contains_value(val, nreg, n_regs, value)) {
+          // Update the mapping: record new Node defined by the register
+          regnd.map(nreg,n);
+          // Update mapping for defined *value*, which is the defined
+          // Node after skipping all copies.
+          value.map(nreg,val);
+          for (int l = 1; l < n_regs; l++) {
+            OptoReg::Name nreg_lo = OptoReg::add(nreg,-l);
+            regnd.map(nreg_lo, n );
+            value.map(nreg_lo,val);
+          }
+        } else if (n->is_Copy()) {
+          // Note: vector can't be constant and can't be copy of calee.
+          j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
+        }
       } else {
         // If the value occupies a register pair, record same info
         // in both registers.
diff --git a/src/share/vm/opto/reg_split.cpp b/src/share/vm/opto/reg_split.cpp
index 63a11fe8f..cae363bea 100644
--- a/src/share/vm/opto/reg_split.cpp
+++ b/src/share/vm/opto/reg_split.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -74,12 +74,13 @@ Node *PhaseChaitin::get_spillcopy_wide( Node *def, Node *use, uint uidx ) {
   const RegMask *w_i_mask = w_mask->overlap( *i_mask ) ? w_mask : i_mask;
   const RegMask *w_o_mask;
 
+  int num_regs = RegMask::num_registers(ireg);
+  bool is_vect = RegMask::is_vector(ireg);
   if( w_mask->overlap( *o_mask ) && // Overlap AND
-      ((ireg != Op_RegL && ireg != Op_RegD // Single use or aligned
-#ifdef _LP64
-        && ireg != Op_RegP
-#endif
-         ) || o_mask->is_aligned_Pairs()) ) {
+      ((num_regs == 1) // Single use or aligned
+        ||  is_vect    // or vector
+        || !is_vect && o_mask->is_aligned_pairs()) ) {
+    assert(!is_vect || o_mask->is_aligned_sets(num_regs), "vectors are aligned");
     // Don't come here for mis-aligned doubles
     w_o_mask = w_mask;
   } else {                      // wide ideal mask does not overlap with o_mask
@@ -400,15 +401,17 @@ bool PhaseChaitin::is_high_pressure( Block *b, LRG *lrg, uint insidx ) {
   // CNC - Turned off 7/8/99, causes too much spilling
   // if( lrg->_is_bound ) return false;
 
+  // Use float pressure numbers for vectors.
+  bool is_float_or_vector = lrg->_is_float || lrg->_is_vector;
   // Not yet reached the high-pressure cutoff point, so low pressure
-  uint hrp_idx = lrg->_is_float ? b->_fhrp_index : b->_ihrp_index;
+  uint hrp_idx = is_float_or_vector ? b->_fhrp_index : b->_ihrp_index;
   if( insidx < hrp_idx ) return false;
   // Register pressure for the block as a whole depends on reg class
-  int block_pres = lrg->_is_float ? b->_freg_pressure : b->_reg_pressure;
+  int block_pres = is_float_or_vector ? b->_freg_pressure : b->_reg_pressure;
   // Bound live ranges will split at the binding points first;
   // Intermediate splits should assume the live range's register set
   // got "freed up" and that num_regs will become INT_PRESSURE.
-  int bound_pres = lrg->_is_float ? FLOATPRESSURE : INTPRESSURE;
+  int bound_pres = is_float_or_vector ? FLOATPRESSURE : INTPRESSURE;
   // Effective register pressure limit.
   int lrg_pres = (lrg->get_invalid_mask_size() > lrg->num_regs())
     ? (lrg->get_invalid_mask_size() >> (lrg->num_regs()-1)) : bound_pres;
@@ -794,12 +797,15 @@ uint PhaseChaitin::Split( uint maxlrg ) {
                   if( i < n->req() ) break;
                   insert_point--;
                 }
+                uint orig_eidx = b->end_idx();
                 maxlrg = split_DEF( n1, b, insert_point, maxlrg, Reachblock, debug_defs, splits, slidx);
                 // If it wasn't split bail
                 if (!maxlrg) {
                   return 0;
                 }
-                insidx++;
+                // Spill of NULL check mem op goes into the following block.
+                if (b->end_idx() > orig_eidx)
+                  insidx++;
               }
               // This is a new DEF, so update UP
               UPblock[slidx] = false;
@@ -960,7 +966,7 @@ uint PhaseChaitin::Split( uint maxlrg ) {
             // Grab register mask info
             const RegMask &dmask = def->out_RegMask();
             const RegMask &umask = n->in_RegMask(inpidx);
-
+            bool is_vect = RegMask::is_vector(def->ideal_reg());
             assert(inpidx < oopoff, "cannot use-split oop map info");
 
             bool dup = UPblock[slidx];
@@ -972,7 +978,7 @@ uint PhaseChaitin::Split( uint maxlrg ) {
             if( !umask.is_AllStack() &&
                 (int)umask.Size() <= lrgs(useidx).num_regs() &&
                 (!def->rematerialize() ||
-                 umask.is_misaligned_Pair())) {
+                 !is_vect && umask.is_misaligned_pair())) {
               // These need a Split regardless of overlap or pressure
               // SPLIT - NO DEF - NO CISC SPILL
               maxlrg = split_USE(def,b,n,inpidx,maxlrg,dup,false, splits,slidx);
@@ -1123,10 +1129,12 @@ uint PhaseChaitin::Split( uint maxlrg ) {
         // Grab UP info for DEF
         const RegMask &dmask = n->out_RegMask();
         bool defup = dmask.is_UP();
+        int ireg = n->ideal_reg();
+        bool is_vect = RegMask::is_vector(ireg);
         // Only split at Def if this is a HRP block or bound (and spilled once)
         if( !n->rematerialize() &&
-            (((dmask.is_bound1() || dmask.is_bound2() || dmask.is_misaligned_Pair()) &&
-             (deflrg._direct_conflict || deflrg._must_spill)) ||
+            (((dmask.is_bound(ireg) || !is_vect && dmask.is_misaligned_pair()) &&
+              (deflrg._direct_conflict || deflrg._must_spill)) ||
              // Check for LRG being up in a register and we are inside a high
              // pressure area.  Spill it down immediately.
              (defup && is_high_pressure(b,&deflrg,insidx))) ) {
diff --git a/src/share/vm/opto/regmask.cpp b/src/share/vm/opto/regmask.cpp
index ce220f01b..59413388c 100644
--- a/src/share/vm/opto/regmask.cpp
+++ b/src/share/vm/opto/regmask.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -129,11 +129,34 @@ const RegMask RegMask::Empty(
   0
 );
 
+//=============================================================================
+bool RegMask::is_vector(uint ireg) {
+  return (ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY);
+}
+
+int RegMask::num_registers(uint ireg) {
+    switch(ireg) {
+      case Op_VecY:
+        return 8;
+      case Op_VecX:
+        return 4;
+      case Op_VecD:
+      case Op_RegD:
+      case Op_RegL:
+#ifdef _LP64
+      case Op_RegP:
+#endif
+        return 2;
+    }
+    // Op_VecS and the rest ideal registers.
+    return 1;
+}
+
 //------------------------------find_first_pair--------------------------------
 // Find the lowest-numbered register pair in the mask.  Return the
 // HIGHEST register number in the pair, or BAD if no pairs.
 OptoReg::Name RegMask::find_first_pair() const {
-  VerifyPairs();
+  verify_pairs();
   for( int i = 0; i < RM_SIZE; i++ ) {
     if( _A[i] ) {               // Found some bits
       int bit = _A[i] & -_A[i]; // Extract low bit
@@ -146,30 +169,30 @@ OptoReg::Name RegMask::find_first_pair() const {
 
 //------------------------------ClearToPairs-----------------------------------
 // Clear out partial bits; leave only bit pairs
-void RegMask::ClearToPairs() {
+void RegMask::clear_to_pairs() {
   for( int i = 0; i < RM_SIZE; i++ ) {
     int bits = _A[i];
     bits &= ((bits & 0x55555555)<<1); // 1 hi-bit set for each pair
     bits |= (bits>>1);          // Smear 1 hi-bit into a pair
     _A[i] = bits;
   }
-  VerifyPairs();
+  verify_pairs();
 }
 
 //------------------------------SmearToPairs-----------------------------------
 // Smear out partial bits; leave only bit pairs
-void RegMask::SmearToPairs() {
+void RegMask::smear_to_pairs() {
   for( int i = 0; i < RM_SIZE; i++ ) {
     int bits = _A[i];
     bits |= ((bits & 0x55555555)<<1); // Smear lo bit hi per pair
     bits |= ((bits & 0xAAAAAAAA)>>1); // Smear hi bit lo per pair
     _A[i] = bits;
   }
-  VerifyPairs();
+  verify_pairs();
 }
 
 //------------------------------is_aligned_pairs-------------------------------
-bool RegMask::is_aligned_Pairs() const {
+bool RegMask::is_aligned_pairs() const {
   // Assert that the register mask contains only bit pairs.
   for( int i = 0; i < RM_SIZE; i++ ) {
     int bits = _A[i];
@@ -204,7 +227,7 @@ int RegMask::is_bound1() const {
 
 //------------------------------is_bound2--------------------------------------
 // Return TRUE if the mask contains an adjacent pair of bits and no other bits.
-int RegMask::is_bound2() const {
+int RegMask::is_bound_pair() const {
   if( is_AllStack() ) return false;
 
   int bit = -1;                 // Set to hold the one bit allowed
@@ -226,6 +249,132 @@ int RegMask::is_bound2() const {
   return true;
 }
 
+static int low_bits[3] = { 0x55555555, 0x11111111, 0x01010101 };
+//------------------------------find_first_set---------------------------------
+// Find the lowest-numbered register set in the mask.  Return the
+// HIGHEST register number in the set, or BAD if no sets.
+// Works also for size 1.
+OptoReg::Name RegMask::find_first_set(int size) const {
+  verify_sets(size);
+  for (int i = 0; i < RM_SIZE; i++) {
+    if (_A[i]) {                // Found some bits
+      int bit = _A[i] & -_A[i]; // Extract low bit
+      // Convert to bit number, return hi bit in pair
+      return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(bit)+(size-1));
+    }
+  }
+  return OptoReg::Bad;
+}
+
+//------------------------------clear_to_sets----------------------------------
+// Clear out partial bits; leave only aligned adjacent bit pairs
+void RegMask::clear_to_sets(int size) {
+  if (size == 1) return;
+  assert(2 <= size && size <= 8, "update low bits table");
+  assert(is_power_of_2(size), "sanity");
+  int low_bits_mask = low_bits[size>>2];
+  for (int i = 0; i < RM_SIZE; i++) {
+    int bits = _A[i];
+    int sets = (bits & low_bits_mask);
+    for (int j = 1; j < size; j++) {
+      sets = (bits & (sets<<1)); // filter bits which produce whole sets
+    }
+    sets |= (sets>>1);           // Smear 1 hi-bit into a set
+    if (size > 2) {
+      sets |= (sets>>2);         // Smear 2 hi-bits into a set
+      if (size > 4) {
+        sets |= (sets>>4);       // Smear 4 hi-bits into a set
+      }
+    }
+    _A[i] = sets;
+  }
+  verify_sets(size);
+}
+
+//------------------------------smear_to_sets----------------------------------
+// Smear out partial bits to aligned adjacent bit sets
+void RegMask::smear_to_sets(int size) {
+  if (size == 1) return;
+  assert(2 <= size && size <= 8, "update low bits table");
+  assert(is_power_of_2(size), "sanity");
+  int low_bits_mask = low_bits[size>>2];
+  for (int i = 0; i < RM_SIZE; i++) {
+    int bits = _A[i];
+    int sets = 0;
+    for (int j = 0; j < size; j++) {
+      sets |= (bits & low_bits_mask);  // collect partial bits
+      bits  = bits>>1;
+    }
+    sets |= (sets<<1);           // Smear 1 lo-bit  into a set
+    if (size > 2) {
+      sets |= (sets<<2);         // Smear 2 lo-bits into a set
+      if (size > 4) {
+        sets |= (sets<<4);       // Smear 4 lo-bits into a set
+      }
+    }
+    _A[i] = sets;
+  }
+  verify_sets(size);
+}
+
+//------------------------------is_aligned_set--------------------------------
+bool RegMask::is_aligned_sets(int size) const {
+  if (size == 1) return true;
+  assert(2 <= size && size <= 8, "update low bits table");
+  assert(is_power_of_2(size), "sanity");
+  int low_bits_mask = low_bits[size>>2];
+  // Assert that the register mask contains only bit sets.
+  for (int i = 0; i < RM_SIZE; i++) {
+    int bits = _A[i];
+    while (bits) {              // Check bits for pairing
+      int bit = bits & -bits;   // Extract low bit
+      // Low bit is not odd means its mis-aligned.
+      if ((bit & low_bits_mask) == 0) return false;
+      // Do extra work since (bit << size) may overflow.
+      int hi_bit = bit << (size-1); // high bit
+      int set = hi_bit + ((hi_bit-1) & ~(bit-1));
+      // Check for aligned adjacent bits in this set
+      if ((bits & set) != set) return false;
+      bits -= set;  // Remove this set
+    }
+  }
+  return true;
+}
+
+//------------------------------is_bound_set-----------------------------------
+// Return TRUE if the mask contains one adjacent set of bits and no other bits.
+// Works also for size 1.
+int RegMask::is_bound_set(int size) const {
+  if( is_AllStack() ) return false;
+  assert(1 <= size && size <= 8, "update low bits table");
+  int bit = -1;                 // Set to hold the one bit allowed
+  for (int i = 0; i < RM_SIZE; i++) {
+    if (_A[i] ) {               // Found some bits
+      if (bit != -1)
+       return false;            // Already had bits, so fail
+      bit = _A[i] & -_A[i];     // Extract 1 bit from mask
+      int hi_bit = bit << (size-1); // high bit
+      if (hi_bit != 0) {        // Bit set stays in same word?
+        int set = hi_bit + ((hi_bit-1) & ~(bit-1));
+        if (set != _A[i])
+          return false;         // Require adjacent bit set and no more bits
+      } else {                  // Else its a split-set case
+        if (((-1) & ~(bit-1)) != _A[i])
+          return false;         // Found many bits, so fail
+        i++;                    // Skip iteration forward and check high part
+        assert(size <= 8, "update next code");
+        // The lower 24 bits should be 0 since it is split case and size <= 8.
+        int set = bit>>24;
+        set = set & -set; // Remove sign extension.
+        set = (((set << size) - 1) >> 8);
+        if (_A[i] != set) return false; // Require 1 lo bit in next word
+      }
+    }
+  }
+  // True for both the empty mask and for a bit set
+  return true;
+}
+
 //------------------------------is_UP------------------------------------------
 // UP means register only, Register plus stack, or stack only is DOWN
 bool RegMask::is_UP() const {
diff --git a/src/share/vm/opto/regmask.hpp b/src/share/vm/opto/regmask.hpp
index e50ff84ca..e4c31dcef 100644
--- a/src/share/vm/opto/regmask.hpp
+++ b/src/share/vm/opto/regmask.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -113,7 +113,11 @@ public:
   // the controlling alignment constraint.  Note that this alignment
   // requirement is internal to the allocator, and independent of any
   // particular platform.
-  enum { SlotsPerLong = 2 };
+  enum { SlotsPerLong = 2,
+         SlotsPerVecS = 1,
+         SlotsPerVecD = 2,
+         SlotsPerVecX = 4,
+         SlotsPerVecY = 8 };
 
   // A constructor only used by the ADLC output.  All mask fields are filled
   // in directly.  Calls to this look something like RM(1,2,3,4);
@@ -193,20 +197,53 @@ public:
   OptoReg::Name find_first_pair() const;
 
   // Clear out partial bits; leave only aligned adjacent bit pairs.
-  void ClearToPairs();
+  void clear_to_pairs();
   // Smear out partial bits; leave only aligned adjacent bit pairs.
-  void SmearToPairs();
+  void smear_to_pairs();
   // Verify that the mask contains only aligned adjacent bit pairs
-  void VerifyPairs() const { assert( is_aligned_Pairs(), "mask is not aligned, adjacent pairs" ); }
+  void verify_pairs() const { assert( is_aligned_pairs(), "mask is not aligned, adjacent pairs" ); }
   // Test that the mask contains only aligned adjacent bit pairs
-  bool is_aligned_Pairs() const;
+  bool is_aligned_pairs() const;
 
   // mask is a pair of misaligned registers
-  bool is_misaligned_Pair() const { return Size()==2 && !is_aligned_Pairs();}
+  bool is_misaligned_pair() const { return Size()==2 && !is_aligned_pairs(); }
   // Test for single register
   int is_bound1() const;
   // Test for a single adjacent pair
-  int is_bound2() const;
+  int is_bound_pair() const;
+  // Test for a single adjacent set of ideal register's size.
+  int is_bound(uint ireg) const {
+    if (is_vector(ireg)) {
+      if (is_bound_set(num_registers(ireg)))
+        return true;
+    } else if (is_bound1() || is_bound_pair()) {
+      return true;
+    }
+    return false;
+  }
+
+  // Find the lowest-numbered register set in the mask.  Return the
+  // HIGHEST register number in the set, or BAD if no sets.
+  // Assert that the mask contains only bit sets.
+  OptoReg::Name find_first_set(int size) const;
+
+  // Clear out partial bits; leave only aligned adjacent bit sets of size.
+  void clear_to_sets(int size);
+  // Smear out partial bits to aligned adjacent bit sets.
+  void smear_to_sets(int size);
+  // Verify that the mask contains only aligned adjacent bit sets
+  void verify_sets(int size) const { assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); }
+  // Test that the mask contains only aligned adjacent bit sets
+  bool is_aligned_sets(int size) const;
+
+  // mask is a set of misaligned registers
+  bool is_misaligned_set(int size) const { return (int)Size()==size && !is_aligned_sets(size);}
+
+  // Test for a single adjacent set
+  int is_bound_set(int size) const;
+
+  static bool is_vector(uint ireg);
+  static int num_registers(uint ireg);
 
   // Fast overlap test.  Non-zero if any registers in common.
   int overlap( const RegMask &rm ) const {
@@ -280,9 +317,15 @@ public:
 
   static bool can_represent(OptoReg::Name reg) {
     // NOTE: -1 in computation reflects the usage of the last
-    //       bit of the regmask as an infinite stack flag.
+    //       bit of the regmask as an infinite stack flag and
+    //       -7 is to keep mask aligned for largest value (VecY).
     return (int)reg < (int)(CHUNK_SIZE-1);
   }
+  static bool can_represent_arg(OptoReg::Name reg) {
+    // NOTE: -SlotsPerVecY in computation reflects the need
+    //       to keep mask aligned for largest value (VecY).
+    return (int)reg < (int)(CHUNK_SIZE-SlotsPerVecY);
+  }
 };
 
 // Do not use this constant directly in client code!
diff --git a/src/share/vm/opto/superword.cpp b/src/share/vm/opto/superword.cpp
index ae46e7f17..78db4b5ba 100644
--- a/src/share/vm/opto/superword.cpp
+++ b/src/share/vm/opto/superword.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -67,6 +67,10 @@ SuperWord::SuperWord(PhaseIdealLoop* phase) :
 
 //------------------------------transform_loop---------------------------
 void SuperWord::transform_loop(IdealLoopTree* lpt) {
+  assert(UseSuperWord, "should be");
+  // Do vectors exist on this architecture?
+  if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
+
   assert(lpt->_head->is_CountedLoop(), "must be");
   CountedLoopNode *cl = lpt->_head->as_CountedLoop();
 
@@ -89,15 +93,12 @@ void SuperWord::transform_loop(IdealLoopTree* lpt) {
   Node *pre_opaq1 = pre_end->limit();
   if (pre_opaq1->Opcode() != Op_Opaque1) return;
 
-  // Do vectors exist on this architecture?
-  if (vector_width_in_bytes() == 0) return;
-
   init(); // initialize data structures
 
   set_lpt(lpt);
   set_lp(cl);
 
- // For now, define one block which is the entire loop body
+  // For now, define one block which is the entire loop body
   set_bb(cl);
 
   assert(_packset.length() == 0, "packset must be empty");
@@ -177,7 +178,7 @@ void SuperWord::find_adjacent_refs() {
   Node_List memops;
   for (int i = 0; i < _block.length(); i++) {
     Node* n = _block.at(i);
-    if (n->is_Mem() && in_bb(n) &&
+    if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) &&
         is_java_primitive(n->as_Mem()->memory_type())) {
       int align = memory_alignment(n->as_Mem(), 0);
       if (align != bottom_align) {
@@ -185,54 +186,130 @@ void SuperWord::find_adjacent_refs() {
       }
     }
   }
-  if (memops.size() == 0) return;
 
-  // Find a memory reference to align to.  The pre-loop trip count
-  // is modified to align this reference to a vector-aligned address
-  find_align_to_ref(memops);
-  if (align_to_ref() == NULL) return;
+  Node_List align_to_refs;
+  int best_iv_adjustment = 0;
+  MemNode* best_align_to_mem_ref = NULL;
 
-  SWPointer align_to_ref_p(align_to_ref(), this);
-  int offset = align_to_ref_p.offset_in_bytes();
-  int scale  = align_to_ref_p.scale_in_bytes();
-  int vw              = vector_width_in_bytes();
-  int stride_sign     = (scale * iv_stride()) > 0 ? 1 : -1;
-  int iv_adjustment   = (stride_sign * vw - (offset % vw)) % vw;
+  while (memops.size() != 0) {
+    // Find a memory reference to align to.
+    MemNode* mem_ref = find_align_to_ref(memops);
+    if (mem_ref == NULL) break;
+    align_to_refs.push(mem_ref);
+    int iv_adjustment = get_iv_adjustment(mem_ref);
 
-#ifndef PRODUCT
-  if (TraceSuperWord)
-    tty->print_cr("\noffset = %d iv_adjustment = %d  elt_align = %d scale = %d iv_stride = %d",
-                  offset, iv_adjustment, align_to_ref_p.memory_size(), align_to_ref_p.scale_in_bytes(), iv_stride());
-#endif
+    if (best_align_to_mem_ref == NULL) {
+      // Set memory reference which is the best from all memory operations
+      // to be used for alignment. The pre-loop trip count is modified to align
+      // this reference to a vector-aligned address.
+      best_align_to_mem_ref = mem_ref;
+      best_iv_adjustment = iv_adjustment;
+    }
 
-  // Set alignment relative to "align_to_ref"
-  for (int i = memops.size() - 1; i >= 0; i--) {
-    MemNode* s = memops.at(i)->as_Mem();
-    SWPointer p2(s, this);
-    if (p2.comparable(align_to_ref_p)) {
-      int align = memory_alignment(s, iv_adjustment);
-      set_alignment(s, align);
-    } else {
-      memops.remove(i);
+    SWPointer align_to_ref_p(mem_ref, this);
+    // Set alignment relative to "align_to_ref" for all related memory operations.
+    for (int i = memops.size() - 1; i >= 0; i--) {
+      MemNode* s = memops.at(i)->as_Mem();
+      if (isomorphic(s, mem_ref)) {
+        SWPointer p2(s, this);
+        if (p2.comparable(align_to_ref_p)) {
+          int align = memory_alignment(s, iv_adjustment);
+          set_alignment(s, align);
+        }
+      }
     }
-  }
 
-  // Create initial pack pairs of memory operations
-  for (uint i = 0; i < memops.size(); i++) {
-    Node* s1 = memops.at(i);
-    for (uint j = 0; j < memops.size(); j++) {
-      Node* s2 = memops.at(j);
-      if (s1 != s2 && are_adjacent_refs(s1, s2)) {
+    // Create initial pack pairs of memory operations for which
+    // alignment is set and vectors will be aligned.
+    bool create_pack = true;
+    if (memory_alignment(mem_ref, best_iv_adjustment) != 0) {
+      if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
+        // Can't allow vectorization of unaligned memory accesses with the
+        // same type since it could be overlapped accesses to the same array.
+        create_pack = false;
+      } else {
+        // Allow independent (different type) unaligned memory operations
+        // if HW supports them.
+        if (!Matcher::misaligned_vectors_ok()) {
+          create_pack = false;
+        } else {
+          // Check if packs of the same memory type but
+          // with a different alignment were created before.
+          for (uint i = 0; i < align_to_refs.size(); i++) {
+            MemNode* mr = align_to_refs.at(i)->as_Mem();
+            if (same_velt_type(mr, mem_ref) &&
+                memory_alignment(mr, iv_adjustment) != 0)
+              create_pack = false;
+          }
+        }
+      }
+    }
+    if (create_pack) {
+      for (uint i = 0; i < memops.size(); i++) {
+        Node* s1 = memops.at(i);
         int align = alignment(s1);
-        if (stmts_can_pack(s1, s2, align)) {
-          Node_List* pair = new Node_List();
-          pair->push(s1);
-          pair->push(s2);
-          _packset.append(pair);
+        if (align == top_align) continue;
+        for (uint j = 0; j < memops.size(); j++) {
+          Node* s2 = memops.at(j);
+          if (alignment(s2) == top_align) continue;
+          if (s1 != s2 && are_adjacent_refs(s1, s2)) {
+            if (stmts_can_pack(s1, s2, align)) {
+              Node_List* pair = new Node_List();
+              pair->push(s1);
+              pair->push(s2);
+              _packset.append(pair);
+            }
+          }
+        }
+      }
+    } else { // Don't create unaligned pack
+      // First, remove remaining memory ops of the same type from the list.
+      for (int i = memops.size() - 1; i >= 0; i--) {
+        MemNode* s = memops.at(i)->as_Mem();
+        if (same_velt_type(s, mem_ref)) {
+          memops.remove(i);
+        }
+      }
+
+      // Second, remove already constructed packs of the same type.
+      for (int i = _packset.length() - 1; i >= 0; i--) {
+        Node_List* p = _packset.at(i);
+        MemNode* s = p->at(0)->as_Mem();
+        if (same_velt_type(s, mem_ref)) {
+          remove_pack_at(i);
         }
       }
+
+      // If needed find the best memory reference for loop alignment again.
+      if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
+        // Put memory ops from remaining packs back on memops list for
+        // the best alignment search.
+        uint orig_msize = memops.size();
+        for (int i = 0; i < _packset.length(); i++) {
+          Node_List* p = _packset.at(i);
+          MemNode* s = p->at(0)->as_Mem();
+          assert(!same_velt_type(s, mem_ref), "sanity");
+          memops.push(s);
+        }
+        MemNode* best_align_to_mem_ref = find_align_to_ref(memops);
+        if (best_align_to_mem_ref == NULL) break;
+        best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref);
+        // Restore list.
+        while (memops.size() > orig_msize)
+          (void)memops.pop();
+      }
+    } // unaligned memory accesses
+
+    // Remove used mem nodes.
+    for (int i = memops.size() - 1; i >= 0; i--) {
+      MemNode* m = memops.at(i)->as_Mem();
+      if (alignment(m) != top_align) {
+        memops.remove(i);
+      }
     }
-  }
+
+  } // while (memops.size() != 0
+  set_align_to_ref(best_align_to_mem_ref);
 
 #ifndef PRODUCT
   if (TraceSuperWord) {
@@ -246,7 +323,7 @@ void SuperWord::find_adjacent_refs() {
 // Find a memory reference to align the loop induction variable to.
 // Looks first at stores then at loads, looking for a memory reference
 // with the largest number of references similar to it.
-void SuperWord::find_align_to_ref(Node_List &memops) {
+MemNode* SuperWord::find_align_to_ref(Node_List &memops) {
   GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0);
 
   // Count number of comparable memory ops
@@ -270,20 +347,28 @@ void SuperWord::find_align_to_ref(Node_List &memops) {
     }
   }
 
-  // Find Store (or Load) with the greatest number of "comparable" references
+  // Find Store (or Load) with the greatest number of "comparable" references,
+  // biggest vector size, smallest data size and smallest iv offset.
   int max_ct        = 0;
+  int max_vw        = 0;
   int max_idx       = -1;
   int min_size      = max_jint;
   int min_iv_offset = max_jint;
   for (uint j = 0; j < memops.size(); j++) {
     MemNode* s = memops.at(j)->as_Mem();
     if (s->is_Store()) {
+      int vw = vector_width_in_bytes(velt_basic_type(s));
+      assert(vw > 1, "sanity");
       SWPointer p(s, this);
-      if (cmp_ct.at(j) > max_ct ||
-          cmp_ct.at(j) == max_ct && (data_size(s) < min_size ||
-                                     data_size(s) == min_size &&
-                                        p.offset_in_bytes() < min_iv_offset)) {
+      if (cmp_ct.at(j) >  max_ct ||
+          cmp_ct.at(j) == max_ct &&
+            (vw >  max_vw ||
+             vw == max_vw &&
+              (data_size(s) <  min_size ||
+               data_size(s) == min_size &&
+                 (p.offset_in_bytes() < min_iv_offset)))) {
         max_ct = cmp_ct.at(j);
+        max_vw = vw;
         max_idx = j;
         min_size = data_size(s);
         min_iv_offset = p.offset_in_bytes();
@@ -295,12 +380,18 @@ void SuperWord::find_align_to_ref(Node_List &memops) {
     for (uint j = 0; j < memops.size(); j++) {
       MemNode* s = memops.at(j)->as_Mem();
       if (s->is_Load()) {
+        int vw = vector_width_in_bytes(velt_basic_type(s));
+        assert(vw > 1, "sanity");
         SWPointer p(s, this);
-        if (cmp_ct.at(j) > max_ct ||
-            cmp_ct.at(j) == max_ct && (data_size(s) < min_size ||
-                                       data_size(s) == min_size &&
-                                          p.offset_in_bytes() < min_iv_offset)) {
+        if (cmp_ct.at(j) >  max_ct ||
+            cmp_ct.at(j) == max_ct &&
+              (vw >  max_vw ||
+               vw == max_vw &&
+                (data_size(s) <  min_size ||
+                 data_size(s) == min_size &&
+                   (p.offset_in_bytes() < min_iv_offset)))) {
           max_ct = cmp_ct.at(j);
+          max_vw = vw;
           max_idx = j;
           min_size = data_size(s);
           min_iv_offset = p.offset_in_bytes();
@@ -309,10 +400,7 @@ void SuperWord::find_align_to_ref(Node_List &memops) {
     }
   }
 
-  if (max_ct > 0)
-    set_align_to_ref(memops.at(max_idx)->as_Mem());
-
-#ifndef PRODUCT
+#ifdef ASSERT
   if (TraceSuperWord && Verbose) {
     tty->print_cr("\nVector memops after find_align_to_refs");
     for (uint i = 0; i < memops.size(); i++) {
@@ -321,6 +409,17 @@ void SuperWord::find_align_to_ref(Node_List &memops) {
     }
   }
 #endif
+
+  if (max_ct > 0) {
+#ifdef ASSERT
+    if (TraceSuperWord) {
+      tty->print("\nVector align to node: ");
+      memops.at(max_idx)->as_Mem()->dump();
+    }
+#endif
+    return memops.at(max_idx)->as_Mem();
+  }
+  return NULL;
 }
 
 //------------------------------ref_is_alignable---------------------------
@@ -341,7 +440,9 @@ bool SuperWord::ref_is_alignable(SWPointer& p) {
 
   // If initial offset from start of object is computable,
   // compute alignment within the vector.
-  int vw = vector_width_in_bytes();
+  BasicType bt = velt_basic_type(p.mem());
+  int vw = vector_width_in_bytes(bt);
+  assert(vw > 1, "sanity");
   if (vw % span == 0) {
     Node* init_nd = pre_end->init_trip();
     if (init_nd->is_Con() && p.invar() == NULL) {
@@ -361,6 +462,26 @@ bool SuperWord::ref_is_alignable(SWPointer& p) {
   return false;
 }
 
+//---------------------------get_iv_adjustment---------------------------
+// Calculate loop's iv adjustment for this memory ops.
+int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
+  SWPointer align_to_ref_p(mem_ref, this);
+  int offset = align_to_ref_p.offset_in_bytes();
+  int scale  = align_to_ref_p.scale_in_bytes();
+  BasicType bt = velt_basic_type(mem_ref);
+  int vw       = vector_width_in_bytes(bt);
+  assert(vw > 1, "sanity");
+  int stride_sign   = (scale * iv_stride()) > 0 ? 1 : -1;
+  int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw;
+
+#ifndef PRODUCT
+  if (TraceSuperWord)
+    tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d",
+                  offset, iv_adjustment, align_to_ref_p.memory_size(), scale, iv_stride(), vw);
+#endif
+  return iv_adjustment;
+}
+
 //---------------------------dependence_graph---------------------------
 // Construct dependency graph.
 // Add dependence edges to load/store nodes for memory dependence
@@ -488,9 +609,13 @@ void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &p
 bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {
 
   // Do not use superword for non-primitives
-  if((s1->is_Mem() && !is_java_primitive(s1->as_Mem()->memory_type())) ||
-     (s2->is_Mem() && !is_java_primitive(s2->as_Mem()->memory_type())))
+  BasicType bt1 = velt_basic_type(s1);
+  BasicType bt2 = velt_basic_type(s2);
+  if(!is_java_primitive(bt1) || !is_java_primitive(bt2))
     return false;
+  if (Matcher::max_vector_size(bt1) < 2) {
+    return false; // No vectors for this type
+  }
 
   if (isomorphic(s1, s2)) {
     if (independent(s1, s2)) {
@@ -552,7 +677,7 @@ bool SuperWord::isomorphic(Node* s1, Node* s2) {
   if (s1->Opcode() != s2->Opcode()) return false;
   if (s1->req() != s2->req()) return false;
   if (s1->in(0) != s2->in(0)) return false;
-  if (velt_type(s1) != velt_type(s2)) return false;
+  if (!same_velt_type(s1, s2)) return false;
   return true;
 }
 
@@ -595,14 +720,16 @@ bool SuperWord::independent_path(Node* shallow, Node* deep, uint dp) {
 //------------------------------set_alignment---------------------------
 void SuperWord::set_alignment(Node* s1, Node* s2, int align) {
   set_alignment(s1, align);
-  set_alignment(s2, align + data_size(s1));
+  if (align == top_align || align == bottom_align) {
+    set_alignment(s2, align);
+  } else {
+    set_alignment(s2, align + data_size(s1));
+  }
 }
 
 //------------------------------data_size---------------------------
 int SuperWord::data_size(Node* s) {
-  const Type* t = velt_type(s);
-  BasicType  bt = t->array_element_basic_type();
-  int bsize = type2aelembytes(bt);
+  int bsize = type2aelembytes(velt_basic_type(s));
   assert(bsize != 0, "valid size");
   return bsize;
 }
@@ -631,9 +758,9 @@ void SuperWord::extend_packlist() {
 //------------------------------follow_use_defs---------------------------
 // Extend the packset by visiting operand definitions of nodes in pack p
 bool SuperWord::follow_use_defs(Node_List* p) {
+  assert(p->size() == 2, "just checking");
   Node* s1 = p->at(0);
   Node* s2 = p->at(1);
-  assert(p->size() == 2, "just checking");
   assert(s1->req() == s2->req(), "just checking");
   assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
 
@@ -718,7 +845,12 @@ bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) {
     for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break;
     for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break;
     if (i1 != i2) {
-      return false;
+      if ((i1 == (3-i2)) && (u2->is_Add() || u2->is_Mul())) {
+        // Further analysis relies on operands position matching.
+        u2->swap_edges(i1, i2);
+      } else {
+        return false;
+      }
     }
   } while (i1 < ct);
   return true;
@@ -727,7 +859,7 @@ bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) {
 //------------------------------est_savings---------------------------
 // Estimate the savings from executing s1 and s2 as a pack
 int SuperWord::est_savings(Node* s1, Node* s2) {
-  int save = 2 - 1; // 2 operations per instruction in packed form
+  int save_in = 2 - 1; // 2 operations per instruction in packed form
 
   // inputs
   for (uint i = 1; i < s1->req(); i++) {
@@ -735,17 +867,18 @@ int SuperWord::est_savings(Node* s1, Node* s2) {
     Node* x2 = s2->in(i);
     if (x1 != x2) {
       if (are_adjacent_refs(x1, x2)) {
-        save += adjacent_profit(x1, x2);
+        save_in += adjacent_profit(x1, x2);
       } else if (!in_packset(x1, x2)) {
-        save -= pack_cost(2);
+        save_in -= pack_cost(2);
       } else {
-        save += unpack_cost(2);
+        save_in += unpack_cost(2);
       }
     }
   }
 
   // uses of result
   uint ct = 0;
+  int save_use = 0;
   for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
     Node* s1_use = s1->fast_out(i);
     for (int j = 0; j < _packset.length(); j++) {
@@ -756,7 +889,7 @@ int SuperWord::est_savings(Node* s1, Node* s2) {
           if (p->at(p->size()-1) == s2_use) {
             ct++;
             if (are_adjacent_refs(s1_use, s2_use)) {
-              save += adjacent_profit(s1_use, s2_use);
+              save_use += adjacent_profit(s1_use, s2_use);
             }
           }
         }
@@ -764,10 +897,10 @@ int SuperWord::est_savings(Node* s1, Node* s2) {
     }
   }
 
-  if (ct < s1->outcnt()) save += unpack_cost(1);
-  if (ct < s2->outcnt()) save += unpack_cost(1);
+  if (ct < s1->outcnt()) save_use += unpack_cost(1);
+  if (ct < s2->outcnt()) save_use += unpack_cost(1);
 
-  return save;
+  return MAX2(save_in, save_use);
 }
 
 //------------------------------costs---------------------------
@@ -778,8 +911,9 @@ int SuperWord::unpack_cost(int ct) { return ct; }
 //------------------------------combine_packs---------------------------
 // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last
 void SuperWord::combine_packs() {
-  bool changed;
-  do {
+  bool changed = true;
+  // Combine packs regardless max vector size.
+  while (changed) {
     changed = false;
     for (int i = 0; i < _packset.length(); i++) {
       Node_List* p1 = _packset.at(i);
@@ -787,6 +921,7 @@ void SuperWord::combine_packs() {
       for (int j = 0; j < _packset.length(); j++) {
         Node_List* p2 = _packset.at(j);
         if (p2 == NULL) continue;
+        if (i == j) continue;
         if (p1->at(p1->size()-1) == p2->at(0)) {
           for (uint k = 1; k < p2->size(); k++) {
             p1->push(p2->at(k));
@@ -796,8 +931,39 @@ void SuperWord::combine_packs() {
         }
       }
     }
-  } while (changed);
+  }
+
+  // Split packs which have size greater then max vector size.
+  for (int i = 0; i < _packset.length(); i++) {
+    Node_List* p1 = _packset.at(i);
+    if (p1 != NULL) {
+      BasicType bt = velt_basic_type(p1->at(0));
+      uint max_vlen = Matcher::max_vector_size(bt); // Max elements in vector
+      assert(is_power_of_2(max_vlen), "sanity");
+      uint psize = p1->size();
+      if (!is_power_of_2(psize)) {
+        // Skip pack which can't be vector.
+        // case1: for(...) { a[i] = i; }    elements values are different (i+x)
+        // case2: for(...) { a[i] = b[i+1]; }  can't align both, load and store
+        _packset.at_put(i, NULL);
+        continue;
+      }
+      if (psize > max_vlen) {
+        Node_List* pack = new Node_List();
+        for (uint j = 0; j < psize; j++) {
+          pack->push(p1->at(j));
+          if (pack->size() >= max_vlen) {
+            assert(is_power_of_2(pack->size()), "sanity");
+            _packset.append(pack);
+            pack = new Node_List();
+          }
+        }
+        _packset.at_put(i, NULL);
+      }
+    }
+  }
 
+  // Compress list.
   for (int i = _packset.length() - 1; i >= 0; i--) {
     Node_List* p1 = _packset.at(i);
     if (p1 == NULL) {
@@ -880,8 +1046,7 @@ void SuperWord::filter_packs() {
 // Can code be generated for pack p?
 bool SuperWord::implemented(Node_List* p) {
   Node* p0 = p->at(0);
-  int vopc = VectorNode::opcode(p0->Opcode(), p->size(), velt_type(p0));
-  return vopc > 0 && Matcher::has_match_rule(vopc);
+  return VectorNode::implemented(p0->Opcode(), p->size(), velt_basic_type(p0));
 }
 
 //------------------------------profitable---------------------------
@@ -939,36 +1104,36 @@ void SuperWord::schedule() {
 }
 
 //-------------------------------remove_and_insert-------------------
-//remove "current" from its current position in the memory graph and insert
-//it after the appropriate insertion point (lip or uip)
+// Remove "current" from its current position in the memory graph and insert
+// it after the appropriate insertion point (lip or uip).
 void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip,
                                   Node *uip, Unique_Node_List &sched_before) {
   Node* my_mem = current->in(MemNode::Memory);
-  _igvn.rehash_node_delayed(current);
-  _igvn.hash_delete(my_mem);
+  bool sched_up = sched_before.member(current);
 
-  //remove current_store from its current position in the memmory graph
+  // remove current_store from its current position in the memmory graph
   for (DUIterator i = current->outs(); current->has_out(i); i++) {
     Node* use = current->out(i);
     if (use->is_Mem()) {
       assert(use->in(MemNode::Memory) == current, "must be");
-      _igvn.rehash_node_delayed(use);
       if (use == prev) { // connect prev to my_mem
-        use->set_req(MemNode::Memory, my_mem);
+          _igvn.replace_input_of(use, MemNode::Memory, my_mem);
+          --i; //deleted this edge; rescan position
       } else if (sched_before.member(use)) {
-        _igvn.hash_delete(uip);
-        use->set_req(MemNode::Memory, uip);
+        if (!sched_up) { // Will be moved together with current
+          _igvn.replace_input_of(use, MemNode::Memory, uip);
+          --i; //deleted this edge; rescan position
+        }
       } else {
-        _igvn.hash_delete(lip);
-        use->set_req(MemNode::Memory, lip);
+        if (sched_up) { // Will be moved together with current
+          _igvn.replace_input_of(use, MemNode::Memory, lip);
+          --i; //deleted this edge; rescan position
+        }
       }
-      --i; //deleted this edge; rescan position
     }
   }
 
-  bool sched_up = sched_before.member(current);
   Node *insert_pt =  sched_up ?  uip : lip;
-  _igvn.hash_delete(insert_pt);
 
   // all uses of insert_pt's memory state should use current's instead
   for (DUIterator i = insert_pt->outs(); insert_pt->has_out(i); i++) {
@@ -988,7 +1153,7 @@ void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip,
   }
 
   //connect current to insert_pt
-  current->set_req(MemNode::Memory, insert_pt);
+  _igvn.replace_input_of(current, MemNode::Memory, insert_pt);
 }
 
 //------------------------------co_locate_pack----------------------------------
@@ -1025,7 +1190,7 @@ void SuperWord::co_locate_pack(Node_List* pk) {
         if (use->is_Mem() && use != previous)
           memops.push(use);
       }
-      if(current == first) break;
+      if (current == first) break;
       previous = current;
       current  = current->in(MemNode::Memory)->as_Mem();
     }
@@ -1038,27 +1203,37 @@ void SuperWord::co_locate_pack(Node_List* pk) {
           Node *s2 = memops.at(j);
           if (!independent(s1, s2)) {
             if (in_pack(s2, pk) || schedule_before_pack.member(s2)) {
-              schedule_before_pack.push(s1); //s1 must be scheduled before
+              schedule_before_pack.push(s1); // s1 must be scheduled before
               Node_List* mem_pk = my_pack(s1);
               if (mem_pk != NULL) {
                 for (uint ii = 0; ii < mem_pk->size(); ii++) {
-                  Node* s = mem_pk->at(ii); // follow partner
+                  Node* s = mem_pk->at(ii);  // follow partner
                   if (memops.member(s) && !schedule_before_pack.member(s))
                     schedule_before_pack.push(s);
                 }
               }
+              break;
             }
           }
         }
       }
     }
 
-    MemNode* lower_insert_pt = last;
     Node*    upper_insert_pt = first->in(MemNode::Memory);
+    // Following code moves loads connected to upper_insert_pt below aliased stores.
+    // Collect such loads here and reconnect them back to upper_insert_pt later.
+    memops.clear();
+    for (DUIterator i = upper_insert_pt->outs(); upper_insert_pt->has_out(i); i++) {
+      Node* use = upper_insert_pt->out(i);
+      if (!use->is_Store())
+        memops.push(use);
+    }
+
+    MemNode* lower_insert_pt = last;
     previous                 = last; //previous store in pk
     current                  = last->in(MemNode::Memory)->as_Mem();
 
-    //start scheduling from "last" to "first"
+    // start scheduling from "last" to "first"
     while (true) {
       assert(in_bb(current), "stay in block");
       assert(in_pack(previous, pk), "previous stays in pack");
@@ -1066,16 +1241,13 @@ void SuperWord::co_locate_pack(Node_List* pk) {
 
       if (in_pack(current, pk)) {
         // Forward users of my memory state (except "previous) to my input memory state
-        _igvn.hash_delete(current);
         for (DUIterator i = current->outs(); current->has_out(i); i++) {
           Node* use = current->out(i);
           if (use->is_Mem() && use != previous) {
             assert(use->in(MemNode::Memory) == current, "must be");
             if (schedule_before_pack.member(use)) {
-              _igvn.hash_delete(upper_insert_pt);
               _igvn.replace_input_of(use, MemNode::Memory, upper_insert_pt);
             } else {
-              _igvn.hash_delete(lower_insert_pt);
               _igvn.replace_input_of(use, MemNode::Memory, lower_insert_pt);
             }
             --i; // deleted this edge; rescan position
@@ -1089,6 +1261,14 @@ void SuperWord::co_locate_pack(Node_List* pk) {
       if (current == first) break;
       current = my_mem->as_Mem();
     } // end while
+
+    // Reconnect loads back to upper_insert_pt.
+    for (uint i = 0; i < memops.size(); i++) {
+      Node *ld = memops.at(i);
+      if (ld->in(MemNode::Memory) != upper_insert_pt) {
+        _igvn.replace_input_of(ld, MemNode::Memory, upper_insert_pt);
+      }
+    }
   } else if (pk->at(0)->is_Load()) { //load
     // all loads in the pack should have the same memory state. By default,
     // we use the memory state of the last load. However, if any load could
@@ -1149,35 +1329,30 @@ void SuperWord::output() {
       Node* vn = NULL;
       Node* low_adr = p->at(0);
       Node* first   = executed_first(p);
+      int   opc = n->Opcode();
       if (n->is_Load()) {
-        int   opc = n->Opcode();
         Node* ctl = n->in(MemNode::Control);
         Node* mem = first->in(MemNode::Memory);
         Node* adr = low_adr->in(MemNode::Address);
         const TypePtr* atyp = n->adr_type();
-        vn = VectorLoadNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen);
-
+        vn = LoadVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n));
       } else if (n->is_Store()) {
         // Promote value to be stored to vector
         Node* val = vector_opd(p, MemNode::ValueIn);
-
-        int   opc = n->Opcode();
         Node* ctl = n->in(MemNode::Control);
         Node* mem = first->in(MemNode::Memory);
         Node* adr = low_adr->in(MemNode::Address);
         const TypePtr* atyp = n->adr_type();
-        vn = VectorStoreNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen);
-
+        vn = StoreVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen);
       } else if (n->req() == 3) {
         // Promote operands to vector
         Node* in1 = vector_opd(p, 1);
         Node* in2 = vector_opd(p, 2);
-        vn = VectorNode::make(_phase->C, n->Opcode(), in1, in2, vlen, velt_type(n));
-
+        vn = VectorNode::make(_phase->C, opc, in1, in2, vlen, velt_basic_type(n));
       } else {
         ShouldNotReachHere();
       }
-
+      assert(vn != NULL, "sanity");
       _phase->_igvn.register_new_node_with_optimizer(vn);
       _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0)));
       for (uint j = 0; j < p->size(); j++) {
@@ -1185,6 +1360,12 @@ void SuperWord::output() {
         _igvn.replace_node(pm, vn);
       }
       _igvn._worklist.push(vn);
+#ifdef ASSERT
+      if (TraceSuperWord) {
+        tty->print("new Vector node: ");
+        vn->dump();
+      }
+#endif
     }
   }
 }
@@ -1207,10 +1388,10 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
   }
 
   if (same_opd) {
-    if (opd->is_Vector() || opd->is_VectorLoad()) {
+    if (opd->is_Vector() || opd->is_LoadVector()) {
       return opd; // input is matching vector
     }
-    assert(!opd->is_VectorStore(), "such vector is not expected here");
+    assert(!opd->is_StoreVector(), "such vector is not expected here");
     // Convert scalar input to vector with the same number of elements as
     // p0's vector. Use p0's type because size of operand's container in
     // vector should match p0's size regardless operand's size.
@@ -1219,12 +1400,18 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
 
     _phase->_igvn.register_new_node_with_optimizer(vn);
     _phase->set_ctrl(vn, _phase->get_ctrl(opd));
+#ifdef ASSERT
+    if (TraceSuperWord) {
+      tty->print("new Vector node: ");
+      vn->dump();
+    }
+#endif
     return vn;
   }
 
   // Insert pack operation
-  const Type* p0_t = velt_type(p0);
-  PackNode* pk = PackNode::make(_phase->C, opd, p0_t);
+  BasicType bt = velt_basic_type(p0);
+  PackNode* pk = PackNode::make(_phase->C, opd, vlen, bt);
   DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); )
 
   for (uint i = 1; i < vlen; i++) {
@@ -1232,10 +1419,16 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
     Node* in = pi->in(opd_idx);
     assert(my_pack(in) == NULL, "Should already have been unpacked");
     assert(opd_bt == in->bottom_type()->basic_type(), "all same type");
-    pk->add_opd(in);
+    pk->add_opd(i, in);
   }
   _phase->_igvn.register_new_node_with_optimizer(pk);
   _phase->set_ctrl(pk, _phase->get_ctrl(opd));
+#ifdef ASSERT
+    if (TraceSuperWord) {
+      tty->print("new Pack node: ");
+      pk->dump();
+    }
+#endif
   return pk;
 }
 
@@ -1273,16 +1466,15 @@ void SuperWord::insert_extracts(Node_List* p) {
     // Insert extract operation
     _igvn.hash_delete(def);
     int def_pos = alignment(def) / data_size(def);
-    const Type* def_t = velt_type(def);
 
-    Node* ex = ExtractNode::make(_phase->C, def, def_pos, def_t);
+    Node* ex = ExtractNode::make(_phase->C, def, def_pos, velt_basic_type(def));
     _phase->_igvn.register_new_node_with_optimizer(ex);
     _phase->set_ctrl(ex, _phase->get_ctrl(def));
     _igvn.replace_input_of(use, idx, ex);
     _igvn._worklist.push(def);
 
     bb_insert_after(ex, bb_idx(def));
-    set_velt_type(ex, def_t);
+    set_velt_type(ex, velt_type(def));
   }
 }
 
@@ -1509,10 +1701,7 @@ void SuperWord::compute_vector_element_type() {
   // Initial type
   for (int i = 0; i < _block.length(); i++) {
     Node* n = _block.at(i);
-    const Type* t  = n->is_Mem() ? Type::get_const_basic_type(n->as_Mem()->memory_type())
-                                 : _igvn.type(n);
-    const Type* vt = container_type(t);
-    set_velt_type(n, vt);
+    set_velt_type(n, container_type(n));
   }
 
   // Propagate narrowed type backwards through operations
@@ -1543,7 +1732,7 @@ void SuperWord::compute_vector_element_type() {
             bool same_type = true;
             for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) {
               Node *use = in->fast_out(k);
-              if (!in_bb(use) || velt_type(use) != vt) {
+              if (!in_bb(use) || !same_velt_type(use, n)) {
                 same_type = false;
                 break;
               }
@@ -1575,20 +1764,24 @@ int SuperWord::memory_alignment(MemNode* s, int iv_adjust_in_bytes) {
   if (!p.valid()) {
     return bottom_align;
   }
+  int vw = vector_width_in_bytes(velt_basic_type(s));
+  if (vw < 2) {
+    return bottom_align; // No vectors for this type
+  }
   int offset  = p.offset_in_bytes();
   offset     += iv_adjust_in_bytes;
-  int off_rem = offset % vector_width_in_bytes();
-  int off_mod = off_rem >= 0 ? off_rem : off_rem + vector_width_in_bytes();
+  int off_rem = offset % vw;
+  int off_mod = off_rem >= 0 ? off_rem : off_rem + vw;
   return off_mod;
 }
 
 //---------------------------container_type---------------------------
 // Smallest type containing range of values
-const Type* SuperWord::container_type(const Type* t) {
-  const Type* tp = t->make_ptr();
-  if (tp && tp->isa_aryptr()) {
-    t = tp->is_aryptr()->elem();
+const Type* SuperWord::container_type(Node* n) {
+  if (n->is_Mem()) {
+    return Type::get_const_basic_type(n->as_Mem()->memory_type());
   }
+  const Type* t = _igvn.type(n);
   if (t->basic_type() == T_INT) {
     if (t->higher_equal(TypeInt::BOOL))  return TypeInt::BOOL;
     if (t->higher_equal(TypeInt::BYTE))  return TypeInt::BYTE;
@@ -1599,11 +1792,22 @@ const Type* SuperWord::container_type(const Type* t) {
   return t;
 }
 
+bool SuperWord::same_velt_type(Node* n1, Node* n2) {
+  const Type* vt1 = velt_type(n1);
+  const Type* vt2 = velt_type(n1);
+  if (vt1->basic_type() == T_INT && vt2->basic_type() == T_INT) {
+    // Compare vectors element sizes for integer types.
+    return data_size(n1) == data_size(n2);
+  }
+  return vt1 == vt2;
+}
+
 //-------------------------vector_opd_range-----------------------
 // (Start, end] half-open range defining which operands are vector
 void SuperWord::vector_opd_range(Node* n, uint* start, uint* end) {
   switch (n->Opcode()) {
-  case Op_LoadB:   case Op_LoadUS:
+  case Op_LoadB:   case Op_LoadUB:
+  case Op_LoadS:   case Op_LoadUS:
   case Op_LoadI:   case Op_LoadL:
   case Op_LoadF:   case Op_LoadD:
   case Op_LoadP:
@@ -1721,6 +1925,7 @@ void SuperWord::align_initial_loop_index(MemNode* align_to_ref) {
   assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, "");
 
   SWPointer align_to_ref_p(align_to_ref, this);
+  assert(align_to_ref_p.valid(), "sanity");
 
   // Given:
   //     lim0 == original pre loop limit
@@ -1773,10 +1978,12 @@ void SuperWord::align_initial_loop_index(MemNode* align_to_ref) {
   //     N = (V - (e - lim0)) % V
   //     lim = lim0 - (V - (e - lim0)) % V
 
+  int vw = vector_width_in_bytes(velt_basic_type(align_to_ref));
+  assert(vw > 1, "sanity");
   int stride   = iv_stride();
   int scale    = align_to_ref_p.scale_in_bytes();
   int elt_size = align_to_ref_p.memory_size();
-  int v_align  = vector_width_in_bytes() / elt_size;
+  int v_align  = vw / elt_size;
   int k        = align_to_ref_p.offset_in_bytes() / elt_size;
 
   Node *kn   = _igvn.intcon(k);
@@ -1796,6 +2003,25 @@ void SuperWord::align_initial_loop_index(MemNode* align_to_ref) {
     _phase->_igvn.register_new_node_with_optimizer(e);
     _phase->set_ctrl(e, pre_ctrl);
   }
+  if (vw > ObjectAlignmentInBytes) {
+    // incorporate base e +/- base && Mask >>> log2(elt)
+    Node* mask = _igvn.MakeConX(~(-1 << exact_log2(vw)));
+    Node* xbase = new(_phase->C, 2) CastP2XNode(NULL, align_to_ref_p.base());
+    _phase->_igvn.register_new_node_with_optimizer(xbase);
+    Node* masked_xbase  = new (_phase->C, 3) AndXNode(xbase, mask);
+    _phase->_igvn.register_new_node_with_optimizer(masked_xbase);
+#ifdef _LP64
+    masked_xbase  = new (_phase->C, 2) ConvL2INode(masked_xbase);
+    _phase->_igvn.register_new_node_with_optimizer(masked_xbase);
+#endif
+    Node* log2_elt = _igvn.intcon(exact_log2(elt_size));
+    Node* bref     = new (_phase->C, 3) URShiftINode(masked_xbase, log2_elt);
+    _phase->_igvn.register_new_node_with_optimizer(bref);
+    _phase->set_ctrl(bref, pre_ctrl);
+    e = new (_phase->C, 3) AddINode(e, bref);
+    _phase->_igvn.register_new_node_with_optimizer(e);
+    _phase->set_ctrl(e, pre_ctrl);
+  }
 
   // compute e +/- lim0
   if (scale < 0) {
diff --git a/src/share/vm/opto/superword.hpp b/src/share/vm/opto/superword.hpp
index 509376712..97224e4d7 100644
--- a/src/share/vm/opto/superword.hpp
+++ b/src/share/vm/opto/superword.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -264,7 +264,10 @@ class SuperWord : public ResourceObj {
                                      _iv = lp->as_CountedLoop()->phi()->as_Phi(); }
   int      iv_stride()             { return lp()->as_CountedLoop()->stride_con(); }
 
-  int vector_width_in_bytes()      { return Matcher::vector_width_in_bytes(); }
+  int vector_width_in_bytes(BasicType bt) {
+    return MIN2(ABS(iv_stride())*type2aelembytes(bt),
+                Matcher::vector_width_in_bytes(bt));
+  }
 
   MemNode* align_to_ref()            { return _align_to_ref; }
   void  set_align_to_ref(MemNode* m) { _align_to_ref = m; }
@@ -298,7 +301,9 @@ class SuperWord : public ResourceObj {
 
   // vector element type
   const Type* velt_type(Node* n)             { return _node_info.adr_at(bb_idx(n))->_velt_type; }
+  BasicType velt_basic_type(Node* n)         { return velt_type(n)->array_element_basic_type(); }
   void set_velt_type(Node* n, const Type* t) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_velt_type = t; }
+  bool same_velt_type(Node* n1, Node* n2);
 
   // my_pack
   Node_List* my_pack(Node* n)                { return !in_bb(n) ? NULL : _node_info.adr_at(bb_idx(n))->_my_pack; }
@@ -311,7 +316,9 @@ class SuperWord : public ResourceObj {
   // Find the adjacent memory references and create pack pairs for them.
   void find_adjacent_refs();
   // Find a memory reference to align the loop induction variable to.
-  void find_align_to_ref(Node_List &memops);
+  MemNode* find_align_to_ref(Node_List &memops);
+  // Calculate loop's iv adjustment for this memory ops.
+  int get_iv_adjustment(MemNode* mem);
   // Can the preloop align the reference to position zero in the vector?
   bool ref_is_alignable(SWPointer& p);
   // Construct dependency graph.
@@ -394,7 +401,7 @@ class SuperWord : public ResourceObj {
   // (Start, end] half-open range defining which operands are vector
   void vector_opd_range(Node* n, uint* start, uint* end);
   // Smallest type containing range of values
-  static const Type* container_type(const Type* t);
+  const Type* container_type(Node* n);
   // Adjust pre-loop limit so that in main loop, a load/store reference
   // to align_to_ref will be a position zero in the vector.
   void align_initial_loop_index(MemNode* align_to_ref);
@@ -462,6 +469,7 @@ class SWPointer VALUE_OBJ_CLASS_SPEC {
 
   Node* base()            { return _base; }
   Node* adr()             { return _adr; }
+  MemNode* mem()          { return _mem; }
   int   scale_in_bytes()  { return _scale; }
   Node* invar()           { return _invar; }
   bool  negate_invar()    { return _negate_invar; }
diff --git a/src/share/vm/opto/type.cpp b/src/share/vm/opto/type.cpp
index af118139e..30ec56959 100644
--- a/src/share/vm/opto/type.cpp
+++ b/src/share/vm/opto/type.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -60,6 +60,10 @@ const BasicType Type::_basic_type[Type::lastype] = {
 
   T_ILLEGAL,    // Tuple
   T_ARRAY,      // Array
+  T_ILLEGAL,    // VectorS
+  T_ILLEGAL,    // VectorD
+  T_ILLEGAL,    // VectorX
+  T_ILLEGAL,    // VectorY
 
   T_ADDRESS,    // AnyPtr   // shows up in factory methods for NULL_PTR
   T_ADDRESS,    // RawPtr
@@ -414,6 +418,24 @@ void Type::Initialize_shared(Compile* current) {
   // get_zero_type() should not happen for T_CONFLICT
   _zero_type[T_CONFLICT]= NULL;
 
+  // Vector predefined types, it needs initialized _const_basic_type[].
+  if (Matcher::vector_size_supported(T_BYTE,4)) {
+    TypeVect::VECTS = TypeVect::make(T_BYTE,4);
+  }
+  if (Matcher::vector_size_supported(T_FLOAT,2)) {
+    TypeVect::VECTD = TypeVect::make(T_FLOAT,2);
+  }
+  if (Matcher::vector_size_supported(T_FLOAT,4)) {
+    TypeVect::VECTX = TypeVect::make(T_FLOAT,4);
+  }
+  if (Matcher::vector_size_supported(T_FLOAT,8)) {
+    TypeVect::VECTY = TypeVect::make(T_FLOAT,8);
+  }
+  mreg2type[Op_VecS] = TypeVect::VECTS;
+  mreg2type[Op_VecD] = TypeVect::VECTD;
+  mreg2type[Op_VecX] = TypeVect::VECTX;
+  mreg2type[Op_VecY] = TypeVect::VECTY;
+
   // Restore working type arena.
   current->set_type_arena(save);
   current->set_type_dict(NULL);
@@ -668,6 +690,10 @@ const Type::TYPES Type::dual_type[Type::lastype] = {
 
   Bad,          // Tuple - handled in v-call
   Bad,          // Array - handled in v-call
+  Bad,          // VectorS - handled in v-call
+  Bad,          // VectorD - handled in v-call
+  Bad,          // VectorX - handled in v-call
+  Bad,          // VectorY - handled in v-call
 
   Bad,          // AnyPtr - handled in v-call
   Bad,          // RawPtr - handled in v-call
@@ -728,8 +754,8 @@ void Type::dump_on(outputStream *st) const {
 //------------------------------data-------------------------------------------
 const char * const Type::msg[Type::lastype] = {
   "bad","control","top","int:","long:","half", "narrowoop:",
-  "tuple:", "aryptr",
-  "anyptr:", "rawptr:", "java:", "inst:", "ary:", "klass:",
+  "tuple:", "array:", "vectors:", "vectord:", "vectorx:", "vectory:",
+  "anyptr:", "rawptr:", "java:", "inst:", "aryptr:", "klass:",
   "func", "abIO", "return_address", "memory",
   "float_top", "ftcon:", "float",
   "double_top", "dblcon:", "double",
@@ -790,7 +816,7 @@ void Type::typerr( const Type *t ) const {
 //------------------------------isa_oop_ptr------------------------------------
 // Return true if type is an oop pointer type.  False for raw pointers.
 static char isa_oop_ptr_tbl[Type::lastype] = {
-  0,0,0,0,0,0,0/*narrowoop*/,0/*tuple*/, 0/*ary*/,
+  0,0,0,0,0,0,0/*narrowoop*/,0/*tuple*/, 0/*array*/, 0, 0, 0, 0/*vector*/,
   0/*anyptr*/,0/*rawptr*/,1/*OopPtr*/,1/*InstPtr*/,1/*AryPtr*/,1/*KlassPtr*/,
   0/*func*/,0,0/*return_address*/,0,
   /*floats*/0,0,0, /*doubles*/0,0,0,
@@ -1926,6 +1952,121 @@ bool TypeAry::ary_must_be_exact() const {
   return false;
 }
 
+//==============================TypeVect=======================================
+// Convenience common pre-built types.
+const TypeVect *TypeVect::VECTS = NULL; //  32-bit vectors
+const TypeVect *TypeVect::VECTD = NULL; //  64-bit vectors
+const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors
+const TypeVect *TypeVect::VECTY = NULL; // 256-bit vectors
+
+//------------------------------make-------------------------------------------
+const TypeVect* TypeVect::make(const Type *elem, uint length) {
+  BasicType elem_bt = elem->array_element_basic_type();
+  assert(is_java_primitive(elem_bt), "only primitive types in vector");
+  assert(length > 1 && is_power_of_2(length), "vector length is power of 2");
+  assert(Matcher::vector_size_supported(elem_bt, length), "length in range");
+  int size = length * type2aelembytes(elem_bt);
+  switch (Matcher::vector_ideal_reg(size)) {
+  case Op_VecS:
+    return (TypeVect*)(new TypeVectS(elem, length))->hashcons();
+  case Op_VecD:
+  case Op_RegD:
+    return (TypeVect*)(new TypeVectD(elem, length))->hashcons();
+  case Op_VecX:
+    return (TypeVect*)(new TypeVectX(elem, length))->hashcons();
+  case Op_VecY:
+    return (TypeVect*)(new TypeVectY(elem, length))->hashcons();
+  }
+ ShouldNotReachHere();
+  return NULL;
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types.  It returns a new Type object.
+const Type *TypeVect::xmeet( const Type *t ) const {
+  // Perform a fast test for common case; meeting the same types together.
+  if( this == t ) return this;  // Meeting same type-rep?
+
+  // Current "this->_base" is Vector
+  switch (t->base()) {          // switch on original type
+
+  case Bottom:                  // Ye Olde Default
+    return t;
+
+  default:                      // All else is a mistake
+    typerr(t);
+
+  case VectorS:
+  case VectorD:
+  case VectorX:
+  case VectorY: {                // Meeting 2 vectors?
+    const TypeVect* v = t->is_vect();
+    assert(  base() == v->base(), "");
+    assert(length() == v->length(), "");
+    assert(element_basic_type() == v->element_basic_type(), "");
+    return TypeVect::make(_elem->xmeet(v->_elem), _length);
+  }
+  case Top:
+    break;
+  }
+  return this;
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: compute field-by-field dual
+const Type *TypeVect::xdual() const {
+  return new TypeVect(base(), _elem->dual(), _length);
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeVect::eq(const Type *t) const {
+  const TypeVect *v = t->is_vect();
+  return (_elem == v->_elem) && (_length == v->_length);
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeVect::hash(void) const {
+  return (intptr_t)_elem + (intptr_t)_length;
+}
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise.   Singletons are simple
+// constants (Ldi nodes).  Vector is singleton if all elements are the same
+// constant value (when vector is created with Replicate code).
+bool TypeVect::singleton(void) const {
+// There is no Con node for vectors yet.
+//  return _elem->singleton();
+  return false;
+}
+
+bool TypeVect::empty(void) const {
+  return _elem->empty();
+}
+
+//------------------------------dump2------------------------------------------
+#ifndef PRODUCT
+void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const {
+  switch (base()) {
+  case VectorS:
+    st->print("vectors["); break;
+  case VectorD:
+    st->print("vectord["); break;
+  case VectorX:
+    st->print("vectorx["); break;
+  case VectorY:
+    st->print("vectory["); break;
+  default:
+    ShouldNotReachHere();
+  }
+  st->print("%d]:{", _length);
+  _elem->dump2(d, depth, st);
+  st->print("}");
+}
+#endif
+
+
 //=============================================================================
 // Convenience common pre-built types.
 const TypePtr *TypePtr::NULL_PTR;
@@ -4140,7 +4281,7 @@ void TypeFunc::dump2( Dict &d, uint depth, outputStream *st ) const {
 // Print a 'flattened' signature
 static const char * const flat_type_msg[Type::lastype] = {
   "bad","control","top","int","long","_", "narrowoop",
-  "tuple:", "array:",
+  "tuple:", "array:", "vectors:", "vectord:", "vectorx:", "vectory:",
   "ptr", "rawptr", "ptr", "ptr", "ptr", "ptr",
   "func", "abIO", "return_address", "mem",
   "float_top", "ftcon:", "flt",
diff --git a/src/share/vm/opto/type.hpp b/src/share/vm/opto/type.hpp
index 133ce78f0..a4b5487d7 100644
--- a/src/share/vm/opto/type.hpp
+++ b/src/share/vm/opto/type.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -51,6 +51,11 @@ class   TypeLong;
 class   TypeNarrowOop;
 class   TypeAry;
 class   TypeTuple;
+class   TypeVect;
+class     TypeVectS;
+class     TypeVectD;
+class     TypeVectX;
+class     TypeVectY;
 class   TypePtr;
 class     TypeRawPtr;
 class     TypeOopPtr;
@@ -78,6 +83,10 @@ public:
 
     Tuple,                      // Method signature or object layout
     Array,                      // Array types
+    VectorS,                    //  32bit Vector types
+    VectorD,                    //  64bit Vector types
+    VectorX,                    // 128bit Vector types
+    VectorY,                    // 256bit Vector types
 
     AnyPtr,                     // Any old raw, klass, inst, or array pointer
     RawPtr,                     // Raw (non-oop) pointers
@@ -222,6 +231,8 @@ public:
   const TypeF      *isa_float_constant() const;  // Returns NULL if not a FloatCon
   const TypeTuple  *is_tuple() const;            // Collection of fields, NOT a pointer
   const TypeAry    *is_ary() const;              // Array, NOT array pointer
+  const TypeVect   *is_vect() const;             // Vector
+  const TypeVect   *isa_vect() const;            // Returns NULL if not a Vector
   const TypePtr    *is_ptr() const;              // Asserts it is a ptr type
   const TypePtr    *isa_ptr() const;             // Returns NULL if not ptr type
   const TypeRawPtr *isa_rawptr() const;          // NOT Java oop
@@ -574,6 +585,69 @@ public:
 #endif
 };
 
+//------------------------------TypeVect---------------------------------------
+// Class of Vector Types
+class TypeVect : public Type {
+  const Type*   _elem;  // Vector's element type
+  const uint  _length;  // Elements in vector (power of 2)
+
+protected:
+  TypeVect(TYPES t, const Type* elem, uint length) : Type(t),
+    _elem(elem), _length(length) {}
+
+public:
+  const Type* element_type() const { return _elem; }
+  BasicType element_basic_type() const { return _elem->array_element_basic_type(); }
+  uint length() const { return _length; }
+  uint length_in_bytes() const {
+   return _length * type2aelembytes(element_basic_type());
+  }
+
+  virtual bool eq(const Type *t) const;
+  virtual int  hash() const;             // Type specific hashing
+  virtual bool singleton(void) const;    // TRUE if type is a singleton
+  virtual bool empty(void) const;        // TRUE if type is vacuous
+
+  static const TypeVect *make(const BasicType elem_bt, uint length) {
+    // Use bottom primitive type.
+    return make(get_const_basic_type(elem_bt), length);
+  }
+  // Used directly by Replicate nodes to construct singleton vector.
+  static const TypeVect *make(const Type* elem, uint length);
+
+  virtual const Type *xmeet( const Type *t) const;
+  virtual const Type *xdual() const;     // Compute dual right now.
+
+  static const TypeVect *VECTS;
+  static const TypeVect *VECTD;
+  static const TypeVect *VECTX;
+  static const TypeVect *VECTY;
+
+#ifndef PRODUCT
+  virtual void dump2(Dict &d, uint, outputStream *st) const; // Specialized per-Type dumping
+#endif
+};
+
+class TypeVectS : public TypeVect {
+  friend class TypeVect;
+  TypeVectS(const Type* elem, uint length) : TypeVect(VectorS, elem, length) {}
+};
+
+class TypeVectD : public TypeVect {
+  friend class TypeVect;
+  TypeVectD(const Type* elem, uint length) : TypeVect(VectorD, elem, length) {}
+};
+
+class TypeVectX : public TypeVect {
+  friend class TypeVect;
+  TypeVectX(const Type* elem, uint length) : TypeVect(VectorX, elem, length) {}
+};
+
+class TypeVectY : public TypeVect {
+  friend class TypeVect;
+  TypeVectY(const Type* elem, uint length) : TypeVect(VectorY, elem, length) {}
+};
+
 //------------------------------TypePtr----------------------------------------
 // Class of machine Pointer Types: raw data, instances or arrays.
 // If the _base enum is AnyPtr, then this refers to all of the above.
@@ -1113,6 +1187,15 @@ inline const TypeAry *Type::is_ary() const {
   return (TypeAry*)this;
 }
 
+inline const TypeVect *Type::is_vect() const {
+  assert( _base >= VectorS && _base <= VectorY, "Not a Vector" );
+  return (TypeVect*)this;
+}
+
+inline const TypeVect *Type::isa_vect() const {
+  return (_base >= VectorS && _base <= VectorY) ? (TypeVect*)this : NULL;
+}
+
 inline const TypePtr *Type::is_ptr() const {
   // AnyPtr is the first Ptr and KlassPtr the last, with no non-ptrs between.
   assert(_base >= AnyPtr && _base <= KlassPtr, "Not a pointer");
diff --git a/src/share/vm/opto/vectornode.cpp b/src/share/vm/opto/vectornode.cpp
index 885a1c898..c786754cd 100644
--- a/src/share/vm/opto/vectornode.cpp
+++ b/src/share/vm/opto/vectornode.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,147 +28,16 @@
 
 //------------------------------VectorNode--------------------------------------
 
-// Return vector type for an element type and vector length.
-const Type* VectorNode::vect_type(BasicType elt_bt, uint len) {
-  assert(len <= VectorNode::max_vlen(elt_bt), "len in range");
-  switch(elt_bt) {
-  case T_BOOLEAN:
-  case T_BYTE:
-    switch(len) {
-    case 2:  return TypeInt::CHAR;
-    case 4:  return TypeInt::INT;
-    case 8:  return TypeLong::LONG;
-    }
-    break;
-  case T_CHAR:
-  case T_SHORT:
-    switch(len) {
-    case 2:  return TypeInt::INT;
-    case 4:  return TypeLong::LONG;
-    }
-    break;
-  case T_INT:
-    switch(len) {
-    case 2:  return TypeLong::LONG;
-    }
-    break;
-  case T_LONG:
-    break;
-  case T_FLOAT:
-    switch(len) {
-    case 2:  return Type::DOUBLE;
-    }
-    break;
-  case T_DOUBLE:
-    break;
-  }
-  ShouldNotReachHere();
-  return NULL;
-}
-
-// Scalar promotion
-VectorNode* VectorNode::scalar2vector(Compile* C, Node* s, uint vlen, const Type* opd_t) {
-  BasicType bt = opd_t->array_element_basic_type();
-  assert(vlen <= VectorNode::max_vlen(bt), "vlen in range");
-  switch (bt) {
-  case T_BOOLEAN:
-  case T_BYTE:
-    if (vlen == 16) return new (C, 2) Replicate16BNode(s);
-    if (vlen ==  8) return new (C, 2) Replicate8BNode(s);
-    if (vlen ==  4) return new (C, 2) Replicate4BNode(s);
-    break;
-  case T_CHAR:
-    if (vlen == 8) return new (C, 2) Replicate8CNode(s);
-    if (vlen == 4) return new (C, 2) Replicate4CNode(s);
-    if (vlen == 2) return new (C, 2) Replicate2CNode(s);
-    break;
-  case T_SHORT:
-    if (vlen == 8) return new (C, 2) Replicate8SNode(s);
-    if (vlen == 4) return new (C, 2) Replicate4SNode(s);
-    if (vlen == 2) return new (C, 2) Replicate2SNode(s);
-    break;
-  case T_INT:
-    if (vlen == 4) return new (C, 2) Replicate4INode(s);
-    if (vlen == 2) return new (C, 2) Replicate2INode(s);
-    break;
-  case T_LONG:
-    if (vlen == 2) return new (C, 2) Replicate2LNode(s);
-    break;
-  case T_FLOAT:
-    if (vlen == 4) return new (C, 2) Replicate4FNode(s);
-    if (vlen == 2) return new (C, 2) Replicate2FNode(s);
-    break;
-  case T_DOUBLE:
-    if (vlen == 2) return new (C, 2) Replicate2DNode(s);
-    break;
-  }
-  ShouldNotReachHere();
-  return NULL;
-}
-
-// Return initial Pack node. Additional operands added with add_opd() calls.
-PackNode* PackNode::make(Compile* C, Node* s, const Type* opd_t) {
-  BasicType bt = opd_t->array_element_basic_type();
-  switch (bt) {
-  case T_BOOLEAN:
-  case T_BYTE:
-    return new (C, 2) PackBNode(s);
-  case T_CHAR:
-    return new (C, 2) PackCNode(s);
-  case T_SHORT:
-    return new (C, 2) PackSNode(s);
-  case T_INT:
-    return new (C, 2) PackINode(s);
-  case T_LONG:
-    return new (C, 2) PackLNode(s);
-  case T_FLOAT:
-    return new (C, 2) PackFNode(s);
-  case T_DOUBLE:
-    return new (C, 2) PackDNode(s);
-  }
-  ShouldNotReachHere();
-  return NULL;
-}
-
-// Create a binary tree form for Packs. [lo, hi) (half-open) range
-Node* PackNode::binaryTreePack(Compile* C, int lo, int hi) {
-  int ct = hi - lo;
-  assert(is_power_of_2(ct), "power of 2");
-  int mid = lo + ct/2;
-  Node* n1 = ct == 2 ? in(lo)   : binaryTreePack(C, lo,  mid);
-  Node* n2 = ct == 2 ? in(lo+1) : binaryTreePack(C, mid, hi );
-  int rslt_bsize = ct * type2aelembytes(elt_basic_type());
-  if (bottom_type()->is_floatingpoint()) {
-    switch (rslt_bsize) {
-    case  8: return new (C, 3) PackFNode(n1, n2);
-    case 16: return new (C, 3) PackDNode(n1, n2);
-    }
-  } else {
-    assert(bottom_type()->isa_int() || bottom_type()->isa_long(), "int or long");
-    switch (rslt_bsize) {
-    case  2: return new (C, 3) Pack2x1BNode(n1, n2);
-    case  4: return new (C, 3) Pack2x2BNode(n1, n2);
-    case  8: return new (C, 3) PackINode(n1, n2);
-    case 16: return new (C, 3) PackLNode(n1, n2);
-    }
-  }
-  ShouldNotReachHere();
-  return NULL;
-}
-
 // Return the vector operator for the specified scalar operation
-// and vector length.  One use is to check if the code generator
+// and vector length.  Also used to check if the code generator
 // supports the vector operation.
-int VectorNode::opcode(int sopc, uint vlen, const Type* opd_t) {
-  BasicType bt = opd_t->array_element_basic_type();
-  if (!(is_power_of_2(vlen) && vlen <= max_vlen(bt)))
-    return 0; // unimplemented
+int VectorNode::opcode(int sopc, uint vlen, BasicType bt) {
   switch (sopc) {
   case Op_AddI:
     switch (bt) {
     case T_BOOLEAN:
     case T_BYTE:      return Op_AddVB;
-    case T_CHAR:      return Op_AddVC;
+    case T_CHAR:
     case T_SHORT:     return Op_AddVS;
     case T_INT:       return Op_AddVI;
     }
@@ -186,7 +55,7 @@ int VectorNode::opcode(int sopc, uint vlen, const Type* opd_t) {
     switch (bt) {
     case T_BOOLEAN:
     case T_BYTE:   return Op_SubVB;
-    case T_CHAR:   return Op_SubVC;
+    case T_CHAR:
     case T_SHORT:  return Op_SubVS;
     case T_INT:    return Op_SubVI;
     }
@@ -216,18 +85,18 @@ int VectorNode::opcode(int sopc, uint vlen, const Type* opd_t) {
     switch (bt) {
     case T_BOOLEAN:
     case T_BYTE:   return Op_LShiftVB;
-    case T_CHAR:   return Op_LShiftVC;
+    case T_CHAR:
     case T_SHORT:  return Op_LShiftVS;
     case T_INT:    return Op_LShiftVI;
     }
     ShouldNotReachHere();
-  case Op_URShiftI:
+  case Op_RShiftI:
     switch (bt) {
     case T_BOOLEAN:
-    case T_BYTE:   return Op_URShiftVB;
-    case T_CHAR:   return Op_URShiftVC;
-    case T_SHORT:  return Op_URShiftVS;
-    case T_INT:    return Op_URShiftVI;
+    case T_BYTE:   return Op_RShiftVB;
+    case T_CHAR:
+    case T_SHORT:  return Op_RShiftVS;
+    case T_INT:    return Op_RShiftVI;
     }
     ShouldNotReachHere();
   case Op_AndI:
@@ -241,13 +110,14 @@ int VectorNode::opcode(int sopc, uint vlen, const Type* opd_t) {
     return Op_XorV;
 
   case Op_LoadB:
+  case Op_LoadUB:
   case Op_LoadUS:
   case Op_LoadS:
   case Op_LoadI:
   case Op_LoadL:
   case Op_LoadF:
   case Op_LoadD:
-    return VectorLoadNode::opcode(sopc, vlen);
+    return Op_LoadVector;
 
   case Op_StoreB:
   case Op_StoreC:
@@ -255,211 +125,170 @@ int VectorNode::opcode(int sopc, uint vlen, const Type* opd_t) {
   case Op_StoreL:
   case Op_StoreF:
   case Op_StoreD:
-    return VectorStoreNode::opcode(sopc, vlen);
-  }
-  return 0; // Unimplemented
-}
-
-// Helper for above.
-int VectorLoadNode::opcode(int sopc, uint vlen) {
-  switch (sopc) {
-  case Op_LoadB:
-    switch (vlen) {
-    case  2:       return 0; // Unimplemented
-    case  4:       return Op_Load4B;
-    case  8:       return Op_Load8B;
-    case 16:       return Op_Load16B;
-    }
-    break;
-  case Op_LoadUS:
-    switch (vlen) {
-    case  2:       return Op_Load2C;
-    case  4:       return Op_Load4C;
-    case  8:       return Op_Load8C;
-    }
-    break;
-  case Op_LoadS:
-    switch (vlen) {
-    case  2:       return Op_Load2S;
-    case  4:       return Op_Load4S;
-    case  8:       return Op_Load8S;
-    }
-    break;
-  case Op_LoadI:
-    switch (vlen) {
-    case  2:       return Op_Load2I;
-    case  4:       return Op_Load4I;
-    }
-    break;
-  case Op_LoadL:
-    if (vlen == 2) return Op_Load2L;
-    break;
-  case Op_LoadF:
-    switch (vlen) {
-    case  2:       return Op_Load2F;
-    case  4:       return Op_Load4F;
-    }
-    break;
-  case Op_LoadD:
-    if (vlen == 2) return Op_Load2D;
-    break;
+    return Op_StoreVector;
   }
   return 0; // Unimplemented
 }
 
-// Helper for above
-int VectorStoreNode::opcode(int sopc, uint vlen) {
-  switch (sopc) {
-  case Op_StoreB:
-    switch (vlen) {
-    case  2:       return 0; // Unimplemented
-    case  4:       return Op_Store4B;
-    case  8:       return Op_Store8B;
-    case 16:       return Op_Store16B;
-    }
-    break;
-  case Op_StoreC:
-    switch (vlen) {
-    case  2:       return Op_Store2C;
-    case  4:       return Op_Store4C;
-    case  8:       return Op_Store8C;
-    }
-    break;
-  case Op_StoreI:
-    switch (vlen) {
-    case  2:       return Op_Store2I;
-    case  4:       return Op_Store4I;
-    }
-    break;
-  case Op_StoreL:
-    if (vlen == 2) return Op_Store2L;
-    break;
-  case Op_StoreF:
-    switch (vlen) {
-    case  2:       return Op_Store2F;
-    case  4:       return Op_Store4F;
-    }
-    break;
-  case Op_StoreD:
-    if (vlen == 2) return Op_Store2D;
-    break;
+bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
+  if (is_java_primitive(bt) &&
+      (vlen > 1) && is_power_of_2(vlen) &&
+      Matcher::vector_size_supported(bt, vlen)) {
+    int vopc = VectorNode::opcode(opc, vlen, bt);
+    return vopc > 0 && Matcher::has_match_rule(vopc);
   }
-  return 0; // Unimplemented
+  return false;
 }
 
 // Return the vector version of a scalar operation node.
-VectorNode* VectorNode::make(Compile* C, int sopc, Node* n1, Node* n2, uint vlen, const Type* opd_t) {
-  int vopc = opcode(sopc, vlen, opd_t);
+VectorNode* VectorNode::make(Compile* C, int opc, Node* n1, Node* n2, uint vlen, BasicType bt) {
+  const TypeVect* vt = TypeVect::make(bt, vlen);
+  int vopc = VectorNode::opcode(opc, vlen, bt);
 
   switch (vopc) {
-  case Op_AddVB: return new (C, 3) AddVBNode(n1, n2, vlen);
-  case Op_AddVC: return new (C, 3) AddVCNode(n1, n2, vlen);
-  case Op_AddVS: return new (C, 3) AddVSNode(n1, n2, vlen);
-  case Op_AddVI: return new (C, 3) AddVINode(n1, n2, vlen);
-  case Op_AddVL: return new (C, 3) AddVLNode(n1, n2, vlen);
-  case Op_AddVF: return new (C, 3) AddVFNode(n1, n2, vlen);
-  case Op_AddVD: return new (C, 3) AddVDNode(n1, n2, vlen);
-
-  case Op_SubVB: return new (C, 3) SubVBNode(n1, n2, vlen);
-  case Op_SubVC: return new (C, 3) SubVCNode(n1, n2, vlen);
-  case Op_SubVS: return new (C, 3) SubVSNode(n1, n2, vlen);
-  case Op_SubVI: return new (C, 3) SubVINode(n1, n2, vlen);
-  case Op_SubVL: return new (C, 3) SubVLNode(n1, n2, vlen);
-  case Op_SubVF: return new (C, 3) SubVFNode(n1, n2, vlen);
-  case Op_SubVD: return new (C, 3) SubVDNode(n1, n2, vlen);
-
-  case Op_MulVF: return new (C, 3) MulVFNode(n1, n2, vlen);
-  case Op_MulVD: return new (C, 3) MulVDNode(n1, n2, vlen);
-
-  case Op_DivVF: return new (C, 3) DivVFNode(n1, n2, vlen);
-  case Op_DivVD: return new (C, 3) DivVDNode(n1, n2, vlen);
-
-  case Op_LShiftVB: return new (C, 3) LShiftVBNode(n1, n2, vlen);
-  case Op_LShiftVC: return new (C, 3) LShiftVCNode(n1, n2, vlen);
-  case Op_LShiftVS: return new (C, 3) LShiftVSNode(n1, n2, vlen);
-  case Op_LShiftVI: return new (C, 3) LShiftVINode(n1, n2, vlen);
+  case Op_AddVB: return new (C, 3) AddVBNode(n1, n2, vt);
+  case Op_AddVS: return new (C, 3) AddVSNode(n1, n2, vt);
+  case Op_AddVI: return new (C, 3) AddVINode(n1, n2, vt);
+  case Op_AddVL: return new (C, 3) AddVLNode(n1, n2, vt);
+  case Op_AddVF: return new (C, 3) AddVFNode(n1, n2, vt);
+  case Op_AddVD: return new (C, 3) AddVDNode(n1, n2, vt);
+
+  case Op_SubVB: return new (C, 3) SubVBNode(n1, n2, vt);
+  case Op_SubVS: return new (C, 3) SubVSNode(n1, n2, vt);
+  case Op_SubVI: return new (C, 3) SubVINode(n1, n2, vt);
+  case Op_SubVL: return new (C, 3) SubVLNode(n1, n2, vt);
+  case Op_SubVF: return new (C, 3) SubVFNode(n1, n2, vt);
+  case Op_SubVD: return new (C, 3) SubVDNode(n1, n2, vt);
+
+  case Op_MulVF: return new (C, 3) MulVFNode(n1, n2, vt);
+  case Op_MulVD: return new (C, 3) MulVDNode(n1, n2, vt);
+
+  case Op_DivVF: return new (C, 3) DivVFNode(n1, n2, vt);
+  case Op_DivVD: return new (C, 3) DivVDNode(n1, n2, vt);
+
+  case Op_LShiftVB: return new (C, 3) LShiftVBNode(n1, n2, vt);
+  case Op_LShiftVS: return new (C, 3) LShiftVSNode(n1, n2, vt);
+  case Op_LShiftVI: return new (C, 3) LShiftVINode(n1, n2, vt);
+
+  case Op_RShiftVB: return new (C, 3) RShiftVBNode(n1, n2, vt);
+  case Op_RShiftVS: return new (C, 3) RShiftVSNode(n1, n2, vt);
+  case Op_RShiftVI: return new (C, 3) RShiftVINode(n1, n2, vt);
+
+  case Op_AndV: return new (C, 3) AndVNode(n1, n2, vt);
+  case Op_OrV:  return new (C, 3) OrVNode (n1, n2, vt);
+  case Op_XorV: return new (C, 3) XorVNode(n1, n2, vt);
+  }
+  ShouldNotReachHere();
+  return NULL;
 
-  case Op_URShiftVB: return new (C, 3) URShiftVBNode(n1, n2, vlen);
-  case Op_URShiftVC: return new (C, 3) URShiftVCNode(n1, n2, vlen);
-  case Op_URShiftVS: return new (C, 3) URShiftVSNode(n1, n2, vlen);
-  case Op_URShiftVI: return new (C, 3) URShiftVINode(n1, n2, vlen);
+}
 
-  case Op_AndV: return new (C, 3) AndVNode(n1, n2, vlen, opd_t->array_element_basic_type());
-  case Op_OrV:  return new (C, 3) OrVNode (n1, n2, vlen, opd_t->array_element_basic_type());
-  case Op_XorV: return new (C, 3) XorVNode(n1, n2, vlen, opd_t->array_element_basic_type());
+// Scalar promotion
+VectorNode* VectorNode::scalar2vector(Compile* C, Node* s, uint vlen, const Type* opd_t) {
+  BasicType bt = opd_t->array_element_basic_type();
+  const TypeVect* vt = opd_t->singleton() ? TypeVect::make(opd_t, vlen)
+                                          : TypeVect::make(bt, vlen);
+  switch (bt) {
+  case T_BOOLEAN:
+  case T_BYTE:
+    return new (C, 2) ReplicateBNode(s, vt);
+  case T_CHAR:
+  case T_SHORT:
+    return new (C, 2) ReplicateSNode(s, vt);
+  case T_INT:
+    return new (C, 2) ReplicateINode(s, vt);
+  case T_LONG:
+    return new (C, 2) ReplicateLNode(s, vt);
+  case T_FLOAT:
+    return new (C, 2) ReplicateFNode(s, vt);
+  case T_DOUBLE:
+    return new (C, 2) ReplicateDNode(s, vt);
   }
   ShouldNotReachHere();
   return NULL;
 }
 
-// Return the vector version of a scalar load node.
-VectorLoadNode* VectorLoadNode::make(Compile* C, int opc, Node* ctl, Node* mem,
-                                     Node* adr, const TypePtr* atyp, uint vlen) {
-  int vopc = opcode(opc, vlen);
-
-  switch(vopc) {
-  case Op_Load16B: return new (C, 3) Load16BNode(ctl, mem, adr, atyp);
-  case Op_Load8B:  return new (C, 3) Load8BNode(ctl, mem, adr, atyp);
-  case Op_Load4B:  return new (C, 3) Load4BNode(ctl, mem, adr, atyp);
-
-  case Op_Load8C:  return new (C, 3) Load8CNode(ctl, mem, adr, atyp);
-  case Op_Load4C:  return new (C, 3) Load4CNode(ctl, mem, adr, atyp);
-  case Op_Load2C:  return new (C, 3) Load2CNode(ctl, mem, adr, atyp);
-
-  case Op_Load8S:  return new (C, 3) Load8SNode(ctl, mem, adr, atyp);
-  case Op_Load4S:  return new (C, 3) Load4SNode(ctl, mem, adr, atyp);
-  case Op_Load2S:  return new (C, 3) Load2SNode(ctl, mem, adr, atyp);
-
-  case Op_Load4I:  return new (C, 3) Load4INode(ctl, mem, adr, atyp);
-  case Op_Load2I:  return new (C, 3) Load2INode(ctl, mem, adr, atyp);
+// Return initial Pack node. Additional operands added with add_opd() calls.
+PackNode* PackNode::make(Compile* C, Node* s, uint vlen, BasicType bt) {
+  const TypeVect* vt = TypeVect::make(bt, vlen);
+  switch (bt) {
+  case T_BOOLEAN:
+  case T_BYTE:
+    return new (C, vlen+1) PackBNode(s, vt);
+  case T_CHAR:
+  case T_SHORT:
+    return new (C, vlen+1) PackSNode(s, vt);
+  case T_INT:
+    return new (C, vlen+1) PackINode(s, vt);
+  case T_LONG:
+    return new (C, vlen+1) PackLNode(s, vt);
+  case T_FLOAT:
+    return new (C, vlen+1) PackFNode(s, vt);
+  case T_DOUBLE:
+    return new (C, vlen+1) PackDNode(s, vt);
+  }
+  ShouldNotReachHere();
+  return NULL;
+}
 
-  case Op_Load2L:  return new (C, 3) Load2LNode(ctl, mem, adr, atyp);
+// Create a binary tree form for Packs. [lo, hi) (half-open) range
+Node* PackNode::binaryTreePack(Compile* C, int lo, int hi) {
+  int ct = hi - lo;
+  assert(is_power_of_2(ct), "power of 2");
+  if (ct == 2) {
+    PackNode* pk = PackNode::make(C, in(lo), 2, vect_type()->element_basic_type());
+    pk->add_opd(1, in(lo+1));
+    return pk;
 
-  case Op_Load4F:  return new (C, 3) Load4FNode(ctl, mem, adr, atyp);
-  case Op_Load2F:  return new (C, 3) Load2FNode(ctl, mem, adr, atyp);
+  } else {
+    int mid = lo + ct/2;
+    Node* n1 = binaryTreePack(C, lo,  mid);
+    Node* n2 = binaryTreePack(C, mid, hi );
 
-  case Op_Load2D:  return new (C, 3) Load2DNode(ctl, mem, adr, atyp);
+    BasicType bt = vect_type()->element_basic_type();
+    switch (bt) {
+    case T_BOOLEAN:
+    case T_BYTE:
+      return new (C, 3) PackSNode(n1, n2, TypeVect::make(T_SHORT, 2));
+    case T_CHAR:
+    case T_SHORT:
+      return new (C, 3) PackINode(n1, n2, TypeVect::make(T_INT, 2));
+    case T_INT:
+      return new (C, 3) PackLNode(n1, n2, TypeVect::make(T_LONG, 2));
+    case T_LONG:
+      return new (C, 3) Pack2LNode(n1, n2, TypeVect::make(T_LONG, 2));
+    case T_FLOAT:
+      return new (C, 3) PackDNode(n1, n2, TypeVect::make(T_DOUBLE, 2));
+    case T_DOUBLE:
+      return new (C, 3) Pack2DNode(n1, n2, TypeVect::make(T_DOUBLE, 2));
+    }
+    ShouldNotReachHere();
   }
-  ShouldNotReachHere();
+  return NULL;
+}
+
+// Return the vector version of a scalar load node.
+LoadVectorNode* LoadVectorNode::make(Compile* C, int opc, Node* ctl, Node* mem,
+                                     Node* adr, const TypePtr* atyp, uint vlen, BasicType bt) {
+  const TypeVect* vt = TypeVect::make(bt, vlen);
+  return new (C, 3) LoadVectorNode(ctl, mem, adr, atyp, vt);
   return NULL;
 }
 
 // Return the vector version of a scalar store node.
-VectorStoreNode* VectorStoreNode::make(Compile* C, int opc, Node* ctl, Node* mem,
+StoreVectorNode* StoreVectorNode::make(Compile* C, int opc, Node* ctl, Node* mem,
                                        Node* adr, const TypePtr* atyp, Node* val,
                                        uint vlen) {
-  int vopc = opcode(opc, vlen);
-
-  switch(vopc) {
-  case Op_Store16B: return new (C, 4) Store16BNode(ctl, mem, adr, atyp, val);
-  case Op_Store8B: return new (C, 4) Store8BNode(ctl, mem, adr, atyp, val);
-  case Op_Store4B: return new (C, 4) Store4BNode(ctl, mem, adr, atyp, val);
-
-  case Op_Store8C: return new (C, 4) Store8CNode(ctl, mem, adr, atyp, val);
-  case Op_Store4C: return new (C, 4) Store4CNode(ctl, mem, adr, atyp, val);
-  case Op_Store2C: return new (C, 4) Store2CNode(ctl, mem, adr, atyp, val);
-
-  case Op_Store4I: return new (C, 4) Store4INode(ctl, mem, adr, atyp, val);
-  case Op_Store2I: return new (C, 4) Store2INode(ctl, mem, adr, atyp, val);
-
-  case Op_Store2L: return new (C, 4) Store2LNode(ctl, mem, adr, atyp, val);
-
-  case Op_Store4F: return new (C, 4) Store4FNode(ctl, mem, adr, atyp, val);
-  case Op_Store2F: return new (C, 4) Store2FNode(ctl, mem, adr, atyp, val);
-
-  case Op_Store2D: return new (C, 4) Store2DNode(ctl, mem, adr, atyp, val);
-  }
-  ShouldNotReachHere();
-  return NULL;
+  return new (C, 4) StoreVectorNode(ctl, mem, adr, atyp, val);
 }
 
 // Extract a scalar element of vector.
-Node* ExtractNode::make(Compile* C, Node* v, uint position, const Type* opd_t) {
-  BasicType bt = opd_t->array_element_basic_type();
-  assert(position < VectorNode::max_vlen(bt), "pos in range");
+Node* ExtractNode::make(Compile* C, Node* v, uint position, BasicType bt) {
+  assert((int)position < Matcher::max_vector_size(bt), "pos in range");
   ConINode* pos = ConINode::make(C, (int)position);
   switch (bt) {
   case T_BOOLEAN:
+    return new (C, 3) ExtractUBNode(v, pos);
   case T_BYTE:
     return new (C, 3) ExtractBNode(v, pos);
   case T_CHAR:
@@ -478,3 +307,4 @@ Node* ExtractNode::make(Compile* C, Node* v, uint position, const Type* opd_t) {
   ShouldNotReachHere();
   return NULL;
 }
+
diff --git a/src/share/vm/opto/vectornode.hpp b/src/share/vm/opto/vectornode.hpp
index 7d1905c7f..602ee94c5 100644
--- a/src/share/vm/opto/vectornode.hpp
+++ b/src/share/vm/opto/vectornode.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,48 +31,32 @@
 
 //------------------------------VectorNode--------------------------------------
 // Vector Operation
-class VectorNode : public Node {
-  virtual uint size_of() const { return sizeof(*this); }
- protected:
-  uint _length; // vector length
-  virtual BasicType elt_basic_type() const = 0; // Vector element basic type
-
-  static const Type* vect_type(BasicType elt_bt, uint len);
-  static const Type* vect_type(const Type* elt_type, uint len) {
-    return vect_type(elt_type->array_element_basic_type(), len);
-  }
-
+class VectorNode : public TypeNode {
  public:
-  friend class VectorLoadNode;  // For vect_type
-  friend class VectorStoreNode; // ditto.
 
-  VectorNode(Node* n1, uint vlen) : Node(NULL, n1), _length(vlen) {
+  VectorNode(Node* n1, const TypeVect* vt) : TypeNode(vt, 2) {
     init_class_id(Class_Vector);
+    init_req(1, n1);
   }
-  VectorNode(Node* n1, Node* n2, uint vlen) : Node(NULL, n1, n2), _length(vlen) {
+  VectorNode(Node* n1, Node* n2, const TypeVect* vt) : TypeNode(vt, 3) {
     init_class_id(Class_Vector);
+    init_req(1, n1);
+    init_req(2, n2);
   }
-  virtual int Opcode() const;
 
-  uint length() const { return _length; } // Vector length
+  const TypeVect* vect_type() const { return type()->is_vect(); }
+  uint length() const { return vect_type()->length(); } // Vector length
 
-  static uint max_vlen(BasicType bt) { // max vector length
-    return (uint)(Matcher::vector_width_in_bytes() / type2aelembytes(bt));
-  }
-
-  // Element and vector type
-  const Type* elt_type()  const { return Type::get_const_basic_type(elt_basic_type()); }
-  const Type* vect_type() const { return vect_type(elt_basic_type(), length()); }
-
-  virtual const Type *bottom_type() const { return vect_type(); }
-  virtual uint        ideal_reg()   const { return Matcher::vector_ideal_reg(); }
+  virtual int Opcode() const;
 
-  // Vector opcode from scalar opcode
-  static int opcode(int sopc, uint vlen, const Type* opd_t);
+  virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(vect_type()->length_in_bytes()); }
 
   static VectorNode* scalar2vector(Compile* C, Node* s, uint vlen, const Type* opd_t);
 
-  static VectorNode* make(Compile* C, int sopc, Node* n1, Node* n2, uint vlen, const Type* elt_t);
+  static VectorNode* make(Compile* C, int opc, Node* n1, Node* n2, uint vlen, BasicType bt);
+
+  static int  opcode(int opc, uint vlen, BasicType bt);
+  static bool implemented(int opc, uint vlen, BasicType bt);
 
 };
 
@@ -81,981 +65,393 @@ class VectorNode : public Node {
 //------------------------------AddVBNode---------------------------------------
 // Vector add byte
 class AddVBNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
-  AddVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------AddVCNode---------------------------------------
-// Vector add char
-class AddVCNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
  public:
-  AddVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  AddVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------AddVSNode---------------------------------------
-// Vector add short
+// Vector add char/short
 class AddVSNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
  public:
-  AddVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  AddVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------AddVINode---------------------------------------
 // Vector add int
 class AddVINode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_INT; }
  public:
-  AddVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  AddVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------AddVLNode---------------------------------------
 // Vector add long
 class AddVLNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_LONG; }
  public:
-  AddVLNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  AddVLNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------AddVFNode---------------------------------------
 // Vector add float
 class AddVFNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_FLOAT; }
  public:
-  AddVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  AddVFNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------AddVDNode---------------------------------------
 // Vector add double
 class AddVDNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
  public:
-  AddVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  AddVDNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------SubVBNode---------------------------------------
 // Vector subtract byte
 class SubVBNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
  public:
-  SubVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------SubVCNode---------------------------------------
-// Vector subtract char
-class SubVCNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  SubVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  SubVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------SubVSNode---------------------------------------
 // Vector subtract short
 class SubVSNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
  public:
-  SubVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  SubVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------SubVINode---------------------------------------
 // Vector subtract int
 class SubVINode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_INT; }
  public:
-  SubVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  SubVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------SubVLNode---------------------------------------
 // Vector subtract long
 class SubVLNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_LONG; }
  public:
-  SubVLNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  SubVLNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------SubVFNode---------------------------------------
 // Vector subtract float
 class SubVFNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_FLOAT; }
  public:
-  SubVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  SubVFNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------SubVDNode---------------------------------------
 // Vector subtract double
 class SubVDNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
  public:
-  SubVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  SubVDNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------MulVFNode---------------------------------------
 // Vector multiply float
 class MulVFNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_FLOAT; }
  public:
-  MulVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  MulVFNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------MulVDNode---------------------------------------
 // Vector multiply double
 class MulVDNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
  public:
-  MulVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  MulVDNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------DivVFNode---------------------------------------
 // Vector divide float
 class DivVFNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_FLOAT; }
  public:
-  DivVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  DivVFNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------DivVDNode---------------------------------------
 // Vector Divide double
 class DivVDNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
  public:
-  DivVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  DivVDNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------LShiftVBNode---------------------------------------
 // Vector lshift byte
 class LShiftVBNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
-  LShiftVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------LShiftVCNode---------------------------------------
-// Vector lshift chars
-class LShiftVCNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
  public:
-  LShiftVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  LShiftVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------LShiftVSNode---------------------------------------
 // Vector lshift shorts
 class LShiftVSNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
  public:
-  LShiftVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  LShiftVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------LShiftVINode---------------------------------------
 // Vector lshift ints
 class LShiftVINode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_INT; }
  public:
-  LShiftVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  LShiftVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------URShiftVBNode---------------------------------------
 // Vector urshift bytes
-class URShiftVBNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
-  URShiftVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------URShiftVCNode---------------------------------------
-// Vector urshift char
-class URShiftVCNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
+class RShiftVBNode : public VectorNode {
  public:
-  URShiftVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  RShiftVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------URShiftVSNode---------------------------------------
 // Vector urshift shorts
-class URShiftVSNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
+class RShiftVSNode : public VectorNode {
  public:
-  URShiftVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  RShiftVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------URShiftVINode---------------------------------------
 // Vector urshift ints
-class URShiftVINode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_INT; }
+class RShiftVINode : public VectorNode {
  public:
-  URShiftVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  RShiftVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------AndVNode---------------------------------------
 // Vector and
 class AndVNode : public VectorNode {
- protected:
-  BasicType _bt;
-  virtual BasicType elt_basic_type() const { return _bt; }
  public:
-  AndVNode(Node* in1, Node* in2, uint vlen, BasicType bt) : VectorNode(in1,in2,vlen), _bt(bt) {}
+  AndVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------OrVNode---------------------------------------
 // Vector or
 class OrVNode : public VectorNode {
- protected:
-  BasicType _bt;
-  virtual BasicType elt_basic_type() const { return _bt; }
  public:
-  OrVNode(Node* in1, Node* in2, uint vlen, BasicType bt) : VectorNode(in1,in2,vlen), _bt(bt) {}
+  OrVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------XorVNode---------------------------------------
 // Vector xor
 class XorVNode : public VectorNode {
- protected:
-  BasicType _bt;
-  virtual BasicType elt_basic_type() const { return _bt; }
  public:
-  XorVNode(Node* in1, Node* in2, uint vlen, BasicType bt) : VectorNode(in1,in2,vlen), _bt(bt) {}
+  XorVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
   virtual int Opcode() const;
 };
 
-//================================= M E M O R Y ==================================
-
-
-//------------------------------VectorLoadNode--------------------------------------
-// Vector Load from memory
-class VectorLoadNode : public LoadNode {
-  virtual uint size_of() const { return sizeof(*this); }
-
- protected:
-  virtual BasicType elt_basic_type()  const = 0; // Vector element basic type
-  // For use in constructor
-  static const Type* vect_type(const Type* elt_type, uint len) {
-    return VectorNode::vect_type(elt_type, len);
-  }
+//================================= M E M O R Y ===============================
 
+//------------------------------LoadVectorNode---------------------------------
+// Load Vector from memory
+class LoadVectorNode : public LoadNode {
  public:
-  VectorLoadNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *rt)
-    : LoadNode(c,mem,adr,at,rt) {
-    init_class_id(Class_VectorLoad);
+  LoadVectorNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeVect* vt)
+    : LoadNode(c, mem, adr, at, vt) {
+    init_class_id(Class_LoadVector);
   }
-  virtual int Opcode() const;
-
-  virtual uint  length() const = 0; // Vector length
-
-  // Element and vector type
-  const Type* elt_type()  const { return Type::get_const_basic_type(elt_basic_type()); }
-  const Type* vect_type() const { return VectorNode::vect_type(elt_basic_type(), length()); }
-
-  virtual uint ideal_reg() const  { return Matcher::vector_ideal_reg(); }
-  virtual BasicType memory_type() const { return T_VOID; }
-  virtual int memory_size() const { return length()*type2aelembytes(elt_basic_type()); }
-
-  // Vector opcode from scalar opcode
-  static int opcode(int sopc, uint vlen);
-
-  static VectorLoadNode* make(Compile* C, int opc, Node* ctl, Node* mem,
-                              Node* adr, const TypePtr* atyp, uint vlen);
-};
-
-//------------------------------Load16BNode--------------------------------------
-// Vector load of 16 bytes (8bits signed) from memory
-class Load16BNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
-  Load16BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE)
-    : VectorLoadNode(c,mem,adr,at,vect_type(ti,16)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store16B; }
-  virtual uint length() const { return 16; }
-};
-
-//------------------------------Load8BNode--------------------------------------
-// Vector load of 8 bytes (8bits signed) from memory
-class Load8BNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
-  Load8BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE)
-    : VectorLoadNode(c,mem,adr,at,vect_type(ti,8)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store8B; }
-  virtual uint length() const { return 8; }
-};
-
-//------------------------------Load4BNode--------------------------------------
-// Vector load of 4 bytes (8bits signed) from memory
-class Load4BNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
-  Load4BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE)
-    : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store4B; }
-  virtual uint length() const { return 4; }
-};
-
-//------------------------------Load8CNode--------------------------------------
-// Vector load of 8 chars (16bits unsigned) from memory
-class Load8CNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  Load8CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR)
-    : VectorLoadNode(c,mem,adr,at,vect_type(ti,8)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store8C; }
-  virtual uint length() const { return 8; }
-};
-
-//------------------------------Load4CNode--------------------------------------
-// Vector load of 4 chars (16bits unsigned) from memory
-class Load4CNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  Load4CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR)
-    : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store4C; }
-  virtual uint length() const { return 4; }
-};
-
-//------------------------------Load2CNode--------------------------------------
-// Vector load of 2 chars (16bits unsigned) from memory
-class Load2CNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  Load2CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR)
-    : VectorLoadNode(c,mem,adr,at,vect_type(ti,2)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store2C; }
-  virtual uint length() const { return 2; }
-};
-
-//------------------------------Load8SNode--------------------------------------
-// Vector load of 8 shorts (16bits signed) from memory
-class Load8SNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
-  Load8SNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT)
-    : VectorLoadNode(c,mem,adr,at,vect_type(ti,8)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store8C; }
-  virtual uint length() const { return 8; }
-};
-
-//------------------------------Load4SNode--------------------------------------
-// Vector load of 4 shorts (16bits signed) from memory
-class Load4SNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
-  Load4SNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT)
-    : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store4C; }
-  virtual uint length() const { return 4; }
-};
-
-//------------------------------Load2SNode--------------------------------------
-// Vector load of 2 shorts (16bits signed) from memory
-class Load2SNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
-  Load2SNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT)
-    : VectorLoadNode(c,mem,adr,at,vect_type(ti,2)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store2C; }
-  virtual uint length() const { return 2; }
-};
-
-//------------------------------Load4INode--------------------------------------
-// Vector load of 4 integers (32bits signed) from memory
-class Load4INode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_INT; }
- public:
-  Load4INode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::INT)
-    : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store4I; }
-  virtual uint length() const { return 4; }
-};
 
-//------------------------------Load2INode--------------------------------------
-// Vector load of 2 integers (32bits signed) from memory
-class Load2INode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_INT; }
- public:
-  Load2INode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::INT)
-    : VectorLoadNode(c,mem,adr,at,vect_type(ti,2)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store2I; }
-  virtual uint length() const { return 2; }
-};
+  const TypeVect* vect_type() const { return type()->is_vect(); }
+  uint length() const { return vect_type()->length(); } // Vector length
 
-//------------------------------Load2LNode--------------------------------------
-// Vector load of 2 longs (64bits signed) from memory
-class Load2LNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_LONG; }
- public:
-  Load2LNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeLong *tl = TypeLong::LONG)
-    : VectorLoadNode(c,mem,adr,at,vect_type(tl,2)) {}
   virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store2L; }
-  virtual uint length() const { return 2; }
-};
 
-//------------------------------Load4FNode--------------------------------------
-// Vector load of 4 floats (32bits) from memory
-class Load4FNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_FLOAT; }
- public:
-  Load4FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *t = Type::FLOAT)
-    : VectorLoadNode(c,mem,adr,at,vect_type(t,4)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store4F; }
-  virtual uint length() const { return 4; }
-};
+  virtual uint ideal_reg() const  { return Matcher::vector_ideal_reg(memory_size()); }
+  virtual BasicType memory_type() const { return T_VOID; }
+  virtual int memory_size() const { return vect_type()->length_in_bytes(); }
 
-//------------------------------Load2FNode--------------------------------------
-// Vector load of 2 floats (32bits) from memory
-class Load2FNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_FLOAT; }
- public:
-  Load2FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *t = Type::FLOAT)
-    : VectorLoadNode(c,mem,adr,at,vect_type(t,2)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store2F; }
-  virtual uint length() const { return 2; }
-};
+  virtual int store_Opcode() const { return Op_StoreVector; }
 
-//------------------------------Load2DNode--------------------------------------
-// Vector load of 2 doubles (64bits) from memory
-class Load2DNode : public VectorLoadNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
- public:
-  Load2DNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *t = Type::DOUBLE)
-    : VectorLoadNode(c,mem,adr,at,vect_type(t,2)) {}
-  virtual int Opcode() const;
-  virtual int store_Opcode() const { return Op_Store2D; }
-  virtual uint length() const { return 2; }
+  static LoadVectorNode* make(Compile* C, int opc, Node* ctl, Node* mem,
+                              Node* adr, const TypePtr* atyp, uint vlen, BasicType bt);
 };
 
-
-//------------------------------VectorStoreNode--------------------------------------
-// Vector Store to memory
-class VectorStoreNode : public StoreNode {
-  virtual uint size_of() const { return sizeof(*this); }
-
- protected:
-  virtual BasicType elt_basic_type()  const = 0; // Vector element basic type
-
+//------------------------------StoreVectorNode--------------------------------
+// Store Vector to memory
+class StoreVectorNode : public StoreNode {
  public:
-  VectorStoreNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : StoreNode(c,mem,adr,at,val) {
-    init_class_id(Class_VectorStore);
+  StoreVectorNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+    : StoreNode(c, mem, adr, at, val) {
+    assert(val->is_Vector() || val->is_LoadVector(), "sanity");
+    init_class_id(Class_StoreVector);
   }
-  virtual int Opcode() const;
 
-  virtual uint  length() const = 0; // Vector length
+  const TypeVect* vect_type() const { return in(MemNode::ValueIn)->bottom_type()->is_vect(); }
+  uint length() const { return vect_type()->length(); } // Vector length
 
-  // Element and vector type
-  const Type* elt_type()  const { return Type::get_const_basic_type(elt_basic_type()); }
-  const Type* vect_type() const { return VectorNode::vect_type(elt_basic_type(), length()); }
+  virtual int Opcode() const;
 
-  virtual uint ideal_reg() const  { return Matcher::vector_ideal_reg(); }
+  virtual uint ideal_reg() const  { return Matcher::vector_ideal_reg(memory_size()); }
   virtual BasicType memory_type() const { return T_VOID; }
-  virtual int memory_size() const { return length()*type2aelembytes(elt_basic_type()); }
-
-  // Vector opcode from scalar opcode
-  static int opcode(int sopc, uint vlen);
+  virtual int memory_size() const { return vect_type()->length_in_bytes(); }
 
-  static VectorStoreNode* make(Compile* C, int opc, Node* ctl, Node* mem,
+  static StoreVectorNode* make(Compile* C, int opc, Node* ctl, Node* mem,
                                Node* adr, const TypePtr* atyp, Node* val,
                                uint vlen);
 };
 
-//------------------------------Store16BNode--------------------------------------
-// Vector store of 16 bytes (8bits signed) to memory
-class Store16BNode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
-  Store16BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
-  virtual int Opcode() const;
-  virtual uint length() const { return 16; }
-};
 
-//------------------------------Store8BNode--------------------------------------
-// Vector store of 8 bytes (8bits signed) to memory
-class Store8BNode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
-  Store8BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
-  virtual int Opcode() const;
-  virtual uint length() const { return 8; }
-};
-
-//------------------------------Store4BNode--------------------------------------
-// Vector store of 4 bytes (8bits signed) to memory
-class Store4BNode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
-  Store4BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
-  virtual int Opcode() const;
-  virtual uint length() const { return 4; }
-};
-
-//------------------------------Store8CNode--------------------------------------
-// Vector store of 8 chars (16bits signed/unsigned) to memory
-class Store8CNode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  Store8CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
-  virtual int Opcode() const;
-  virtual uint length() const { return 8; }
-};
-
-//------------------------------Store4CNode--------------------------------------
-// Vector store of 4 chars (16bits signed/unsigned) to memory
-class Store4CNode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  Store4CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
-  virtual int Opcode() const;
-  virtual uint length() const { return 4; }
-};
-
-//------------------------------Store2CNode--------------------------------------
-// Vector store of 2 chars (16bits signed/unsigned) to memory
-class Store2CNode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  Store2CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
-  virtual int Opcode() const;
-  virtual uint length() const { return 2; }
-};
-
-//------------------------------Store4INode--------------------------------------
-// Vector store of 4 integers (32bits signed) to memory
-class Store4INode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_INT; }
- public:
-  Store4INode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
-  virtual int Opcode() const;
-  virtual uint length() const { return 4; }
-};
-
-//------------------------------Store2INode--------------------------------------
-// Vector store of 2 integers (32bits signed) to memory
-class Store2INode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_INT; }
- public:
-  Store2INode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
-  virtual int Opcode() const;
-  virtual uint length() const { return 2; }
-};
+//=========================Promote_Scalar_to_Vector============================
 
-//------------------------------Store2LNode--------------------------------------
-// Vector store of 2 longs (64bits signed) to memory
-class Store2LNode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_LONG; }
+//------------------------------ReplicateBNode---------------------------------
+// Replicate byte scalar to be vector
+class ReplicateBNode : public VectorNode {
  public:
-  Store2LNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
+  ReplicateBNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
   virtual int Opcode() const;
-  virtual uint length() const { return 2; }
 };
 
-//------------------------------Store4FNode--------------------------------------
-// Vector store of 4 floats (32bits) to memory
-class Store4FNode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_FLOAT; }
+//------------------------------ReplicateSNode---------------------------------
+// Replicate short scalar to be vector
+class ReplicateSNode : public VectorNode {
  public:
-  Store4FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
+  ReplicateSNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
   virtual int Opcode() const;
-  virtual uint length() const { return 4; }
 };
 
-//------------------------------Store2FNode--------------------------------------
-// Vector store of 2 floats (32bits) to memory
-class Store2FNode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_FLOAT; }
+//------------------------------ReplicateINode---------------------------------
+// Replicate int scalar to be vector
+class ReplicateINode : public VectorNode {
  public:
-  Store2FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
+  ReplicateINode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
   virtual int Opcode() const;
-  virtual uint length() const { return 2; }
 };
 
-//------------------------------Store2DNode--------------------------------------
-// Vector store of 2 doubles (64bits) to memory
-class Store2DNode : public VectorStoreNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
+//------------------------------ReplicateLNode---------------------------------
+// Replicate long scalar to be vector
+class ReplicateLNode : public VectorNode {
  public:
-  Store2DNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
-    : VectorStoreNode(c,mem,adr,at,val) {}
+  ReplicateLNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
   virtual int Opcode() const;
-  virtual uint length() const { return 2; }
 };
 
-//=========================Promote_Scalar_to_Vector====================================
-
-//------------------------------Replicate16BNode---------------------------------------
-// Replicate byte scalar to be vector of 16 bytes
-class Replicate16BNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
+//------------------------------ReplicateFNode---------------------------------
+// Replicate float scalar to be vector
+class ReplicateFNode : public VectorNode {
  public:
-  Replicate16BNode(Node* in1) : VectorNode(in1, 16) {}
+  ReplicateFNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
   virtual int Opcode() const;
 };
 
-//------------------------------Replicate8BNode---------------------------------------
-// Replicate byte scalar to be vector of 8 bytes
-class Replicate8BNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
+//------------------------------ReplicateDNode---------------------------------
+// Replicate double scalar to be vector
+class ReplicateDNode : public VectorNode {
  public:
-  Replicate8BNode(Node* in1) : VectorNode(in1, 8) {}
+  ReplicateDNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
   virtual int Opcode() const;
 };
 
-//------------------------------Replicate4BNode---------------------------------------
-// Replicate byte scalar to be vector of 4 bytes
-class Replicate4BNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
-  Replicate4BNode(Node* in1) : VectorNode(in1, 4) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------Replicate8CNode---------------------------------------
-// Replicate char scalar to be vector of 8 chars
-class Replicate8CNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  Replicate8CNode(Node* in1) : VectorNode(in1, 8) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------Replicate4CNode---------------------------------------
-// Replicate char scalar to be vector of 4 chars
-class Replicate4CNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  Replicate4CNode(Node* in1) : VectorNode(in1, 4) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------Replicate2CNode---------------------------------------
-// Replicate char scalar to be vector of 2 chars
-class Replicate2CNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  Replicate2CNode(Node* in1) : VectorNode(in1, 2) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------Replicate8SNode---------------------------------------
-// Replicate short scalar to be vector of 8 shorts
-class Replicate8SNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
-  Replicate8SNode(Node* in1) : VectorNode(in1, 8) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------Replicate4SNode---------------------------------------
-// Replicate short scalar to be vector of 4 shorts
-class Replicate4SNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
-  Replicate4SNode(Node* in1) : VectorNode(in1, 4) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------Replicate2SNode---------------------------------------
-// Replicate short scalar to be vector of 2 shorts
-class Replicate2SNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
-  Replicate2SNode(Node* in1) : VectorNode(in1, 2) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------Replicate4INode---------------------------------------
-// Replicate int scalar to be vector of 4 ints
-class Replicate4INode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_INT; }
- public:
-  Replicate4INode(Node* in1) : VectorNode(in1, 4) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------Replicate2INode---------------------------------------
-// Replicate int scalar to be vector of 2 ints
-class Replicate2INode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_INT; }
- public:
-  Replicate2INode(Node* in1) : VectorNode(in1, 2) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------Replicate2LNode---------------------------------------
-// Replicate long scalar to be vector of 2 longs
-class Replicate2LNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_LONG; }
- public:
-  Replicate2LNode(Node* in1) : VectorNode(in1, 2) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------Replicate4FNode---------------------------------------
-// Replicate float scalar to be vector of 4 floats
-class Replicate4FNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_FLOAT; }
- public:
-  Replicate4FNode(Node* in1) : VectorNode(in1, 4) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------Replicate2FNode---------------------------------------
-// Replicate float scalar to be vector of 2 floats
-class Replicate2FNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_FLOAT; }
- public:
-  Replicate2FNode(Node* in1) : VectorNode(in1, 2) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------Replicate2DNode---------------------------------------
-// Replicate double scalar to be vector of 2 doubles
-class Replicate2DNode : public VectorNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
- public:
-  Replicate2DNode(Node* in1) : VectorNode(in1, 2) {}
-  virtual int Opcode() const;
-};
-
-//========================Pack_Scalars_into_a_Vector==============================
+//========================Pack_Scalars_into_a_Vector===========================
 
 //------------------------------PackNode---------------------------------------
 // Pack parent class (not for code generation).
 class PackNode : public VectorNode {
  public:
-  PackNode(Node* in1)  : VectorNode(in1, 1) {}
-  PackNode(Node* in1, Node* n2)  : VectorNode(in1, n2, 2) {}
+  PackNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
+  PackNode(Node* in1, Node* n2, const TypeVect* vt) : VectorNode(in1, n2, vt) {}
   virtual int Opcode() const;
 
-  void add_opd(Node* n) {
-    add_req(n);
-    _length++;
-    assert(_length == req() - 1, "vector length matches edge count");
+  void add_opd(uint i, Node* n) {
+    init_req(i+1, n);
   }
 
   // Create a binary tree form for Packs. [lo, hi) (half-open) range
   Node* binaryTreePack(Compile* C, int lo, int hi);
 
-  static PackNode* make(Compile* C, Node* s, const Type* elt_t);
+  static PackNode* make(Compile* C, Node* s, uint vlen, BasicType bt);
 };
 
 //------------------------------PackBNode---------------------------------------
 // Pack byte scalars into vector
 class PackBNode : public PackNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
-  PackBNode(Node* in1)  : PackNode(in1) {}
-  virtual int Opcode() const;
-};
-
-//------------------------------PackCNode---------------------------------------
-// Pack char scalars into vector
-class PackCNode : public PackNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
  public:
-  PackCNode(Node* in1)  : PackNode(in1) {}
+  PackBNode(Node* in1, const TypeVect* vt)  : PackNode(in1, vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------PackSNode---------------------------------------
 // Pack short scalars into a vector
 class PackSNode : public PackNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_SHORT; }
  public:
-  PackSNode(Node* in1)  : PackNode(in1) {}
+  PackSNode(Node* in1, const TypeVect* vt)  : PackNode(in1, vt) {}
+  PackSNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------PackINode---------------------------------------
 // Pack integer scalars into a vector
 class PackINode : public PackNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_INT; }
  public:
-  PackINode(Node* in1)  : PackNode(in1) {}
-  PackINode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+  PackINode(Node* in1, const TypeVect* vt)  : PackNode(in1, vt) {}
+  PackINode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------PackLNode---------------------------------------
 // Pack long scalars into a vector
 class PackLNode : public PackNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_LONG; }
  public:
-  PackLNode(Node* in1)  : PackNode(in1) {}
-  PackLNode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+  PackLNode(Node* in1, const TypeVect* vt)  : PackNode(in1, vt) {}
+  PackLNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------Pack2LNode--------------------------------------
+// Pack 2 long scalars into a vector
+class Pack2LNode : public PackNode {
+ public:
+  Pack2LNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------PackFNode---------------------------------------
 // Pack float scalars into vector
 class PackFNode : public PackNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_FLOAT; }
  public:
-  PackFNode(Node* in1)  : PackNode(in1) {}
-  PackFNode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+  PackFNode(Node* in1, const TypeVect* vt)  : PackNode(in1, vt) {}
+  PackFNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
   virtual int Opcode() const;
 };
 
 //------------------------------PackDNode---------------------------------------
 // Pack double scalars into a vector
 class PackDNode : public PackNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
  public:
-  PackDNode(Node* in1)  : PackNode(in1) {}
-  PackDNode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+  PackDNode(Node* in1, const TypeVect* vt) : PackNode(in1, vt) {}
+  PackDNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
   virtual int Opcode() const;
 };
 
-// The Pack2xN nodes assist code generation.  They are created from
-// Pack4C, etc. nodes in final_graph_reshape in the form of a
-// balanced, binary tree.
-
-//------------------------------Pack2x1BNode-----------------------------------------
-// Pack 2 1-byte integers into vector of 2 bytes
-class Pack2x1BNode : public PackNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_BYTE; }
+//------------------------------Pack2DNode--------------------------------------
+// Pack 2 double scalars into a vector
+class Pack2DNode : public PackNode {
  public:
-  Pack2x1BNode(Node *in1, Node* in2) : PackNode(in1, in2) {}
+  Pack2DNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
   virtual int Opcode() const;
-  virtual uint ideal_reg() const { return Op_RegI; }
 };
 
-//------------------------------Pack2x2BNode---------------------------------------
-// Pack 2 2-byte integers into vector of 4 bytes
-class Pack2x2BNode : public PackNode {
- protected:
-  virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
-  Pack2x2BNode(Node *in1, Node* in2) : PackNode(in1, in2) {}
-  virtual int Opcode() const;
-  virtual uint ideal_reg() const { return Op_RegI; }
-};
 
 //========================Extract_Scalar_from_Vector===============================
 
@@ -1069,7 +465,7 @@ class ExtractNode : public Node {
   virtual int Opcode() const;
   uint  pos() const { return in(2)->get_int(); }
 
-  static Node* make(Compile* C, Node* v, uint position, const Type* opd_t);
+  static Node* make(Compile* C, Node* v, uint position, BasicType bt);
 };
 
 //------------------------------ExtractBNode---------------------------------------
@@ -1082,6 +478,16 @@ class ExtractBNode : public ExtractNode {
   virtual uint ideal_reg() const { return Op_RegI; }
 };
 
+//------------------------------ExtractUBNode--------------------------------------
+// Extract a boolean from a vector at position "pos"
+class ExtractUBNode : public ExtractNode {
+ public:
+  ExtractUBNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
 //------------------------------ExtractCNode---------------------------------------
 // Extract a char from a vector at position "pos"
 class ExtractCNode : public ExtractNode {
author	kvn <none@none>	2012-06-15 01:25:19 -0700
committer	kvn <none@none>	2012-06-15 01:25:19 -0700
commit	68446ffadeedf06b663e39278a07cd6171a28b2e (patch)
tree	c872b74df1f9eeb5dde27cf0e472c47bfb6fd82c /src/share/vm/opto
parent	d62195ef3f73f37d98d1b6dfbe12ed1fa5a51bc0 (diff)